mirror of https://github.com/apache/lucene.git
LUCENE-2413: consolidate remaining concrete core analyzers to modules/analysis
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@948195 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
04c8590b80
commit
71b59ca566
153
lucene/build.xml
153
lucene/build.xml
|
@ -23,21 +23,11 @@
|
|||
|
||||
<import file="common-build.xml"/>
|
||||
|
||||
<property name="build.demo.template" value="src/demo/demo-build.template"/>
|
||||
|
||||
<property name="demo.name" value="lucene-demos-${version}"/>
|
||||
<property name="demo.war.name" value="luceneweb"/>
|
||||
|
||||
<!-- Build classpath -->
|
||||
<path id="classpath">
|
||||
<pathelement location="${build.dir}/classes/java"/>
|
||||
</path>
|
||||
|
||||
<path id="demo.classpath">
|
||||
<path refid="classpath"/>
|
||||
<pathelement location="${build.dir}/classes/demo"/>
|
||||
</path>
|
||||
|
||||
<path id="test.classpath">
|
||||
<path refid="classpath"/>
|
||||
<path refid="junit-path"/>
|
||||
|
@ -57,10 +47,10 @@
|
|||
excludes="contrib/db/*/lib/,contrib/*/ext-libs/,src/site/build/,contrib/benchmark/temp/,contrib/benchmark/work/"
|
||||
/>
|
||||
<patternset id="binary.build.dist.patterns"
|
||||
includes="${final.name}.jar,${demo.war.name}.war,${demo.name}.jar,docs/,contrib/*/*.jar,contrib/*/*.war, contrib/*/*/*.jar"
|
||||
includes="${final.name}.jar,docs/,contrib/*/*.jar,contrib/*/*.war, contrib/*/*/*.jar"
|
||||
/>
|
||||
<patternset id="binary.root.dist.patterns"
|
||||
includes="src/demo/,src/jsp/,docs/,*.txt,contrib/*/README*,**/CHANGES.txt,lib/servlet-api-*.jar"
|
||||
includes="docs/,*.txt,contrib/*/README*,**/CHANGES.txt"
|
||||
excludes="${build.demo.template}"
|
||||
/>
|
||||
|
||||
|
@ -177,71 +167,8 @@ The source distribution does not contain sources of the previous Lucene Java ver
|
|||
<!-- -->
|
||||
<!-- ================================================================== -->
|
||||
|
||||
<target name="jar-demo" depends="compile-demo">
|
||||
<sequential>
|
||||
<build-manifest title="Lucene Search Engine: demos"/>
|
||||
<jar
|
||||
destfile="${build.dir}/${demo.name}.jar"
|
||||
basedir="${build.dir}/classes/demo"
|
||||
excludes="**/*.java"
|
||||
manifest="${manifest.file}">
|
||||
<metainf dir="${common.dir}">
|
||||
<include name="LICENSE.txt"/>
|
||||
<include name="NOTICE.txt"/>
|
||||
</metainf>
|
||||
</jar>
|
||||
</sequential>
|
||||
</target>
|
||||
|
||||
<target name="jar-demo-src" depends="compile-demo">
|
||||
<sequential>
|
||||
<build-manifest title="Lucene Search Engine: demos"/>
|
||||
<jar
|
||||
destfile="${build.dir}/${demo.name}-src.jar"
|
||||
basedir="src/demo"
|
||||
manifest="${manifest.file}">
|
||||
<metainf dir="${common.dir}">
|
||||
<include name="LICENSE.txt"/>
|
||||
<include name="NOTICE.txt"/>
|
||||
</metainf>
|
||||
</jar>
|
||||
</sequential>
|
||||
</target>
|
||||
|
||||
<target name="war-demo" depends="jar-core,jar-demo">
|
||||
<sequential>
|
||||
<build-manifest title="Lucene Search Engine: demos"/>
|
||||
<war destfile="${build.dir}/${demo.war.name}.war"
|
||||
webxml="src/jsp/WEB-INF/web.xml"
|
||||
manifest="${manifest.file}">
|
||||
<fileset dir="src/jsp" excludes="WEB-INF/web.xml"/>
|
||||
<lib dir="${build.dir}" includes="${demo.name}.jar"/>
|
||||
<lib dir="${build.dir}" includes="${final.name}.jar"/>
|
||||
<metainf dir="${common.dir}">
|
||||
<include name="LICENSE.txt"/>
|
||||
<include name="NOTICE.txt"/>
|
||||
</metainf>
|
||||
</war>
|
||||
</sequential>
|
||||
</target>
|
||||
|
||||
<target name="compile-core" depends="jflex-notice, javacc-notice, common.compile-core"/>
|
||||
|
||||
<!-- ================================================================== -->
|
||||
<!-- B U I L D D E M O -->
|
||||
<!-- ================================================================== -->
|
||||
<!-- -->
|
||||
<!-- ================================================================== -->
|
||||
<target name="compile-demo" depends="compile-core">
|
||||
<mkdir dir="${build.dir}/classes/demo"/>
|
||||
|
||||
<compile
|
||||
srcdir="src/demo"
|
||||
destdir="${build.dir}/classes/demo">
|
||||
<classpath refid="demo.classpath"/>
|
||||
</compile>
|
||||
</target>
|
||||
|
||||
<!-- ================================================================== -->
|
||||
<!-- D O C U M E N T A T I O N -->
|
||||
<!-- ================================================================== -->
|
||||
|
@ -252,7 +179,7 @@ The source distribution does not contain sources of the previous Lucene Java ver
|
|||
</target>
|
||||
|
||||
<target name="javadocs" description="Generate javadoc"
|
||||
depends="javadocs-all, javadocs-core, javadocs-demo, javadocs-contrib">
|
||||
depends="javadocs-all, javadocs-core, javadocs-contrib">
|
||||
<echo file="${javadoc.dir}/index.html" append="false">
|
||||
<![CDATA[<html><head><title>${Name} ${version} Javadoc Index</title></head>
|
||||
<body>
|
||||
|
@ -266,7 +193,6 @@ The source distribution does not contain sources of the previous Lucene Java ver
|
|||
<contrib-crawl target="javadocs-index.html" failonerror="false"/>
|
||||
<echo file="${javadoc.dir}/index.html" append="true"><![CDATA[
|
||||
</ul>
|
||||
<li><a href="demo/index.html">Demo</a></li>
|
||||
</ul></body>]]></echo>
|
||||
</target>
|
||||
|
||||
|
@ -285,27 +211,12 @@ The source distribution does not contain sources of the previous Lucene Java ver
|
|||
</sequential>
|
||||
</target>
|
||||
|
||||
<target name="javadocs-demo" description="Generate javadoc for demo classes">
|
||||
<sequential>
|
||||
<mkdir dir="${javadoc.dir}/demo"/>
|
||||
<invoke-javadoc
|
||||
destdir="${javadoc.dir}/demo"
|
||||
title="${Name} ${version} demo API">
|
||||
<sources>
|
||||
<packageset dir="src/demo"/>
|
||||
<link href=""/>
|
||||
</sources>
|
||||
</invoke-javadoc>
|
||||
<jarify basedir="${javadoc.dir}/demo" destfile="${build.dir}/${demo.name}-javadoc.jar"/>
|
||||
</sequential>
|
||||
</target>
|
||||
|
||||
<target name="javadocs-contrib" description="Generate javadoc for contrib classes">
|
||||
<contrib-crawl target="javadocs"
|
||||
failonerror="false"/>
|
||||
</target>
|
||||
|
||||
<target name="javadocs-all" description="Generate javadoc for core, demo and contrib classes" depends="build-contrib">
|
||||
<target name="javadocs-all" description="Generate javadoc for core and contrib classes" depends="build-contrib">
|
||||
<sequential>
|
||||
<mkdir dir="${javadoc.dir}/all"/>
|
||||
<invoke-javadoc
|
||||
|
@ -314,8 +225,6 @@ The source distribution does not contain sources of the previous Lucene Java ver
|
|||
<!-- TODO: find a dynamic way to do include multiple source roots -->
|
||||
<packageset dir="src/java"/>
|
||||
|
||||
<packageset dir="src/demo"/>
|
||||
|
||||
<!-- please keep this list up to date, and in alpha order... -->
|
||||
|
||||
<!-- ie: `find contrib/* -path \*src/java | sort` -->
|
||||
|
@ -349,10 +258,9 @@ The source distribution does not contain sources of the previous Lucene Java ver
|
|||
|
||||
<group title="Core" packages="org.apache.*:org.apache.lucene.analysis:org.apache.lucene.analysis.standard*:org.apache.lucene.analysis.tokenattributes*"/>
|
||||
|
||||
<group title="Demo" packages="org.apache.lucene.demo*"/>
|
||||
|
||||
<group title="contrib: Ant" packages="org.apache.lucene.ant*"/>
|
||||
<group title="contrib: Benchmark" packages="org.apache.lucene.benchmark*"/>
|
||||
<group title="contrib: Demo" packages="org.apache.lucene.demo*"/>
|
||||
<group title="contrib: ICU" packages="org.apache.lucene.collation*"/>
|
||||
<group title="contrib: DB" packages="org.apache.lucene.store.db*:org.apache.lucene.store.je*:com.sleepycat*"/>
|
||||
<group title="contrib: Highlighter" packages="org.apache.lucene.search.highlight:*org.apache.lucene.search.vectorhighlight*"/>
|
||||
|
@ -379,7 +287,7 @@ The source distribution does not contain sources of the previous Lucene Java ver
|
|||
<!-- ================================================================== -->
|
||||
<!-- -->
|
||||
<!-- ================================================================== -->
|
||||
<target name="package" depends="jar-core, javadocs, war-demo, build-contrib, init-dist, changes-to-html">
|
||||
<target name="package" depends="jar-core, javadocs, build-contrib, init-dist, changes-to-html">
|
||||
<copy file="${build.demo.template}" tofile="${build.dir}/build-demo.xml">
|
||||
<filterset begintoken="@PLACEHOLDER_" endtoken="@">
|
||||
<filter token="version" value="${version}"/>
|
||||
|
@ -518,7 +426,7 @@ The source distribution does not contain sources of the previous Lucene Java ver
|
|||
|
||||
<target name="dist-all" depends="dist, dist-src"/>
|
||||
|
||||
<target name="generate-maven-artifacts" depends="maven.ant.tasks-check, package, jar-src, jar-demo-src, javadocs">
|
||||
<target name="generate-maven-artifacts" depends="maven.ant.tasks-check, package, jar-src, javadocs">
|
||||
<sequential>
|
||||
<m2-deploy pom.xml="lucene-parent-pom.xml.template"/>
|
||||
<m2-deploy pom.xml="lucene-core-pom.xml.template">
|
||||
|
@ -530,15 +438,6 @@ The source distribution does not contain sources of the previous Lucene Java ver
|
|||
</artifact-attachments>
|
||||
</m2-deploy>
|
||||
|
||||
<m2-deploy pom.xml="lucene-demos-pom.xml.template">
|
||||
<artifact-attachments>
|
||||
<attach file="${build.dir}/${demo.name}-src.jar"
|
||||
classifier="sources"/>
|
||||
<attach file="${build.dir}/${demo.name}-javadoc.jar"
|
||||
classifier="javadoc"/>
|
||||
</artifact-attachments>
|
||||
</m2-deploy>
|
||||
|
||||
<m2-deploy pom.xml="lucene-contrib-pom.xml.template"/>
|
||||
<contrib-crawl target="dist-maven"/>
|
||||
</sequential>
|
||||
|
@ -604,13 +503,10 @@ The source distribution does not contain sources of the previous Lucene Java ver
|
|||
<fileset dir="contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser" includes="*.java">
|
||||
<containsregexp expression="Generated.*By.*JavaCC"/>
|
||||
</fileset>
|
||||
<fileset dir="src/demo/org/apache/lucene/demo/html" includes="*.java">
|
||||
<containsregexp expression="Generated.*By.*JavaCC"/>
|
||||
</fileset>
|
||||
</delete>
|
||||
</target>
|
||||
|
||||
<target name="javacc" depends="init,javacc-check,clean-javacc,javacc-QueryParser,javacc-HTMLParser,javacc-contrib-queryparser"/>
|
||||
<target name="javacc" depends="init,javacc-check,clean-javacc,javacc-QueryParser,javacc-contrib-queryparser"/>
|
||||
|
||||
<target name="javacc-QueryParser" depends="init,javacc-check" if="javacc.present">
|
||||
<sequential>
|
||||
|
@ -630,12 +526,6 @@ The source distribution does not contain sources of the previous Lucene Java ver
|
|||
</sequential>
|
||||
</target>
|
||||
|
||||
<target name="javacc-HTMLParser" depends="init,javacc-check" if="javacc.present">
|
||||
<invoke-javacc target="src/demo/org/apache/lucene/demo/html/HTMLParser.jj"
|
||||
outputDir="src/demo/org/apache/lucene/demo/html"
|
||||
/>
|
||||
</target>
|
||||
|
||||
<target name="javacc-contrib-queryparser" depends="init,javacc-check" if="javacc.present">
|
||||
<ant target="javacc"
|
||||
dir="contrib/queryparser"
|
||||
|
@ -643,33 +533,6 @@ The source distribution does not contain sources of the previous Lucene Java ver
|
|||
/>
|
||||
</target>
|
||||
|
||||
<!-- ================================================================== -->
|
||||
<!-- Build the JFlex files into the source tree -->
|
||||
<!-- ================================================================== -->
|
||||
|
||||
<target name="jflex" depends="jflex-check, clean-jflex,jflex-StandardAnalyzer" />
|
||||
|
||||
<target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
|
||||
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
|
||||
<classpath refid="jflex.classpath"/>
|
||||
</taskdef>
|
||||
|
||||
<jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex"
|
||||
outdir="src/java/org/apache/lucene/analysis/standard"
|
||||
nobak="on" />
|
||||
<jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex"
|
||||
outdir="src/java/org/apache/lucene/analysis/standard"
|
||||
nobak="on" />
|
||||
</target>
|
||||
|
||||
<target name="clean-jflex">
|
||||
<delete>
|
||||
<fileset dir="src/java/org/apache/lucene/analysis/standard" includes="*.java">
|
||||
<containsregexp expression="generated.*by.*JFlex"/>
|
||||
</fileset>
|
||||
</delete>
|
||||
</target>
|
||||
|
||||
<macrodef name="createLevAutomaton">
|
||||
<attribute name="n"/>
|
||||
<sequential>
|
||||
|
|
|
@ -2,6 +2,11 @@ Lucene contrib change Log
|
|||
|
||||
======================= Trunk (not yet released) =======================
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-2413: Moved the demo out of lucene core and into contrib/demo.
|
||||
(Robert Muir)
|
||||
|
||||
======================= Lucene 3.x (not yet released) =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -34,4 +34,21 @@
|
|||
/>
|
||||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
|
||||
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
|
||||
|
||||
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
|
||||
<subant target="default">
|
||||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -30,9 +30,9 @@ import java.util.Vector;
|
|||
import java.lang.reflect.Constructor;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.core.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.core.StopAnalyzer;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
|
|
|
@ -18,6 +18,7 @@
|
|||
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
|
||||
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
|
||||
<contrib-uptodate name="memory" property="memory.uptodate" classpath.property="memory.jar"/>
|
||||
<contrib-uptodate name="demo" property="demo.uptodate" classpath.property="demo.jar"/>
|
||||
|
||||
<target name="check-files">
|
||||
<available file="temp/news20.tar.gz" property="news20.exists"/>
|
||||
|
@ -139,8 +140,8 @@
|
|||
<pathelement path="${memory.jar}"/>
|
||||
<pathelement path="${highlighter.jar}"/>
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<pathelement path="${demo.jar}"/>
|
||||
<path refid="base.classpath"/>
|
||||
<pathelement path="${common.dir}/build/classes/demo"/>
|
||||
<fileset dir="lib">
|
||||
<include name="**/*.jar"/>
|
||||
</fileset>
|
||||
|
@ -228,9 +229,9 @@
|
|||
<echo>Benchmark output in JIRA table format is in file: ${shingle.jira.output.file}</echo>
|
||||
</target>
|
||||
|
||||
<target name="compile-demo">
|
||||
<subant target="compile-demo">
|
||||
<fileset dir="${common.dir}" includes="build.xml"/>
|
||||
<target name="compile-demo" unless="demo.uptodate">
|
||||
<subant target="default">
|
||||
<fileset dir="${common.dir}/contrib/demo" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
<target name="compile-highlighter" unless="highlighter.uptodate">
|
||||
|
|
|
@ -0,0 +1,78 @@
|
|||
<?xml version="1.0"?>
|
||||
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
|
||||
<project name="demo" default="default">
|
||||
|
||||
<description>
|
||||
Lucene Demo
|
||||
</description>
|
||||
|
||||
<property name="build.demo.template" value="src/java/demo-build.template"/>
|
||||
|
||||
<property name="demo.name" value="lucene-demos-${version}"/>
|
||||
<property name="demo.war.name" value="luceneweb"/>
|
||||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
|
||||
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
|
||||
|
||||
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
|
||||
<subant target="default">
|
||||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="war-demo" depends="jar-core">
|
||||
<sequential>
|
||||
<build-manifest title="Lucene Search Engine: demos"/>
|
||||
<war destfile="${build.dir}/${demo.war.name}.war"
|
||||
webxml="src/jsp/WEB-INF/web.xml"
|
||||
manifest="${manifest.file}">
|
||||
<fileset dir="src/jsp" excludes="WEB-INF/web.xml"/>
|
||||
<lib dir="${build.dir}/../.." includes="lucene-core-${version}.jar"/>
|
||||
<lib dir="${common.dir}/../modules/analysis/build/common" includes="lucene-analyzers-common-${version}.jar"/>
|
||||
<lib dir="${build.dir}" includes="${final.name}.jar"/>
|
||||
<metainf dir="${common.dir}">
|
||||
<include name="LICENSE.txt"/>
|
||||
<include name="NOTICE.txt"/>
|
||||
</metainf>
|
||||
</war>
|
||||
</sequential>
|
||||
</target>
|
||||
|
||||
<target name="clean-javacc">
|
||||
<fileset dir="src/demo/org/apache/lucene/demo/html" includes="*.java">
|
||||
<containsregexp expression="Generated.*By.*JavaCC"/>
|
||||
</fileset>
|
||||
</target>
|
||||
|
||||
<target name="javacc" depends="init,javacc-check" if="javacc.present">
|
||||
<invoke-javacc target="src/demo/org/apache/lucene/demo/html/HTMLParser.jj"
|
||||
outputDir="src/demo/org/apache/lucene/demo/html"
|
||||
/>
|
||||
</target>
|
||||
</project>
|
|
@ -21,24 +21,10 @@ import java.util.Collections;
|
|||
import java.util.LinkedList;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Index;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.Field.TermVector;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.TermFreqVector;
|
||||
import org.apache.lucene.index.TermPositionVector;
|
||||
import org.apache.lucene.index.TermVectorOffsetInfo;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
|
||||
|
@ -49,24 +35,24 @@ public class FieldTermStack {
|
|||
private final String fieldName;
|
||||
LinkedList<TermInfo> termList = new LinkedList<TermInfo>();
|
||||
|
||||
public static void main( String[] args ) throws Exception {
|
||||
Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer );
|
||||
Query query = parser.parse( "a x:b" );
|
||||
FieldQuery fieldQuery = new FieldQuery( query, true, false );
|
||||
//public static void main( String[] args ) throws Exception {
|
||||
// Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
|
||||
// QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer );
|
||||
// Query query = parser.parse( "a x:b" );
|
||||
// FieldQuery fieldQuery = new FieldQuery( query, true, false );
|
||||
|
||||
Directory dir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer));
|
||||
Document doc = new Document();
|
||||
doc.add( new Field( "f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
|
||||
doc.add( new Field( "f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
|
||||
writer.addDocument( doc );
|
||||
writer.close();
|
||||
// Directory dir = new RAMDirectory();
|
||||
// IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer));
|
||||
// Document doc = new Document();
|
||||
// doc.add( new Field( "f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
|
||||
// doc.add( new Field( "f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
|
||||
// writer.addDocument( doc );
|
||||
// writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open( dir, true );
|
||||
new FieldTermStack( reader, 0, "f", fieldQuery );
|
||||
reader.close();
|
||||
}
|
||||
// IndexReader reader = IndexReader.open( dir, true );
|
||||
// new FieldTermStack( reader, 0, "f", fieldQuery );
|
||||
// reader.close();
|
||||
//}
|
||||
|
||||
/**
|
||||
* a constructor.
|
||||
|
|
|
@ -38,6 +38,22 @@
|
|||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
|
||||
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
|
||||
|
||||
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
|
||||
<subant target="default">
|
||||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="jar" depends="compile" description="Create JAR">
|
||||
<jarify>
|
||||
<manifest-attributes>
|
||||
|
|
|
@ -27,13 +27,9 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
|
|
@ -27,4 +27,19 @@
|
|||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
|
||||
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
|
||||
|
||||
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
|
||||
<subant target="default">
|
||||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
</project>
|
||||
|
|
|
@ -21,7 +21,7 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
|
|
|
@ -16,7 +16,7 @@ package org.apache.lucene.misc;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
|
|
|
@ -32,7 +32,6 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -49,7 +48,6 @@ import org.apache.lucene.search.TermQuery;
|
|||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -158,13 +156,6 @@ public final class MoreLikeThis {
|
|||
*/
|
||||
public static final int DEFAULT_MAX_NUM_TOKENS_PARSED=5000;
|
||||
|
||||
|
||||
/**
|
||||
* Default analyzer to parse source doc with.
|
||||
* @see #getAnalyzer
|
||||
*/
|
||||
public static final Analyzer DEFAULT_ANALYZER = new StandardAnalyzer(Version.LUCENE_CURRENT);
|
||||
|
||||
/**
|
||||
* Ignore terms with less than this frequency in the source doc.
|
||||
* @see #getMinTermFreq
|
||||
|
@ -240,7 +231,7 @@ public final class MoreLikeThis {
|
|||
/**
|
||||
* Analyzer that will be used to parse the doc.
|
||||
*/
|
||||
private Analyzer analyzer = DEFAULT_ANALYZER;
|
||||
private Analyzer analyzer = null;
|
||||
|
||||
/**
|
||||
* Ignore words less frequent that this.
|
||||
|
@ -343,10 +334,9 @@ public final class MoreLikeThis {
|
|||
|
||||
/**
|
||||
* Returns an analyzer that will be used to parse source doc with. The default analyzer
|
||||
* is the {@link #DEFAULT_ANALYZER}.
|
||||
* is not set.
|
||||
*
|
||||
* @return the analyzer that will be used to parse source doc with.
|
||||
* @see #DEFAULT_ANALYZER
|
||||
*/
|
||||
public Analyzer getAnalyzer() {
|
||||
return analyzer;
|
||||
|
@ -887,6 +877,10 @@ public final class MoreLikeThis {
|
|||
private void addTermFrequencies(Reader r, Map<String,Int> termFreqMap, String fieldName)
|
||||
throws IOException
|
||||
{
|
||||
if (analyzer == null) {
|
||||
throw new UnsupportedOperationException("To use MoreLikeThis without " +
|
||||
"term vectors, you must provide an Analyzer");
|
||||
}
|
||||
TokenStream ts = analyzer.tokenStream(fieldName, r);
|
||||
int tokenCount=0;
|
||||
// for every token
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -76,6 +77,7 @@ public class TestMoreLikeThis extends LuceneTestCase {
|
|||
|
||||
MoreLikeThis mlt = new MoreLikeThis(
|
||||
reader);
|
||||
mlt.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
|
||||
mlt.setMinDocFreq(1);
|
||||
mlt.setMinTermFreq(1);
|
||||
mlt.setMinWordLen(1);
|
||||
|
@ -110,6 +112,7 @@ public class TestMoreLikeThis extends LuceneTestCase {
|
|||
private Map<String,Float> getOriginalValues() throws IOException {
|
||||
Map<String,Float> originalValues = new HashMap<String,Float>();
|
||||
MoreLikeThis mlt = new MoreLikeThis(reader);
|
||||
mlt.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
|
||||
mlt.setMinDocFreq(1);
|
||||
mlt.setMinTermFreq(1);
|
||||
mlt.setMinWordLen(1);
|
||||
|
|
|
@ -577,22 +577,6 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
|||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
|
||||
* Usage:<br>
|
||||
* <code>java org.apache.lucene.queryParser.QueryParser <input></code>
|
||||
*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
if (args.length == 0) {
|
||||
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
|
||||
System.exit(0);
|
||||
}
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
|
||||
new org.apache.lucene.analysis.SimpleAnalyzer());
|
||||
Query q = qp.parse(args[0]);
|
||||
System.out.println(q.toString("field"));
|
||||
}
|
||||
|
||||
// * Query ::= ( Clause )*
|
||||
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
|
||||
final public int Conjunction() throws ParseException {
|
||||
|
@ -1290,4 +1274,19 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
|||
JJCalls next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
|
||||
* Usage:<br>
|
||||
* <code>java org.apache.lucene.queryParser.QueryParser <input></code>
|
||||
*/
|
||||
// public static void main(String[] args) throws Exception {
|
||||
// if (args.length == 0) {
|
||||
// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
|
||||
// System.exit(0);
|
||||
// }
|
||||
// PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
|
||||
// new org.apache.lucene.analysis.SimpleAnalyzer());
|
||||
// Query q = qp.parse(args[0]);
|
||||
// System.out.println(q.toString("field"));
|
||||
// }
|
||||
}
|
||||
|
|
|
@ -606,16 +606,16 @@ public class PrecedenceQueryParser {
|
|||
* Usage:<br>
|
||||
* <code>java org.apache.lucene.queryParser.QueryParser <input></code>
|
||||
*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
if (args.length == 0) {
|
||||
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
|
||||
System.exit(0);
|
||||
}
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
|
||||
new org.apache.lucene.analysis.SimpleAnalyzer());
|
||||
Query q = qp.parse(args[0]);
|
||||
System.out.println(q.toString("field"));
|
||||
}
|
||||
// public static void main(String[] args) throws Exception {
|
||||
// if (args.length == 0) {
|
||||
// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
|
||||
// System.exit(0);
|
||||
// }
|
||||
// PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
|
||||
// new org.apache.lucene.analysis.SimpleAnalyzer());
|
||||
// Query q = qp.parse(args[0]);
|
||||
// System.out.println(q.toString("field"));
|
||||
// }
|
||||
}
|
||||
|
||||
PARSER_END(PrecedenceQueryParser)
|
||||
|
|
|
@ -20,11 +20,9 @@ package org.apache.lucene.queryParser.standard;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
|
|
|
@ -32,7 +32,6 @@ import java.util.Locale;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
|
@ -341,8 +340,9 @@ public class TestQPHelper extends LocalizedTestCase {
|
|||
"t<EFBFBD>rm term term");
|
||||
assertQueryEquals("<EFBFBD>mlaut", new MockAnalyzer(MockTokenizer.WHITESPACE, false), "<EFBFBD>mlaut");
|
||||
|
||||
assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
|
||||
assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
|
||||
// FIXME: change MockAnalyzer to not extend CharTokenizer for this test
|
||||
//assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
|
||||
//assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
|
||||
|
||||
assertQueryEquals("a AND b", null, "+a +b");
|
||||
assertQueryEquals("(a AND b)", null, "+a +b");
|
||||
|
|
|
@ -30,7 +30,6 @@ import java.util.List;
|
|||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
|
@ -333,8 +332,9 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
|
|||
"t<EFBFBD>rm term term");
|
||||
assertQueryEquals("<EFBFBD>mlaut", new MockAnalyzer(MockTokenizer.WHITESPACE, false), "<EFBFBD>mlaut");
|
||||
|
||||
assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
|
||||
assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
|
||||
//FIXME: Change MockAnalyzer to not extend CharTokenizer for this test
|
||||
//assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
|
||||
//assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
|
||||
|
||||
assertQueryEquals("a AND b", null, "+a +b");
|
||||
assertQueryEquals("(a AND b)", null, "+a +b");
|
||||
|
|
|
@ -24,4 +24,20 @@
|
|||
</description>
|
||||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
|
||||
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
|
||||
|
||||
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
|
||||
<subant target="default">
|
||||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
</project>
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.search.spell;
|
|||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
|
|
@ -22,7 +22,7 @@ import java.util.Iterator;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
|
|
@ -25,6 +25,22 @@
|
|||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
|
||||
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
|
||||
|
||||
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
|
||||
<subant target="default">
|
||||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="list-demo" depends="compile">
|
||||
<java classname="org.apache.lucene.swing.models.ListSearcherSimulator"
|
||||
fork="yes" spawn="yes"
|
||||
|
|
|
@ -25,7 +25,7 @@ import javax.swing.event.ListDataEvent;
|
|||
import javax.swing.event.ListDataListener;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
|
|
|
@ -24,7 +24,7 @@ import javax.swing.table.AbstractTableModel;
|
|||
import javax.swing.table.TableModel;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
|
|
|
@ -30,6 +30,22 @@
|
|||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
|
||||
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
|
||||
|
||||
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
|
||||
<subant target="default">
|
||||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="index" depends="compile" description="Build WordNet index">
|
||||
<fail if="synindex.exists">
|
||||
Index already exists - must remove first.
|
||||
|
|
|
@ -23,12 +23,12 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
|
||||
|
||||
public class TestSynonymTokenFilter extends BaseTokenStreamTestCase {
|
||||
final String testFile = "testSynonyms.txt";
|
||||
|
|
|
@ -156,7 +156,7 @@ public abstract class TokenStream extends AttributeSource implements Closeable {
|
|||
* This method can be used to perform any end-of-stream operations, such as
|
||||
* setting the final offset of a stream. The final offset of a stream might
|
||||
* differ from the offset of the last token eg in case one or more whitespaces
|
||||
* followed after the last token, but a {@link WhitespaceTokenizer} was used.
|
||||
* followed after the last token, but a WhitespaceTokenizer was used.
|
||||
*
|
||||
* @throws IOException
|
||||
*/
|
||||
|
|
|
@ -1082,22 +1082,6 @@ public class QueryParser implements QueryParserConstants {
|
|||
return sb.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
|
||||
* Usage:<br>
|
||||
* <code>java org.apache.lucene.queryParser.QueryParser <input></code>
|
||||
*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
if (args.length == 0) {
|
||||
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
|
||||
System.exit(0);
|
||||
}
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
|
||||
new org.apache.lucene.analysis.SimpleAnalyzer());
|
||||
Query q = qp.parse(args[0]);
|
||||
System.out.println(q.toString("field"));
|
||||
}
|
||||
|
||||
// * Query ::= ( Clause )*
|
||||
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
|
||||
final public int Conjunction() throws ParseException {
|
||||
|
@ -1802,4 +1786,19 @@ public class QueryParser implements QueryParserConstants {
|
|||
JJCalls next;
|
||||
}
|
||||
|
||||
/**
|
||||
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
|
||||
* Usage:<br>
|
||||
* <code>java org.apache.lucene.queryParser.QueryParser <input></code>
|
||||
*/
|
||||
// public static void main(String[] args) throws Exception {
|
||||
// if (args.length == 0) {
|
||||
// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
|
||||
// System.exit(0);
|
||||
// }
|
||||
// QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
|
||||
// new org.apache.lucene.analysis.SimpleAnalyzer());
|
||||
// Query q = qp.parse(args[0]);
|
||||
// System.out.println(q.toString("field"));
|
||||
// }
|
||||
}
|
||||
|
|
|
@ -1111,16 +1111,16 @@ public class QueryParser {
|
|||
* Usage:<br>
|
||||
* <code>java org.apache.lucene.queryParser.QueryParser <input></code>
|
||||
*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
if (args.length == 0) {
|
||||
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
|
||||
System.exit(0);
|
||||
}
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
|
||||
new org.apache.lucene.analysis.SimpleAnalyzer());
|
||||
Query q = qp.parse(args[0]);
|
||||
System.out.println(q.toString("field"));
|
||||
}
|
||||
// public static void main(String[] args) throws Exception {
|
||||
// if (args.length == 0) {
|
||||
// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
|
||||
// System.exit(0);
|
||||
// }
|
||||
// QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
|
||||
// new org.apache.lucene.analysis.SimpleAnalyzer());
|
||||
// Query q = qp.parse(args[0]);
|
||||
// System.out.println(q.toString("field"));
|
||||
// }
|
||||
}
|
||||
|
||||
PARSER_END(QueryParser)
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.AttributeSource.AttributeFactory;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
|
||||
|
@ -45,6 +46,13 @@ public class MockTokenizer extends CharTokenizer {
|
|||
private final boolean lowerCase;
|
||||
private int state;
|
||||
|
||||
public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
|
||||
super(Version.LUCENE_CURRENT, factory, input);
|
||||
this.runAutomaton = runAutomaton;
|
||||
this.lowerCase = lowerCase;
|
||||
this.state = runAutomaton.getInitialState();
|
||||
}
|
||||
|
||||
public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
|
||||
super(Version.LUCENE_CURRENT, input);
|
||||
this.runAutomaton = runAutomaton;
|
||||
|
|
|
@ -46,8 +46,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
|||
}
|
||||
// internal buffer size is 1024 make sure we have a surrogate pair right at the border
|
||||
builder.insert(1023, "\ud801\udc1c");
|
||||
LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(
|
||||
TEST_VERSION_CURRENT, new StringReader(builder.toString()));
|
||||
MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.SIMPLE, true);
|
||||
assertTokenStreamContents(tokenizer, builder.toString().toLowerCase().split(" "));
|
||||
}
|
||||
|
||||
|
@ -64,8 +63,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
|||
builder.append("a");
|
||||
}
|
||||
builder.append("\ud801\udc1cabc");
|
||||
LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(
|
||||
TEST_VERSION_CURRENT, new StringReader(builder.toString()));
|
||||
MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.SIMPLE, true);
|
||||
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase()});
|
||||
}
|
||||
}
|
||||
|
@ -79,8 +77,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
|||
for (int i = 0; i < 255; i++) {
|
||||
builder.append("A");
|
||||
}
|
||||
LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(
|
||||
TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
|
||||
MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.SIMPLE, true);
|
||||
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
|
||||
}
|
||||
|
||||
|
@ -94,42 +91,10 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
|
|||
builder.append("A");
|
||||
}
|
||||
builder.append("\ud801\udc1c");
|
||||
LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(
|
||||
TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
|
||||
MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.SIMPLE, true);
|
||||
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
|
||||
}
|
||||
|
||||
public void testLowerCaseTokenizer() throws IOException {
|
||||
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
|
||||
LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT,
|
||||
reader);
|
||||
assertTokenStreamContents(tokenizer, new String[] { "tokenizer",
|
||||
"\ud801\udc44test" });
|
||||
}
|
||||
|
||||
public void testLowerCaseTokenizerBWCompat() throws IOException {
|
||||
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
|
||||
LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(Version.LUCENE_30,
|
||||
reader);
|
||||
assertTokenStreamContents(tokenizer, new String[] { "tokenizer", "test" });
|
||||
}
|
||||
|
||||
public void testWhitespaceTokenizer() throws IOException {
|
||||
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
|
||||
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
|
||||
reader);
|
||||
assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
|
||||
"\ud801\udc1ctest" });
|
||||
}
|
||||
|
||||
public void testWhitespaceTokenizerBWCompat() throws IOException {
|
||||
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
|
||||
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_30,
|
||||
reader);
|
||||
assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
|
||||
"\ud801\udc1ctest" });
|
||||
}
|
||||
|
||||
public void testIsTokenCharCharInSubclass() {
|
||||
new TestingCharTokenizer(Version.LUCENE_30, new StringReader(""));
|
||||
try {
|
||||
|
|
|
@ -239,7 +239,7 @@ public class TestToken extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testTokenAttributeFactory() throws Exception {
|
||||
TokenStream ts = new WhitespaceTokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, new StringReader("foo bar"));
|
||||
TokenStream ts = new MockTokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, new StringReader("foo bar"), MockTokenizer.WHITESPACE, false);
|
||||
|
||||
assertTrue("TypeAttribute is not implemented by SenselessAttributeImpl",
|
||||
ts.addAttribute(SenselessAttribute.class) instanceof SenselessAttributeImpl);
|
||||
|
|
|
@ -25,8 +25,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
|
|
@ -42,7 +42,6 @@ import org.apache.lucene.analysis.MockTokenFilter;
|
|||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -538,67 +537,6 @@ public class TestIndexWriter extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Make sure we skip wicked long terms.
|
||||
*/
|
||||
public void testWickedLongTerm() throws IOException {
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
|
||||
|
||||
char[] chars = new char[DocumentsWriter.MAX_TERM_LENGTH_UTF8];
|
||||
Arrays.fill(chars, 'x');
|
||||
Document doc = new Document();
|
||||
final String bigTerm = new String(chars);
|
||||
|
||||
// This produces a too-long term:
|
||||
String contents = "abc xyz x" + bigTerm + " another term";
|
||||
doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
|
||||
// Make sure we can add another normal document
|
||||
doc = new Document();
|
||||
doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED));
|
||||
writer.addDocument(doc);
|
||||
writer.close();
|
||||
|
||||
IndexReader reader = IndexReader.open(dir, true);
|
||||
|
||||
// Make sure all terms < max size were indexed
|
||||
assertEquals(2, reader.docFreq(new Term("content", "abc")));
|
||||
assertEquals(1, reader.docFreq(new Term("content", "bbb")));
|
||||
assertEquals(1, reader.docFreq(new Term("content", "term")));
|
||||
assertEquals(1, reader.docFreq(new Term("content", "another")));
|
||||
|
||||
// Make sure position is still incremented when
|
||||
// massive term is skipped:
|
||||
TermPositions tps = reader.termPositions(new Term("content", "another"));
|
||||
assertTrue(tps.next());
|
||||
assertEquals(1, tps.freq());
|
||||
assertEquals(3, tps.nextPosition());
|
||||
|
||||
// Make sure the doc that has the massive term is in
|
||||
// the index:
|
||||
assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
|
||||
|
||||
reader.close();
|
||||
|
||||
// Make sure we can add a document with exactly the
|
||||
// maximum length term, and search on that term:
|
||||
doc = new Document();
|
||||
doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED));
|
||||
StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT);
|
||||
sa.setMaxTokenLength(100000);
|
||||
writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
|
||||
writer.addDocument(doc);
|
||||
writer.close();
|
||||
reader = IndexReader.open(dir, true);
|
||||
assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
|
||||
reader.close();
|
||||
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testOptimizeMaxNumSegments() throws IOException {
|
||||
|
||||
MockRAMDirectory dir = new MockRAMDirectory();
|
||||
|
|
|
@ -32,7 +32,6 @@ import org.apache.lucene.analysis.MockTokenizer;
|
|||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
|
|
@ -29,7 +29,6 @@ import java.util.HashSet;
|
|||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
|
@ -255,8 +254,10 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
assertQueryEquals("türm term term", new MockAnalyzer(), "türm term term");
|
||||
assertQueryEquals("ümlaut", new MockAnalyzer(), "ümlaut");
|
||||
|
||||
assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
|
||||
assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
|
||||
// FIXME: enhance MockAnalyzer to be able to support this
|
||||
// it must no longer extend CharTokenizer
|
||||
//assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
|
||||
//assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
|
||||
|
||||
assertQueryEquals("a AND b", null, "+a +b");
|
||||
assertQueryEquals("(a AND b)", null, "+a +b");
|
||||
|
|
|
@ -19,8 +19,6 @@ package org.apache.lucene.search;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
|
|
@ -21,19 +21,15 @@ import java.io.Reader;
|
|||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -44,7 +40,6 @@ import org.apache.lucene.index.TermPositions;
|
|||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.search.payloads.PayloadSpanUtil;
|
||||
|
@ -52,9 +47,7 @@ import org.apache.lucene.search.spans.SpanNearQuery;
|
|||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
|
||||
|
|
|
@ -20,7 +20,8 @@ package org.apache.lucene.search;
|
|||
import java.io.IOException;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.analysis.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -50,7 +51,7 @@ public class TestRegexpRandom2 extends LuceneTestCase {
|
|||
super.setUp();
|
||||
random = newRandom();
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(),
|
||||
IndexWriter writer = new IndexWriter(dir, new MockAnalyzer(MockTokenizer.KEYWORD, false),
|
||||
IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
|
||||
Document doc = new Document();
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.search;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.search;
|
|||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.*;
|
||||
|
|
|
@ -34,6 +34,16 @@ New Features
|
|||
(... in progress)
|
||||
|
||||
* LUCENE-2413: Consolidated all Lucene analyzers into common.
|
||||
- o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
|
||||
- o.a.l.analysis.KeywordTokenizer -> o.a.l.analysis.core.KeywordTokenizer
|
||||
- o.a.l.analysis.LetterTokenizer -> o.a.l.analysis.core.LetterTokenizer
|
||||
- o.a.l.analysis.LowerCaseFilter -> o.a.l.analysis.core.LowerCaseFilter
|
||||
- o.a.l.analysis.LowerCaseTokenizer -> o.a.l.analysis.core.LowerCaseTokenizer
|
||||
- o.a.l.analysis.SimpleAnalyzer -> o.a.l.analysis.core.SimpleAnalyzer
|
||||
- o.a.l.analysis.StopAnalyzer -> o.a.l.analysis.core.StopAnalyzer
|
||||
- o.a.l.analysis.StopFilter -> o.a.l.analysis.core.StopFilter
|
||||
- o.a.l.analysis.WhitespaceAnalyzer -> o.a.l.analysis.core.WhitespaceAnalyzer
|
||||
- o.a.l.analysis.WhitespaceTokenizer -> o.a.l.analysis.core.WhitespaceTokenizer
|
||||
- o.a.l.analysis.PorterStemFilter -> o.a.l.analysis.en.PorterStemFilter
|
||||
- o.a.l.analysis.ASCIIFoldingFilter -> o.a.l.analysis.miscellaneous.ASCIIFoldingFilter
|
||||
- o.a.l.analysis.ISOLatin1AccentFilter -> o.a.l.analysis.miscellaneous.ISOLatin1AccentFilter
|
||||
|
@ -44,6 +54,9 @@ New Features
|
|||
- o.a.l.analysis.BaseCharFilter -> o.a.l.analysis.charfilter.BaseCharFilter
|
||||
- o.a.l.analysis.MappingCharFilter -> o.a.l.analysis.charfilter.MappingCharFilter
|
||||
- o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
|
||||
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
|
||||
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
|
||||
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
|
||||
... (in progress)
|
||||
|
||||
Build
|
||||
|
|
|
@ -38,7 +38,7 @@
|
|||
|
||||
<target name="compile-core" depends="jflex-notice, common.compile-core"/>
|
||||
|
||||
<target name="jflex" depends="jflex-check,clean-jflex,jflex-wiki-tokenizer"/>
|
||||
<target name="jflex" depends="jflex-check,clean-jflex,jflex-StandardAnalyzer,jflex-wiki-tokenizer"/>
|
||||
|
||||
<target name="jflex-wiki-tokenizer" depends="init,jflex-check" if="jflex.present">
|
||||
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
|
||||
|
@ -49,11 +49,27 @@
|
|||
nobak="on"/>
|
||||
</target>
|
||||
|
||||
<target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
|
||||
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
|
||||
<classpath refid="jflex.classpath"/>
|
||||
</taskdef>
|
||||
|
||||
<jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex"
|
||||
outdir="src/java/org/apache/lucene/analysis/standard"
|
||||
nobak="on" />
|
||||
<jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex"
|
||||
outdir="src/java/org/apache/lucene/analysis/standard"
|
||||
nobak="on" />
|
||||
</target>
|
||||
|
||||
<target name="clean-jflex">
|
||||
<delete>
|
||||
<fileset dir="src/java/org/apache/lucene/analysis/wikipedia" includes="*.java">
|
||||
<containsregexp expression="generated.*by.*JFlex"/>
|
||||
</fileset>
|
||||
<fileset dir="src/java/org/apache/lucene/analysis/standard" includes="*.java">
|
||||
<containsregexp expression="generated.*by.*JFlex"/>
|
||||
</fileset>
|
||||
</delete>
|
||||
</target>
|
||||
</project>
|
||||
|
|
|
@ -24,14 +24,14 @@ import java.util.Hashtable;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -163,10 +163,10 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link ArabicLetterTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link StopFilter},
|
||||
* {@link ArabicNormalizationFilter}, {@link KeywordMarkerFilter}
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.ar;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.CharTokenizer;
|
||||
import org.apache.lucene.analysis.LetterTokenizer;
|
||||
import org.apache.lucene.analysis.core.LetterTokenizer;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
@ -120,7 +120,7 @@ public class ArabicLetterTokenizer extends LetterTokenizer {
|
|||
|
||||
/**
|
||||
* Allows for Letter category or NonspacingMark category
|
||||
* @see org.apache.lucene.analysis.LetterTokenizer#isTokenChar(int)
|
||||
* @see org.apache.lucene.analysis.core.LetterTokenizer#isTokenChar(int)
|
||||
*/
|
||||
@Override
|
||||
protected boolean isTokenChar(int c) {
|
||||
|
|
|
@ -23,16 +23,16 @@ import java.io.Reader;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -119,11 +119,11 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -28,16 +28,16 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -193,10 +193,10 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}
|
||||
* , and {@link BrazilianStemFilter}.
|
||||
|
|
|
@ -19,9 +19,9 @@ package org.apache.lucene.analysis.cjk;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.Reader;
|
||||
|
|
|
@ -19,8 +19,8 @@ package org.apache.lucene.analysis.cn;
|
|||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer; // javadoc @link
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
|
||||
|
@ -35,10 +35,10 @@ public final class ChineseAnalyzer extends ReusableAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link ChineseTokenizer} filtered with
|
||||
* {@link ChineseFilter}
|
||||
*/
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Arrays;
|
|||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.analysis;
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -19,6 +19,8 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
|
||||
/**
|
||||
* "Tokenizes" the entire stream as a single token. This is useful
|
||||
* for data like zip codes, ids, and some product names.
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.analysis;
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -20,6 +20,7 @@ package org.apache.lucene.analysis;
|
|||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.AttributeSource;
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.analysis;
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -19,6 +19,8 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.CharTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.analysis;
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -19,6 +19,8 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.util.CharacterUtils;
|
||||
import org.apache.lucene.util.Version;
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.analysis;
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -19,6 +19,8 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.CharTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.analysis;
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -19,6 +19,10 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharTokenizer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/** An {@link Analyzer} that filters {@link LetterTokenizer}
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.analysis;
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -24,6 +24,12 @@ import java.util.Arrays;
|
|||
import java.util.Set;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/** Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
|
||||
|
@ -91,10 +97,10 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link LowerCaseTokenizer} filtered with
|
||||
* {@link StopFilter}
|
||||
*/
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.analysis;
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -22,6 +22,9 @@ import java.util.Arrays;
|
|||
import java.util.Set;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.queryParser.QueryParser; // for javadoc
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.analysis;
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -19,6 +19,9 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.CharTokenizer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
|
@ -1,4 +1,4 @@
|
|||
package org.apache.lucene.analysis;
|
||||
package org.apache.lucene.analysis.core;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -19,6 +19,8 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.analysis.CharTokenizer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
|
@ -17,17 +17,17 @@ package org.apache.lucene.analysis.cz;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.*;
|
||||
|
@ -218,10 +218,10 @@ public final class CzechAnalyzer extends ReusableAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If
|
||||
|
|
|
@ -23,16 +23,16 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.tartarus.snowball.ext.DanishStemmer;
|
||||
|
||||
|
@ -106,11 +106,11 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -28,17 +28,17 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.tartarus.snowball.ext.German2Stemmer;
|
||||
|
||||
|
@ -224,10 +224,10 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -17,13 +17,13 @@ package org.apache.lucene.analysis.el;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
|
@ -121,10 +121,10 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link GreekLowerCaseFilter}, {@link StandardFilter},
|
||||
* {@link StopFilter}, and {@link GreekStemFilter}
|
||||
|
|
|
@ -22,15 +22,15 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -89,11 +89,11 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -23,16 +23,16 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.tartarus.snowball.ext.SpanishStemmer;
|
||||
|
||||
|
@ -106,11 +106,11 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -24,14 +24,14 @@ import java.util.Hashtable;
|
|||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
|
||||
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
|
@ -136,10 +136,10 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link ArabicLetterTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link ArabicNormalizationFilter},
|
||||
* {@link PersianNormalizationFilter} and Persian Stop words
|
||||
|
|
|
@ -23,16 +23,16 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.tartarus.snowball.ext.FinnishStemmer;
|
||||
|
||||
|
@ -106,11 +106,11 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates a
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* which tokenizes all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return A
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from an {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
|
||||
* , {@link KeywordMarkerFilter} if a stem exclusion set is
|
||||
|
|
|
@ -19,17 +19,17 @@ package org.apache.lucene.analysis.fr;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.snowball.SnowballFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.WordlistLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.File;
|
||||
|
@ -225,10 +225,10 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link StandardTokenizer} filtered with
|
||||
* {@link StandardFilter}, {@link ElisionFilter},
|
||||
* {@link LowerCaseFilter}, {@link StopFilter},
|
||||
|
|
|
@ -21,13 +21,13 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.core.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.core.StopFilter;
|
||||
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
|
||||
import org.apache.lucene.analysis.in.IndicTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
@ -106,10 +106,10 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
|
|||
|
||||
/**
|
||||
* Creates
|
||||
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* used to tokenize all the text in the provided {@link Reader}.
|
||||
*
|
||||
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
|
||||
* built from a {@link IndicTokenizer} filtered with
|
||||
* {@link LowerCaseFilter}, {@link IndicNormalizationFilter},
|
||||
* {@link HindiNormalizationFilter}, {@link KeywordMarkerFilter}
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue