LUCENE-2413: consolidate remaining concrete core analyzers to modules/analysis

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@948195 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-05-25 20:16:44 +00:00
parent 04c8590b80
commit 71b59ca566
262 changed files with 859 additions and 726 deletions

View File

@ -23,21 +23,11 @@
<import file="common-build.xml"/>
<property name="build.demo.template" value="src/demo/demo-build.template"/>
<property name="demo.name" value="lucene-demos-${version}"/>
<property name="demo.war.name" value="luceneweb"/>
<!-- Build classpath -->
<path id="classpath">
<pathelement location="${build.dir}/classes/java"/>
</path>
<path id="demo.classpath">
<path refid="classpath"/>
<pathelement location="${build.dir}/classes/demo"/>
</path>
<path id="test.classpath">
<path refid="classpath"/>
<path refid="junit-path"/>
@ -57,10 +47,10 @@
excludes="contrib/db/*/lib/,contrib/*/ext-libs/,src/site/build/,contrib/benchmark/temp/,contrib/benchmark/work/"
/>
<patternset id="binary.build.dist.patterns"
includes="${final.name}.jar,${demo.war.name}.war,${demo.name}.jar,docs/,contrib/*/*.jar,contrib/*/*.war, contrib/*/*/*.jar"
includes="${final.name}.jar,docs/,contrib/*/*.jar,contrib/*/*.war, contrib/*/*/*.jar"
/>
<patternset id="binary.root.dist.patterns"
includes="src/demo/,src/jsp/,docs/,*.txt,contrib/*/README*,**/CHANGES.txt,lib/servlet-api-*.jar"
includes="docs/,*.txt,contrib/*/README*,**/CHANGES.txt"
excludes="${build.demo.template}"
/>
@ -177,70 +167,7 @@ The source distribution does not contain sources of the previous Lucene Java ver
<!-- -->
<!-- ================================================================== -->
<target name="jar-demo" depends="compile-demo">
<sequential>
<build-manifest title="Lucene Search Engine: demos"/>
<jar
destfile="${build.dir}/${demo.name}.jar"
basedir="${build.dir}/classes/demo"
excludes="**/*.java"
manifest="${manifest.file}">
<metainf dir="${common.dir}">
<include name="LICENSE.txt"/>
<include name="NOTICE.txt"/>
</metainf>
</jar>
</sequential>
</target>
<target name="jar-demo-src" depends="compile-demo">
<sequential>
<build-manifest title="Lucene Search Engine: demos"/>
<jar
destfile="${build.dir}/${demo.name}-src.jar"
basedir="src/demo"
manifest="${manifest.file}">
<metainf dir="${common.dir}">
<include name="LICENSE.txt"/>
<include name="NOTICE.txt"/>
</metainf>
</jar>
</sequential>
</target>
<target name="war-demo" depends="jar-core,jar-demo">
<sequential>
<build-manifest title="Lucene Search Engine: demos"/>
<war destfile="${build.dir}/${demo.war.name}.war"
webxml="src/jsp/WEB-INF/web.xml"
manifest="${manifest.file}">
<fileset dir="src/jsp" excludes="WEB-INF/web.xml"/>
<lib dir="${build.dir}" includes="${demo.name}.jar"/>
<lib dir="${build.dir}" includes="${final.name}.jar"/>
<metainf dir="${common.dir}">
<include name="LICENSE.txt"/>
<include name="NOTICE.txt"/>
</metainf>
</war>
</sequential>
</target>
<target name="compile-core" depends="jflex-notice, javacc-notice, common.compile-core"/>
<!-- ================================================================== -->
<!-- B U I L D D E M O -->
<!-- ================================================================== -->
<!-- -->
<!-- ================================================================== -->
<target name="compile-demo" depends="compile-core">
<mkdir dir="${build.dir}/classes/demo"/>
<compile
srcdir="src/demo"
destdir="${build.dir}/classes/demo">
<classpath refid="demo.classpath"/>
</compile>
</target>
<!-- ================================================================== -->
<!-- D O C U M E N T A T I O N -->
@ -252,7 +179,7 @@ The source distribution does not contain sources of the previous Lucene Java ver
</target>
<target name="javadocs" description="Generate javadoc"
depends="javadocs-all, javadocs-core, javadocs-demo, javadocs-contrib">
depends="javadocs-all, javadocs-core, javadocs-contrib">
<echo file="${javadoc.dir}/index.html" append="false">
<![CDATA[<html><head><title>${Name} ${version} Javadoc Index</title></head>
<body>
@ -266,7 +193,6 @@ The source distribution does not contain sources of the previous Lucene Java ver
<contrib-crawl target="javadocs-index.html" failonerror="false"/>
<echo file="${javadoc.dir}/index.html" append="true"><![CDATA[
</ul>
<li><a href="demo/index.html">Demo</a></li>
</ul></body>]]></echo>
</target>
@ -285,27 +211,12 @@ The source distribution does not contain sources of the previous Lucene Java ver
</sequential>
</target>
<target name="javadocs-demo" description="Generate javadoc for demo classes">
<sequential>
<mkdir dir="${javadoc.dir}/demo"/>
<invoke-javadoc
destdir="${javadoc.dir}/demo"
title="${Name} ${version} demo API">
<sources>
<packageset dir="src/demo"/>
<link href=""/>
</sources>
</invoke-javadoc>
<jarify basedir="${javadoc.dir}/demo" destfile="${build.dir}/${demo.name}-javadoc.jar"/>
</sequential>
</target>
<target name="javadocs-contrib" description="Generate javadoc for contrib classes">
<contrib-crawl target="javadocs"
failonerror="false"/>
</target>
<target name="javadocs-all" description="Generate javadoc for core, demo and contrib classes" depends="build-contrib">
<target name="javadocs-all" description="Generate javadoc for core and contrib classes" depends="build-contrib">
<sequential>
<mkdir dir="${javadoc.dir}/all"/>
<invoke-javadoc
@ -314,8 +225,6 @@ The source distribution does not contain sources of the previous Lucene Java ver
<!-- TODO: find a dynamic way to do include multiple source roots -->
<packageset dir="src/java"/>
<packageset dir="src/demo"/>
<!-- please keep this list up to date, and in alpha order... -->
<!-- ie: `find contrib/* -path \*src/java | sort` -->
@ -348,11 +257,10 @@ The source distribution does not contain sources of the previous Lucene Java ver
<!-- packages are not being matched by any of these rules -->
<group title="Core" packages="org.apache.*:org.apache.lucene.analysis:org.apache.lucene.analysis.standard*:org.apache.lucene.analysis.tokenattributes*"/>
<group title="Demo" packages="org.apache.lucene.demo*"/>
<group title="contrib: Ant" packages="org.apache.lucene.ant*"/>
<group title="contrib: Benchmark" packages="org.apache.lucene.benchmark*"/>
<group title="contrib: Demo" packages="org.apache.lucene.demo*"/>
<group title="contrib: ICU" packages="org.apache.lucene.collation*"/>
<group title="contrib: DB" packages="org.apache.lucene.store.db*:org.apache.lucene.store.je*:com.sleepycat*"/>
<group title="contrib: Highlighter" packages="org.apache.lucene.search.highlight:*org.apache.lucene.search.vectorhighlight*"/>
@ -379,7 +287,7 @@ The source distribution does not contain sources of the previous Lucene Java ver
<!-- ================================================================== -->
<!-- -->
<!-- ================================================================== -->
<target name="package" depends="jar-core, javadocs, war-demo, build-contrib, init-dist, changes-to-html">
<target name="package" depends="jar-core, javadocs, build-contrib, init-dist, changes-to-html">
<copy file="${build.demo.template}" tofile="${build.dir}/build-demo.xml">
<filterset begintoken="@PLACEHOLDER_" endtoken="@">
<filter token="version" value="${version}"/>
@ -518,7 +426,7 @@ The source distribution does not contain sources of the previous Lucene Java ver
<target name="dist-all" depends="dist, dist-src"/>
<target name="generate-maven-artifacts" depends="maven.ant.tasks-check, package, jar-src, jar-demo-src, javadocs">
<target name="generate-maven-artifacts" depends="maven.ant.tasks-check, package, jar-src, javadocs">
<sequential>
<m2-deploy pom.xml="lucene-parent-pom.xml.template"/>
<m2-deploy pom.xml="lucene-core-pom.xml.template">
@ -529,16 +437,7 @@ The source distribution does not contain sources of the previous Lucene Java ver
classifier="javadoc"/>
</artifact-attachments>
</m2-deploy>
<m2-deploy pom.xml="lucene-demos-pom.xml.template">
<artifact-attachments>
<attach file="${build.dir}/${demo.name}-src.jar"
classifier="sources"/>
<attach file="${build.dir}/${demo.name}-javadoc.jar"
classifier="javadoc"/>
</artifact-attachments>
</m2-deploy>
<m2-deploy pom.xml="lucene-contrib-pom.xml.template"/>
<contrib-crawl target="dist-maven"/>
</sequential>
@ -604,13 +503,10 @@ The source distribution does not contain sources of the previous Lucene Java ver
<fileset dir="contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/parser" includes="*.java">
<containsregexp expression="Generated.*By.*JavaCC"/>
</fileset>
<fileset dir="src/demo/org/apache/lucene/demo/html" includes="*.java">
<containsregexp expression="Generated.*By.*JavaCC"/>
</fileset>
</delete>
</target>
<target name="javacc" depends="init,javacc-check,clean-javacc,javacc-QueryParser,javacc-HTMLParser,javacc-contrib-queryparser"/>
<target name="javacc" depends="init,javacc-check,clean-javacc,javacc-QueryParser,javacc-contrib-queryparser"/>
<target name="javacc-QueryParser" depends="init,javacc-check" if="javacc.present">
<sequential>
@ -629,12 +525,6 @@ The source distribution does not contain sources of the previous Lucene Java ver
</sequential>
</target>
<target name="javacc-HTMLParser" depends="init,javacc-check" if="javacc.present">
<invoke-javacc target="src/demo/org/apache/lucene/demo/html/HTMLParser.jj"
outputDir="src/demo/org/apache/lucene/demo/html"
/>
</target>
<target name="javacc-contrib-queryparser" depends="init,javacc-check" if="javacc.present">
<ant target="javacc"
@ -642,33 +532,6 @@ The source distribution does not contain sources of the previous Lucene Java ver
antfile="build.xml"
/>
</target>
<!-- ================================================================== -->
<!-- Build the JFlex files into the source tree -->
<!-- ================================================================== -->
<target name="jflex" depends="jflex-check, clean-jflex,jflex-StandardAnalyzer" />
<target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath refid="jflex.classpath"/>
</taskdef>
<jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex"
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
<jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex"
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
</target>
<target name="clean-jflex">
<delete>
<fileset dir="src/java/org/apache/lucene/analysis/standard" includes="*.java">
<containsregexp expression="generated.*by.*JFlex"/>
</fileset>
</delete>
</target>
<macrodef name="createLevAutomaton">
<attribute name="n"/>

View File

@ -2,6 +2,11 @@ Lucene contrib change Log
======================= Trunk (not yet released) =======================
Build
* LUCENE-2413: Moved the demo out of lucene core and into contrib/demo.
(Robert Muir)
======================= Lucene 3.x (not yet released) =======================
Changes in backwards compatibility policy

View File

@ -34,4 +34,21 @@
/>
<import file="../contrib-build.xml"/>
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
<path id="classpath">
<pathelement path="${analyzers-common.jar}"/>
<path refid="base.classpath"/>
</path>
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
<subant target="default">
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
</subant>
</target>
</project>

View File

@ -30,9 +30,9 @@ import java.util.Vector;
import java.lang.reflect.Constructor;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.SimpleAnalyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.Document;

View File

@ -21,7 +21,7 @@ import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;

View File

@ -18,6 +18,7 @@
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
<contrib-uptodate name="memory" property="memory.uptodate" classpath.property="memory.jar"/>
<contrib-uptodate name="demo" property="demo.uptodate" classpath.property="demo.jar"/>
<target name="check-files">
<available file="temp/news20.tar.gz" property="news20.exists"/>
@ -139,8 +140,8 @@
<pathelement path="${memory.jar}"/>
<pathelement path="${highlighter.jar}"/>
<pathelement path="${analyzers-common.jar}"/>
<pathelement path="${demo.jar}"/>
<path refid="base.classpath"/>
<pathelement path="${common.dir}/build/classes/demo"/>
<fileset dir="lib">
<include name="**/*.jar"/>
</fileset>
@ -228,9 +229,9 @@
<echo>Benchmark output in JIRA table format is in file: ${shingle.jira.output.file}</echo>
</target>
<target name="compile-demo">
<subant target="compile-demo">
<fileset dir="${common.dir}" includes="build.xml"/>
<target name="compile-demo" unless="demo.uptodate">
<subant target="default">
<fileset dir="${common.dir}/contrib/demo" includes="build.xml"/>
</subant>
</target>
<target name="compile-highlighter" unless="highlighter.uptodate">

View File

@ -0,0 +1,78 @@
<?xml version="1.0"?>
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<project name="demo" default="default">
<description>
Lucene Demo
</description>
<property name="build.demo.template" value="src/java/demo-build.template"/>
<property name="demo.name" value="lucene-demos-${version}"/>
<property name="demo.war.name" value="luceneweb"/>
<import file="../contrib-build.xml"/>
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
<path id="classpath">
<pathelement path="${analyzers-common.jar}"/>
<path refid="base.classpath"/>
</path>
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
<subant target="default">
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
</subant>
</target>
<target name="war-demo" depends="jar-core">
<sequential>
<build-manifest title="Lucene Search Engine: demos"/>
<war destfile="${build.dir}/${demo.war.name}.war"
webxml="src/jsp/WEB-INF/web.xml"
manifest="${manifest.file}">
<fileset dir="src/jsp" excludes="WEB-INF/web.xml"/>
<lib dir="${build.dir}/../.." includes="lucene-core-${version}.jar"/>
<lib dir="${common.dir}/../modules/analysis/build/common" includes="lucene-analyzers-common-${version}.jar"/>
<lib dir="${build.dir}" includes="${final.name}.jar"/>
<metainf dir="${common.dir}">
<include name="LICENSE.txt"/>
<include name="NOTICE.txt"/>
</metainf>
</war>
</sequential>
</target>
<target name="clean-javacc">
<fileset dir="src/demo/org/apache/lucene/demo/html" includes="*.java">
<containsregexp expression="Generated.*By.*JavaCC"/>
</fileset>
</target>
<target name="javacc" depends="init,javacc-check" if="javacc.present">
<invoke-javacc target="src/demo/org/apache/lucene/demo/html/HTMLParser.jj"
outputDir="src/demo/org/apache/lucene/demo/html"
/>
</target>
</project>

View File

@ -21,24 +21,10 @@ import java.util.Collections;
import java.util.LinkedList;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermVectorOffsetInfo;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
/**
* <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
@ -49,24 +35,24 @@ public class FieldTermStack {
private final String fieldName;
LinkedList<TermInfo> termList = new LinkedList<TermInfo>();
public static void main( String[] args ) throws Exception {
Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer );
Query query = parser.parse( "a x:b" );
FieldQuery fieldQuery = new FieldQuery( query, true, false );
//public static void main( String[] args ) throws Exception {
// Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_CURRENT);
// QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer );
// Query query = parser.parse( "a x:b" );
// FieldQuery fieldQuery = new FieldQuery( query, true, false );
Directory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer));
Document doc = new Document();
doc.add( new Field( "f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
doc.add( new Field( "f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
writer.addDocument( doc );
writer.close();
// Directory dir = new RAMDirectory();
// IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, analyzer));
// Document doc = new Document();
// doc.add( new Field( "f", "a a a b b c a b b c d e f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
// doc.add( new Field( "f", "b a b a f", Store.YES, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS ) );
// writer.addDocument( doc );
// writer.close();
IndexReader reader = IndexReader.open( dir, true );
new FieldTermStack( reader, 0, "f", fieldQuery );
reader.close();
}
// IndexReader reader = IndexReader.open( dir, true );
// new FieldTermStack( reader, 0, "f", fieldQuery );
// reader.close();
//}
/**
* a constructor.

View File

@ -38,6 +38,22 @@
<import file="../contrib-build.xml"/>
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
<path id="classpath">
<pathelement path="${analyzers-common.jar}"/>
<path refid="base.classpath"/>
</path>
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
<subant target="default">
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
</subant>
</target>
<target name="jar" depends="compile" description="Create JAR">
<jarify>
<manifest-attributes>

View File

@ -27,13 +27,9 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

View File

@ -27,4 +27,19 @@
<import file="../contrib-build.xml"/>
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
<path id="classpath">
<pathelement path="${analyzers-common.jar}"/>
<path refid="base.classpath"/>
</path>
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
<subant target="default">
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
</subant>
</target>
</project>

View File

@ -21,7 +21,7 @@ import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;

View File

@ -16,7 +16,7 @@ package org.apache.lucene.misc;
* limitations under the License.
*/
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;

View File

@ -32,7 +32,6 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
@ -49,7 +48,6 @@ import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.Version;
/**
@ -158,13 +156,6 @@ public final class MoreLikeThis {
*/
public static final int DEFAULT_MAX_NUM_TOKENS_PARSED=5000;
/**
* Default analyzer to parse source doc with.
* @see #getAnalyzer
*/
public static final Analyzer DEFAULT_ANALYZER = new StandardAnalyzer(Version.LUCENE_CURRENT);
/**
* Ignore terms with less than this frequency in the source doc.
* @see #getMinTermFreq
@ -240,7 +231,7 @@ public final class MoreLikeThis {
/**
* Analyzer that will be used to parse the doc.
*/
private Analyzer analyzer = DEFAULT_ANALYZER;
private Analyzer analyzer = null;
/**
* Ignore words less frequent that this.
@ -343,10 +334,9 @@ public final class MoreLikeThis {
/**
* Returns an analyzer that will be used to parse source doc with. The default analyzer
* is the {@link #DEFAULT_ANALYZER}.
* is not set.
*
* @return the analyzer that will be used to parse source doc with.
* @see #DEFAULT_ANALYZER
*/
public Analyzer getAnalyzer() {
return analyzer;
@ -887,6 +877,10 @@ public final class MoreLikeThis {
private void addTermFrequencies(Reader r, Map<String,Int> termFreqMap, String fieldName)
throws IOException
{
if (analyzer == null) {
throw new UnsupportedOperationException("To use MoreLikeThis without " +
"term vectors, you must provide an Analyzer");
}
TokenStream ts = analyzer.tokenStream(fieldName, r);
int tokenCount=0;
// for every token

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;

View File

@ -24,6 +24,7 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@ -76,6 +77,7 @@ public class TestMoreLikeThis extends LuceneTestCase {
MoreLikeThis mlt = new MoreLikeThis(
reader);
mlt.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
mlt.setMinDocFreq(1);
mlt.setMinTermFreq(1);
mlt.setMinWordLen(1);
@ -110,6 +112,7 @@ public class TestMoreLikeThis extends LuceneTestCase {
private Map<String,Float> getOriginalValues() throws IOException {
Map<String,Float> originalValues = new HashMap<String,Float>();
MoreLikeThis mlt = new MoreLikeThis(reader);
mlt.setAnalyzer(new MockAnalyzer(MockTokenizer.WHITESPACE, false));
mlt.setMinDocFreq(1);
mlt.setMinTermFreq(1);
mlt.setMinWordLen(1);

View File

@ -577,22 +577,6 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
return sb.toString();
}
/**
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
* Usage:<br>
* <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>
*/
public static void main(String[] args) throws Exception {
if (args.length == 0) {
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
System.exit(0);
}
PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]);
System.out.println(q.toString("field"));
}
// * Query ::= ( Clause )*
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
final public int Conjunction() throws ParseException {
@ -1290,4 +1274,19 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
JJCalls next;
}
/**
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
* Usage:<br>
* <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>
*/
// public static void main(String[] args) throws Exception {
// if (args.length == 0) {
// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
// System.exit(0);
// }
// PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
// new org.apache.lucene.analysis.SimpleAnalyzer());
// Query q = qp.parse(args[0]);
// System.out.println(q.toString("field"));
// }
}

View File

@ -606,16 +606,16 @@ public class PrecedenceQueryParser {
* Usage:<br>
* <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>
*/
public static void main(String[] args) throws Exception {
if (args.length == 0) {
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
System.exit(0);
}
PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]);
System.out.println(q.toString("field"));
}
// public static void main(String[] args) throws Exception {
// if (args.length == 0) {
// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
// System.exit(0);
// }
// PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
// new org.apache.lucene.analysis.SimpleAnalyzer());
// Query q = qp.parse(args[0]);
// System.out.println(q.toString("field"));
// }
}
PARSER_END(PrecedenceQueryParser)

View File

@ -20,11 +20,9 @@ package org.apache.lucene.queryParser.standard;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;

View File

@ -32,7 +32,6 @@ import java.util.Locale;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
@ -341,8 +340,9 @@ public class TestQPHelper extends LocalizedTestCase {
"t<EFBFBD>rm term term");
assertQueryEquals("<EFBFBD>mlaut", new MockAnalyzer(MockTokenizer.WHITESPACE, false), "<EFBFBD>mlaut");
assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
// FIXME: change MockAnalyzer to not extend CharTokenizer for this test
//assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
//assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
assertQueryEquals("a AND b", null, "+a +b");
assertQueryEquals("(a AND b)", null, "+a +b");

View File

@ -30,7 +30,6 @@ import java.util.List;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
@ -333,8 +332,9 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
"t<EFBFBD>rm term term");
assertQueryEquals("<EFBFBD>mlaut", new MockAnalyzer(MockTokenizer.WHITESPACE, false), "<EFBFBD>mlaut");
assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
//FIXME: Change MockAnalyzer to not extend CharTokenizer for this test
//assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
//assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
assertQueryEquals("a AND b", null, "+a +b");
assertQueryEquals("(a AND b)", null, "+a +b");

View File

@ -24,4 +24,20 @@
</description>
<import file="../contrib-build.xml"/>
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
<path id="classpath">
<pathelement path="${analyzers-common.jar}"/>
<path refid="base.classpath"/>
</path>
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
<subant target="default">
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
</subant>
</target>
</project>

View File

@ -20,7 +20,7 @@ package org.apache.lucene.search.spell;
import java.io.IOException;
import java.util.Iterator;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;

View File

@ -22,7 +22,7 @@ import java.util.Iterator;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;

View File

@ -25,6 +25,22 @@
<import file="../contrib-build.xml"/>
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
<path id="classpath">
<pathelement path="${analyzers-common.jar}"/>
<path refid="base.classpath"/>
</path>
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
<subant target="default">
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
</subant>
</target>
<target name="list-demo" depends="compile">
<java classname="org.apache.lucene.swing.models.ListSearcherSimulator"
fork="yes" spawn="yes"

View File

@ -25,7 +25,7 @@ import javax.swing.event.ListDataEvent;
import javax.swing.event.ListDataListener;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;

View File

@ -24,7 +24,7 @@ import javax.swing.table.AbstractTableModel;
import javax.swing.table.TableModel;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;

View File

@ -30,6 +30,22 @@
<import file="../contrib-build.xml"/>
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
<path id="classpath">
<pathelement path="${analyzers-common.jar}"/>
<path refid="base.classpath"/>
</path>
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
<subant target="default">
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
</subant>
</target>
<target name="index" depends="compile" description="Build WordNet index">
<fail if="synindex.exists">
Index already exists - must remove first.

View File

@ -23,12 +23,12 @@ import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.WhitespaceTokenizer;
public class TestSynonymTokenFilter extends BaseTokenStreamTestCase {
final String testFile = "testSynonyms.txt";

View File

@ -156,7 +156,7 @@ public abstract class TokenStream extends AttributeSource implements Closeable {
* This method can be used to perform any end-of-stream operations, such as
* setting the final offset of a stream. The final offset of a stream might
* differ from the offset of the last token eg in case one or more whitespaces
* followed after the last token, but a {@link WhitespaceTokenizer} was used.
* followed after the last token, but a WhitespaceTokenizer was used.
*
* @throws IOException
*/

View File

@ -1082,22 +1082,6 @@ public class QueryParser implements QueryParserConstants {
return sb.toString();
}
/**
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
* Usage:<br>
* <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>
*/
public static void main(String[] args) throws Exception {
if (args.length == 0) {
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
System.exit(0);
}
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]);
System.out.println(q.toString("field"));
}
// * Query ::= ( Clause )*
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
final public int Conjunction() throws ParseException {
@ -1802,4 +1786,19 @@ public class QueryParser implements QueryParserConstants {
JJCalls next;
}
/**
* Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
* Usage:<br>
* <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>
*/
// public static void main(String[] args) throws Exception {
// if (args.length == 0) {
// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
// System.exit(0);
// }
// QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
// new org.apache.lucene.analysis.SimpleAnalyzer());
// Query q = qp.parse(args[0]);
// System.out.println(q.toString("field"));
// }
}

View File

@ -1111,16 +1111,16 @@ public class QueryParser {
* Usage:<br>
* <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>
*/
public static void main(String[] args) throws Exception {
if (args.length == 0) {
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
System.exit(0);
}
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]);
System.out.println(q.toString("field"));
}
// public static void main(String[] args) throws Exception {
// if (args.length == 0) {
// System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
// System.exit(0);
// }
// QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
// new org.apache.lucene.analysis.SimpleAnalyzer());
// Query q = qp.parse(args[0]);
// System.out.println(q.toString("field"));
// }
}
PARSER_END(QueryParser)

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.AttributeSource.AttributeFactory;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;
@ -45,6 +46,13 @@ public class MockTokenizer extends CharTokenizer {
private final boolean lowerCase;
private int state;
public MockTokenizer(AttributeFactory factory, Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
super(Version.LUCENE_CURRENT, factory, input);
this.runAutomaton = runAutomaton;
this.lowerCase = lowerCase;
this.state = runAutomaton.getInitialState();
}
public MockTokenizer(Reader input, CharacterRunAutomaton runAutomaton, boolean lowerCase) {
super(Version.LUCENE_CURRENT, input);
this.runAutomaton = runAutomaton;

View File

@ -46,8 +46,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
}
// internal buffer size is 1024 make sure we have a surrogate pair right at the border
builder.insert(1023, "\ud801\udc1c");
LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(
TEST_VERSION_CURRENT, new StringReader(builder.toString()));
MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.SIMPLE, true);
assertTokenStreamContents(tokenizer, builder.toString().toLowerCase().split(" "));
}
@ -64,8 +63,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
builder.append("a");
}
builder.append("\ud801\udc1cabc");
LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(
TEST_VERSION_CURRENT, new StringReader(builder.toString()));
MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString()), MockTokenizer.SIMPLE, true);
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase()});
}
}
@ -79,8 +77,7 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
for (int i = 0; i < 255; i++) {
builder.append("A");
}
LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(
TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.SIMPLE, true);
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
}
@ -94,42 +91,10 @@ public class TestCharTokenizers extends BaseTokenStreamTestCase {
builder.append("A");
}
builder.append("\ud801\udc1c");
LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(
TEST_VERSION_CURRENT, new StringReader(builder.toString() + builder.toString()));
MockTokenizer tokenizer = new MockTokenizer(new StringReader(builder.toString() + builder.toString()), MockTokenizer.SIMPLE, true);
assertTokenStreamContents(tokenizer, new String[] {builder.toString().toLowerCase(), builder.toString().toLowerCase()});
}
public void testLowerCaseTokenizer() throws IOException {
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(TEST_VERSION_CURRENT,
reader);
assertTokenStreamContents(tokenizer, new String[] { "tokenizer",
"\ud801\udc44test" });
}
public void testLowerCaseTokenizerBWCompat() throws IOException {
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
LowerCaseTokenizer tokenizer = new LowerCaseTokenizer(Version.LUCENE_30,
reader);
assertTokenStreamContents(tokenizer, new String[] { "tokenizer", "test" });
}
public void testWhitespaceTokenizer() throws IOException {
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT,
reader);
assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
"\ud801\udc1ctest" });
}
public void testWhitespaceTokenizerBWCompat() throws IOException {
StringReader reader = new StringReader("Tokenizer \ud801\udc1ctest");
WhitespaceTokenizer tokenizer = new WhitespaceTokenizer(Version.LUCENE_30,
reader);
assertTokenStreamContents(tokenizer, new String[] { "Tokenizer",
"\ud801\udc1ctest" });
}
public void testIsTokenCharCharInSubclass() {
new TestingCharTokenizer(Version.LUCENE_30, new StringReader(""));
try {

View File

@ -239,7 +239,7 @@ public class TestToken extends LuceneTestCase {
}
public void testTokenAttributeFactory() throws Exception {
TokenStream ts = new WhitespaceTokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, new StringReader("foo bar"));
TokenStream ts = new MockTokenizer(Token.TOKEN_ATTRIBUTE_FACTORY, new StringReader("foo bar"), MockTokenizer.WHITESPACE, false);
assertTrue("TypeAttribute is not implemented by SenselessAttributeImpl",
ts.addAttribute(SenselessAttribute.class) instanceof SenselessAttributeImpl);

View File

@ -25,8 +25,6 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

View File

@ -42,7 +42,6 @@ import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
@ -538,67 +537,6 @@ public class TestIndexWriter extends LuceneTestCase {
}
}
/**
* Make sure we skip wicked long terms.
*/
public void testWickedLongTerm() throws IOException {
RAMDirectory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
TEST_VERSION_CURRENT, new StandardAnalyzer(TEST_VERSION_CURRENT)));
char[] chars = new char[DocumentsWriter.MAX_TERM_LENGTH_UTF8];
Arrays.fill(chars, 'x');
Document doc = new Document();
final String bigTerm = new String(chars);
// This produces a too-long term:
String contents = "abc xyz x" + bigTerm + " another term";
doc.add(new Field("content", contents, Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
// Make sure we can add another normal document
doc = new Document();
doc.add(new Field("content", "abc bbb ccc", Field.Store.NO, Field.Index.ANALYZED));
writer.addDocument(doc);
writer.close();
IndexReader reader = IndexReader.open(dir, true);
// Make sure all terms < max size were indexed
assertEquals(2, reader.docFreq(new Term("content", "abc")));
assertEquals(1, reader.docFreq(new Term("content", "bbb")));
assertEquals(1, reader.docFreq(new Term("content", "term")));
assertEquals(1, reader.docFreq(new Term("content", "another")));
// Make sure position is still incremented when
// massive term is skipped:
TermPositions tps = reader.termPositions(new Term("content", "another"));
assertTrue(tps.next());
assertEquals(1, tps.freq());
assertEquals(3, tps.nextPosition());
// Make sure the doc that has the massive term is in
// the index:
assertEquals("document with wicked long term should is not in the index!", 2, reader.numDocs());
reader.close();
// Make sure we can add a document with exactly the
// maximum length term, and search on that term:
doc = new Document();
doc.add(new Field("content", bigTerm, Field.Store.NO, Field.Index.ANALYZED));
StandardAnalyzer sa = new StandardAnalyzer(TEST_VERSION_CURRENT);
sa.setMaxTokenLength(100000);
writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, sa));
writer.addDocument(doc);
writer.close();
reader = IndexReader.open(dir, true);
assertEquals(1, reader.docFreq(new Term("content", bigTerm)));
reader.close();
dir.close();
}
public void testOptimizeMaxNumSegments() throws IOException {
MockRAMDirectory dir = new MockRAMDirectory();

View File

@ -32,7 +32,6 @@ import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.document.Document;

View File

@ -29,7 +29,6 @@ import java.util.HashSet;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.MockTokenizer;
@ -255,8 +254,10 @@ public class TestQueryParser extends LocalizedTestCase {
assertQueryEquals("türm term term", new MockAnalyzer(), "türm term term");
assertQueryEquals("ümlaut", new MockAnalyzer(), "ümlaut");
assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
// FIXME: enhance MockAnalyzer to be able to support this
// it must no longer extend CharTokenizer
//assertQueryEquals("\"\"", new KeywordAnalyzer(), "");
//assertQueryEquals("foo:\"\"", new KeywordAnalyzer(), "foo:");
assertQueryEquals("a AND b", null, "+a +b");
assertQueryEquals("(a AND b)", null, "+a +b");

View File

@ -19,8 +19,6 @@ package org.apache.lucene.search;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;

View File

@ -21,19 +21,15 @@ import java.io.Reader;
import java.io.IOException;
import java.io.StringReader;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
@ -44,7 +40,6 @@ import org.apache.lucene.index.TermPositions;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.analysis.LowerCaseTokenizer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.index.Payload;
import org.apache.lucene.search.payloads.PayloadSpanUtil;
@ -52,9 +47,7 @@ import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
import org.apache.lucene.util.automaton.RegExp;

View File

@ -20,7 +20,8 @@ package org.apache.lucene.search;
import java.io.IOException;
import java.util.Random;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
@ -50,7 +51,7 @@ public class TestRegexpRandom2 extends LuceneTestCase {
super.setUp();
random = newRandom();
RAMDirectory dir = new RAMDirectory();
IndexWriter writer = new IndexWriter(dir, new KeywordAnalyzer(),
IndexWriter writer = new IndexWriter(dir, new MockAnalyzer(MockTokenizer.KEYWORD, false),
IndexWriter.MaxFieldLength.UNLIMITED);
Document doc = new Document();

View File

@ -20,7 +20,6 @@ package org.apache.lucene.search;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

View File

@ -20,7 +20,6 @@ package org.apache.lucene.search;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.*;

View File

@ -34,6 +34,16 @@ New Features
(... in progress)
* LUCENE-2413: Consolidated all Lucene analyzers into common.
- o.a.l.analysis.KeywordAnalyzer -> o.a.l.analysis.core.KeywordAnalyzer
- o.a.l.analysis.KeywordTokenizer -> o.a.l.analysis.core.KeywordTokenizer
- o.a.l.analysis.LetterTokenizer -> o.a.l.analysis.core.LetterTokenizer
- o.a.l.analysis.LowerCaseFilter -> o.a.l.analysis.core.LowerCaseFilter
- o.a.l.analysis.LowerCaseTokenizer -> o.a.l.analysis.core.LowerCaseTokenizer
- o.a.l.analysis.SimpleAnalyzer -> o.a.l.analysis.core.SimpleAnalyzer
- o.a.l.analysis.StopAnalyzer -> o.a.l.analysis.core.StopAnalyzer
- o.a.l.analysis.StopFilter -> o.a.l.analysis.core.StopFilter
- o.a.l.analysis.WhitespaceAnalyzer -> o.a.l.analysis.core.WhitespaceAnalyzer
- o.a.l.analysis.WhitespaceTokenizer -> o.a.l.analysis.core.WhitespaceTokenizer
- o.a.l.analysis.PorterStemFilter -> o.a.l.analysis.en.PorterStemFilter
- o.a.l.analysis.ASCIIFoldingFilter -> o.a.l.analysis.miscellaneous.ASCIIFoldingFilter
- o.a.l.analysis.ISOLatin1AccentFilter -> o.a.l.analysis.miscellaneous.ISOLatin1AccentFilter
@ -44,6 +54,9 @@ New Features
- o.a.l.analysis.BaseCharFilter -> o.a.l.analysis.charfilter.BaseCharFilter
- o.a.l.analysis.MappingCharFilter -> o.a.l.analysis.charfilter.MappingCharFilter
- o.a.l.analysis.NormalizeCharMap -> o.a.l.analysis.charfilter.NormalizeCharMap
- o.a.l.analysis.ReusableAnalyzerBase -> o.a.l.analysis.util.ReusableAnalyzerBase
- o.a.l.analysis.StopwordAnalyzerBase -> o.a.l.analysis.util.StopwordAnalyzerBase
- o.a.l.analysis.WordListLoader -> o.a.l.analysis.util.WordListLoader
... (in progress)
Build

View File

@ -38,7 +38,7 @@
<target name="compile-core" depends="jflex-notice, common.compile-core"/>
<target name="jflex" depends="jflex-check,clean-jflex,jflex-wiki-tokenizer"/>
<target name="jflex" depends="jflex-check,clean-jflex,jflex-StandardAnalyzer,jflex-wiki-tokenizer"/>
<target name="jflex-wiki-tokenizer" depends="init,jflex-check" if="jflex.present">
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
@ -49,11 +49,27 @@
nobak="on"/>
</target>
<target name="jflex-StandardAnalyzer" depends="init,jflex-check" if="jflex.present">
<taskdef classname="jflex.anttask.JFlexTask" name="jflex">
<classpath refid="jflex.classpath"/>
</taskdef>
<jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImplOrig.jflex"
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
<jflex file="src/java/org/apache/lucene/analysis/standard/StandardTokenizerImpl31.jflex"
outdir="src/java/org/apache/lucene/analysis/standard"
nobak="on" />
</target>
<target name="clean-jflex">
<delete>
<fileset dir="src/java/org/apache/lucene/analysis/wikipedia" includes="*.java">
<containsregexp expression="generated.*by.*JFlex"/>
</fileset>
<fileset dir="src/java/org/apache/lucene/analysis/standard" includes="*.java">
<containsregexp expression="generated.*by.*JFlex"/>
</fileset>
</delete>
</target>
</project>

View File

@ -24,14 +24,14 @@ import java.util.Hashtable;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.util.Version;
/**
@ -163,10 +163,10 @@ public final class ArabicAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link ArabicLetterTokenizer} filtered with
* {@link LowerCaseFilter}, {@link StopFilter},
* {@link ArabicNormalizationFilter}, {@link KeywordMarkerFilter}

View File

@ -19,7 +19,7 @@ package org.apache.lucene.analysis.ar;
import java.io.Reader;
import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.LetterTokenizer;
import org.apache.lucene.analysis.core.LetterTokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
@ -120,7 +120,7 @@ public class ArabicLetterTokenizer extends LetterTokenizer {
/**
* Allows for Letter category or NonspacingMark category
* @see org.apache.lucene.analysis.LetterTokenizer#isTokenChar(int)
* @see org.apache.lucene.analysis.core.LetterTokenizer#isTokenChar(int)
*/
@Override
protected boolean isTokenChar(int c) {

View File

@ -23,16 +23,16 @@ import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
/**
@ -119,11 +119,11 @@ public final class BulgarianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -28,16 +28,16 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
/**
@ -193,10 +193,10 @@ public final class BrazilianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link LowerCaseFilter}, {@link StandardFilter}, {@link StopFilter}
* , and {@link BrazilianStemFilter}.

View File

@ -19,9 +19,9 @@ package org.apache.lucene.analysis.cjk;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
import java.io.Reader;

View File

@ -19,8 +19,8 @@ package org.apache.lucene.analysis.cn;
import java.io.Reader;
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.standard.StandardAnalyzer; // javadoc @link
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
@ -35,10 +35,10 @@ public final class ChineseAnalyzer extends ReusableAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link ChineseTokenizer} filtered with
* {@link ChineseFilter}
*/

View File

@ -23,7 +23,7 @@ import java.util.Arrays;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis;
package org.apache.lucene.analysis.core;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -19,6 +19,8 @@ package org.apache.lucene.analysis;
import java.io.Reader;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
/**
* "Tokenizes" the entire stream as a single token. This is useful
* for data like zip codes, ids, and some product names.

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis;
package org.apache.lucene.analysis.core;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -20,6 +20,7 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.AttributeSource;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis;
package org.apache.lucene.analysis.core;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -19,6 +19,8 @@ package org.apache.lucene.analysis;
import java.io.Reader;
import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis;
package org.apache.lucene.analysis.core;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -19,6 +19,8 @@ package org.apache.lucene.analysis;
import java.io.IOException;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.util.CharacterUtils;
import org.apache.lucene.util.Version;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis;
package org.apache.lucene.analysis.core;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -19,6 +19,8 @@ package org.apache.lucene.analysis;
import java.io.Reader;
import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis;
package org.apache.lucene.analysis.core;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -19,6 +19,10 @@ package org.apache.lucene.analysis;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents;
import org.apache.lucene.util.Version;
/** An {@link Analyzer} that filters {@link LetterTokenizer}

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis;
package org.apache.lucene.analysis.core;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -24,6 +24,12 @@ import java.util.Arrays;
import java.util.Set;
import java.util.List;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents;
import org.apache.lucene.util.Version;
/** Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
@ -91,10 +97,10 @@ public final class StopAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link LowerCaseTokenizer} filtered with
* {@link StopFilter}
*/

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis;
package org.apache.lucene.analysis.core;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -22,6 +22,9 @@ import java.util.Arrays;
import java.util.Set;
import java.util.List;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.queryParser.QueryParser; // for javadoc

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis;
package org.apache.lucene.analysis.core;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -19,6 +19,9 @@ package org.apache.lucene.analysis;
import java.io.Reader;
import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents;
import org.apache.lucene.util.Version;
/**

View File

@ -1,4 +1,4 @@
package org.apache.lucene.analysis;
package org.apache.lucene.analysis.core;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -19,6 +19,8 @@ package org.apache.lucene.analysis;
import java.io.Reader;
import org.apache.lucene.analysis.CharTokenizer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;

View File

@ -17,17 +17,17 @@ package org.apache.lucene.analysis.cz;
* limitations under the License.
*/
import org.apache.lucene.analysis.ReusableAnalyzerBase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ReusableAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
import java.io.*;
@ -218,10 +218,10 @@ public final class CzechAnalyzer extends ReusableAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , and {@link CzechStemFilter} (only if version is >= LUCENE_31). If

View File

@ -23,16 +23,16 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.DanishStemmer;
@ -106,11 +106,11 @@ public final class DanishAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -28,17 +28,17 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.German2Stemmer;
@ -224,10 +224,10 @@ public final class GermanAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -17,13 +17,13 @@ package org.apache.lucene.analysis.el;
*/
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
import java.io.IOException;
@ -121,10 +121,10 @@ public final class GreekAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link GreekLowerCaseFilter}, {@link StandardFilter},
* {@link StopFilter}, and {@link GreekStemFilter}

View File

@ -22,15 +22,15 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.util.Version;
/**
@ -89,11 +89,11 @@ public final class EnglishAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -23,16 +23,16 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.SpanishStemmer;
@ -106,11 +106,11 @@ public final class SpanishAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -24,14 +24,14 @@ import java.util.Hashtable;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
/**
@ -136,10 +136,10 @@ public final class PersianAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link ArabicLetterTokenizer} filtered with
* {@link LowerCaseFilter}, {@link ArabicNormalizationFilter},
* {@link PersianNormalizationFilter} and Persian Stop words

View File

@ -23,16 +23,16 @@ import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
import org.tartarus.snowball.ext.FinnishStemmer;
@ -106,11 +106,11 @@ public final class FinnishAnalyzer extends StopwordAnalyzerBase {
/**
* Creates a
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* which tokenizes all the text in the provided {@link Reader}.
*
* @return A
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from an {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter}
* , {@link KeywordMarkerFilter} if a stem exclusion set is

View File

@ -19,17 +19,17 @@ package org.apache.lucene.analysis.fr;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.snowball.SnowballFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer; // for javadoc
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.WordlistLoader;
import org.apache.lucene.util.Version;
import java.io.File;
@ -225,10 +225,10 @@ public final class FrenchAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link StandardTokenizer} filtered with
* {@link StandardFilter}, {@link ElisionFilter},
* {@link LowerCaseFilter}, {@link StopFilter},

View File

@ -21,13 +21,13 @@ import java.io.IOException;
import java.io.Reader;
import java.util.Set;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.miscellaneous.KeywordMarkerFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopwordAnalyzerBase;
import org.apache.lucene.analysis.util.StopwordAnalyzerBase;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.in.IndicNormalizationFilter;
import org.apache.lucene.analysis.in.IndicTokenizer;
import org.apache.lucene.util.Version;
@ -106,10 +106,10 @@ public final class HindiAnalyzer extends StopwordAnalyzerBase {
/**
* Creates
* {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* used to tokenize all the text in the provided {@link Reader}.
*
* @return {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents}
* @return {@link org.apache.lucene.analysis.util.ReusableAnalyzerBase.TokenStreamComponents}
* built from a {@link IndicTokenizer} filtered with
* {@link LowerCaseFilter}, {@link IndicNormalizationFilter},
* {@link HindiNormalizationFilter}, {@link KeywordMarkerFilter}

Some files were not shown because too many files have changed in this diff Show More