mirror of https://github.com/apache/lucene.git
LUCENE-1684: add matchVersion to StandardAnalyzer, and improve defaults if version is 2.9+
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@784984 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6ed703e655
commit
757795bffe
|
@ -18,6 +18,7 @@ package org.apache.lucene.analysis.standard;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.analysis.*;
|
import org.apache.lucene.analysis.*;
|
||||||
|
import org.apache.lucene.util.Version;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -26,12 +27,24 @@ import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
|
* Filters {@link StandardTokenizer} with {@link StandardFilter}, {@link
|
||||||
* LowerCaseFilter} and {@link StopFilter}, using a list of English stop words.
|
* LowerCaseFilter} and {@link StopFilter}, using a list of
|
||||||
|
* English stop words.
|
||||||
|
*
|
||||||
|
* <a name="version"/>
|
||||||
|
* <p>You must specify the required {@link Version}
|
||||||
|
* compatibility when creating StandardAnalyzer:
|
||||||
|
* <ul>
|
||||||
|
* <li> As of 2.9, StopFilter preserves position
|
||||||
|
* increments by default
|
||||||
|
* <li> As of 2.9, Tokens incorrectly identified as acronyms
|
||||||
|
* are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
|
||||||
|
* </ul>
|
||||||
*
|
*
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
*/
|
*/
|
||||||
public class StandardAnalyzer extends Analyzer {
|
public class StandardAnalyzer extends Analyzer {
|
||||||
private Set stopSet;
|
private Set stopSet;
|
||||||
|
private Version matchVersion;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Specifies whether deprecated acronyms should be replaced with HOST type.
|
* Specifies whether deprecated acronyms should be replaced with HOST type.
|
||||||
|
@ -94,87 +107,92 @@ public class StandardAnalyzer extends Analyzer {
|
||||||
|
|
||||||
/** Builds an analyzer with the default stop words ({@link
|
/** Builds an analyzer with the default stop words ({@link
|
||||||
* #STOP_WORDS}).
|
* #STOP_WORDS}).
|
||||||
* @deprecated Use {@link #StandardAnalyzer(boolean, String[])},
|
* @deprecated Use {@link #StandardAnalyzer(Version)},
|
||||||
* passing in null for the stop words, instead */
|
* instead. */
|
||||||
public StandardAnalyzer() {
|
public StandardAnalyzer() {
|
||||||
this(STOP_WORDS);
|
this(Version.LUCENE_24, STOP_WORDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Builds an analyzer with the default stop words ({@link
|
||||||
|
* #STOP_WORDS}).
|
||||||
|
* @param matchVersion Lucene version to match See {@link
|
||||||
|
* <a href="#version">above</a>}
|
||||||
|
*/
|
||||||
|
public StandardAnalyzer(Version matchVersion) {
|
||||||
|
this(matchVersion, STOP_WORDS);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Builds an analyzer with the given stop words.
|
/** Builds an analyzer with the given stop words.
|
||||||
* @deprecated Use {@link #StandardAnalyzer(boolean, Set)}
|
* @deprecated Use {@link #StandardAnalyzer(Version, Set)}
|
||||||
* instead */
|
* instead */
|
||||||
public StandardAnalyzer(Set stopWords) {
|
public StandardAnalyzer(Set stopWords) {
|
||||||
stopSet = stopWords;
|
this(Version.LUCENE_24, stopWords);
|
||||||
useDefaultStopPositionIncrements = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Builds an analyzer with the given stop words.
|
/** Builds an analyzer with the given stop words.
|
||||||
* @param enableStopPositionIncrements See {@link
|
* @param matchVersion Lucene version to match See {@link
|
||||||
* StopFilter#setEnablePositionIncrements}
|
* <a href="#version">above</a>}
|
||||||
* @param stopWords stop words */
|
* @param stopWords stop words */
|
||||||
public StandardAnalyzer(boolean enableStopPositionIncrements, Set stopWords) {
|
public StandardAnalyzer(Version matchVersion, Set stopWords) {
|
||||||
stopSet = stopWords;
|
stopSet = stopWords;
|
||||||
this.enableStopPositionIncrements = enableStopPositionIncrements;
|
init(matchVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Builds an analyzer with the given stop words.
|
/** Builds an analyzer with the given stop words.
|
||||||
* @deprecated Use {@link #StandardAnalyzer(boolean,
|
* @deprecated Use {@link #StandardAnalyzer(Version,
|
||||||
* String[])} instead */
|
* String[])} instead */
|
||||||
public StandardAnalyzer(String[] stopWords) {
|
public StandardAnalyzer(String[] stopWords) {
|
||||||
|
this(Version.LUCENE_24, stopWords);
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Builds an analyzer with the given stop words.
|
||||||
|
* @param matchVersion Lucene version to match See {@link
|
||||||
|
* <a href="#version">above</a>}
|
||||||
|
* @param stopWords Array of stop words */
|
||||||
|
public StandardAnalyzer(Version matchVersion, String[] stopWords) {
|
||||||
if (stopWords == null) {
|
if (stopWords == null) {
|
||||||
stopWords = STOP_WORDS;
|
stopWords = STOP_WORDS;
|
||||||
}
|
}
|
||||||
stopSet = StopFilter.makeStopSet(stopWords);
|
stopSet = StopFilter.makeStopSet(stopWords);
|
||||||
useDefaultStopPositionIncrements = true;
|
init(matchVersion);
|
||||||
}
|
|
||||||
|
|
||||||
/** Builds an analyzer with the given stop words.
|
|
||||||
* @param enableStopPositionIncrements See {@link
|
|
||||||
* StopFilter#setEnablePositionIncrements}
|
|
||||||
* @param stopWords Array of stop words */
|
|
||||||
public StandardAnalyzer(boolean enableStopPositionIncrements, String[] stopWords) {
|
|
||||||
stopSet = StopFilter.makeStopSet(stopWords);
|
|
||||||
this.enableStopPositionIncrements = enableStopPositionIncrements;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Builds an analyzer with the stop words from the given file.
|
/** Builds an analyzer with the stop words from the given file.
|
||||||
* @see WordlistLoader#getWordSet(File)
|
* @see WordlistLoader#getWordSet(File)
|
||||||
* @deprecated Use {@link #StandardAnalyzer(boolean, File)}
|
* @deprecated Use {@link #StandardAnalyzer(Version, File)}
|
||||||
* instead
|
* instead
|
||||||
*/
|
*/
|
||||||
public StandardAnalyzer(File stopwords) throws IOException {
|
public StandardAnalyzer(File stopwords) throws IOException {
|
||||||
stopSet = WordlistLoader.getWordSet(stopwords);
|
this(Version.LUCENE_24, stopwords);
|
||||||
useDefaultStopPositionIncrements = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Builds an analyzer with the stop words from the given file.
|
/** Builds an analyzer with the stop words from the given file.
|
||||||
* @see WordlistLoader#getWordSet(File)
|
* @see WordlistLoader#getWordSet(File)
|
||||||
* @param enableStopPositionIncrements See {@link
|
* @param matchVersion Lucene version to match See {@link
|
||||||
* StopFilter#setEnablePositionIncrements}
|
* <a href="#version">above</a>}
|
||||||
* @param stopwords File to read stop words from */
|
* @param stopwords File to read stop words from */
|
||||||
public StandardAnalyzer(boolean enableStopPositionIncrements, File stopwords) throws IOException {
|
public StandardAnalyzer(Version matchVersion, File stopwords) throws IOException {
|
||||||
stopSet = WordlistLoader.getWordSet(stopwords);
|
stopSet = WordlistLoader.getWordSet(stopwords);
|
||||||
this.enableStopPositionIncrements = enableStopPositionIncrements;
|
init(matchVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Builds an analyzer with the stop words from the given reader.
|
/** Builds an analyzer with the stop words from the given reader.
|
||||||
* @see WordlistLoader#getWordSet(Reader)
|
* @see WordlistLoader#getWordSet(Reader)
|
||||||
* @deprecated Use {@link #StandardAnalyzer(boolean, Reader)}
|
* @deprecated Use {@link #StandardAnalyzer(Version, Reader)}
|
||||||
* instead
|
* instead
|
||||||
*/
|
*/
|
||||||
public StandardAnalyzer(Reader stopwords) throws IOException {
|
public StandardAnalyzer(Reader stopwords) throws IOException {
|
||||||
stopSet = WordlistLoader.getWordSet(stopwords);
|
this(Version.LUCENE_24, stopwords);
|
||||||
useDefaultStopPositionIncrements = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Builds an analyzer with the stop words from the given reader.
|
/** Builds an analyzer with the stop words from the given reader.
|
||||||
* @see WordlistLoader#getWordSet(Reader)
|
* @see WordlistLoader#getWordSet(Reader)
|
||||||
* @param enableStopPositionIncrements See {@link
|
* @param matchVersion Lucene version to match See {@link
|
||||||
* StopFilter#setEnablePositionIncrements}
|
* <a href="#version">above</a>}
|
||||||
* @param stopwords Reader to read stop words from */
|
* @param stopwords Reader to read stop words from */
|
||||||
public StandardAnalyzer(boolean enableStopPositionIncrements, Reader stopwords) throws IOException {
|
public StandardAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
|
||||||
stopSet = WordlistLoader.getWordSet(stopwords);
|
stopSet = WordlistLoader.getWordSet(stopwords);
|
||||||
this.enableStopPositionIncrements = enableStopPositionIncrements;
|
init(matchVersion);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -186,9 +204,8 @@ public class StandardAnalyzer extends Analyzer {
|
||||||
* @deprecated Remove in 3.X and make true the only valid value
|
* @deprecated Remove in 3.X and make true the only valid value
|
||||||
*/
|
*/
|
||||||
public StandardAnalyzer(boolean replaceInvalidAcronym) {
|
public StandardAnalyzer(boolean replaceInvalidAcronym) {
|
||||||
this(STOP_WORDS);
|
this(Version.LUCENE_24, STOP_WORDS);
|
||||||
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
||||||
useDefaultStopPositionIncrements = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -200,9 +217,8 @@ public class StandardAnalyzer extends Analyzer {
|
||||||
* @deprecated Remove in 3.X and make true the only valid value
|
* @deprecated Remove in 3.X and make true the only valid value
|
||||||
*/
|
*/
|
||||||
public StandardAnalyzer(Reader stopwords, boolean replaceInvalidAcronym) throws IOException{
|
public StandardAnalyzer(Reader stopwords, boolean replaceInvalidAcronym) throws IOException{
|
||||||
this(stopwords);
|
this(Version.LUCENE_24, stopwords);
|
||||||
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
||||||
useDefaultStopPositionIncrements = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -214,9 +230,8 @@ public class StandardAnalyzer extends Analyzer {
|
||||||
* @deprecated Remove in 3.X and make true the only valid value
|
* @deprecated Remove in 3.X and make true the only valid value
|
||||||
*/
|
*/
|
||||||
public StandardAnalyzer(File stopwords, boolean replaceInvalidAcronym) throws IOException{
|
public StandardAnalyzer(File stopwords, boolean replaceInvalidAcronym) throws IOException{
|
||||||
this(stopwords);
|
this(Version.LUCENE_24, stopwords);
|
||||||
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
||||||
useDefaultStopPositionIncrements = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -229,9 +244,8 @@ public class StandardAnalyzer extends Analyzer {
|
||||||
* @deprecated Remove in 3.X and make true the only valid value
|
* @deprecated Remove in 3.X and make true the only valid value
|
||||||
*/
|
*/
|
||||||
public StandardAnalyzer(String [] stopwords, boolean replaceInvalidAcronym) throws IOException{
|
public StandardAnalyzer(String [] stopwords, boolean replaceInvalidAcronym) throws IOException{
|
||||||
this(stopwords);
|
this(Version.LUCENE_24, stopwords);
|
||||||
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
||||||
useDefaultStopPositionIncrements = true;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -243,9 +257,17 @@ public class StandardAnalyzer extends Analyzer {
|
||||||
* @deprecated Remove in 3.X and make true the only valid value
|
* @deprecated Remove in 3.X and make true the only valid value
|
||||||
*/
|
*/
|
||||||
public StandardAnalyzer(Set stopwords, boolean replaceInvalidAcronym) throws IOException{
|
public StandardAnalyzer(Set stopwords, boolean replaceInvalidAcronym) throws IOException{
|
||||||
this(stopwords);
|
this(Version.LUCENE_24, stopwords);
|
||||||
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
||||||
useDefaultStopPositionIncrements = true;
|
}
|
||||||
|
|
||||||
|
private final void init(Version matchVersion) {
|
||||||
|
this.matchVersion = matchVersion;
|
||||||
|
if (matchVersion.onOrAfter(Version.LUCENE_29)) {
|
||||||
|
enableStopPositionIncrements = true;
|
||||||
|
} else {
|
||||||
|
useDefaultStopPositionIncrements = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Constructs a {@link StandardTokenizer} filtered by a {@link
|
/** Constructs a {@link StandardTokenizer} filtered by a {@link
|
||||||
|
@ -289,7 +311,8 @@ public class StandardAnalyzer extends Analyzer {
|
||||||
public int getMaxTokenLength() {
|
public int getMaxTokenLength() {
|
||||||
return maxTokenLength;
|
return maxTokenLength;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @deprecated Use {@link #tokenStream} instead */
|
||||||
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException {
|
||||||
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
||||||
if (streams == null) {
|
if (streams == null) {
|
||||||
|
|
|
@ -0,0 +1,58 @@
|
||||||
|
package org.apache.lucene.util;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use by certain classes to match version compatibility
|
||||||
|
* across releases of Lucene.
|
||||||
|
*/
|
||||||
|
public final class Version extends Parameter implements Serializable {
|
||||||
|
|
||||||
|
/** Use this to get the latest & greatest settings, bug
|
||||||
|
* fixes, etc, for Lucene.
|
||||||
|
*
|
||||||
|
* <p><b>WARNING</b>: if you use this setting, and then
|
||||||
|
* upgrade to a newer release of Lucene, sizable changes
|
||||||
|
* may happen. If precise back compatibility is important
|
||||||
|
* then you should instead explicitly specify an actual
|
||||||
|
* version.
|
||||||
|
*/
|
||||||
|
public static final Version LUCENE_CURRENT = new Version("LUCENE_CURRENT", 0);
|
||||||
|
|
||||||
|
/** Match settings and bugs in Lucene's 2.4 release.
|
||||||
|
* @deprecated This will be removed in 3.0 */
|
||||||
|
public static final Version LUCENE_24 = new Version("LUCENE_24", 2400);
|
||||||
|
|
||||||
|
/** Match settings and bugs in Lucene's 2.9 release.
|
||||||
|
* @deprecated This will be removed in 3.0 */
|
||||||
|
public static final Version LUCENE_29 = new Version("LUCENE_29", 2900);
|
||||||
|
|
||||||
|
private final int v;
|
||||||
|
|
||||||
|
public Version(String name, int v) {
|
||||||
|
super(name);
|
||||||
|
this.v = v;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean onOrAfter(Version other) {
|
||||||
|
return v == 0 || v >= other.v;
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue