LUCENE-2002: add Version to QueryParser & contrib analyzers

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@829206 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-10-23 20:25:17 +00:00
parent 0557d2ce5a
commit aaddac8992
96 changed files with 760 additions and 464 deletions

View File

@ -137,6 +137,11 @@ Optimizations
* LUCENE-1183: Optimize Levenshtein Distance computation in
FuzzyQuery. (Cédrik Lime via Mike McCandless)
* LUCENE-2002: Add required Version matchVersion argument when
constructing QueryParser or MultiFieldQueryParser and, default (as
of 2.9) enablePositionIncrements to true to match
StandardAnalyzer's 2.9 default (Uwe Schindler, Mike McCandless)
Documentation
Build

View File

@ -580,9 +580,21 @@
<target name="javacc" depends="clean-javacc,javacc-QueryParser,javacc-HTMLParser,javacc-contrib-queryparser, javacc-contrib-surround, javacc-contrib-precedence"/>
<target name="javacc-QueryParser" depends="init,javacc-check" if="javacc.present">
<invoke-javacc target="src/java/org/apache/lucene/queryParser/QueryParser.jj"
outputDir="src/java/org/apache/lucene/queryParser"
/>
<sequential>
<invoke-javacc target="src/java/org/apache/lucene/queryParser/QueryParser.jj"
outputDir="src/java/org/apache/lucene/queryParser"/>
<!-- Change the inccorrect public ctors for QueryParser to be protected instead -->
<replaceregexp file="src/java/org/apache/lucene/queryParser/QueryParser.java"
byline="true"
match="public QueryParser\(CharStream "
replace="protected QueryParser(CharStream "/>
<replaceregexp file="src/java/org/apache/lucene/queryParser/QueryParser.java"
byline="true"
match="public QueryParser\(QueryParserTokenManager "
replace="protected QueryParser(QueryParserTokenManager "/>
</sequential>
</target>
<target name="javacc-HTMLParser" depends="init,javacc-check" if="javacc.present">

View File

@ -42,7 +42,7 @@
<property name="Name" value="Lucene"/>
<property name="dev.version" value="3.0-dev"/>
<property name="version" value="${dev.version}"/>
<property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091023"/>
<property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091023a"/>
<property name="spec.version" value="${version}"/>
<property name="year" value="2000-${current.year}"/>
<property name="final.name" value="lucene-${name}-${version}"/>

View File

@ -25,6 +25,12 @@ API Changes
text exactly the same as LowerCaseFilter. Please use LowerCaseFilter
instead, which has the same functionality. (Robert Muir)
* LUCENE-2002: Add required Version matchVersion argument when
constructing ComplexPhraseQueryParser and default (as of 2.9)
enablePositionIncrements to true to match StandardAnalyzer's
default. Also added required matchVersion to most of the analyzers
(Uwe Schindler, Mike McCandless)
Bug fixes
* LUCENE-1781: Fixed various issues with the lat/lng bounding box

View File

@ -33,6 +33,7 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Arabic.
@ -109,32 +110,38 @@ public final class ArabicAnalyzer extends Analyzer {
}
}
private final Version matchVersion;
/**
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
*/
public ArabicAnalyzer() {
public ArabicAnalyzer(Version matchVersion) {
this.matchVersion = matchVersion;
stoptable = DefaultSetHolder.DEFAULT_STOP_SET;
}
/**
* Builds an analyzer with the given stop words.
*/
public ArabicAnalyzer( String... stopwords ) {
public ArabicAnalyzer( Version matchVersion, String... stopwords ) {
stoptable = StopFilter.makeStopSet( stopwords );
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
*/
public ArabicAnalyzer( Hashtable<?,?> stopwords ) {
stoptable = new HashSet( stopwords.keySet() );
public ArabicAnalyzer( Version matchVersion, Hashtable<?,?> stopwords ) {
stoptable = new HashSet(stopwords.keySet());
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words. Lines can be commented out using {@link #STOPWORDS_COMMENT}
*/
public ArabicAnalyzer( File stopwords ) throws IOException {
public ArabicAnalyzer( Version matchVersion, File stopwords ) throws IOException {
stoptable = WordlistLoader.getWordSet( stopwords, STOPWORDS_COMMENT);
this.matchVersion = matchVersion;
}
@ -149,7 +156,8 @@ public final class ArabicAnalyzer extends Analyzer {
TokenStream result = new ArabicLetterTokenizer( reader );
result = new LowerCaseFilter(result);
// the order here is important: the stopword list is not normalized!
result = new StopFilter(false, result, stoptable );
result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
result, stoptable );
result = new ArabicNormalizationFilter( result );
result = new ArabicStemFilter( result );
@ -177,7 +185,8 @@ public final class ArabicAnalyzer extends Analyzer {
streams.source = new ArabicLetterTokenizer(reader);
streams.result = new LowerCaseFilter(streams.source);
// the order here is important: the stopword list is not normalized!
streams.result = new StopFilter(false, streams.result, stoptable);
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.result, stoptable);
streams.result = new ArabicNormalizationFilter(streams.result);
streams.result = new ArabicStemFilter(streams.result);
setPreviousTokenStream(streams);

View File

@ -33,6 +33,7 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Brazilian Portuguese language.
@ -41,6 +42,9 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
* will not be indexed at all) and an external list of exclusions (words that will
* not be stemmed, but indexed).
* </p>
*
* <p><b>NOTE</b>: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*/
public final class BrazilianAnalyzer extends Analyzer {
@ -78,33 +82,38 @@ public final class BrazilianAnalyzer extends Analyzer {
* Contains words that should be indexed but not stemmed.
*/
private Set excltable = Collections.emptySet();
private final Version matchVersion;
/**
* Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}).
*/
public BrazilianAnalyzer() {
stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS );
public BrazilianAnalyzer(Version matchVersion) {
stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS );
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
*/
public BrazilianAnalyzer( String... stopwords ) {
stoptable = StopFilter.makeStopSet( stopwords );
public BrazilianAnalyzer( Version matchVersion, String... stopwords ) {
stoptable = StopFilter.makeStopSet( stopwords );
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
*/
public BrazilianAnalyzer( Map stopwords ) {
stoptable = new HashSet(stopwords.keySet());
public BrazilianAnalyzer( Version matchVersion, Map stopwords ) {
stoptable = new HashSet(stopwords.keySet());
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
*/
public BrazilianAnalyzer( File stopwords ) throws IOException {
stoptable = WordlistLoader.getWordSet( stopwords );
public BrazilianAnalyzer( Version matchVersion, File stopwords ) throws IOException {
stoptable = WordlistLoader.getWordSet( stopwords );
this.matchVersion = matchVersion;
}
/**
@ -137,10 +146,11 @@ public final class BrazilianAnalyzer extends Analyzer {
* {@link BrazilianStemFilter}.
*/
public final TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer( reader );
TokenStream result = new StandardTokenizer( matchVersion, reader );
result = new LowerCaseFilter( result );
result = new StandardFilter( result );
result = new StopFilter( false, result, stoptable );
result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
result, stoptable );
result = new BrazilianStemFilter( result, excltable );
return result;
}
@ -163,10 +173,11 @@ public final class BrazilianAnalyzer extends Analyzer {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
streams.source = new StandardTokenizer(reader);
streams.source = new StandardTokenizer(matchVersion, reader);
streams.result = new LowerCaseFilter(streams.source);
streams.result = new StandardFilter(streams.result);
streams.result = new StopFilter(false, streams.result, stoptable);
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.result, stoptable);
streams.result = new BrazilianStemFilter(streams.result, excltable);
setPreviousTokenStream(streams);
} else {

View File

@ -21,6 +21,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
@ -56,14 +57,16 @@ public class CJKAnalyzer extends Analyzer {
* stop word list
*/
private final Set stopTable;
private final Version matchVersion;
//~ Constructors -----------------------------------------------------------
/**
* Builds an analyzer which removes words in {@link #STOP_WORDS}.
*/
public CJKAnalyzer() {
public CJKAnalyzer(Version matchVersion) {
stopTable = StopFilter.makeStopSet(STOP_WORDS);
this.matchVersion = matchVersion;
}
/**
@ -71,8 +74,9 @@ public class CJKAnalyzer extends Analyzer {
*
* @param stopWords stop word array
*/
public CJKAnalyzer(String... stopWords) {
public CJKAnalyzer(Version matchVersion, String... stopWords) {
stopTable = StopFilter.makeStopSet(stopWords);
this.matchVersion = matchVersion;
}
//~ Methods ----------------------------------------------------------------
@ -86,7 +90,8 @@ public class CJKAnalyzer extends Analyzer {
* {@link StopFilter}
*/
public final TokenStream tokenStream(String fieldName, Reader reader) {
return new StopFilter(false, new CJKTokenizer(reader), stopTable);
return new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
new CJKTokenizer(reader), stopTable);
}
private class SavedStreams {
@ -109,7 +114,8 @@ public class CJKAnalyzer extends Analyzer {
if (streams == null) {
streams = new SavedStreams();
streams.source = new CJKTokenizer(reader);
streams.result = new StopFilter(false, streams.source, stopTable);
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.source, stopTable);
setPreviousTokenStream(streams);
} else {
streams.source.reset(reader);

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.Version;
import java.io.*;
import java.util.HashSet;
@ -38,6 +39,9 @@ import java.util.Collections;
* will not be indexed at all).
* A default set of stopwords is used unless an alternative list is specified.
* </p>
*
* <p><b>NOTE</b>: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*/
public final class CzechAnalyzer extends Analyzer {
@ -69,30 +73,35 @@ public final class CzechAnalyzer extends Analyzer {
* Contains the stopwords used with the {@link StopFilter}.
*/
private Set stoptable;
private final Version matchVersion;
/**
* Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}).
*/
public CzechAnalyzer() {
stoptable = StopFilter.makeStopSet( CZECH_STOP_WORDS );
public CzechAnalyzer(Version matchVersion) {
stoptable = StopFilter.makeStopSet( CZECH_STOP_WORDS );
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
*/
public CzechAnalyzer( String... stopwords ) {
stoptable = StopFilter.makeStopSet( stopwords );
public CzechAnalyzer(Version matchVersion, String... stopwords) {
stoptable = StopFilter.makeStopSet( stopwords );
this.matchVersion = matchVersion;
}
public CzechAnalyzer( HashSet stopwords ) {
stoptable = stopwords;
public CzechAnalyzer(Version matchVersion, HashSet stopwords) {
stoptable = stopwords;
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
*/
public CzechAnalyzer( File stopwords ) throws IOException {
stoptable = WordlistLoader.getWordSet( stopwords );
public CzechAnalyzer(Version matchVersion, File stopwords ) throws IOException {
stoptable = WordlistLoader.getWordSet( stopwords );
this.matchVersion = matchVersion;
}
/**
@ -131,10 +140,11 @@ public final class CzechAnalyzer extends Analyzer {
* {@link StandardFilter}, {@link LowerCaseFilter}, and {@link StopFilter}
*/
public final TokenStream tokenStream( String fieldName, Reader reader ) {
TokenStream result = new StandardTokenizer( reader );
TokenStream result = new StandardTokenizer( matchVersion, reader );
result = new StandardFilter( result );
result = new LowerCaseFilter( result );
result = new StopFilter(false, result, stoptable );
result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
result, stoptable );
return result;
}
@ -155,10 +165,11 @@ public final class CzechAnalyzer extends Analyzer {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
streams.source = new StandardTokenizer(reader);
streams.source = new StandardTokenizer(matchVersion, reader);
streams.result = new StandardFilter(streams.source);
streams.result = new LowerCaseFilter(streams.result);
streams.result = new StopFilter(false, streams.result, stoptable);
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.result, stoptable);
setPreviousTokenStream(streams);
} else {
streams.source.reset(reader);

View File

@ -33,6 +33,7 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for German language.
@ -43,6 +44,9 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
* A default set of stopwords is used unless an alternative list is specified, but the
* exclusion list is empty by default.
* </p>
*
* <p><b>NOTE</b>: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*/
public class GermanAnalyzer extends Analyzer {
@ -74,37 +78,43 @@ public class GermanAnalyzer extends Analyzer {
*/
private Set exclusionSet = new HashSet();
private final Version matchVersion;
/**
* Builds an analyzer with the default stop words:
* {@link #GERMAN_STOP_WORDS}.
*/
public GermanAnalyzer() {
public GermanAnalyzer(Version matchVersion) {
stopSet = StopFilter.makeStopSet(GERMAN_STOP_WORDS);
setOverridesTokenStreamMethod(GermanAnalyzer.class);
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
*/
public GermanAnalyzer(String... stopwords) {
public GermanAnalyzer(Version matchVersion, String... stopwords) {
stopSet = StopFilter.makeStopSet(stopwords);
setOverridesTokenStreamMethod(GermanAnalyzer.class);
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
*/
public GermanAnalyzer(Map stopwords) {
public GermanAnalyzer(Version matchVersion, Map stopwords) {
stopSet = new HashSet(stopwords.keySet());
setOverridesTokenStreamMethod(GermanAnalyzer.class);
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
*/
public GermanAnalyzer(File stopwords) throws IOException {
public GermanAnalyzer(Version matchVersion, File stopwords) throws IOException {
stopSet = WordlistLoader.getWordSet(stopwords);
setOverridesTokenStreamMethod(GermanAnalyzer.class);
this.matchVersion = matchVersion;
}
/**
@ -139,10 +149,11 @@ public class GermanAnalyzer extends Analyzer {
* {@link GermanStemFilter}
*/
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
TokenStream result = new StandardTokenizer(matchVersion, reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
result = new StopFilter(false, result, stopSet);
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
result, stopSet);
result = new GermanStemFilter(result, exclusionSet);
return result;
}
@ -171,10 +182,11 @@ public class GermanAnalyzer extends Analyzer {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
streams.source = new StandardTokenizer(reader);
streams.source = new StandardTokenizer(matchVersion, reader);
streams.result = new StandardFilter(streams.source);
streams.result = new LowerCaseFilter(streams.result);
streams.result = new StopFilter(false, streams.result, stopSet);
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.result, stopSet);
streams.result = new GermanStemFilter(streams.result, exclusionSet);
setPreviousTokenStream(streams);
} else {

View File

@ -22,6 +22,7 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
@ -36,6 +37,9 @@ import java.util.Set;
* that will not be indexed at all).
* A default set of stopwords is used unless an alternative list is specified.
* </p>
*
* <p><b>NOTE</b>: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*/
public final class GreekAnalyzer extends Analyzer
{
@ -59,27 +63,33 @@ public final class GreekAnalyzer extends Analyzer
*/
private Set stopSet = new HashSet();
public GreekAnalyzer() {
this(GREEK_STOP_WORDS);
private final Version matchVersion;
public GreekAnalyzer(Version matchVersion) {
super();
stopSet = StopFilter.makeStopSet(GREEK_STOP_WORDS);
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
* @param stopwords Array of stopwords to use.
*/
public GreekAnalyzer(String... stopwords)
public GreekAnalyzer(Version matchVersion, String... stopwords)
{
super();
stopSet = StopFilter.makeStopSet(stopwords);
super();
stopSet = StopFilter.makeStopSet(stopwords);
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
*/
public GreekAnalyzer(Map stopwords)
public GreekAnalyzer(Version matchVersion, Map stopwords)
{
super();
stopSet = new HashSet(stopwords.keySet());
super();
stopSet = new HashSet(stopwords.keySet());
this.matchVersion = matchVersion;
}
/**
@ -90,9 +100,10 @@ public final class GreekAnalyzer extends Analyzer
*/
public TokenStream tokenStream(String fieldName, Reader reader)
{
TokenStream result = new StandardTokenizer(reader);
TokenStream result = new StandardTokenizer(matchVersion, reader);
result = new GreekLowerCaseFilter(result);
result = new StopFilter(false, result, stopSet);
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
result, stopSet);
return result;
}
@ -113,9 +124,10 @@ public final class GreekAnalyzer extends Analyzer
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
streams.source = new StandardTokenizer(reader);
streams.source = new StandardTokenizer(matchVersion, reader);
streams.result = new GreekLowerCaseFilter(streams.source);
streams.result = new StopFilter(false, streams.result, stopSet);
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.result, stopSet);
setPreviousTokenStream(streams);
} else {
streams.source.reset(reader);

View File

@ -35,6 +35,7 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Persian.
@ -106,36 +107,40 @@ public final class PersianAnalyzer extends Analyzer {
}
}
private final Version matchVersion;
/**
* Builds an analyzer with the default stop words:
* {@link #DEFAULT_STOPWORD_FILE}.
*/
public PersianAnalyzer() {
public PersianAnalyzer(Version matchVersion) {
stoptable = DefaultSetHolder.DEFAULT_STOP_SET;
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
*/
public PersianAnalyzer(String[] stopwords) {
public PersianAnalyzer(Version matchVersion, String[] stopwords) {
stoptable = StopFilter.makeStopSet(stopwords);
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
*/
public PersianAnalyzer(Hashtable stopwords) {
public PersianAnalyzer(Version matchVersion, Hashtable stopwords) {
stoptable = new HashSet(stopwords.keySet());
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words. Lines can be commented out
* using {@link #STOPWORDS_COMMENT}
*/
public PersianAnalyzer(File stopwords) throws IOException {
public PersianAnalyzer(Version matchVersion, File stopwords) throws IOException {
stoptable = WordlistLoader.getWordSet(stopwords, STOPWORDS_COMMENT);
this.matchVersion = matchVersion;
}
/**
@ -157,8 +162,8 @@ public final class PersianAnalyzer extends Analyzer {
* the order here is important: the stopword list is normalized with the
* above!
*/
result = new StopFilter(false, result, stoptable);
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
result, stoptable);
return result;
}
@ -190,7 +195,8 @@ public final class PersianAnalyzer extends Analyzer {
* the order here is important: the stopword list is normalized with the
* above!
*/
streams.result = new StopFilter(false, streams.result, stoptable);
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.result, stoptable);
setPreviousTokenStream(streams);
} else {
streams.source.reset(reader);

View File

@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
@ -42,6 +43,17 @@ import java.util.Set;
* A default set of stopwords is used unless an alternative list is specified, but the
* exclusion list is empty by default.
* </p>
*
* <a name="version"/>
* <p>You must specify the required {@link Version}
* compatibility when creating FrenchAnalyzer:
* <ul>
* <li> As of 2.9, StopFilter preserves position
* increments
* </ul>
*
* <p><b>NOTE</b>: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*/
public final class FrenchAnalyzer extends Analyzer {
@ -82,26 +94,31 @@ public final class FrenchAnalyzer extends Analyzer {
*/
private Set excltable = new HashSet();
private final Version matchVersion;
/**
* Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}).
*/
public FrenchAnalyzer() {
public FrenchAnalyzer(Version matchVersion) {
stoptable = StopFilter.makeStopSet(FRENCH_STOP_WORDS);
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
*/
public FrenchAnalyzer(String... stopwords) {
public FrenchAnalyzer(Version matchVersion, String... stopwords) {
stoptable = StopFilter.makeStopSet(stopwords);
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
* @throws IOException
*/
public FrenchAnalyzer(File stopwords) throws IOException {
public FrenchAnalyzer(Version matchVersion, File stopwords) throws IOException {
stoptable = new HashSet(WordlistLoader.getWordSet(stopwords));
this.matchVersion = matchVersion;
}
/**
@ -138,9 +155,10 @@ public final class FrenchAnalyzer extends Analyzer {
* {@link FrenchStemFilter} and {@link LowerCaseFilter}
*/
public final TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
TokenStream result = new StandardTokenizer(matchVersion, reader);
result = new StandardFilter(result);
result = new StopFilter(false, result, stoptable);
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
result, stoptable);
result = new FrenchStemFilter(result, excltable);
// Convert to lowercase after stemming!
result = new LowerCaseFilter(result);
@ -165,9 +183,10 @@ public final class FrenchAnalyzer extends Analyzer {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
streams.source = new StandardTokenizer(reader);
streams.source = new StandardTokenizer(matchVersion, reader);
streams.result = new StandardFilter(streams.source);
streams.result = new StopFilter(false, streams.result, stoptable);
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.result, stoptable);
streams.result = new FrenchStemFilter(streams.result, excltable);
// Convert to lowercase after stemming!
streams.result = new LowerCaseFilter(streams.result);

View File

@ -23,6 +23,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.Version;
import java.io.File;
import java.io.IOException;
@ -42,6 +43,9 @@ import java.util.Map;
* A default set of stopwords is used unless an alternative list is specified, but the
* exclusion list is empty by default.
* </p>
*
* <p><b>NOTE</b>: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*/
public class DutchAnalyzer extends Analyzer {
/**
@ -73,30 +77,33 @@ public class DutchAnalyzer extends Analyzer {
private Set excltable = new HashSet();
private Map stemdict = new HashMap();
private final Version matchVersion;
/**
* Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS})
* and a few default entries for the stem exclusion table.
*
*/
public DutchAnalyzer() {
public DutchAnalyzer(Version matchVersion) {
setOverridesTokenStreamMethod(DutchAnalyzer.class);
stoptable = StopFilter.makeStopSet(DUTCH_STOP_WORDS);
stemdict.put("fiets", "fiets"); //otherwise fiet
stemdict.put("bromfiets", "bromfiets"); //otherwise bromfiet
stemdict.put("ei", "eier");
stemdict.put("kind", "kinder");
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
*
* @param matchVersion
* @param stopwords
*/
public DutchAnalyzer(String... stopwords) {
public DutchAnalyzer(Version matchVersion, String... stopwords) {
setOverridesTokenStreamMethod(DutchAnalyzer.class);
stoptable = StopFilter.makeStopSet(stopwords);
this.matchVersion = matchVersion;
}
/**
@ -104,9 +111,10 @@ public class DutchAnalyzer extends Analyzer {
*
* @param stopwords
*/
public DutchAnalyzer(HashSet stopwords) {
public DutchAnalyzer(Version matchVersion, HashSet stopwords) {
setOverridesTokenStreamMethod(DutchAnalyzer.class);
stoptable = stopwords;
this.matchVersion = matchVersion;
}
/**
@ -114,7 +122,7 @@ public class DutchAnalyzer extends Analyzer {
*
* @param stopwords
*/
public DutchAnalyzer(File stopwords) {
public DutchAnalyzer(Version matchVersion, File stopwords) {
setOverridesTokenStreamMethod(DutchAnalyzer.class);
try {
stoptable = org.apache.lucene.analysis.WordlistLoader.getWordSet(stopwords);
@ -122,6 +130,7 @@ public class DutchAnalyzer extends Analyzer {
// TODO: throw IOException
throw new RuntimeException(e);
}
this.matchVersion = matchVersion;
}
/**
@ -179,9 +188,10 @@ public class DutchAnalyzer extends Analyzer {
* and {@link DutchStemFilter}
*/
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
TokenStream result = new StandardTokenizer(matchVersion, reader);
result = new StandardFilter(result);
result = new StopFilter(false, result, stoptable);
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
result, stoptable);
result = new DutchStemFilter(result, excltable, stemdict);
return result;
}
@ -211,9 +221,10 @@ public class DutchAnalyzer extends Analyzer {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
streams.source = new StandardTokenizer(reader);
streams.source = new StandardTokenizer(matchVersion, reader);
streams.result = new StandardFilter(streams.source);
streams.result = new StopFilter(false, streams.result, stoptable);
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.result, stoptable);
streams.result = new DutchStemFilter(streams.result, excltable, stemdict);
setPreviousTokenStream(streams);
} else {

View File

@ -23,6 +23,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
@ -48,15 +49,17 @@ public class QueryAutoStopWordAnalyzer extends Analyzer {
//The default maximum percentage (40%) of index documents which
//can contain a term, after which the term is considered to be a stop word.
public static final float defaultMaxDocFreqPercent = 0.4f;
private final Version matchVersion;
/**
* Initializes this analyzer with the Analyzer object that actually produces the tokens
*
* @param delegate The choice of {@link Analyzer} that is used to produce the token stream which needs filtering
*/
public QueryAutoStopWordAnalyzer(Analyzer delegate) {
public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer delegate) {
this.delegate = delegate;
setOverridesTokenStreamMethod(QueryAutoStopWordAnalyzer.class);
this.matchVersion = matchVersion;
}
/**
@ -175,7 +178,8 @@ public class QueryAutoStopWordAnalyzer extends Analyzer {
}
HashSet stopWords = (HashSet) stopWordsPerField.get(fieldName);
if (stopWords != null) {
result = new StopFilter(false, result, stopWords);
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
result, stopWords);
}
return result;
}
@ -217,7 +221,8 @@ public class QueryAutoStopWordAnalyzer extends Analyzer {
/* if there are any stopwords for the field, save the stopfilter */
HashSet stopWords = (HashSet) stopWordsPerField.get(fieldName);
if (stopWords != null)
streams.withStopFilter = new StopFilter(false, streams.wrapped, stopWords);
streams.withStopFilter = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.wrapped, stopWords);
else
streams.withStopFilter = streams.wrapped;
@ -238,7 +243,8 @@ public class QueryAutoStopWordAnalyzer extends Analyzer {
streams.wrapped = result;
HashSet stopWords = (HashSet) stopWordsPerField.get(fieldName);
if (stopWords != null)
streams.withStopFilter = new StopFilter(false, streams.wrapped, stopWords);
streams.withStopFilter = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.wrapped, stopWords);
else
streams.withStopFilter = streams.wrapped;
}

View File

@ -28,6 +28,7 @@ import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Russian language.
@ -60,27 +61,31 @@ public final class RussianAnalyzer extends Analyzer
*/
private Set stopSet = new HashSet();
public RussianAnalyzer() {
this(RUSSIAN_STOP_WORDS);
private final Version matchVersion;
public RussianAnalyzer(Version matchVersion) {
this(matchVersion, RUSSIAN_STOP_WORDS);
}
/**
* Builds an analyzer with the given stop words.
*/
public RussianAnalyzer(String... stopwords)
public RussianAnalyzer(Version matchVersion, String... stopwords)
{
super();
stopSet = StopFilter.makeStopSet(stopwords);
super();
stopSet = StopFilter.makeStopSet(stopwords);
this.matchVersion = matchVersion;
}
/**
* Builds an analyzer with the given stop words.
* TODO: create a Set version of this ctor
*/
public RussianAnalyzer(Map stopwords)
public RussianAnalyzer(Version matchVersion, Map stopwords)
{
super();
stopSet = new HashSet(stopwords.keySet());
super();
stopSet = new HashSet(stopwords.keySet());
this.matchVersion = matchVersion;
}
/**
@ -96,7 +101,8 @@ public final class RussianAnalyzer extends Analyzer
{
TokenStream result = new RussianLetterTokenizer(reader);
result = new LowerCaseFilter(result);
result = new StopFilter(false, result, stopSet);
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
result, stopSet);
result = new RussianStemFilter(result);
return result;
}
@ -122,7 +128,8 @@ public final class RussianAnalyzer extends Analyzer
streams = new SavedStreams();
streams.source = new RussianLetterTokenizer(reader);
streams.result = new LowerCaseFilter(streams.source);
streams.result = new StopFilter(false, streams.result, stopSet);
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.result, stopSet);
streams.result = new RussianStemFilter(streams.result);
setPreviousTokenStream(streams);
} else {

View File

@ -1,56 +0,0 @@
package org.apache.lucene.analysis.ru;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.analysis.LowerCaseFilter; // for javadoc
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
/**
* Normalizes token text to lower case.
* @deprecated Use {@link LowerCaseFilter} instead, which has the same
* functionality. This filter will be removed in Lucene 3.1
*/
public final class RussianLowerCaseFilter extends TokenFilter
{
private TermAttribute termAtt;
public RussianLowerCaseFilter(TokenStream in)
{
super(in);
termAtt = addAttribute(TermAttribute.class);
}
public final boolean incrementToken() throws IOException
{
if (input.incrementToken()) {
char[] chArray = termAtt.termBuffer();
int chLen = termAtt.termLength();
for (int i = 0; i < chLen; i++)
{
chArray[i] = Character.toLowerCase(chArray[i]);
}
return true;
} else {
return false;
}
}
}

View File

@ -25,22 +25,29 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.Version;
/**
* {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words.
* @version 0.2
*
* <p><b>NOTE</b>: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*/
public class ThaiAnalyzer extends Analyzer {
public ThaiAnalyzer() {
private final Version matchVersion;
public ThaiAnalyzer(Version matchVersion) {
setOverridesTokenStreamMethod(ThaiAnalyzer.class);
this.matchVersion = matchVersion;
}
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream ts = new StandardTokenizer(reader);
TokenStream ts = new StandardTokenizer(matchVersion, reader);
ts = new StandardFilter(ts);
ts = new ThaiWordFilter(ts);
ts = new StopFilter(false, ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
ts = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
return ts;
}
@ -60,10 +67,11 @@ public class ThaiAnalyzer extends Analyzer {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
streams.source = new StandardTokenizer(reader);
streams.source = new StandardTokenizer(matchVersion, reader);
streams.result = new StandardFilter(streams.source);
streams.result = new ThaiWordFilter(streams.result);
streams.result = new StopFilter(false, streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
setPreviousTokenStream(streams);
} else {
streams.source.reset(reader);

View File

@ -22,6 +22,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
/**
* Test the Arabic Analyzer
@ -32,14 +33,14 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
/** This test fails with NPE when the
* stopwords file is missing in classpath */
public void testResourcesAvailable() {
new ArabicAnalyzer();
new ArabicAnalyzer(Version.LUCENE_CURRENT);
}
/**
* Some simple tests showing some features of the analyzer, how some regular forms will conflate
*/
public void testBasicFeatures() throws Exception {
ArabicAnalyzer a = new ArabicAnalyzer();
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesTo(a, "كبير", new String[] { "كبير" });
assertAnalyzesTo(a, "كبيرة", new String[] { "كبير" }); // feminine marker
@ -60,7 +61,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
* Simple tests to show things are getting reset correctly, etc.
*/
public void testReusableTokenStream() throws Exception {
ArabicAnalyzer a = new ArabicAnalyzer();
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesToReuse(a, "كبير", new String[] { "كبير" });
assertAnalyzesToReuse(a, "كبيرة", new String[] { "كبير" }); // feminine marker
}
@ -69,7 +70,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
* Non-arabic text gets treated in a similar way as SimpleAnalyzer.
*/
public void testEnglishInput() throws Exception {
assertAnalyzesTo(new ArabicAnalyzer(), "English text.", new String[] {
assertAnalyzesTo(new ArabicAnalyzer(Version.LUCENE_CURRENT), "English text.", new String[] {
"english", "text" });
}
@ -77,7 +78,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
* Test that custom stopwords work, and are not case-sensitive.
*/
public void testCustomStopwords() throws Exception {
ArabicAnalyzer a = new ArabicAnalyzer(new String[] { "the", "and", "a" });
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT, new String[] { "the", "and", "a" });
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
}

View File

@ -21,6 +21,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
/**
* Test the Brazilian Stem Filter, which only modifies the term text.
@ -123,7 +124,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new BrazilianAnalyzer();
Analyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT);
checkReuse(a, "boa", "boa");
checkReuse(a, "boainain", "boainain");
checkReuse(a, "boas", "boas");
@ -131,7 +132,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
}
public void testStemExclusionTable() throws Exception {
BrazilianAnalyzer a = new BrazilianAnalyzer();
BrazilianAnalyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT);
a.setStemExclusionTable(new String[] { "quintessência" });
checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged.
}
@ -141,14 +142,14 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
* when using reusable token streams.
*/
public void testExclusionTableReuse() throws Exception {
BrazilianAnalyzer a = new BrazilianAnalyzer();
BrazilianAnalyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT);
checkReuse(a, "quintessência", "quintessente");
a.setStemExclusionTable(new String[] { "quintessência" });
checkReuse(a, "quintessência", "quintessência");
}
private void check(final String input, final String expected) throws Exception {
checkOneTerm(new BrazilianAnalyzer(), input, expected);
checkOneTerm(new BrazilianAnalyzer(Version.LUCENE_CURRENT), input, expected);
}
private void checkReuse(Analyzer a, String input, String expected) throws Exception {

View File

@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Version;
public class TestCJKTokenizer extends BaseTokenStreamTestCase {
@ -218,7 +218,7 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
}
public void testTokenStream() throws Exception {
Analyzer analyzer = new CJKAnalyzer();
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
TokenStream ts = analyzer.tokenStream("dummy", new StringReader("\u4e00\u4e01\u4e02"));
TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
assertTrue(ts.incrementToken());
@ -229,7 +229,7 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
}
public void testReusableTokenStream() throws Exception {
Analyzer analyzer = new CJKAnalyzer();
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
String str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053";
TestToken[] out_tokens = {

View File

@ -25,6 +25,7 @@ import java.io.InputStream;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.Version;
/**
* Test the CzechAnalyzer
@ -37,11 +38,11 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
File customStopFile = new File(dataDir, "org/apache/lucene/analysis/cz/customStopWordFile.txt");
public void testStopWord() throws Exception {
assertAnalyzesTo(new CzechAnalyzer(), "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" });
assertAnalyzesTo(new CzechAnalyzer(Version.LUCENE_CURRENT), "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" });
}
public void testReusableTokenStream() throws Exception {
Analyzer analyzer = new CzechAnalyzer();
Analyzer analyzer = new CzechAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" });
assertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česká", "republika" });
}
@ -61,7 +62,7 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
* this would cause a NPE when it is time to create the StopFilter.
*/
public void testInvalidStopWordFile() throws Exception {
CzechAnalyzer cz = new CzechAnalyzer();
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
cz.loadStopWords(new UnreliableInputStream(), "UTF-8");
assertAnalyzesTo(cz, "Pokud mluvime o volnem",
new String[] { "pokud", "mluvime", "o", "volnem" });
@ -72,7 +73,7 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
* when using reusable token streams.
*/
public void testStopWordFileReuse() throws Exception {
CzechAnalyzer cz = new CzechAnalyzer();
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesToReuse(cz, "Česká Republika",
new String[] { "česká", "republika" });

View File

@ -28,6 +28,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.Version;
/**
* Test the German stemmer. The stemming algorithm is known to work less
@ -61,7 +62,7 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new GermanAnalyzer();
Analyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
checkReuse(a, "Tisch", "tisch");
checkReuse(a, "Tische", "tisch");
checkReuse(a, "Tischen", "tisch");
@ -71,13 +72,17 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
* subclass that acts just like whitespace analyzer for testing
*/
private class GermanSubclassAnalyzer extends GermanAnalyzer {
public GermanSubclassAnalyzer(Version matchVersion) {
super(matchVersion);
}
public TokenStream tokenStream(String fieldName, Reader reader) {
return new WhitespaceTokenizer(reader);
}
}
public void testLUCENE1678BWComp() throws Exception {
checkReuse(new GermanSubclassAnalyzer(), "Tischen", "Tischen");
checkReuse(new GermanSubclassAnalyzer(Version.LUCENE_CURRENT), "Tischen", "Tischen");
}
/*
@ -85,14 +90,14 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
* when using reusable token streams.
*/
public void testExclusionTableReuse() throws Exception {
GermanAnalyzer a = new GermanAnalyzer();
GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
checkReuse(a, "tischen", "tisch");
a.setStemExclusionTable(new String[] { "tischen" });
checkReuse(a, "tischen", "tischen");
}
private void check(final String input, final String expected) throws Exception {
checkOneTerm(new GermanAnalyzer(), input, expected);
checkOneTerm(new GermanAnalyzer(Version.LUCENE_CURRENT), input, expected);
}
private void checkReuse(Analyzer a, String input, String expected) throws Exception {

View File

@ -19,7 +19,7 @@ package org.apache.lucene.analysis.el;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.Version;
/**
* A unit test class for verifying the correct operation of the GreekAnalyzer.
@ -33,7 +33,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
* @throws Exception in case an error occurs
*/
public void testAnalyzer() throws Exception {
Analyzer a = new GreekAnalyzer();
Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT);
// Verify the correct analysis of capitals and small accented letters
assertAnalyzesTo(a, "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
new String[] { "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1", "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1", "\u03c3\u03b5\u03b9\u03c1\u03b1", "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
@ -49,7 +49,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new GreekAnalyzer();
Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT);
// Verify the correct analysis of capitals and small accented letters
assertAnalyzesToReuse(a, "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
new String[] { "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1", "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1", "\u03c3\u03b5\u03b9\u03c1\u03b1", "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",

View File

@ -22,6 +22,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.Version;
/**
* Test the Persian Analyzer
@ -33,7 +34,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* This test fails with NPE when the stopwords file is missing in classpath
*/
public void testResourcesAvailable() {
new PersianAnalyzer();
new PersianAnalyzer(Version.LUCENE_CURRENT);
}
/**
@ -44,7 +45,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
*/
public void testBehaviorVerbs() throws Exception {
Analyzer a = new PersianAnalyzer();
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
// active present indicative
assertAnalyzesTo(a, "می‌خورد", new String[] { "خورد" });
// active preterite indicative
@ -120,7 +121,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
*/
public void testBehaviorVerbsDefective() throws Exception {
Analyzer a = new PersianAnalyzer();
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
// active present indicative
assertAnalyzesTo(a, "مي خورد", new String[] { "خورد" });
// active preterite indicative
@ -191,7 +192,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* nouns, removing the plural -ha.
*/
public void testBehaviorNouns() throws Exception {
Analyzer a = new PersianAnalyzer();
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesTo(a, "برگ ها", new String[] { "برگ" });
assertAnalyzesTo(a, "برگ‌ها", new String[] { "برگ" });
}
@ -201,7 +202,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* (lowercased, etc)
*/
public void testBehaviorNonPersian() throws Exception {
Analyzer a = new PersianAnalyzer();
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesTo(a, "English test.", new String[] { "english", "test" });
}
@ -209,7 +210,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* Basic test ensuring that reusableTokenStream works correctly.
*/
public void testReusableTokenStream() throws Exception {
Analyzer a = new PersianAnalyzer();
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesToReuse(a, "خورده مي شده بوده باشد", new String[] { "خورده" });
assertAnalyzesToReuse(a, "برگ‌ها", new String[] { "برگ" });
}
@ -218,7 +219,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
* Test that custom stopwords work, and are not case-sensitive.
*/
public void testCustomStopwords() throws Exception {
PersianAnalyzer a = new PersianAnalyzer(new String[] { "the", "and", "a" });
PersianAnalyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT, new String[] { "the", "and", "a" });
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
"brown", "fox" });
}

View File

@ -29,6 +29,7 @@ import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
/**
*
@ -37,7 +38,7 @@ public class TestElision extends BaseTokenStreamTestCase {
public void testElision() throws Exception {
String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
Tokenizer tokenizer = new StandardTokenizer(new StringReader(test));
Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(test));
Set articles = new HashSet();
articles.add("l");
articles.add("M");

View File

@ -22,6 +22,7 @@ import java.io.StringReader;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.Version;
/**
* Test case for FrenchAnalyzer.
@ -32,7 +33,7 @@ import org.apache.lucene.analysis.TokenStream;
public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
public void testAnalyzer() throws Exception {
FrenchAnalyzer fa = new FrenchAnalyzer();
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesTo(fa, "", new String[] {
});
@ -116,7 +117,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
}
public void testReusableTokenStream() throws Exception {
FrenchAnalyzer fa = new FrenchAnalyzer();
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
// stopwords
assertAnalyzesToReuse(
fa,
@ -141,7 +142,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
* when using reusable token streams.
*/
public void testExclusionTableReuse() throws Exception {
FrenchAnalyzer fa = new FrenchAnalyzer();
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesToReuse(fa, "habitable", new String[] { "habit" });
fa.setStemExclusionTable(new String[] { "habitable" });
assertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });

View File

@ -24,6 +24,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
/**
* Test the Dutch Stem Filter, which only modifies the term text.
@ -119,7 +120,7 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new DutchAnalyzer();
Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
checkOneTermReuse(a, "lichaamsziek", "lichaamsziek");
checkOneTermReuse(a, "lichamelijk", "licham");
checkOneTermReuse(a, "lichamelijke", "licham");
@ -130,13 +131,16 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
* subclass that acts just like whitespace analyzer for testing
*/
private class DutchSubclassAnalyzer extends DutchAnalyzer {
public DutchSubclassAnalyzer(Version matchVersion) {
super(matchVersion);
}
public TokenStream tokenStream(String fieldName, Reader reader) {
return new WhitespaceTokenizer(reader);
}
}
public void testLUCENE1678BWComp() throws Exception {
Analyzer a = new DutchSubclassAnalyzer();
Analyzer a = new DutchSubclassAnalyzer(Version.LUCENE_CURRENT);
checkOneTermReuse(a, "lichaamsziek", "lichaamsziek");
checkOneTermReuse(a, "lichamelijk", "lichamelijk");
checkOneTermReuse(a, "lichamelijke", "lichamelijke");
@ -148,7 +152,7 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
* when using reusable token streams.
*/
public void testExclusionTableReuse() throws Exception {
DutchAnalyzer a = new DutchAnalyzer();
DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
checkOneTermReuse(a, "lichamelijk", "licham");
a.setStemExclusionTable(new String[] { "lichamelijk" });
checkOneTermReuse(a, "lichamelijk", "lichamelijk");
@ -159,14 +163,14 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
* when using reusable token streams.
*/
public void testStemDictionaryReuse() throws Exception {
DutchAnalyzer a = new DutchAnalyzer();
DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
checkOneTermReuse(a, "lichamelijk", "licham");
a.setStemDictionary(customDictFile);
checkOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
}
private void check(final String input, final String expected) throws Exception {
checkOneTerm(new DutchAnalyzer(), input, expected);
checkOneTerm(new DutchAnalyzer(Version.LUCENE_CURRENT), input, expected);
}
}

View File

@ -37,6 +37,7 @@ import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
String variedFieldValues[] = {"the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "boring", "dog"};
@ -62,7 +63,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
}
writer.close();
reader = IndexReader.open(dir, true);
protectedAnalyzer = new QueryAutoStopWordAnalyzer(appAnalyzer);
protectedAnalyzer = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, appAnalyzer);
}
protected void tearDown() throws Exception {
@ -72,7 +73,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
//Helper method to query
private int search(Analyzer a, String queryString) throws IOException, ParseException {
QueryParser qp = new QueryParser("repetitiveField", a);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "repetitiveField", a);
Query q = qp.parse(queryString);
return new IndexSearcher(reader).search(q, null, 1000).totalHits;
}
@ -149,8 +150,8 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
* subclass that acts just like whitespace analyzer for testing
*/
private class QueryAutoStopWordSubclassAnalyzer extends QueryAutoStopWordAnalyzer {
public QueryAutoStopWordSubclassAnalyzer() {
super(new WhitespaceAnalyzer());
public QueryAutoStopWordSubclassAnalyzer(Version matchVersion) {
super(matchVersion, new WhitespaceAnalyzer());
}
public TokenStream tokenStream(String fieldName, Reader reader) {
@ -159,7 +160,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
}
public void testLUCENE1678BWComp() throws Exception {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordSubclassAnalyzer();
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordSubclassAnalyzer(Version.LUCENE_CURRENT);
a.addStopWords(reader, "repetitiveField", 10);
int numHits = search(a, "repetitiveField:boring");
assertFalse(numHits == 0);
@ -180,7 +181,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
}
public void testWrappingNonReusableAnalyzer() throws Exception {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(new NonreusableAnalyzer());
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new NonreusableAnalyzer());
a.addStopWords(reader, 10);
int numHits = search(a, "repetitiveField:boring");
assertTrue(numHits == 0);
@ -189,7 +190,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
}
public void testTokenStream() throws Exception {
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(new WhitespaceAnalyzer());
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new WhitespaceAnalyzer());
a.addStopWords(reader, 10);
TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring"));
TermAttribute termAtt = ts.getAttribute(TermAttribute.class);

View File

@ -28,6 +28,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
/**
* Test case for RussianAnalyzer.
@ -49,7 +50,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
public void testUnicode() throws IOException
{
RussianAnalyzer ra = new RussianAnalyzer();
RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
inWords =
new InputStreamReader(
new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUTF8.txt")),
@ -90,7 +91,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
public void testDigitsInRussianCharset()
{
Reader reader = new StringReader("text 1000");
RussianAnalyzer ra = new RussianAnalyzer();
RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
TokenStream stream = ra.tokenStream("", reader);
TermAttribute termText = stream.getAttribute(TermAttribute.class);
@ -108,7 +109,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new RussianAnalyzer();
Analyzer a = new RussianAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" });
assertAnalyzesToReuse(a, "Но знание это хранилось в тайне",

View File

@ -42,6 +42,7 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
/**
* A test class for ShingleAnalyzerWrapper as regards queries and scoring.
@ -85,7 +86,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
protected ScoreDoc[] queryParsingTest(Analyzer analyzer, String qs) throws Exception {
searcher = setUpSearcher(analyzer);
QueryParser qp = new QueryParser("content", analyzer);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer);
Query q = qp.parse(qs);

View File

@ -23,6 +23,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.util.Version;
/**
* Test case for ThaiAnalyzer, modified from TestFrenchAnalyzer
@ -36,7 +37,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
* testcase for offsets
*/
public void testOffsets() throws Exception {
assertAnalyzesTo(new ThaiAnalyzer(), "เดอะนิวยอร์กไทมส์",
assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_CURRENT), "เดอะนิวยอร์กไทมส์",
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์"},
new int[] { 0, 2, 7, 9, 12 },
new int[] { 2, 7, 9, 12, 17});
@ -54,7 +55,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
* Instead, allow the definition of alphanum to include relevant categories like nonspacing marks!
*/
public void testBuggyTokenType() throws Exception {
assertAnalyzesTo(new ThaiAnalyzer(), "เดอะนิวยอร์กไทมส์ ๑๒๓",
assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_CURRENT), "เดอะนิวยอร์กไทมส์ ๑๒๓",
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์", "๑๒๓" },
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
}
@ -68,7 +69,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
*/
public void testAnalyzer() throws Exception {
ThaiAnalyzer analyzer = new ThaiAnalyzer();
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesTo(analyzer, "", new String[] {});
@ -90,7 +91,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
}
public void testReusableTokenStream() throws Exception {
ThaiAnalyzer analyzer = new ThaiAnalyzer();
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesToReuse(analyzer, "", new String[] {});
assertAnalyzesToReuse(
@ -108,13 +109,16 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
* subclass that acts just like whitespace analyzer for testing
*/
private class ThaiSubclassAnalyzer extends ThaiAnalyzer {
public ThaiSubclassAnalyzer(Version matchVersion) {
super(matchVersion);
}
public TokenStream tokenStream(String fieldName, Reader reader) {
return new WhitespaceTokenizer(reader);
}
}
public void testLUCENE1678BWComp() throws Exception {
ThaiSubclassAnalyzer a = new ThaiSubclassAnalyzer();
ThaiSubclassAnalyzer a = new ThaiSubclassAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesToReuse(a, "การที่ได้ต้องแสดงว่างานดี", new String[] { "การที่ได้ต้องแสดงว่างานดี" });
}
}

View File

@ -32,6 +32,7 @@ import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.WordlistLoader;
import org.apache.lucene.analysis.cn.smart.SentenceTokenizer;
import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
import org.apache.lucene.util.Version;
/**
* <p>
@ -103,11 +104,13 @@ public class SmartChineseAnalyzer extends Analyzer {
}
}
private final Version matchVersion;
/**
* Create a new SmartChineseAnalyzer, using the default stopword list.
*/
public SmartChineseAnalyzer() {
this(true);
public SmartChineseAnalyzer(Version matchVersion) {
this(matchVersion, true);
}
/**
@ -121,9 +124,10 @@ public class SmartChineseAnalyzer extends Analyzer {
*
* @param useDefaultStopWords true to use the default stopword list.
*/
public SmartChineseAnalyzer(boolean useDefaultStopWords) {
public SmartChineseAnalyzer(Version matchVersion, boolean useDefaultStopWords) {
stopWords = useDefaultStopWords ? DefaultSetHolder.DEFAULT_STOP_SET
: Collections.EMPTY_SET;
: Collections.EMPTY_SET;
this.matchVersion = matchVersion;
}
/**
@ -135,8 +139,9 @@ public class SmartChineseAnalyzer extends Analyzer {
* </p>
* @param stopWords {@link Set} of stopwords to use.
*/
public SmartChineseAnalyzer(Set stopWords) {
public SmartChineseAnalyzer(Version matchVersion, Set stopWords) {
this.stopWords = stopWords==null?Collections.EMPTY_SET:stopWords;
this.matchVersion = matchVersion;
}
public TokenStream tokenStream(String fieldName, Reader reader) {
@ -147,7 +152,8 @@ public class SmartChineseAnalyzer extends Analyzer {
// The porter stemming is too strict, this is not a bug, this is a feature:)
result = new PorterStemFilter(result);
if (!stopWords.isEmpty()) {
result = new StopFilter(false,result, stopWords, false);
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
result, stopWords, false);
}
return result;
}
@ -167,7 +173,8 @@ public class SmartChineseAnalyzer extends Analyzer {
streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream);
streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream);
if (!stopWords.isEmpty()) {
streams.filteredTokenStream = new StopFilter(false, streams.filteredTokenStream, stopWords, false);
streams.filteredTokenStream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.filteredTokenStream, stopWords, false);
}
} else {
streams.tokenStream.reset(reader);

View File

@ -26,16 +26,17 @@ import java.util.Date;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.Version;
public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
public void testChineseStopWordsDefault() throws Exception {
Analyzer ca = new SmartChineseAnalyzer(); /* will load stopwords */
Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); /* will load stopwords */
String sentence = "我购买了道具和服装。";
String result[] = { "", "购买", "", "道具", "", "服装" };
assertAnalyzesTo(ca, sentence, result);
// set stop-words from the outer world - must yield same behavior
ca = new SmartChineseAnalyzer(SmartChineseAnalyzer.getDefaultStopSet());
ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT, SmartChineseAnalyzer.getDefaultStopSet());
assertAnalyzesTo(ca, sentence, result);
}
@ -44,7 +45,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
* This tests to ensure the SentenceTokenizer->WordTokenFilter chain works correctly.
*/
public void testChineseStopWordsDefaultTwoPhrases() throws Exception {
Analyzer ca = new SmartChineseAnalyzer(); /* will load stopwords */
Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); /* will load stopwords */
String sentence = "我购买了道具和服装。 我购买了道具和服装。";
String result[] = { "", "购买", "", "道具", "", "服装", "", "购买", "", "道具", "", "服装" };
assertAnalyzesTo(ca, sentence, result);
@ -55,7 +56,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
* This tests to ensure the stopwords are working correctly.
*/
public void testChineseStopWordsDefaultTwoPhrasesIdeoSpace() throws Exception {
Analyzer ca = new SmartChineseAnalyzer(); /* will load stopwords */
Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); /* will load stopwords */
String sentence = "我购买了道具和服装 我购买了道具和服装。";
String result[] = { "", "购买", "", "道具", "", "服装", "", "购买", "", "道具", "", "服装" };
assertAnalyzesTo(ca, sentence, result);
@ -69,8 +70,8 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
*/
public void testChineseStopWordsOff() throws Exception {
Analyzer[] analyzers = new Analyzer[] {
new SmartChineseAnalyzer(false),/* doesn't load stopwords */
new SmartChineseAnalyzer(null) /* sets stopwords to empty set */};
new SmartChineseAnalyzer(Version.LUCENE_CURRENT, false),/* doesn't load stopwords */
new SmartChineseAnalyzer(Version.LUCENE_CURRENT, null) /* sets stopwords to empty set */};
String sentence = "我购买了道具和服装。";
String result[] = { "", "购买", "", "道具", "", "服装", "," };
for (Analyzer analyzer : analyzers) {
@ -80,7 +81,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
}
public void testChineseAnalyzer() throws Exception {
Analyzer ca = new SmartChineseAnalyzer(true);
Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true);
String sentence = "我购买了道具和服装。";
String[] result = { "", "购买", "", "道具", "", "服装" };
assertAnalyzesTo(ca, sentence, result);
@ -90,7 +91,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
* English words are lowercased and porter-stemmed.
*/
public void testMixedLatinChinese() throws Exception {
assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买 Tests 了道具和服装",
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买 Tests 了道具和服装",
new String[] { "", "购买", "test", "", "道具", "", "服装"});
}
@ -98,7 +99,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
* Numerics are parsed as their own tokens
*/
public void testNumerics() throws Exception {
assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买 Tests 了道具和服装1234",
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买 Tests 了道具和服装1234",
new String[] { "", "购买", "test", "", "道具", "", "服装", "1234"});
}
@ -106,7 +107,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
* Full width alphas and numerics are folded to half-width
*/
public void testFullWidth() throws Exception {
assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买 了道具和服装1234",
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买 了道具和服装1234",
new String[] { "", "购买", "test", "", "道具", "", "服装", "1234"});
}
@ -114,7 +115,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
* Presentation form delimiters are removed
*/
public void testDelimiters() throws Exception {
assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买︱ Tests 了道具和服装",
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买︱ Tests 了道具和服装",
new String[] { "", "购买", "test", "", "道具", "", "服装"});
}
@ -123,7 +124,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
* (regardless of Unicode category)
*/
public void testNonChinese() throws Exception {
assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买 روبرتTests 了道具和服装",
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买 روبرتTests 了道具和服装",
new String[] { "", "购买", "ر", "و", "ب", "ر", "ت", "test", "", "道具", "", "服装"});
}
@ -133,22 +134,22 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
* Currently it is being analyzed into single characters...
*/
public void testOOV() throws Exception {
assertAnalyzesTo(new SmartChineseAnalyzer(true), "优素福·拉扎·吉拉尼",
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "优素福·拉扎·吉拉尼",
new String[] { "", "", "", "", "", "", "", "" });
assertAnalyzesTo(new SmartChineseAnalyzer(true), "优素福拉扎吉拉尼",
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "优素福拉扎吉拉尼",
new String[] { "", "", "", "", "", "", "", "" });
}
public void testOffsets() throws Exception {
assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买了道具和服装",
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买了道具和服装",
new String[] { "", "购买", "", "道具", "", "服装" },
new int[] { 0, 1, 3, 4, 6, 7 },
new int[] { 1, 3, 4, 6, 7, 9 });
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new SmartChineseAnalyzer();
Analyzer a = new SmartChineseAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesToReuse(a, "我购买 Tests 了道具和服装",
new String[] { "", "购买", "test", "", "道具", "", "服装"},
new int[] { 0, 1, 4, 10, 11, 13, 14 },

View File

@ -31,6 +31,7 @@ import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.FSDirectory;
import org.apache.tools.ant.Project;
import org.apache.tools.ant.types.FileSet;
import org.apache.lucene.util.Version;
/**
* Test cases for index task
@ -69,12 +70,12 @@ public class IndexTaskTest extends TestCase {
dir = FSDirectory.open(indexDir);
searcher = new IndexSearcher(dir, true);
analyzer = new StopAnalyzer(false);
analyzer = new StopAnalyzer(Version.LUCENE_CURRENT);
}
public void testSearch() throws Exception {
Query query = new QueryParser("contents",analyzer).parse("test");
Query query = new QueryParser(Version.LUCENE_CURRENT, "contents",analyzer).parse("test");
int numHits = searcher.search(query, null, 1000).totalHits;

View File

@ -33,6 +33,7 @@ import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
import org.apache.lucene.util.Version;
/**
* A QueryMaker that uses common and uncommon actual Wikipedia queries for
@ -92,7 +93,7 @@ public class EnwikiQueryMaker extends AbstractQueryMaker implements
* @return array of Lucene queries
*/
private static Query[] createQueries(List qs, Analyzer a) {
QueryParser qp = new QueryParser(DocMaker.BODY_FIELD, a);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a);
List queries = new ArrayList();
for (int i = 0; i < qs.size(); i++) {
try {

View File

@ -5,6 +5,7 @@ import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Query;
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
import org.apache.lucene.util.Version;
import java.io.*;
import java.util.ArrayList;
@ -48,7 +49,7 @@ public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMake
Analyzer anlzr = NewAnalyzerTask.createAnalyzer(config.get("analyzer",
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
String defaultField = config.get("file.query.maker.default.field", DocMaker.BODY_FIELD);
QueryParser qp = new QueryParser(defaultField, anlzr);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, defaultField, anlzr);
List qq = new ArrayList();
String fileName = config.get("file.query.maker.file", null);

View File

@ -27,6 +27,7 @@ import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
import org.apache.lucene.util.Version;
import java.util.ArrayList;
import java.util.Arrays;
@ -72,7 +73,7 @@ public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker
* @return array of Lucene queries
*/
private static Query[] createQueries(List qs, Analyzer a) {
QueryParser qp = new QueryParser(DocMaker.BODY_FIELD, a);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a);
List queries = new ArrayList();
for (int i = 0; i < qs.size(); i++) {
try {

View File

@ -25,6 +25,7 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
import org.apache.lucene.util.Version;
import java.util.ArrayList;
@ -46,7 +47,7 @@ public class SimpleQueryMaker extends AbstractQueryMaker implements QueryMaker {
Analyzer anlzr= NewAnalyzerTask.createAnalyzer(config.get("analyzer",
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
QueryParser qp = new QueryParser(DocMaker.BODY_FIELD,anlzr);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD,anlzr);
ArrayList qq = new ArrayList();
Query q1 = new TermQuery(new Term(DocMaker.ID_FIELD,"doc2"));
qq.add(q1);

View File

@ -50,7 +50,7 @@ public class SimpleQQParser implements QualityQueryParser {
public Query parse(QualityQuery qq) throws ParseException {
QueryParser qp = queryParser.get();
if (qp==null) {
qp = new QueryParser(indexField, new StandardAnalyzer(Version.LUCENE_CURRENT));
qp = new QueryParser(Version.LUCENE_CURRENT, indexField, new StandardAnalyzer(Version.LUCENE_CURRENT));
queryParser.set(qp);
}
return qp.parse(qq.getValue(qqName));

View File

@ -39,6 +39,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;
import org.apache.lucene.util.IndexableBinaryStringTools;
import org.apache.lucene.queryParser.analyzing.AnalyzingQueryParser;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.nio.CharBuffer;
@ -83,7 +84,7 @@ public class CollationTestBase extends TestCase {
writer.close();
IndexSearcher is = new IndexSearcher(ramDir, true);
AnalyzingQueryParser aqp = new AnalyzingQueryParser("content", analyzer);
AnalyzingQueryParser aqp = new AnalyzingQueryParser(Version.LUCENE_CURRENT, "content", analyzer);
aqp.setLowercaseExpandedTerms(false);
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi

View File

@ -38,6 +38,7 @@ import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
/**
* <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
@ -50,7 +51,7 @@ public class FieldTermStack {
public static void main( String[] args ) throws Exception {
Analyzer analyzer = new WhitespaceAnalyzer();
QueryParser parser = new QueryParser( "f", analyzer );
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer );
Query query = parser.parse( "a x:b" );
FieldQuery fieldQuery = new FieldQuery( query, true, false );

View File

@ -45,6 +45,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
public abstract class AbstractTestCase extends TestCase {
@ -78,8 +79,8 @@ public abstract class AbstractTestCase extends TestCase {
protected void setUp() throws Exception {
analyzerW = new WhitespaceAnalyzer();
analyzerB = new BigramAnalyzer();
paW = new QueryParser( F, analyzerW );
paB = new QueryParser( F, analyzerB );
paW = new QueryParser(Version.LUCENE_CURRENT, F, analyzerW );
paB = new QueryParser(Version.LUCENE_CURRENT, F, analyzerB );
dir = new RAMDirectory();
}

View File

@ -113,7 +113,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
public void testQueryScorerHits() throws Exception {
Analyzer analyzer = new SimpleAnalyzer();
QueryParser qp = new QueryParser(FIELD_NAME, analyzer);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
query = qp.parse("\"very long\"");
searcher = new IndexSearcher(ramDir, true);
TopDocs hits = searcher.search(query, 10);
@ -143,7 +143,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String s1 = "I call our world Flatland, not because we call it so,";
QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
// Verify that a query against the default field results in text being
// highlighted
@ -221,7 +221,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2
+ " OR " + f2c + ph2 + ")";
Analyzer analyzer = new WhitespaceAnalyzer();
QueryParser qp = new QueryParser(f1, analyzer);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, f1, analyzer);
Query query = qp.parse(q);
QueryScorer scorer = new QueryScorer(query, f1);
@ -590,7 +590,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
// Need to explicitly set the QueryParser property to use TermRangeQuery
// rather
// than RangeFilters
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
query = parser.parse(queryString);
doSearching(query);
@ -930,7 +930,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
String srchkey = "football";
String s = "football-soccer in the euro 2004 footie competition";
QueryParser parser = new QueryParser("bookid", analyzer);
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "bookid", analyzer);
Query query = parser.parse(srchkey);
TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));
@ -1111,7 +1111,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
searcher = new IndexSearcher(ramDir, true);
Analyzer analyzer = new StandardAnalyzer(TEST_VERSION);
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
Query query = parser.parse("JF? or Kenned*");
System.out.println("Searching with primitive query");
// forget to set this and...
@ -1245,7 +1245,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
searchers[0] = new IndexSearcher(ramDir1, true);
searchers[1] = new IndexSearcher(ramDir2, true);
MultiSearcher multiSearcher = new MultiSearcher(searchers);
QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
query = parser.parse("multi*");
System.out.println("Searching for: " + query.toString(FIELD_NAME));
@ -1278,7 +1278,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
public void run() throws Exception {
String docMainText = "fred is one of the people";
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
Query query = parser.parse("fred category:people");
// highlighting respects fieldnames used in query
@ -1419,64 +1419,64 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
Highlighter highlighter;
String result;
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("foo");
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("foo");
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed10 <B>foo</B>", result);
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("10");
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("10");
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-Speed<B>10</B> foo", result);
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hi");
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("hi");
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi</B>-Speed10 foo", result);
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("speed");
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("speed");
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("Hi-<B>Speed</B>10 foo", result);
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hispeed");
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("hispeed");
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result);
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hi speed");
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("hi speed");
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result);
// ///////////////// same tests, just put the bigger overlapping token
// first
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("foo");
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("foo");
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed10 <B>foo</B>", result);
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("10");
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("10");
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-Speed<B>10</B> foo", result);
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hi");
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("hi");
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi</B>-Speed10 foo", result);
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("speed");
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("speed");
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("Hi-<B>Speed</B>10 foo", result);
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hispeed");
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("hispeed");
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result);
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hi speed");
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("hi speed");
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
assertEquals("<B>Hi-Speed</B>10 foo", result);
@ -1521,7 +1521,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
private void searchIndex() throws IOException, ParseException, InvalidTokenOffsetsException {
String q = "t_text1:random";
QueryParser parser = new QueryParser( "t_text1", a );
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "t_text1", a );
Query query = parser.parse( q );
IndexSearcher searcher = new IndexSearcher( dir, true );
// This scorer can return negative idf -> null fragment
@ -1575,7 +1575,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
}
public void doSearching(String queryString) throws Exception {
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
parser.setEnablePositionIncrements(true);
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
query = parser.parse(queryString);

View File

@ -195,7 +195,7 @@ class LuceneMethods {
for (int ii = 0; ii < arraySize; ii++) {
indexedArray[ii] = (String) indexedFields.get(ii);
}
MultiFieldQueryParser parser = new MultiFieldQueryParser(indexedArray, analyzer);
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT, indexedArray, analyzer);
query = parser.parse(queryString);
System.out.println("Searching for: " + query.toString());
return (query);
@ -216,7 +216,7 @@ class LuceneMethods {
for (int ii = 0; ii < arraySize; ii++) {
fieldsArray[ii] = (String) fields.get(ii);
}
MultiFieldQueryParser parser = new MultiFieldQueryParser(fieldsArray, analyzer);
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fieldsArray, analyzer);
query = parser.parse(queryString);
System.out.println("Searching for: " + query.toString());
}

View File

@ -33,6 +33,7 @@ import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
/**
* Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
@ -124,7 +125,7 @@ public class PatternAnalyzer extends Analyzer {
* freely across threads without harm); global per class loader.
*/
public static final PatternAnalyzer DEFAULT_ANALYZER = new PatternAnalyzer(
NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
/**
* A lower-casing word analyzer with <b>extended </b> English stop words
@ -134,15 +135,18 @@ public class PatternAnalyzer extends Analyzer {
* http://thomas.loc.gov/home/all.about.inquery.html
*/
public static final PatternAnalyzer EXTENDED_ANALYZER = new PatternAnalyzer(
NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
private final Pattern pattern;
private final boolean toLowerCase;
private final Set stopWords;
private final Version matchVersion;
/**
* Constructs a new instance with the given parameters.
*
* @param matchVersion If >= {@link Version#LUCENE_29}, StopFilter.enablePositionIncrement is set to true
* @param pattern
* a regular expression delimiting tokens
* @param toLowerCase
@ -158,7 +162,7 @@ public class PatternAnalyzer extends Analyzer {
* or <a href="http://www.unine.ch/info/clef/">other stop words
* lists </a>.
*/
public PatternAnalyzer(Pattern pattern, boolean toLowerCase, Set stopWords) {
public PatternAnalyzer(Version matchVersion, Pattern pattern, boolean toLowerCase, Set stopWords) {
if (pattern == null)
throw new IllegalArgumentException("pattern must not be null");
@ -170,6 +174,7 @@ public class PatternAnalyzer extends Analyzer {
this.pattern = pattern;
this.toLowerCase = toLowerCase;
this.stopWords = stopWords;
this.matchVersion = matchVersion;
}
/**
@ -197,7 +202,7 @@ public class PatternAnalyzer extends Analyzer {
}
else {
stream = new PatternTokenizer(text, pattern, toLowerCase);
if (stopWords != null) stream = new StopFilter(false, stream, stopWords);
if (stopWords != null) stream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), stream, stopWords);
}
return stream;

View File

@ -53,6 +53,7 @@ import org.apache.lucene.search.Searcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.util.Version;
/**
Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour,
@ -277,7 +278,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
Analyzer[] analyzers = new Analyzer[] {
new SimpleAnalyzer(),
new StopAnalyzer(true),
new StopAnalyzer(Version.LUCENE_CURRENT),
new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT),
PatternAnalyzer.DEFAULT_ANALYZER,
// new WhitespaceAnalyzer(),
@ -480,7 +481,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
}
private Query parseQuery(String expression) throws ParseException {
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
// parser.setPhraseSlop(0);
return parser.parse(expression);
}

View File

@ -24,6 +24,7 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.util.Version;
/**
* Verifies the behavior of PatternAnalyzer.
@ -36,13 +37,13 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
*/
public void testNonWordPattern() throws IOException {
// Split on non-letter pattern, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN,
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
false, null);
check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"The", "quick", "brown", "Fox", "the", "abcd", "dc" });
// split on non-letter pattern, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN,
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"quick", "brown", "fox", "abcd", "dc" });
@ -54,13 +55,13 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
*/
public void testWhitespacePattern() throws IOException {
// Split on whitespace patterns, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(PatternAnalyzer.WHITESPACE_PATTERN,
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
false, null);
check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });
// Split on whitespace patterns, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(PatternAnalyzer.WHITESPACE_PATTERN,
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
"quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
@ -72,12 +73,12 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
*/
public void testCustomPattern() throws IOException {
// Split on comma, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile(","), false, null);
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, Pattern.compile(","), false, null);
check(a, "Here,Are,some,Comma,separated,words,", new String[] { "Here",
"Are", "some", "Comma", "separated", "words" });
// split on comma, lowercase, english stopwords
PatternAnalyzer b = new PatternAnalyzer(Pattern.compile(","), true,
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, Pattern.compile(","), true,
StopAnalyzer.ENGLISH_STOP_WORDS_SET);
check(b, "Here,Are,some,Comma,separated,words,", new String[] { "here",
"some", "comma", "separated", "words" });
@ -102,7 +103,7 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
document.append(largeWord2);
// Split on whitespace patterns, do not lowercase, no stopwords
PatternAnalyzer a = new PatternAnalyzer(PatternAnalyzer.WHITESPACE_PATTERN,
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
false, null);
check(a, document.toString(), new String[] { new String(largeWord),
new String(largeWord2) });

View File

@ -28,6 +28,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
/**
* Overrides Lucene's default QueryParser so that Fuzzy-, Prefix-, Range-, and WildcardQuerys
@ -49,8 +50,8 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
* @param field the default field for query terms.
* @param analyzer used to find terms in the query text.
*/
public AnalyzingQueryParser(String field, Analyzer analyzer) {
super(field, analyzer);
public AnalyzingQueryParser(Version matchVersion, String field, Analyzer analyzer) {
super(matchVersion, field, analyzer);
}
/**

View File

@ -38,6 +38,7 @@ import org.apache.lucene.search.spans.SpanNotQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.Version;
/**
* QueryParser which permits complex phrase query syntax eg "(john jon
@ -67,8 +68,8 @@ public class ComplexPhraseQueryParser extends QueryParser {
private ComplexPhraseQuery currentPhraseQuery = null;
public ComplexPhraseQueryParser(String f, Analyzer a) {
super(f, a);
public ComplexPhraseQueryParser(Version matchVersion, String f, Analyzer a) {
super(matchVersion, f, a);
}
protected Query getFieldQuery(String field, String queryText, int slop) {

View File

@ -28,6 +28,7 @@ import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.util.Version;
/**
* @version $Revision$, $Date$
@ -97,7 +98,7 @@ public class TestAnalyzingQueryParser extends TestCase {
}
private String parseWithAnalyzingQueryParser(String s, Analyzer a) throws ParseException {
AnalyzingQueryParser qp = new AnalyzingQueryParser("field", a);
AnalyzingQueryParser qp = new AnalyzingQueryParser(Version.LUCENE_CURRENT, "field", a);
org.apache.lucene.search.Query q = qp.parse(s);
return q.toString("field");
}
@ -109,7 +110,7 @@ class ASCIIAnalyzer extends org.apache.lucene.analysis.Analyzer {
}
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
result = new StandardFilter(result);
result = new ASCIIFoldingFilter(result);
result = new LowerCaseFilter(result);

View File

@ -33,6 +33,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
public class TestComplexPhraseQuery extends TestCase {
@ -71,7 +72,7 @@ public class TestComplexPhraseQuery extends TestCase {
}
private void checkBadQuery(String qString) {
QueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer);
QueryParser qp = new ComplexPhraseQueryParser(Version.LUCENE_CURRENT, defaultFieldName, analyzer);
Throwable expected = null;
try {
qp.parse(qString);
@ -84,7 +85,7 @@ public class TestComplexPhraseQuery extends TestCase {
private void checkMatches(String qString, String expectedVals)
throws Exception {
QueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer);
QueryParser qp = new ComplexPhraseQueryParser(Version.LUCENE_CURRENT, defaultFieldName, analyzer);
qp.setFuzzyPrefixLength(1); // usually a good idea
Query q = qp.parse(qString);

View File

@ -34,6 +34,7 @@ import org.apache.lucene.queryParser.core.QueryNodeException;
import org.apache.lucene.queryParser.standard.StandardQueryParser;
import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
/**
* This test case is a copy of the core Lucene query parser test, it was adapted
@ -154,7 +155,7 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
}
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
result = new TestFilter(result);
result = new LowerCaseFilter(result);
return result;
@ -222,7 +223,7 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
}
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
result = new TestPosIncrementFilter(result);
result = new LowerCaseFilter(result);
return result;

View File

@ -33,6 +33,7 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.standard.QueryParserWrapper;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
/**
* This test case is a copy of the core Lucene query parser test, it was adapted
@ -148,7 +149,7 @@ public class TestMultiAnalyzerWrapper extends LuceneTestCase {
}
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
result = new TestFilter(result);
result = new LowerCaseFilter(result);
return result;
@ -216,7 +217,7 @@ public class TestMultiAnalyzerWrapper extends LuceneTestCase {
}
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
result = new TestPosIncrementFilter(result);
result = new LowerCaseFilter(result);
return result;

View File

@ -1070,7 +1070,7 @@ public class TestQPHelper extends LocalizedTestCase {
public void testStopwords() throws Exception {
StandardQueryParser qp = new StandardQueryParser();
qp.setAnalyzer(
new StopAnalyzer(StopFilter.makeStopSet("the", "foo" ), true));
new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "foo" )));
Query result = qp.parse("a:the OR a:foo", "a");
assertNotNull("result is null and it shouldn't be", result);
@ -1093,7 +1093,7 @@ public class TestQPHelper extends LocalizedTestCase {
public void testPositionIncrement() throws Exception {
StandardQueryParser qp = new StandardQueryParser();
qp.setAnalyzer(
new StopAnalyzer(StopFilter.makeStopSet("the", "in", "are", "this" ), true));
new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "in", "are", "this" )));
qp.setEnablePositionIncrements(true);

View File

@ -1048,7 +1048,7 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
}
public void testStopwords() throws Exception {
QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(StopFilter.makeStopSet("the", "foo"), false));
QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "foo")));
Query result = qp.parse("a:the OR a:foo");
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
@ -1067,7 +1067,7 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
}
public void testPositionIncrement() throws Exception {
QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(StopFilter.makeStopSet("the", "in", "are", "this"), true));
QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "in", "are", "this")));
qp.setEnablePositionIncrements(true);
String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
// 0 2 5 7 8

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis.snowball;
import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.standard.*;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
@ -30,20 +31,25 @@ import java.util.Set;
* Available stemmers are listed in org.tartarus.snowball.ext. The name of a
* stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
* {@link org.tartarus.snowball.ext.EnglishStemmer} is named "English".
*
* <p><b>NOTE</b>: This class uses the same {@link Version}
* dependent settings as {@link StandardAnalyzer}.</p>
*/
public class SnowballAnalyzer extends Analyzer {
private String name;
private Set stopSet;
private final Version matchVersion;
/** Builds the named analyzer with no stop words. */
public SnowballAnalyzer(String name) {
public SnowballAnalyzer(Version matchVersion, String name) {
this.name = name;
setOverridesTokenStreamMethod(SnowballAnalyzer.class);
this.matchVersion = matchVersion;
}
/** Builds the named analyzer with the given stop words. */
public SnowballAnalyzer(String name, String[] stopWords) {
this(name);
public SnowballAnalyzer(Version matchVersion, String name, String[] stopWords) {
this(matchVersion, name);
stopSet = StopFilter.makeStopSet(stopWords);
}
@ -51,11 +57,12 @@ public class SnowballAnalyzer extends Analyzer {
StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
and a {@link SnowballFilter} */
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
TokenStream result = new StandardTokenizer(matchVersion, reader);
result = new StandardFilter(result);
result = new LowerCaseFilter(result);
if (stopSet != null)
result = new StopFilter(false, result, stopSet);
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
result, stopSet);
result = new SnowballFilter(result, name);
return result;
}
@ -80,11 +87,12 @@ public class SnowballAnalyzer extends Analyzer {
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
if (streams == null) {
streams = new SavedStreams();
streams.source = new StandardTokenizer(reader);
streams.source = new StandardTokenizer(matchVersion, reader);
streams.result = new StandardFilter(streams.source);
streams.result = new LowerCaseFilter(streams.result);
if (stopSet != null)
streams.result = new StopFilter(false, streams.result, stopSet);
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.result, stopSet);
streams.result = new SnowballFilter(streams.result, name);
setPreviousTokenStream(streams);
} else {

View File

@ -31,17 +31,18 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Version;
public class TestSnowball extends BaseTokenStreamTestCase {
public void testEnglish() throws Exception {
Analyzer a = new SnowballAnalyzer("English");
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English");
assertAnalyzesTo(a, "he abhorred accents",
new String[]{"he", "abhor", "accent"});
}
public void testReusableTokenStream() throws Exception {
Analyzer a = new SnowballAnalyzer("English");
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English");
assertAnalyzesToReuse(a, "he abhorred accents",
new String[]{"he", "abhor", "accent"});
assertAnalyzesToReuse(a, "she abhorred him",
@ -53,7 +54,7 @@ public class TestSnowball extends BaseTokenStreamTestCase {
*/
private class SnowballSubclassAnalyzer extends SnowballAnalyzer {
public SnowballSubclassAnalyzer(String name) {
super(name);
super(Version.LUCENE_CURRENT, name);
}
public TokenStream tokenStream(String fieldName, Reader reader) {

View File

@ -38,6 +38,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
/**
* See table searcher explanation.
@ -167,7 +168,7 @@ public class ListSearcher extends AbstractListModel {
//build a query based on the fields, searchString and cached analyzer
//NOTE: This is an area for improvement since the MultiFieldQueryParser
// has some weirdness.
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, analyzer);
Query query =parser.parse(searchString);
//reset this list model with the new results
resetSearchResults(is, query);

View File

@ -35,7 +35,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.swing.models.ListSearcher.CountingCollector;
import org.apache.lucene.util.Version;
/**
* This is a TableModel that encapsulates Lucene
@ -244,7 +244,7 @@ public class TableSearcher extends AbstractTableModel {
//build a query based on the fields, searchString and cached analyzer
//NOTE: This is an area for improvement since the MultiFieldQueryParser
// has some weirdness.
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, analyzer);
Query query = parser.parse(searchString);
//reset this table model with the new results
resetSearchResults(is, query);

View File

@ -8,6 +8,7 @@ import org.apache.lucene.xmlparser.DOMUtils;
import org.apache.lucene.xmlparser.ParserException;
import org.apache.lucene.xmlparser.QueryBuilder;
import org.w3c.dom.Element;
import org.apache.lucene.util.Version;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -88,7 +89,7 @@ public class UserInputQueryBuilder implements QueryBuilder {
*/
protected QueryParser createQueryParser(String fieldName, Analyzer analyzer)
{
return new QueryParser(fieldName,analyzer);
return new QueryParser(Version.LUCENE_CURRENT, fieldName,analyzer);
}
}

View File

@ -127,7 +127,7 @@ public class SearchFiles {
} else {
in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
}
QueryParser parser = new QueryParser(field, analyzer);
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, field, analyzer);
while (true) {
if (queries == null) // prompt the user
System.out.println("Enter query: ");

View File

@ -24,7 +24,17 @@ import java.util.Arrays;
import java.util.Set;
import java.util.List;
/** Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}. */
import org.apache.lucene.util.Version;
/** Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
*
* <a name="version"/>
* <p>You must specify the required {@link Version}
* compatibility when creating StopAnalyzer:
* <ul>
* <li> As of 2.9, position increments are preserved
* </ul>
*/
public final class StopAnalyzer extends Analyzer {
private final Set<?> stopWords;
@ -49,40 +59,39 @@ public final class StopAnalyzer extends Analyzer {
/** Builds an analyzer which removes words in
* {@link #ENGLISH_STOP_WORDS}.
* @param enablePositionIncrements See {@link
* StopFilter#setEnablePositionIncrements} */
public StopAnalyzer(boolean enablePositionIncrements) {
* @param matchVersion See <a href="#version">above</a>
*/
public StopAnalyzer(Version matchVersion) {
stopWords = ENGLISH_STOP_WORDS_SET;
this.enablePositionIncrements = enablePositionIncrements;
enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
}
/** Builds an analyzer with the stop words from the given set.
* @param matchVersion See <a href="#version">above</a>
* @param stopWords Set of stop words
* @param enablePositionIncrements See {@link
* StopFilter#setEnablePositionIncrements} */
public StopAnalyzer(Set<?> stopWords, boolean enablePositionIncrements) {
public StopAnalyzer(Version matchVersion, Set<?> stopWords) {
this.stopWords = stopWords;
this.enablePositionIncrements = enablePositionIncrements;
enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
}
/** Builds an analyzer with the stop words from the given file.
* @see WordlistLoader#getWordSet(File)
* @param stopwordsFile File to load stop words from
* @param enablePositionIncrements See {@link
* StopFilter#setEnablePositionIncrements} */
public StopAnalyzer(File stopwordsFile, boolean enablePositionIncrements) throws IOException {
* @param matchVersion See <a href="#version">above</a>
* @param stopwordsFile File to load stop words from */
public StopAnalyzer(Version matchVersion, File stopwordsFile) throws IOException {
stopWords = WordlistLoader.getWordSet(stopwordsFile);
this.enablePositionIncrements = enablePositionIncrements;
this.enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
}
/** Builds an analyzer with the stop words from the given reader.
* @see WordlistLoader#getWordSet(Reader)
* @param stopwords Reader to load stop words from
* @param enablePositionIncrements See {@link
* StopFilter#setEnablePositionIncrements} */
public StopAnalyzer(Reader stopwords, boolean enablePositionIncrements) throws IOException {
* @param matchVersion See <a href="#version">above</a>
* @param stopwords Reader to load stop words from */
public StopAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
stopWords = WordlistLoader.getWordSet(stopwords);
this.enablePositionIncrements = enablePositionIncrements;
this.enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
}
/** Filters LowerCaseTokenizer with StopFilter. */

View File

@ -25,6 +25,7 @@ import java.util.List;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.queryParser.QueryParser; // for javadoc
import org.apache.lucene.util.Version;
/**
* Removes stop words from a token stream.
@ -150,6 +151,21 @@ public final class StopFilter extends TokenFilter {
return false;
}
/**
* Returns version-dependent default for
* enablePositionIncrements. Analyzers that embed
* StopFilter use this method when creating the
* StopFilter. Prior to 2.9, this returns false. On 2.9
* or later, it returns true.
*/
public static boolean getEnablePositionIncrementsVersionDefault(Version matchVersion) {
if (matchVersion.onOrAfter(Version.LUCENE_29)) {
return true;
} else {
return false;
}
}
/**
* @see #setEnablePositionIncrements(boolean).
*/

View File

@ -35,7 +35,7 @@ import java.util.Set;
* compatibility when creating StandardAnalyzer:
* <ul>
* <li> As of 2.9, StopFilter preserves position
* increments by default
* increments
* <li> As of 2.4, Tokens incorrectly identified as acronyms
* are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
* </ul>
@ -52,6 +52,7 @@ public class StandardAnalyzer extends Analyzer {
/** An unmodifiable set containing some common English words that are usually not
useful for searching. */
public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
private final Version matchVersion;
/** Builds an analyzer with the default stop words ({@link
* #STOP_WORDS_SET}).
@ -71,6 +72,7 @@ public class StandardAnalyzer extends Analyzer {
setOverridesTokenStreamMethod(StandardAnalyzer.class);
enableStopPositionIncrements = matchVersion.onOrAfter(Version.LUCENE_29);
replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
this.matchVersion = matchVersion;
}
/** Builds an analyzer with the stop words from the given file.
@ -94,11 +96,12 @@ public class StandardAnalyzer extends Analyzer {
/** Constructs a {@link StandardTokenizer} filtered by a {@link
StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
public TokenStream tokenStream(String fieldName, Reader reader) {
StandardTokenizer tokenStream = new StandardTokenizer(reader, replaceInvalidAcronym);
StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
tokenStream.setMaxTokenLength(maxTokenLength);
TokenStream result = new StandardFilter(tokenStream);
result = new LowerCaseFilter(result);
result = new StopFilter(enableStopPositionIncrements, result, stopSet);
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
result, stopSet);
return result;
}
@ -140,10 +143,11 @@ public class StandardAnalyzer extends Analyzer {
if (streams == null) {
streams = new SavedStreams();
setPreviousTokenStream(streams);
streams.tokenStream = new StandardTokenizer(reader);
streams.tokenStream = new StandardTokenizer(matchVersion, reader);
streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, streams.filteredTokenStream, stopSet);
streams.filteredTokenStream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
streams.filteredTokenStream, stopSet);
} else {
streams.tokenStream.reset(reader);
}

View File

@ -27,6 +27,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.Version;
/** A grammar-based tokenizer constructed with JFlex
*
@ -43,6 +44,14 @@ import org.apache.lucene.util.AttributeSource;
* <p>Many applications have specific tokenizer needs. If this tokenizer does
* not suit your application, please consider copying this source code
* directory to your project and maintaining your own grammar-based tokenizer.
*
* <a name="version"/>
* <p>You must specify the required {@link Version}
* compatibility when creating StandardAnalyzer:
* <ul>
* <li> As of 2.4, Tokens incorrectly identified as acronyms
* are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
* </ul>
*/
public final class StandardTokenizer extends Tokenizer {
@ -104,56 +113,51 @@ public final class StandardTokenizer extends Tokenizer {
return maxTokenLength;
}
/**
* Creates a new instance of the {@link StandardTokenizer}. Attaches the
* <code>input</code> to a newly created JFlex scanner.
*/
public StandardTokenizer(Reader input) {
this(input, false);
}
/**
* Creates a new instance of the {@link org.apache.lucene.analysis.standard.StandardTokenizer}. Attaches
* the <code>input</code> to the newly created JFlex scanner.
*
* @param input The input reader
* @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms with HOST.
*
* See http://issues.apache.org/jira/browse/LUCENE-1068
*/
public StandardTokenizer(Reader input, boolean replaceInvalidAcronym) {
public StandardTokenizer(Version matchVersion, Reader input) {
super();
this.scanner = new StandardTokenizerImpl(input);
init(input, replaceInvalidAcronym);
init(input, matchVersion);
}
/**
* Creates a new StandardTokenizer with a given {@link AttributeSource}.
*/
public StandardTokenizer(AttributeSource source, Reader input, boolean replaceInvalidAcronym) {
public StandardTokenizer(Version matchVersion, AttributeSource source, Reader input) {
super(source);
this.scanner = new StandardTokenizerImpl(input);
init(input, replaceInvalidAcronym);
init(input, matchVersion);
}
/**
* Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}
*/
public StandardTokenizer(AttributeFactory factory, Reader input, boolean replaceInvalidAcronym) {
public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
super(factory);
this.scanner = new StandardTokenizerImpl(input);
init(input, replaceInvalidAcronym);
init(input, matchVersion);
}
private void init(Reader input, boolean replaceInvalidAcronym) {
this.replaceInvalidAcronym = replaceInvalidAcronym;
private void init(Reader input, Version matchVersion) {
if (matchVersion.onOrAfter(Version.LUCENE_24)) {
replaceInvalidAcronym = true;
} else {
replaceInvalidAcronym = false;
}
this.input = input;
termAtt = addAttribute(TermAttribute.class);
offsetAtt = addAttribute(OffsetAttribute.class);
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
typeAtt = addAttribute(TypeAttribute.class);
}
// this tokenizer generates three attributes:
// offset, positionIncrement and type
private TermAttribute termAtt;

View File

@ -27,6 +27,7 @@ import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
/**
* A QueryParser which constructs queries to search multiple fields.
@ -65,8 +66,8 @@ public class MultiFieldQueryParser extends QueryParser
* <p>In other words, all the query's terms must appear, but it doesn't matter in
* what fields they appear.</p>
*/
public MultiFieldQueryParser(String[] fields, Analyzer analyzer, Map<String,Float> boosts) {
this(fields,analyzer);
public MultiFieldQueryParser(Version matchVersion, String[] fields, Analyzer analyzer, Map boosts) {
this(matchVersion, fields, analyzer);
this.boosts = boosts;
}
@ -90,8 +91,8 @@ public class MultiFieldQueryParser extends QueryParser
* <p>In other words, all the query's terms must appear, but it doesn't matter in
* what fields they appear.</p>
*/
public MultiFieldQueryParser(String[] fields, Analyzer analyzer) {
super(null, analyzer);
public MultiFieldQueryParser(Version matchVersion, String[] fields, Analyzer analyzer) {
super(matchVersion, null, analyzer);
this.fields = fields;
}
@ -196,6 +197,7 @@ public class MultiFieldQueryParser extends QueryParser
* (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
* </code>
* </pre>
* @param matchVersion Lucene version to match; this is passed through to QueryParser.
* @param queries Queries strings to parse
* @param fields Fields to search on
* @param analyzer Analyzer to use
@ -203,7 +205,7 @@ public class MultiFieldQueryParser extends QueryParser
* @throws IllegalArgumentException if the length of the queries array differs
* from the length of the fields array
*/
public static Query parse(String[] queries, String[] fields,
public static Query parse(Version matchVersion, String[] queries, String[] fields,
Analyzer analyzer) throws ParseException
{
if (queries.length != fields.length)
@ -211,7 +213,7 @@ public class MultiFieldQueryParser extends QueryParser
BooleanQuery bQuery = new BooleanQuery();
for (int i = 0; i < fields.length; i++)
{
QueryParser qp = new QueryParser(fields[i], analyzer);
QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
Query q = qp.parse(queries[i]);
if (q!=null && // q never null, just being defensive
(!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) {
@ -243,6 +245,7 @@ public class MultiFieldQueryParser extends QueryParser
* </code>
* </pre>
*
* @param matchVersion Lucene version to match; this is passed through to QueryParser.
* @param query Query string to parse
* @param fields Fields to search on
* @param flags Flags describing the fields
@ -251,13 +254,13 @@ public class MultiFieldQueryParser extends QueryParser
* @throws IllegalArgumentException if the length of the fields array differs
* from the length of the flags array
*/
public static Query parse(String query, String[] fields,
public static Query parse(Version matchVersion, String query, String[] fields,
BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException {
if (fields.length != flags.length)
throw new IllegalArgumentException("fields.length != flags.length");
BooleanQuery bQuery = new BooleanQuery();
for (int i = 0; i < fields.length; i++) {
QueryParser qp = new QueryParser(fields[i], analyzer);
QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
Query q = qp.parse(query);
if (q!=null && // q never null, just being defensive
(!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) {
@ -290,6 +293,7 @@ public class MultiFieldQueryParser extends QueryParser
* </code>
* </pre>
*
* @param matchVersion Lucene version to match; this is passed through to QueryParser.
* @param queries Queries string to parse
* @param fields Fields to search on
* @param flags Flags describing the fields
@ -298,7 +302,7 @@ public class MultiFieldQueryParser extends QueryParser
* @throws IllegalArgumentException if the length of the queries, fields,
* and flags array differ
*/
public static Query parse(String[] queries, String[] fields, BooleanClause.Occur[] flags,
public static Query parse(Version matchVersion, String[] queries, String[] fields, BooleanClause.Occur[] flags,
Analyzer analyzer) throws ParseException
{
if (!(queries.length == fields.length && queries.length == flags.length))
@ -306,7 +310,7 @@ public class MultiFieldQueryParser extends QueryParser
BooleanQuery bQuery = new BooleanQuery();
for (int i = 0; i < fields.length; i++)
{
QueryParser qp = new QueryParser(fields[i], analyzer);
QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
Query q = qp.parse(queries[i]);
if (q!=null && // q never null, just being defensive
(!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) {

View File

@ -33,6 +33,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Version;
/**
* This class is generated by JavaCC. The most important method is
@ -99,6 +100,14 @@ import org.apache.lucene.search.WildcardQuery;
* <p><b>NOTE</b>: there is a new QueryParser in contrib, which matches
* the same syntax as this class, but is more modular,
* enabling substantial customization to how a query is created.
*
* <a name="version"/>
* <p><b>NOTE</b>: You must specify the required {@link Version}
* compatibility when creating QueryParser:
* <ul>
* <li> As of 2.9, {@link #setEnablePositionIncrements} is true by
* default.
* </ul>
*/
public class QueryParser implements QueryParserConstants {
@ -123,7 +132,7 @@ public class QueryParser implements QueryParserConstants {
boolean lowercaseExpandedTerms = true;
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
boolean allowLeadingWildcard = false;
boolean enablePositionIncrements = false;
boolean enablePositionIncrements = true;
Analyzer analyzer;
String field;
@ -147,13 +156,19 @@ public class QueryParser implements QueryParserConstants {
static public enum Operator { OR, AND }
/** Constructs a query parser.
* @param matchVersion Lucene version to match. See <a href="#version">above</a>)
* @param f the default field for query terms.
* @param a used to find terms in the query text.
*/
public QueryParser(String f, Analyzer a) {
public QueryParser(Version matchVersion, String f, Analyzer a) {
this(new FastCharStream(new StringReader("")));
analyzer = a;
field = f;
if (matchVersion.onOrAfter(Version.LUCENE_29)) {
enablePositionIncrements = true;
} else {
enablePositionIncrements = false;
}
}
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
@ -1077,7 +1092,7 @@ public class QueryParser implements QueryParserConstants {
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
System.exit(0);
}
QueryParser qp = new QueryParser("field",
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]);
System.out.println(q.toString("field"));
@ -1513,12 +1528,6 @@ public class QueryParser implements QueryParserConstants {
finally { jj_save(0, xla); }
}
private boolean jj_3R_3() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
private boolean jj_3R_2() {
if (jj_scan_token(TERM)) return true;
if (jj_scan_token(COLON)) return true;
@ -1535,6 +1544,12 @@ public class QueryParser implements QueryParserConstants {
return false;
}
private boolean jj_3R_3() {
if (jj_scan_token(STAR)) return true;
if (jj_scan_token(COLON)) return true;
return false;
}
/** Generated Token Manager. */
public QueryParserTokenManager token_source;
/** Current token. */
@ -1563,7 +1578,7 @@ public class QueryParser implements QueryParserConstants {
private int jj_gc = 0;
/** Constructor with user supplied CharStream. */
public QueryParser(CharStream stream) {
protected QueryParser(CharStream stream) {
token_source = new QueryParserTokenManager(stream);
token = new Token();
jj_ntk = -1;
@ -1583,7 +1598,7 @@ public class QueryParser implements QueryParserConstants {
}
/** Constructor with generated Token Manager. */
public QueryParser(QueryParserTokenManager tm) {
protected QueryParser(QueryParserTokenManager tm) {
token_source = tm;
token = new Token();
jj_ntk = -1;

View File

@ -57,6 +57,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Version;
/**
* This class is generated by JavaCC. The most important method is
@ -123,6 +124,14 @@ import org.apache.lucene.search.WildcardQuery;
* <p><b>NOTE</b>: there is a new QueryParser in contrib, which matches
* the same syntax as this class, but is more modular,
* enabling substantial customization to how a query is created.
*
* <a name="version"/>
* <p><b>NOTE</b>: You must specify the required {@link Version}
* compatibility when creating QueryParser:
* <ul>
* <li> As of 2.9, {@link #setEnablePositionIncrements} is true by
* default.
* </ul>
*/
public class QueryParser {
@ -147,7 +156,7 @@ public class QueryParser {
boolean lowercaseExpandedTerms = true;
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
boolean allowLeadingWildcard = false;
boolean enablePositionIncrements = false;
boolean enablePositionIncrements = true;
Analyzer analyzer;
String field;
@ -171,13 +180,19 @@ public class QueryParser {
static public enum Operator { OR, AND }
/** Constructs a query parser.
* @param matchVersion Lucene version to match. See {@link <a href="#version">above</a>)
* @param f the default field for query terms.
* @param a used to find terms in the query text.
*/
public QueryParser(String f, Analyzer a) {
public QueryParser(Version matchVersion, String f, Analyzer a) {
this(new FastCharStream(new StringReader("")));
analyzer = a;
field = f;
if (matchVersion.onOrAfter(Version.LUCENE_29)) {
enablePositionIncrements = true;
} else {
enablePositionIncrements = false;
}
}
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
@ -1101,7 +1116,7 @@ public class QueryParser {
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
System.exit(0);
}
QueryParser qp = new QueryParser("field",
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
new org.apache.lucene.analysis.SimpleAnalyzer());
Query q = qp.parse(args[0]);
System.out.println(q.toString("field"));

View File

@ -32,6 +32,7 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.Version;
/** Token Manager. */
public class QueryParserTokenManager implements QueryParserConstants

View File

@ -63,7 +63,7 @@ public class TestDemo extends LuceneTestCase {
// Now search the index:
IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true
// Parse a simple query that searches for "text":
QueryParser parser = new QueryParser("fieldname", analyzer);
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "fieldname", analyzer);
Query query = parser.parse("text");
ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
assertEquals(1, hits.length);

View File

@ -22,6 +22,7 @@ import java.io.PrintWriter;
import java.io.StringWriter;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
import junit.framework.TestSuite;
import junit.textui.TestRunner;
@ -107,7 +108,7 @@ public class TestSearch extends LuceneTestCase {
};
ScoreDoc[] hits = null;
QueryParser parser = new QueryParser("contents", analyzer);
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "contents", analyzer);
parser.setPhraseSlop(4);
for (int j = 0; j < queries.length; j++) {
Query query = parser.parse(queries[j]);

View File

@ -27,8 +27,10 @@ import org.apache.lucene.analysis.*;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.util.Version;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
import junit.framework.TestSuite;
import junit.textui.TestRunner;
@ -97,7 +99,7 @@ public class TestSearchForDuplicates extends LuceneTestCase {
// try a search without OR
Searcher searcher = new IndexSearcher(directory, true);
QueryParser parser = new QueryParser(PRIORITY_FIELD, analyzer);
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, PRIORITY_FIELD, analyzer);
Query query = parser.parse(HIGH_PRIORITY);
out.println("Query: " + query.toString(PRIORITY_FIELD));
@ -112,7 +114,7 @@ public class TestSearchForDuplicates extends LuceneTestCase {
searcher = new IndexSearcher(directory, true);
hits = null;
parser = new QueryParser(PRIORITY_FIELD, analyzer);
parser = new QueryParser(Version.LUCENE_CURRENT, PRIORITY_FIELD, analyzer);
query = parser.parse(HIGH_PRIORITY + " OR " + MED_PRIORITY);
out.println("Query: " + query.toString(PRIORITY_FIELD));

View File

@ -26,6 +26,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.Payload;
import org.apache.lucene.util.Version;
public class TestAnalyzers extends BaseTokenStreamTestCase {
@ -74,7 +75,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
}
public void testStop() throws Exception {
Analyzer a = new StopAnalyzer(true);
Analyzer a = new StopAnalyzer(Version.LUCENE_CURRENT);
assertAnalyzesTo(a, "foo bar FOO BAR",
new String[] { "foo", "bar", "foo", "bar" });
assertAnalyzesTo(a, "foo a bar such FOO THESE BAR",

View File

@ -31,6 +31,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
@ -58,7 +59,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
analyzer.addAnalyzer("partnum", new KeywordAnalyzer());
QueryParser queryParser = new QueryParser("description", analyzer);
QueryParser queryParser = new QueryParser(Version.LUCENE_CURRENT, "description", analyzer);
Query query = queryParser.parse("partnum:Q36 AND SPACE");
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;

View File

@ -5,6 +5,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.util.Version;
import java.io.StringReader;
@ -108,15 +109,22 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
}
public void testDomainNames() throws Exception {
// Don't reuse a because we alter its state (setReplaceInvalidAcronym)
// Current lucene should not show the bug
StandardAnalyzer a2 = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT);
// domain names
assertAnalyzesTo(a2, "www.nutch.org", new String[]{"www.nutch.org"});
//Notice the trailing . See https://issues.apache.org/jira/browse/LUCENE-1068.
// the following should be recognized as HOST:
assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
// 2.3 should show the bug
a2 = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });
// 2.4 should not show the bug
a2 = new StandardAnalyzer(Version.LUCENE_24);
assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
}
public void testEMailAddresses() throws Exception {

View File

@ -19,6 +19,7 @@ package org.apache.lucene.analysis;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.Version;
import java.io.StringReader;
import java.io.IOException;
@ -28,7 +29,7 @@ import java.util.HashSet;
public class TestStopAnalyzer extends BaseTokenStreamTestCase {
private StopAnalyzer stop = new StopAnalyzer(false);
private StopAnalyzer stop = new StopAnalyzer(Version.LUCENE_CURRENT);
private Set inValidTokens = new HashSet();
public TestStopAnalyzer(String s) {
@ -61,7 +62,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
stopWordsSet.add("good");
stopWordsSet.add("test");
stopWordsSet.add("analyzer");
StopAnalyzer newStop = new StopAnalyzer(stopWordsSet, false);
StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_24, stopWordsSet);
StringReader reader = new StringReader("This is a good test of the english stop analyzer");
TokenStream stream = newStop.tokenStream("test", reader);
assertNotNull(stream);
@ -71,7 +72,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
while (stream.incrementToken()) {
String text = termAtt.term();
assertFalse(stopWordsSet.contains(text));
assertEquals(1,posIncrAtt.getPositionIncrement()); // by default stop tokenizer does not apply increments.
assertEquals(1,posIncrAtt.getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments.
}
}
@ -80,7 +81,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
stopWordsSet.add("good");
stopWordsSet.add("test");
stopWordsSet.add("analyzer");
StopAnalyzer newStop = new StopAnalyzer(stopWordsSet, true);
StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_CURRENT, stopWordsSet);
StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions");
int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1};
TokenStream stream = newStop.tokenStream("test", reader);

View File

@ -22,6 +22,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.English;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.StringReader;
@ -167,10 +168,10 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
buffer.append(English.intToEnglish(i).toUpperCase()).append(' ');
}
//make sure we produce the same tokens
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))));
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString()))));
TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(100));
teeStream.consumeAllTokens();
TokenStream stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))), 100);
TokenStream stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString()))), 100);
TermAttribute tfTok = stream.addAttribute(TermAttribute.class);
TermAttribute sinkTok = sink.addAttribute(TermAttribute.class);
for (int i=0; stream.incrementToken(); i++) {
@ -183,12 +184,12 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
int tfPos = 0;
long start = System.currentTimeMillis();
for (int i = 0; i < 20; i++) {
stream = new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString())));
stream = new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString())));
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
while (stream.incrementToken()) {
tfPos += posIncrAtt.getPositionIncrement();
}
stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))), modCounts[j]);
stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString()))), modCounts[j]);
posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
while (stream.incrementToken()) {
tfPos += posIncrAtt.getPositionIncrement();
@ -200,7 +201,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
//simulate one field with one sink
start = System.currentTimeMillis();
for (int i = 0; i < 20; i++) {
teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))));
teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString()))));
sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(modCounts[j]));
PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(PositionIncrementAttribute.class);
while (teeStream.incrementToken()) {

View File

@ -66,6 +66,7 @@ import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.SingleInstanceLockFactory;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.Version;
public class TestIndexWriter extends BaseTokenStreamTestCase {
public TestIndexWriter(String name) {
@ -1701,7 +1702,7 @@ public class TestIndexWriter extends BaseTokenStreamTestCase {
IndexWriter writer = new IndexWriter(dir, new Analyzer() {
public TokenStream tokenStream(String fieldName, Reader reader) {
return new TokenFilter(new StandardTokenizer(reader)) {
return new TokenFilter(new StandardTokenizer(Version.LUCENE_CURRENT, reader)) {
private int count = 0;
public boolean incrementToken() throws IOException {
@ -4167,7 +4168,7 @@ public class TestIndexWriter extends BaseTokenStreamTestCase {
// LUCENE-1448
public void testEndOffsetPositionStopFilter() throws Exception {
MockRAMDirectory dir = new MockRAMDirectory();
IndexWriter w = new IndexWriter(dir, new StopAnalyzer(true), IndexWriter.MaxFieldLength.LIMITED);
IndexWriter w = new IndexWriter(dir, new StopAnalyzer(Version.LUCENE_CURRENT), IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
Field f = new Field("field", "abcd the", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
doc.add(f);

View File

@ -31,6 +31,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
import org.apache.lucene.search.Query;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
/**
* Test QueryParser's ability to deal with Analyzers that return more
@ -44,7 +45,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
public void testMultiAnalyzer() throws ParseException {
QueryParser qp = new QueryParser("", new MultiAnalyzer());
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "", new MultiAnalyzer());
// trivial, no multiple tokens:
assertEquals("foo", qp.parse("foo").toString());
@ -117,7 +118,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
}
public void testPosIncrementAnalyzer() throws ParseException {
QueryParser qp = new QueryParser("", new PosIncrementAnalyzer());
QueryParser qp = new QueryParser(Version.LUCENE_24, "", new PosIncrementAnalyzer());
assertEquals("quick brown", qp.parse("the quick brown").toString());
assertEquals("\"quick brown\"", qp.parse("\"the quick brown\"").toString());
assertEquals("quick brown fox", qp.parse("the quick brown fox").toString());
@ -134,7 +135,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
}
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
result = new TestFilter(result);
result = new LowerCaseFilter(result);
return result;
@ -200,7 +201,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
}
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream result = new StandardTokenizer(reader);
TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
result = new TestPosIncrementFilter(result);
result = new LowerCaseFilter(result);
return result;
@ -238,7 +239,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
private final static class DumbQueryParser extends QueryParser {
public DumbQueryParser(String f, Analyzer a) {
super(f, a);
super(Version.LUCENE_CURRENT, f, a);
}
/** expose super's version */

View File

@ -36,6 +36,7 @@ import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.util.Version;
/**
* Tests QueryParser.
@ -59,18 +60,18 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
String[] fields = {"b", "t"};
Occur occur[] = {Occur.SHOULD, Occur.SHOULD};
TestQueryParser.QPTestAnalyzer a = new TestQueryParser.QPTestAnalyzer();
MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, a);
MultiFieldQueryParser mfqp = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, a);
Query q = mfqp.parse(qtxt);
assertEquals(expectedRes, q.toString());
q = MultiFieldQueryParser.parse(qtxt, fields, occur, a);
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, qtxt, fields, occur, a);
assertEquals(expectedRes, q.toString());
}
public void testSimple() throws Exception {
String[] fields = {"b", "t"};
MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
MultiFieldQueryParser mfqp = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
Query q = mfqp.parse("one");
assertEquals("b:one t:one", q.toString());
@ -133,7 +134,7 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
boosts.put("b", Float.valueOf(5));
boosts.put("t", Float.valueOf(10));
String[] fields = {"b", "t"};
MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), boosts);
MultiFieldQueryParser mfqp = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), boosts);
//Check for simple
@ -159,24 +160,24 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
public void testStaticMethod1() throws ParseException {
String[] fields = {"b", "t"};
String[] queries = {"one", "two"};
Query q = MultiFieldQueryParser.parse(queries, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
Query q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
assertEquals("b:one t:two", q.toString());
String[] queries2 = {"+one", "+two"};
q = MultiFieldQueryParser.parse(queries2, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries2, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
assertEquals("(+b:one) (+t:two)", q.toString());
String[] queries3 = {"one", "+two"};
q = MultiFieldQueryParser.parse(queries3, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries3, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
assertEquals("b:one (+t:two)", q.toString());
String[] queries4 = {"one +more", "+two"};
q = MultiFieldQueryParser.parse(queries4, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries4, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
assertEquals("(b:one +b:more) (+t:two)", q.toString());
String[] queries5 = {"blah"};
try {
q = MultiFieldQueryParser.parse(queries5, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries5, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
fail();
} catch(IllegalArgumentException e) {
// expected exception, array length differs
@ -186,11 +187,11 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
TestQueryParser.QPTestAnalyzer stopA = new TestQueryParser.QPTestAnalyzer();
String[] queries6 = {"((+stop))", "+((stop))"};
q = MultiFieldQueryParser.parse(queries6, fields, stopA);
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries6, fields, stopA);
assertEquals("", q.toString());
String[] queries7 = {"one ((+stop)) +more", "+((stop)) +two"};
q = MultiFieldQueryParser.parse(queries7, fields, stopA);
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries7, fields, stopA);
assertEquals("(b:one +b:more) (+t:two)", q.toString());
}
@ -198,15 +199,15 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
public void testStaticMethod2() throws ParseException {
String[] fields = {"b", "t"};
BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT};
Query q = MultiFieldQueryParser.parse("one", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
Query q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, "one", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
assertEquals("+b:one -t:one", q.toString());
q = MultiFieldQueryParser.parse("one two", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, "one two", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
assertEquals("+(b:one b:two) -(t:one t:two)", q.toString());
try {
BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST};
q = MultiFieldQueryParser.parse("blah", fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, "blah", fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
fail();
} catch(IllegalArgumentException e) {
// expected exception, array length differs
@ -217,17 +218,17 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
String[] fields = {"b", "t"};
//int[] flags = {MultiFieldQueryParser.REQUIRED_FIELD, MultiFieldQueryParser.PROHIBITED_FIELD};
BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT};
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
Query q = MultiFieldQueryParser.parse("one", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));//, fields, flags, new StandardAnalyzer());
Query q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, "one", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));//, fields, flags, new StandardAnalyzer());
assertEquals("+b:one -t:one", q.toString());
q = MultiFieldQueryParser.parse("one two", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, "one two", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
assertEquals("+(b:one b:two) -(t:one t:two)", q.toString());
try {
BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST};
q = MultiFieldQueryParser.parse("blah", fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, "blah", fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
fail();
} catch(IllegalArgumentException e) {
// expected exception, array length differs
@ -239,12 +240,12 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
String[] fields = {"f1", "f2", "f3"};
BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST,
BooleanClause.Occur.MUST_NOT, BooleanClause.Occur.SHOULD};
Query q = MultiFieldQueryParser.parse(queries, fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
Query q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries, fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
assertEquals("+f1:one -f2:two f3:three", q.toString());
try {
BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST};
q = MultiFieldQueryParser.parse(queries, fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries, fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
fail();
} catch(IllegalArgumentException e) {
// expected exception, array length differs
@ -255,12 +256,12 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
String[] queries = {"one", "two"};
String[] fields = {"b", "t"};
BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT};
Query q = MultiFieldQueryParser.parse(queries, fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
Query q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries, fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
assertEquals("+b:one -t:two", q.toString());
try {
BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST};
q = MultiFieldQueryParser.parse(queries, fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries, fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
fail();
} catch(IllegalArgumentException e) {
// expected exception, array length differs
@ -269,7 +270,7 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
public void testAnalyzerReturningNull() throws ParseException {
String[] fields = new String[] { "f1", "f2", "f3" };
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new AnalyzerReturningNull());
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, new AnalyzerReturningNull());
Query q = parser.parse("bla AND blo");
assertEquals("+(f2:bla f3:bla) +(f2:blo f3:blo)", q.toString());
// the following queries are not affected as their terms are not analyzed anyway:
@ -291,7 +292,7 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
iw.close();
MultiFieldQueryParser mfqp =
new MultiFieldQueryParser(new String[] {"body"}, analyzer);
new MultiFieldQueryParser(Version.LUCENE_CURRENT, new String[] {"body"}, analyzer);
mfqp.setDefaultOperator(QueryParser.Operator.AND);
Query q = mfqp.parse("the footest");
IndexSearcher is = new IndexSearcher(ramDir, true);

View File

@ -47,6 +47,7 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.FuzzyQuery;
@ -60,7 +61,10 @@ import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.util.LocalizedTestCase;
import org.apache.lucene.util.Version;
/**
* Tests QueryParser.
@ -127,7 +131,7 @@ public class TestQueryParser extends LocalizedTestCase {
public static class QPTestParser extends QueryParser {
public QPTestParser(String f, Analyzer a) {
super(f, a);
super(Version.LUCENE_CURRENT, f, a);
}
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException {
@ -149,7 +153,7 @@ public class TestQueryParser extends LocalizedTestCase {
public QueryParser getParser(Analyzer a) throws Exception {
if (a == null)
a = new SimpleAnalyzer();
QueryParser qp = new QueryParser("field", a);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", a);
qp.setDefaultOperator(QueryParser.OR_OPERATOR);
return qp;
}
@ -219,7 +223,7 @@ public class TestQueryParser extends LocalizedTestCase {
throws Exception {
if (a == null)
a = new SimpleAnalyzer();
QueryParser qp = new QueryParser("field", a);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", a);
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
return qp.parse(query);
}
@ -291,7 +295,7 @@ public class TestQueryParser extends LocalizedTestCase {
assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null,
"+(title:dog title:cat) -author:\"bob dole\"");
QueryParser qp = new QueryParser("field", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
// make sure OR is the default:
assertEquals(QueryParser.OR_OPERATOR, qp.getDefaultOperator());
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
@ -446,7 +450,7 @@ public class TestQueryParser extends LocalizedTestCase {
assertQueryEquals("[ a TO z]", null, "[a TO z]");
assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]", null)).getRewriteMethod());
QueryParser qp = new QueryParser("field", new SimpleAnalyzer());
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new SimpleAnalyzer());
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,((TermRangeQuery)qp.parse("[ a TO z]")).getRewriteMethod());
@ -472,7 +476,7 @@ public class TestQueryParser extends LocalizedTestCase {
iw.close();
IndexSearcher is = new IndexSearcher(ramDir, true);
QueryParser qp = new QueryParser("content", new WhitespaceAnalyzer());
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "content", new WhitespaceAnalyzer());
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
@ -569,7 +573,7 @@ public class TestQueryParser extends LocalizedTestCase {
final String defaultField = "default";
final String monthField = "month";
final String hourField = "hour";
QueryParser qp = new QueryParser("field", new SimpleAnalyzer());
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new SimpleAnalyzer());
// Don't set any date resolution and verify if DateField is used
assertDateRangeQueryEquals(qp, defaultField, startDate, endDate,
@ -792,7 +796,7 @@ public class TestQueryParser extends LocalizedTestCase {
Set stopWords = new HashSet(1);
stopWords.add("on");
StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT, stopWords);
QueryParser qp = new QueryParser("field", oneStopAnalyzer);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", oneStopAnalyzer);
Query q = qp.parse("on^1.0");
assertNotNull(q);
q = qp.parse("\"hello\"^2.0");
@ -804,7 +808,7 @@ public class TestQueryParser extends LocalizedTestCase {
q = qp.parse("\"on\"^1.0");
assertNotNull(q);
QueryParser qp2 = new QueryParser("field", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
QueryParser qp2 = new QueryParser(Version.LUCENE_CURRENT, "field", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
q = qp2.parse("the^3");
// "the" is a stop word so the result is an empty query:
assertNotNull(q);
@ -852,7 +856,7 @@ public class TestQueryParser extends LocalizedTestCase {
public void testBooleanQuery() throws Exception {
BooleanQuery.setMaxClauseCount(2);
try {
QueryParser qp = new QueryParser("field", new WhitespaceAnalyzer());
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new WhitespaceAnalyzer());
qp.parse("one two three");
fail("ParseException expected due to too many boolean clauses");
} catch (ParseException expected) {
@ -864,7 +868,7 @@ public class TestQueryParser extends LocalizedTestCase {
* This test differs from TestPrecedenceQueryParser
*/
public void testPrecedence() throws Exception {
QueryParser qp = new QueryParser("field", new WhitespaceAnalyzer());
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new WhitespaceAnalyzer());
Query query1 = qp.parse("A AND B OR C AND D");
Query query2 = qp.parse("+A +B +C +D");
assertEquals(query1, query2);
@ -888,7 +892,7 @@ public class TestQueryParser extends LocalizedTestCase {
public void testStarParsing() throws Exception {
final int[] type = new int[1];
QueryParser qp = new QueryParser("field", new WhitespaceAnalyzer()) {
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new WhitespaceAnalyzer()) {
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
// override error checking of superclass
type[0]=1;
@ -944,7 +948,7 @@ public class TestQueryParser extends LocalizedTestCase {
}
public void testStopwords() throws Exception {
QueryParser qp = new QueryParser("a", new StopAnalyzer(StopFilter.makeStopSet("the", "foo"), true));
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "foo")));
Query result = qp.parse("a:the OR a:foo");
assertNotNull("result is null and it shouldn't be", result);
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
@ -960,7 +964,7 @@ public class TestQueryParser extends LocalizedTestCase {
}
public void testPositionIncrement() throws Exception {
QueryParser qp = new QueryParser("a", new StopAnalyzer(StopFilter.makeStopSet("the", "in", "are", "this"), true));
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "in", "are", "this")));
qp.setEnablePositionIncrements(true);
String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
// 0 2 5 7 8
@ -977,7 +981,7 @@ public class TestQueryParser extends LocalizedTestCase {
}
public void testMatchAllDocs() throws Exception {
QueryParser qp = new QueryParser("field", new WhitespaceAnalyzer());
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new WhitespaceAnalyzer());
assertEquals(new MatchAllDocsQuery(), qp.parse("*:*"));
assertEquals(new MatchAllDocsQuery(), qp.parse("(*:*)"));
BooleanQuery bq = (BooleanQuery)qp.parse("+*:* -*:*");
@ -986,7 +990,7 @@ public class TestQueryParser extends LocalizedTestCase {
}
private void assertHits(int expected, String query, IndexSearcher is) throws ParseException, IOException {
QueryParser qp = new QueryParser("date", new WhitespaceAnalyzer());
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "date", new WhitespaceAnalyzer());
qp.setLocale(Locale.ENGLISH);
Query q = qp.parse(query);
ScoreDoc[] hits = is.search(q, null, 1000).scoreDocs;
@ -1008,4 +1012,49 @@ public class TestQueryParser extends LocalizedTestCase {
BooleanQuery.setMaxClauseCount(originalMaxClauses);
}
// LUCENE-2002: make sure defaults for StandardAnalyzer's
// enableStopPositionIncr & QueryParser's enablePosIncr
// "match"
public void testPositionIncrements() throws Exception {
Directory dir = new MockRAMDirectory();
Analyzer a = new StandardAnalyzer(Version.LUCENE_CURRENT);
IndexWriter w = new IndexWriter(dir, a, IndexWriter.MaxFieldLength.UNLIMITED);
Document doc = new Document();
doc.add(new Field("f", "the wizard of ozzy", Field.Store.NO, Field.Index.ANALYZED));
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
IndexSearcher s = new IndexSearcher(r);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "f", a);
Query q = qp.parse("\"wizard of ozzy\"");
assertEquals(1, s.search(q, 1).totalHits);
r.close();
dir.close();
}
// LUCENE-2002: when we run javacc to regen QueryParser,
// we also run a replaceregexp step to fix 2 of the public
// ctors (change them to protected):
//
// protected QueryParser(CharStream stream)
//
// protected QueryParser(QueryParserTokenManager tm)
//
// This test is here as a safety, in case that ant step
// doesn't work for some reason.
public void testProtectedCtors() throws Exception {
try {
QueryParser.class.getConstructor(new Class[] {CharStream.class});
fail("please switch public QueryParser(CharStream) to be protected");
} catch (NoSuchMethodException nsme) {
// expected
}
try {
QueryParser.class.getConstructor(new Class[] {QueryParserTokenManager.class});
fail("please switch public QueryParser(QueryParserTokenManager) to be protected");
} catch (NoSuchMethodException nsme) {
// expected
}
}
}

View File

@ -32,6 +32,7 @@ import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
/** Test BooleanQuery2 against BooleanQuery by overriding the standard query parser.
* This also tests the scoring order of BooleanQuery.
@ -104,7 +105,7 @@ public class TestBoolean2 extends LuceneTestCase {
};
public Query makeQuery(String queryText) throws ParseException {
Query q = (new QueryParser(field, new WhitespaceAnalyzer())).parse(queryText);
Query q = (new QueryParser(Version.LUCENE_CURRENT, field, new WhitespaceAnalyzer())).parse(queryText);
return q;
}

View File

@ -33,6 +33,7 @@ import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
/**
* Test date sorting, i.e. auto-sorting of fields with type "long".
@ -74,7 +75,7 @@ public class TestDateSort extends LuceneTestCase {
Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.STRING, true));
QueryParser queryParser = new QueryParser(TEXT_FIELD, new WhitespaceAnalyzer());
QueryParser queryParser = new QueryParser(Version.LUCENE_CURRENT, TEXT_FIELD, new WhitespaceAnalyzer());
Query query = queryParser.parse("Document");
// Execute the search and process the search results.

View File

@ -32,6 +32,7 @@ import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
/**
* Tests primitive queries (ie: that rewrite to themselves) to
@ -51,7 +52,7 @@ public class TestExplanations extends LuceneTestCase {
public static final String KEY = "KEY";
public static final String FIELD = "field";
public static final QueryParser qp =
new QueryParser(FIELD, new WhitespaceAnalyzer());
new QueryParser(Version.LUCENE_CURRENT, FIELD, new WhitespaceAnalyzer());
public void tearDown() throws Exception {
super.tearDown();

View File

@ -31,6 +31,7 @@ import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.util.Version;
/**
* Tests {@link FuzzyQuery}.
@ -313,7 +314,7 @@ public class TestFuzzyQuery extends LuceneTestCase {
IndexReader r = w.getReader();
w.close();
Query q = new QueryParser("field", analyzer).parse( "giga~0.9" );
Query q = new QueryParser(Version.LUCENE_CURRENT, "field", analyzer).parse( "giga~0.9" );
// 3. search
IndexSearcher searcher = new IndexSearcher(r);

View File

@ -100,7 +100,7 @@ public class TestMatchAllDocsQuery extends LuceneTestCase {
assertEquals(2, hits.length);
// test parsable toString()
QueryParser qp = new QueryParser("key", analyzer);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);
hits = is.search(qp.parse(new MatchAllDocsQuery().toString()), null, 1000).scoreDocs;
assertEquals(2, hits.length);

View File

@ -30,6 +30,7 @@ import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.util.Collections;
@ -101,7 +102,7 @@ public class TestMultiSearcher extends LuceneTestCase
writerB.close();
// creating the query
QueryParser parser = new QueryParser("fulltext", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
Query query = parser.parse("handle:1");
// building the searchables

View File

@ -26,6 +26,7 @@ import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import java.io.IOException;
@ -87,7 +88,7 @@ public class TestMultiSearcherRanking extends LuceneTestCase {
private void checkQuery(String queryStr) throws IOException, ParseException {
// check result hit ranking
if(verbose) System.out.println("Query: " + queryStr);
QueryParser queryParser = new QueryParser(FIELD_NAME, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
QueryParser queryParser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
Query query = queryParser.parse(queryStr);
ScoreDoc[] multiSearcherHits = multiSearcher.search(query, null, 1000).scoreDocs;
ScoreDoc[] singleSearcherHits = singleSearcher.search(query, null, 1000).scoreDocs;

View File

@ -25,6 +25,7 @@ import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.util.Version;
/** Similarity unit test.
*
@ -48,7 +49,7 @@ public class TestNot extends LuceneTestCase {
writer.close();
Searcher searcher = new IndexSearcher(store, true);
QueryParser parser = new QueryParser("field", new SimpleAnalyzer());
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "field", new SimpleAnalyzer());
Query query = parser.parse("a NOT b");
//System.out.println(query);
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;

View File

@ -25,6 +25,7 @@ import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import java.io.IOException;
import java.io.Reader;
@ -201,7 +202,7 @@ public class TestPhraseQuery extends LuceneTestCase {
public void testPhraseQueryWithStopAnalyzer() throws Exception {
RAMDirectory directory = new RAMDirectory();
StopAnalyzer stopAnalyzer = new StopAnalyzer(false);
StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_24);
IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true,
IndexWriter.MaxFieldLength.LIMITED);
Document doc = new Document();
@ -220,7 +221,7 @@ public class TestPhraseQuery extends LuceneTestCase {
QueryUtils.check(query,searcher);
// currently StopAnalyzer does not leave "holes", so this matches.
// StopAnalyzer as of 2.4 does not leave "holes", so this matches.
query = new PhraseQuery();
query.add(new Term("field", "words"));
query.add(new Term("field", "here"));
@ -357,8 +358,8 @@ public class TestPhraseQuery extends LuceneTestCase {
}
public void testToString() throws Exception {
StopAnalyzer analyzer = new StopAnalyzer(true);
QueryParser qp = new QueryParser("field", analyzer);
StopAnalyzer analyzer = new StopAnalyzer(Version.LUCENE_CURRENT);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", analyzer);
qp.setEnablePositionIncrements(true);
PhraseQuery q = (PhraseQuery)qp.parse("\"this hi this is a test is\"");
assertEquals("field:\"? hi ? ? ? test\"", q.toString());

View File

@ -51,6 +51,7 @@ import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.util.Version;
/**
* Term position unit test.
@ -188,7 +189,7 @@ public class TestPositionIncrement extends BaseTokenStreamTestCase {
assertEquals(0, hits.length);
// should not find "1 2" because there is a gap of 1 in the index
QueryParser qp = new QueryParser("field",
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
new StopWhitespaceAnalyzer(false));
q = (PhraseQuery) qp.parse("\"1 2\"");
hits = searcher.search(q, null, 1000).scoreDocs;
@ -212,7 +213,7 @@ public class TestPositionIncrement extends BaseTokenStreamTestCase {
assertEquals(0, hits.length);
// when both qp qnd stopFilter propagate increments, we should find the doc.
qp = new QueryParser("field",
qp = new QueryParser(Version.LUCENE_CURRENT, "field",
new StopWhitespaceAnalyzer(true));
qp.setEnablePositionIncrements(true);
q = (PhraseQuery) qp.parse("\"1 stop 2\"");

View File

@ -28,6 +28,7 @@ import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MockRAMDirectory;
import org.apache.lucene.util.Version;
/**
@ -327,7 +328,7 @@ public class TestSimpleExplanations extends TestExplanations {
writerB.addDocument(lDoc3);
writerB.close();
QueryParser parser = new QueryParser("fulltext", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
Query query = parser.parse("handle:1");
Searcher[] searchers = new Searcher[2];

View File

@ -31,6 +31,7 @@ import org.apache.lucene.search.TimeLimitingCollector.TimeExceededException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
/**
* Tests the {@link TimeLimitingCollector}. This test checks (1) search
@ -85,7 +86,7 @@ public class TestTimeLimitingCollector extends LuceneTestCase {
for (int i = 0; i < docText.length; i++) {
qtxt += ' ' + docText[i]; // large query so that search will be longer
}
QueryParser queryParser = new QueryParser(FIELD_NAME, new WhitespaceAnalyzer());
QueryParser queryParser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, new WhitespaceAnalyzer());
query = queryParser.parse(qtxt);
// warm the searcher

View File

@ -28,6 +28,7 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import java.io.IOException;
@ -238,7 +239,7 @@ public class TestWildcard
public void testParsingAndSearching() throws Exception {
String field = "content";
boolean dbg = false;
QueryParser qp = new QueryParser(field, new WhitespaceAnalyzer());
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, field, new WhitespaceAnalyzer());
qp.setAllowLeadingWildcard(true);
String docs[] = {
"\\ abcdefg1",

View File

@ -28,6 +28,7 @@ import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.Version;
/**
* Test CustomScoreQuery search.
@ -139,7 +140,7 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
float boost = (float) dboost;
IndexSearcher s = new IndexSearcher(dir, true);
FieldScoreQuery qValSrc = new FieldScoreQuery(field,tp); // a query that would score by the field
QueryParser qp = new QueryParser(TEXT_FIELD,anlzr);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, TEXT_FIELD,anlzr);
String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup.
// regular (boolean) query.

View File

@ -30,13 +30,14 @@ import org.apache.lucene.search.Weight;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.Version;
public class TestNearSpansOrdered extends LuceneTestCase {
protected IndexSearcher searcher;
public static final String FIELD = "field";
public static final QueryParser qp =
new QueryParser(FIELD, new WhitespaceAnalyzer());
new QueryParser(Version.LUCENE_CURRENT, FIELD, new WhitespaceAnalyzer());
public void tearDown() throws Exception {
super.tearDown();