mirror of https://github.com/apache/lucene.git
LUCENE-2002: add Version to QueryParser & contrib analyzers
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@829206 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0557d2ce5a
commit
aaddac8992
|
@ -137,6 +137,11 @@ Optimizations
|
|||
* LUCENE-1183: Optimize Levenshtein Distance computation in
|
||||
FuzzyQuery. (Cédrik Lime via Mike McCandless)
|
||||
|
||||
* LUCENE-2002: Add required Version matchVersion argument when
|
||||
constructing QueryParser or MultiFieldQueryParser and, default (as
|
||||
of 2.9) enablePositionIncrements to true to match
|
||||
StandardAnalyzer's 2.9 default (Uwe Schindler, Mike McCandless)
|
||||
|
||||
Documentation
|
||||
|
||||
Build
|
||||
|
|
18
build.xml
18
build.xml
|
@ -580,9 +580,21 @@
|
|||
<target name="javacc" depends="clean-javacc,javacc-QueryParser,javacc-HTMLParser,javacc-contrib-queryparser, javacc-contrib-surround, javacc-contrib-precedence"/>
|
||||
|
||||
<target name="javacc-QueryParser" depends="init,javacc-check" if="javacc.present">
|
||||
<invoke-javacc target="src/java/org/apache/lucene/queryParser/QueryParser.jj"
|
||||
outputDir="src/java/org/apache/lucene/queryParser"
|
||||
/>
|
||||
<sequential>
|
||||
<invoke-javacc target="src/java/org/apache/lucene/queryParser/QueryParser.jj"
|
||||
outputDir="src/java/org/apache/lucene/queryParser"/>
|
||||
|
||||
<!-- Change the inccorrect public ctors for QueryParser to be protected instead -->
|
||||
<replaceregexp file="src/java/org/apache/lucene/queryParser/QueryParser.java"
|
||||
byline="true"
|
||||
match="public QueryParser\(CharStream "
|
||||
replace="protected QueryParser(CharStream "/>
|
||||
<replaceregexp file="src/java/org/apache/lucene/queryParser/QueryParser.java"
|
||||
byline="true"
|
||||
match="public QueryParser\(QueryParserTokenManager "
|
||||
replace="protected QueryParser(QueryParserTokenManager "/>
|
||||
|
||||
</sequential>
|
||||
</target>
|
||||
|
||||
<target name="javacc-HTMLParser" depends="init,javacc-check" if="javacc.present">
|
||||
|
|
|
@ -42,7 +42,7 @@
|
|||
<property name="Name" value="Lucene"/>
|
||||
<property name="dev.version" value="3.0-dev"/>
|
||||
<property name="version" value="${dev.version}"/>
|
||||
<property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091023"/>
|
||||
<property name="compatibility.tag" value="lucene_2_9_back_compat_tests_20091023a"/>
|
||||
<property name="spec.version" value="${version}"/>
|
||||
<property name="year" value="2000-${current.year}"/>
|
||||
<property name="final.name" value="lucene-${name}-${version}"/>
|
||||
|
|
|
@ -25,6 +25,12 @@ API Changes
|
|||
text exactly the same as LowerCaseFilter. Please use LowerCaseFilter
|
||||
instead, which has the same functionality. (Robert Muir)
|
||||
|
||||
* LUCENE-2002: Add required Version matchVersion argument when
|
||||
constructing ComplexPhraseQueryParser and default (as of 2.9)
|
||||
enablePositionIncrements to true to match StandardAnalyzer's
|
||||
default. Also added required matchVersion to most of the analyzers
|
||||
(Uwe Schindler, Mike McCandless)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-1781: Fixed various issues with the lat/lng bounding box
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.StopFilter;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for Arabic.
|
||||
|
@ -109,32 +110,38 @@ public final class ArabicAnalyzer extends Analyzer {
|
|||
}
|
||||
}
|
||||
|
||||
private final Version matchVersion;
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the default stop words: {@link #DEFAULT_STOPWORD_FILE}.
|
||||
*/
|
||||
public ArabicAnalyzer() {
|
||||
public ArabicAnalyzer(Version matchVersion) {
|
||||
this.matchVersion = matchVersion;
|
||||
stoptable = DefaultSetHolder.DEFAULT_STOP_SET;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public ArabicAnalyzer( String... stopwords ) {
|
||||
public ArabicAnalyzer( Version matchVersion, String... stopwords ) {
|
||||
stoptable = StopFilter.makeStopSet( stopwords );
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public ArabicAnalyzer( Hashtable<?,?> stopwords ) {
|
||||
stoptable = new HashSet( stopwords.keySet() );
|
||||
public ArabicAnalyzer( Version matchVersion, Hashtable<?,?> stopwords ) {
|
||||
stoptable = new HashSet(stopwords.keySet());
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. Lines can be commented out using {@link #STOPWORDS_COMMENT}
|
||||
*/
|
||||
public ArabicAnalyzer( File stopwords ) throws IOException {
|
||||
public ArabicAnalyzer( Version matchVersion, File stopwords ) throws IOException {
|
||||
stoptable = WordlistLoader.getWordSet( stopwords, STOPWORDS_COMMENT);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
|
||||
|
@ -149,7 +156,8 @@ public final class ArabicAnalyzer extends Analyzer {
|
|||
TokenStream result = new ArabicLetterTokenizer( reader );
|
||||
result = new LowerCaseFilter(result);
|
||||
// the order here is important: the stopword list is not normalized!
|
||||
result = new StopFilter(false, result, stoptable );
|
||||
result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
result, stoptable );
|
||||
result = new ArabicNormalizationFilter( result );
|
||||
result = new ArabicStemFilter( result );
|
||||
|
||||
|
@ -177,7 +185,8 @@ public final class ArabicAnalyzer extends Analyzer {
|
|||
streams.source = new ArabicLetterTokenizer(reader);
|
||||
streams.result = new LowerCaseFilter(streams.source);
|
||||
// the order here is important: the stopword list is not normalized!
|
||||
streams.result = new StopFilter(false, streams.result, stoptable);
|
||||
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.result, stoptable);
|
||||
streams.result = new ArabicNormalizationFilter(streams.result);
|
||||
streams.result = new ArabicStemFilter(streams.result);
|
||||
setPreviousTokenStream(streams);
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for Brazilian Portuguese language.
|
||||
|
@ -41,6 +42,9 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|||
* will not be indexed at all) and an external list of exclusions (words that will
|
||||
* not be stemmed, but indexed).
|
||||
* </p>
|
||||
*
|
||||
* <p><b>NOTE</b>: This class uses the same {@link Version}
|
||||
* dependent settings as {@link StandardAnalyzer}.</p>
|
||||
*/
|
||||
public final class BrazilianAnalyzer extends Analyzer {
|
||||
|
||||
|
@ -78,33 +82,38 @@ public final class BrazilianAnalyzer extends Analyzer {
|
|||
* Contains words that should be indexed but not stemmed.
|
||||
*/
|
||||
private Set excltable = Collections.emptySet();
|
||||
private final Version matchVersion;
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the default stop words ({@link #BRAZILIAN_STOP_WORDS}).
|
||||
*/
|
||||
public BrazilianAnalyzer() {
|
||||
stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS );
|
||||
public BrazilianAnalyzer(Version matchVersion) {
|
||||
stoptable = StopFilter.makeStopSet( BRAZILIAN_STOP_WORDS );
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public BrazilianAnalyzer( String... stopwords ) {
|
||||
stoptable = StopFilter.makeStopSet( stopwords );
|
||||
public BrazilianAnalyzer( Version matchVersion, String... stopwords ) {
|
||||
stoptable = StopFilter.makeStopSet( stopwords );
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public BrazilianAnalyzer( Map stopwords ) {
|
||||
stoptable = new HashSet(stopwords.keySet());
|
||||
public BrazilianAnalyzer( Version matchVersion, Map stopwords ) {
|
||||
stoptable = new HashSet(stopwords.keySet());
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public BrazilianAnalyzer( File stopwords ) throws IOException {
|
||||
stoptable = WordlistLoader.getWordSet( stopwords );
|
||||
public BrazilianAnalyzer( Version matchVersion, File stopwords ) throws IOException {
|
||||
stoptable = WordlistLoader.getWordSet( stopwords );
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -137,10 +146,11 @@ public final class BrazilianAnalyzer extends Analyzer {
|
|||
* {@link BrazilianStemFilter}.
|
||||
*/
|
||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new StandardTokenizer( reader );
|
||||
TokenStream result = new StandardTokenizer( matchVersion, reader );
|
||||
result = new LowerCaseFilter( result );
|
||||
result = new StandardFilter( result );
|
||||
result = new StopFilter( false, result, stoptable );
|
||||
result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
result, stoptable );
|
||||
result = new BrazilianStemFilter( result, excltable );
|
||||
return result;
|
||||
}
|
||||
|
@ -163,10 +173,11 @@ public final class BrazilianAnalyzer extends Analyzer {
|
|||
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
streams.source = new StandardTokenizer(reader);
|
||||
streams.source = new StandardTokenizer(matchVersion, reader);
|
||||
streams.result = new LowerCaseFilter(streams.source);
|
||||
streams.result = new StandardFilter(streams.result);
|
||||
streams.result = new StopFilter(false, streams.result, stoptable);
|
||||
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.result, stoptable);
|
||||
streams.result = new BrazilianStemFilter(streams.result, excltable);
|
||||
setPreviousTokenStream(streams);
|
||||
} else {
|
||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
@ -56,14 +57,16 @@ public class CJKAnalyzer extends Analyzer {
|
|||
* stop word list
|
||||
*/
|
||||
private final Set stopTable;
|
||||
private final Version matchVersion;
|
||||
|
||||
//~ Constructors -----------------------------------------------------------
|
||||
|
||||
/**
|
||||
* Builds an analyzer which removes words in {@link #STOP_WORDS}.
|
||||
*/
|
||||
public CJKAnalyzer() {
|
||||
public CJKAnalyzer(Version matchVersion) {
|
||||
stopTable = StopFilter.makeStopSet(STOP_WORDS);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -71,8 +74,9 @@ public class CJKAnalyzer extends Analyzer {
|
|||
*
|
||||
* @param stopWords stop word array
|
||||
*/
|
||||
public CJKAnalyzer(String... stopWords) {
|
||||
public CJKAnalyzer(Version matchVersion, String... stopWords) {
|
||||
stopTable = StopFilter.makeStopSet(stopWords);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
//~ Methods ----------------------------------------------------------------
|
||||
|
@ -86,7 +90,8 @@ public class CJKAnalyzer extends Analyzer {
|
|||
* {@link StopFilter}
|
||||
*/
|
||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new StopFilter(false, new CJKTokenizer(reader), stopTable);
|
||||
return new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
new CJKTokenizer(reader), stopTable);
|
||||
}
|
||||
|
||||
private class SavedStreams {
|
||||
|
@ -109,7 +114,8 @@ public class CJKAnalyzer extends Analyzer {
|
|||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
streams.source = new CJKTokenizer(reader);
|
||||
streams.result = new StopFilter(false, streams.source, stopTable);
|
||||
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.source, stopTable);
|
||||
setPreviousTokenStream(streams);
|
||||
} else {
|
||||
streams.source.reset(reader);
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.HashSet;
|
||||
|
@ -38,6 +39,9 @@ import java.util.Collections;
|
|||
* will not be indexed at all).
|
||||
* A default set of stopwords is used unless an alternative list is specified.
|
||||
* </p>
|
||||
*
|
||||
* <p><b>NOTE</b>: This class uses the same {@link Version}
|
||||
* dependent settings as {@link StandardAnalyzer}.</p>
|
||||
*/
|
||||
public final class CzechAnalyzer extends Analyzer {
|
||||
|
||||
|
@ -69,30 +73,35 @@ public final class CzechAnalyzer extends Analyzer {
|
|||
* Contains the stopwords used with the {@link StopFilter}.
|
||||
*/
|
||||
private Set stoptable;
|
||||
private final Version matchVersion;
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the default stop words ({@link #CZECH_STOP_WORDS}).
|
||||
*/
|
||||
public CzechAnalyzer() {
|
||||
stoptable = StopFilter.makeStopSet( CZECH_STOP_WORDS );
|
||||
public CzechAnalyzer(Version matchVersion) {
|
||||
stoptable = StopFilter.makeStopSet( CZECH_STOP_WORDS );
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public CzechAnalyzer( String... stopwords ) {
|
||||
stoptable = StopFilter.makeStopSet( stopwords );
|
||||
public CzechAnalyzer(Version matchVersion, String... stopwords) {
|
||||
stoptable = StopFilter.makeStopSet( stopwords );
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
public CzechAnalyzer( HashSet stopwords ) {
|
||||
stoptable = stopwords;
|
||||
public CzechAnalyzer(Version matchVersion, HashSet stopwords) {
|
||||
stoptable = stopwords;
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public CzechAnalyzer( File stopwords ) throws IOException {
|
||||
stoptable = WordlistLoader.getWordSet( stopwords );
|
||||
public CzechAnalyzer(Version matchVersion, File stopwords ) throws IOException {
|
||||
stoptable = WordlistLoader.getWordSet( stopwords );
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -131,10 +140,11 @@ public final class CzechAnalyzer extends Analyzer {
|
|||
* {@link StandardFilter}, {@link LowerCaseFilter}, and {@link StopFilter}
|
||||
*/
|
||||
public final TokenStream tokenStream( String fieldName, Reader reader ) {
|
||||
TokenStream result = new StandardTokenizer( reader );
|
||||
TokenStream result = new StandardTokenizer( matchVersion, reader );
|
||||
result = new StandardFilter( result );
|
||||
result = new LowerCaseFilter( result );
|
||||
result = new StopFilter(false, result, stoptable );
|
||||
result = new StopFilter( StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
result, stoptable );
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -155,10 +165,11 @@ public final class CzechAnalyzer extends Analyzer {
|
|||
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
streams.source = new StandardTokenizer(reader);
|
||||
streams.source = new StandardTokenizer(matchVersion, reader);
|
||||
streams.result = new StandardFilter(streams.source);
|
||||
streams.result = new LowerCaseFilter(streams.result);
|
||||
streams.result = new StopFilter(false, streams.result, stoptable);
|
||||
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.result, stoptable);
|
||||
setPreviousTokenStream(streams);
|
||||
} else {
|
||||
streams.source.reset(reader);
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for German language.
|
||||
|
@ -43,6 +44,9 @@ import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|||
* A default set of stopwords is used unless an alternative list is specified, but the
|
||||
* exclusion list is empty by default.
|
||||
* </p>
|
||||
*
|
||||
* <p><b>NOTE</b>: This class uses the same {@link Version}
|
||||
* dependent settings as {@link StandardAnalyzer}.</p>
|
||||
*/
|
||||
public class GermanAnalyzer extends Analyzer {
|
||||
|
||||
|
@ -74,37 +78,43 @@ public class GermanAnalyzer extends Analyzer {
|
|||
*/
|
||||
private Set exclusionSet = new HashSet();
|
||||
|
||||
private final Version matchVersion;
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the default stop words:
|
||||
* {@link #GERMAN_STOP_WORDS}.
|
||||
*/
|
||||
public GermanAnalyzer() {
|
||||
public GermanAnalyzer(Version matchVersion) {
|
||||
stopSet = StopFilter.makeStopSet(GERMAN_STOP_WORDS);
|
||||
setOverridesTokenStreamMethod(GermanAnalyzer.class);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public GermanAnalyzer(String... stopwords) {
|
||||
public GermanAnalyzer(Version matchVersion, String... stopwords) {
|
||||
stopSet = StopFilter.makeStopSet(stopwords);
|
||||
setOverridesTokenStreamMethod(GermanAnalyzer.class);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public GermanAnalyzer(Map stopwords) {
|
||||
public GermanAnalyzer(Version matchVersion, Map stopwords) {
|
||||
stopSet = new HashSet(stopwords.keySet());
|
||||
setOverridesTokenStreamMethod(GermanAnalyzer.class);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public GermanAnalyzer(File stopwords) throws IOException {
|
||||
public GermanAnalyzer(Version matchVersion, File stopwords) throws IOException {
|
||||
stopSet = WordlistLoader.getWordSet(stopwords);
|
||||
setOverridesTokenStreamMethod(GermanAnalyzer.class);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -139,10 +149,11 @@ public class GermanAnalyzer extends Analyzer {
|
|||
* {@link GermanStemFilter}
|
||||
*/
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
TokenStream result = new StandardTokenizer(matchVersion, reader);
|
||||
result = new StandardFilter(result);
|
||||
result = new LowerCaseFilter(result);
|
||||
result = new StopFilter(false, result, stopSet);
|
||||
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
result, stopSet);
|
||||
result = new GermanStemFilter(result, exclusionSet);
|
||||
return result;
|
||||
}
|
||||
|
@ -171,10 +182,11 @@ public class GermanAnalyzer extends Analyzer {
|
|||
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
streams.source = new StandardTokenizer(reader);
|
||||
streams.source = new StandardTokenizer(matchVersion, reader);
|
||||
streams.result = new StandardFilter(streams.source);
|
||||
streams.result = new LowerCaseFilter(streams.result);
|
||||
streams.result = new StopFilter(false, streams.result, stopSet);
|
||||
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.result, stopSet);
|
||||
streams.result = new GermanStemFilter(streams.result, exclusionSet);
|
||||
setPreviousTokenStream(streams);
|
||||
} else {
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.analysis.StopFilter;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
@ -36,6 +37,9 @@ import java.util.Set;
|
|||
* that will not be indexed at all).
|
||||
* A default set of stopwords is used unless an alternative list is specified.
|
||||
* </p>
|
||||
*
|
||||
* <p><b>NOTE</b>: This class uses the same {@link Version}
|
||||
* dependent settings as {@link StandardAnalyzer}.</p>
|
||||
*/
|
||||
public final class GreekAnalyzer extends Analyzer
|
||||
{
|
||||
|
@ -59,27 +63,33 @@ public final class GreekAnalyzer extends Analyzer
|
|||
*/
|
||||
private Set stopSet = new HashSet();
|
||||
|
||||
public GreekAnalyzer() {
|
||||
this(GREEK_STOP_WORDS);
|
||||
private final Version matchVersion;
|
||||
|
||||
public GreekAnalyzer(Version matchVersion) {
|
||||
super();
|
||||
stopSet = StopFilter.makeStopSet(GREEK_STOP_WORDS);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
* @param stopwords Array of stopwords to use.
|
||||
*/
|
||||
public GreekAnalyzer(String... stopwords)
|
||||
public GreekAnalyzer(Version matchVersion, String... stopwords)
|
||||
{
|
||||
super();
|
||||
stopSet = StopFilter.makeStopSet(stopwords);
|
||||
super();
|
||||
stopSet = StopFilter.makeStopSet(stopwords);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public GreekAnalyzer(Map stopwords)
|
||||
public GreekAnalyzer(Version matchVersion, Map stopwords)
|
||||
{
|
||||
super();
|
||||
stopSet = new HashSet(stopwords.keySet());
|
||||
super();
|
||||
stopSet = new HashSet(stopwords.keySet());
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -90,9 +100,10 @@ public final class GreekAnalyzer extends Analyzer
|
|||
*/
|
||||
public TokenStream tokenStream(String fieldName, Reader reader)
|
||||
{
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
TokenStream result = new StandardTokenizer(matchVersion, reader);
|
||||
result = new GreekLowerCaseFilter(result);
|
||||
result = new StopFilter(false, result, stopSet);
|
||||
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
result, stopSet);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -113,9 +124,10 @@ public final class GreekAnalyzer extends Analyzer
|
|||
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
streams.source = new StandardTokenizer(reader);
|
||||
streams.source = new StandardTokenizer(matchVersion, reader);
|
||||
streams.result = new GreekLowerCaseFilter(streams.source);
|
||||
streams.result = new StopFilter(false, streams.result, stopSet);
|
||||
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.result, stopSet);
|
||||
setPreviousTokenStream(streams);
|
||||
} else {
|
||||
streams.source.reset(reader);
|
||||
|
|
|
@ -35,6 +35,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.ar.ArabicLetterTokenizer;
|
||||
import org.apache.lucene.analysis.ar.ArabicNormalizationFilter;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for Persian.
|
||||
|
@ -106,36 +107,40 @@ public final class PersianAnalyzer extends Analyzer {
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
private final Version matchVersion;
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the default stop words:
|
||||
* {@link #DEFAULT_STOPWORD_FILE}.
|
||||
*/
|
||||
public PersianAnalyzer() {
|
||||
public PersianAnalyzer(Version matchVersion) {
|
||||
stoptable = DefaultSetHolder.DEFAULT_STOP_SET;
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public PersianAnalyzer(String[] stopwords) {
|
||||
public PersianAnalyzer(Version matchVersion, String[] stopwords) {
|
||||
stoptable = StopFilter.makeStopSet(stopwords);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public PersianAnalyzer(Hashtable stopwords) {
|
||||
public PersianAnalyzer(Version matchVersion, Hashtable stopwords) {
|
||||
stoptable = new HashSet(stopwords.keySet());
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words. Lines can be commented out
|
||||
* using {@link #STOPWORDS_COMMENT}
|
||||
*/
|
||||
public PersianAnalyzer(File stopwords) throws IOException {
|
||||
public PersianAnalyzer(Version matchVersion, File stopwords) throws IOException {
|
||||
stoptable = WordlistLoader.getWordSet(stopwords, STOPWORDS_COMMENT);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -157,8 +162,8 @@ public final class PersianAnalyzer extends Analyzer {
|
|||
* the order here is important: the stopword list is normalized with the
|
||||
* above!
|
||||
*/
|
||||
result = new StopFilter(false, result, stoptable);
|
||||
|
||||
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
result, stoptable);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -190,7 +195,8 @@ public final class PersianAnalyzer extends Analyzer {
|
|||
* the order here is important: the stopword list is normalized with the
|
||||
* above!
|
||||
*/
|
||||
streams.result = new StopFilter(false, streams.result, stoptable);
|
||||
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.result, stoptable);
|
||||
setPreviousTokenStream(streams);
|
||||
} else {
|
||||
streams.source.reset(reader);
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
@ -42,6 +43,17 @@ import java.util.Set;
|
|||
* A default set of stopwords is used unless an alternative list is specified, but the
|
||||
* exclusion list is empty by default.
|
||||
* </p>
|
||||
*
|
||||
* <a name="version"/>
|
||||
* <p>You must specify the required {@link Version}
|
||||
* compatibility when creating FrenchAnalyzer:
|
||||
* <ul>
|
||||
* <li> As of 2.9, StopFilter preserves position
|
||||
* increments
|
||||
* </ul>
|
||||
*
|
||||
* <p><b>NOTE</b>: This class uses the same {@link Version}
|
||||
* dependent settings as {@link StandardAnalyzer}.</p>
|
||||
*/
|
||||
public final class FrenchAnalyzer extends Analyzer {
|
||||
|
||||
|
@ -82,26 +94,31 @@ public final class FrenchAnalyzer extends Analyzer {
|
|||
*/
|
||||
private Set excltable = new HashSet();
|
||||
|
||||
private final Version matchVersion;
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the default stop words ({@link #FRENCH_STOP_WORDS}).
|
||||
*/
|
||||
public FrenchAnalyzer() {
|
||||
public FrenchAnalyzer(Version matchVersion) {
|
||||
stoptable = StopFilter.makeStopSet(FRENCH_STOP_WORDS);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public FrenchAnalyzer(String... stopwords) {
|
||||
public FrenchAnalyzer(Version matchVersion, String... stopwords) {
|
||||
stoptable = StopFilter.makeStopSet(stopwords);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
* @throws IOException
|
||||
*/
|
||||
public FrenchAnalyzer(File stopwords) throws IOException {
|
||||
public FrenchAnalyzer(Version matchVersion, File stopwords) throws IOException {
|
||||
stoptable = new HashSet(WordlistLoader.getWordSet(stopwords));
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -138,9 +155,10 @@ public final class FrenchAnalyzer extends Analyzer {
|
|||
* {@link FrenchStemFilter} and {@link LowerCaseFilter}
|
||||
*/
|
||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
TokenStream result = new StandardTokenizer(matchVersion, reader);
|
||||
result = new StandardFilter(result);
|
||||
result = new StopFilter(false, result, stoptable);
|
||||
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
result, stoptable);
|
||||
result = new FrenchStemFilter(result, excltable);
|
||||
// Convert to lowercase after stemming!
|
||||
result = new LowerCaseFilter(result);
|
||||
|
@ -165,9 +183,10 @@ public final class FrenchAnalyzer extends Analyzer {
|
|||
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
streams.source = new StandardTokenizer(reader);
|
||||
streams.source = new StandardTokenizer(matchVersion, reader);
|
||||
streams.result = new StandardFilter(streams.source);
|
||||
streams.result = new StopFilter(false, streams.result, stoptable);
|
||||
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.result, stoptable);
|
||||
streams.result = new FrenchStemFilter(streams.result, excltable);
|
||||
// Convert to lowercase after stemming!
|
||||
streams.result = new LowerCaseFilter(streams.result);
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
|
@ -42,6 +43,9 @@ import java.util.Map;
|
|||
* A default set of stopwords is used unless an alternative list is specified, but the
|
||||
* exclusion list is empty by default.
|
||||
* </p>
|
||||
*
|
||||
* <p><b>NOTE</b>: This class uses the same {@link Version}
|
||||
* dependent settings as {@link StandardAnalyzer}.</p>
|
||||
*/
|
||||
public class DutchAnalyzer extends Analyzer {
|
||||
/**
|
||||
|
@ -73,30 +77,33 @@ public class DutchAnalyzer extends Analyzer {
|
|||
private Set excltable = new HashSet();
|
||||
|
||||
private Map stemdict = new HashMap();
|
||||
|
||||
private final Version matchVersion;
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the default stop words ({@link #DUTCH_STOP_WORDS})
|
||||
* and a few default entries for the stem exclusion table.
|
||||
*
|
||||
*/
|
||||
public DutchAnalyzer() {
|
||||
public DutchAnalyzer(Version matchVersion) {
|
||||
setOverridesTokenStreamMethod(DutchAnalyzer.class);
|
||||
stoptable = StopFilter.makeStopSet(DUTCH_STOP_WORDS);
|
||||
stemdict.put("fiets", "fiets"); //otherwise fiet
|
||||
stemdict.put("bromfiets", "bromfiets"); //otherwise bromfiet
|
||||
stemdict.put("ei", "eier");
|
||||
stemdict.put("kind", "kinder");
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*
|
||||
* @param matchVersion
|
||||
* @param stopwords
|
||||
*/
|
||||
public DutchAnalyzer(String... stopwords) {
|
||||
public DutchAnalyzer(Version matchVersion, String... stopwords) {
|
||||
setOverridesTokenStreamMethod(DutchAnalyzer.class);
|
||||
stoptable = StopFilter.makeStopSet(stopwords);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -104,9 +111,10 @@ public class DutchAnalyzer extends Analyzer {
|
|||
*
|
||||
* @param stopwords
|
||||
*/
|
||||
public DutchAnalyzer(HashSet stopwords) {
|
||||
public DutchAnalyzer(Version matchVersion, HashSet stopwords) {
|
||||
setOverridesTokenStreamMethod(DutchAnalyzer.class);
|
||||
stoptable = stopwords;
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -114,7 +122,7 @@ public class DutchAnalyzer extends Analyzer {
|
|||
*
|
||||
* @param stopwords
|
||||
*/
|
||||
public DutchAnalyzer(File stopwords) {
|
||||
public DutchAnalyzer(Version matchVersion, File stopwords) {
|
||||
setOverridesTokenStreamMethod(DutchAnalyzer.class);
|
||||
try {
|
||||
stoptable = org.apache.lucene.analysis.WordlistLoader.getWordSet(stopwords);
|
||||
|
@ -122,6 +130,7 @@ public class DutchAnalyzer extends Analyzer {
|
|||
// TODO: throw IOException
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -179,9 +188,10 @@ public class DutchAnalyzer extends Analyzer {
|
|||
* and {@link DutchStemFilter}
|
||||
*/
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
TokenStream result = new StandardTokenizer(matchVersion, reader);
|
||||
result = new StandardFilter(result);
|
||||
result = new StopFilter(false, result, stoptable);
|
||||
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
result, stoptable);
|
||||
result = new DutchStemFilter(result, excltable, stemdict);
|
||||
return result;
|
||||
}
|
||||
|
@ -211,9 +221,10 @@ public class DutchAnalyzer extends Analyzer {
|
|||
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
streams.source = new StandardTokenizer(reader);
|
||||
streams.source = new StandardTokenizer(matchVersion, reader);
|
||||
streams.result = new StandardFilter(streams.source);
|
||||
streams.result = new StopFilter(false, streams.result, stoptable);
|
||||
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.result, stoptable);
|
||||
streams.result = new DutchStemFilter(streams.result, excltable, stemdict);
|
||||
setPreviousTokenStream(streams);
|
||||
} else {
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
@ -48,15 +49,17 @@ public class QueryAutoStopWordAnalyzer extends Analyzer {
|
|||
//The default maximum percentage (40%) of index documents which
|
||||
//can contain a term, after which the term is considered to be a stop word.
|
||||
public static final float defaultMaxDocFreqPercent = 0.4f;
|
||||
private final Version matchVersion;
|
||||
|
||||
/**
|
||||
* Initializes this analyzer with the Analyzer object that actually produces the tokens
|
||||
*
|
||||
* @param delegate The choice of {@link Analyzer} that is used to produce the token stream which needs filtering
|
||||
*/
|
||||
public QueryAutoStopWordAnalyzer(Analyzer delegate) {
|
||||
public QueryAutoStopWordAnalyzer(Version matchVersion, Analyzer delegate) {
|
||||
this.delegate = delegate;
|
||||
setOverridesTokenStreamMethod(QueryAutoStopWordAnalyzer.class);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -175,7 +178,8 @@ public class QueryAutoStopWordAnalyzer extends Analyzer {
|
|||
}
|
||||
HashSet stopWords = (HashSet) stopWordsPerField.get(fieldName);
|
||||
if (stopWords != null) {
|
||||
result = new StopFilter(false, result, stopWords);
|
||||
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
result, stopWords);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -217,7 +221,8 @@ public class QueryAutoStopWordAnalyzer extends Analyzer {
|
|||
/* if there are any stopwords for the field, save the stopfilter */
|
||||
HashSet stopWords = (HashSet) stopWordsPerField.get(fieldName);
|
||||
if (stopWords != null)
|
||||
streams.withStopFilter = new StopFilter(false, streams.wrapped, stopWords);
|
||||
streams.withStopFilter = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.wrapped, stopWords);
|
||||
else
|
||||
streams.withStopFilter = streams.wrapped;
|
||||
|
||||
|
@ -238,7 +243,8 @@ public class QueryAutoStopWordAnalyzer extends Analyzer {
|
|||
streams.wrapped = result;
|
||||
HashSet stopWords = (HashSet) stopWordsPerField.get(fieldName);
|
||||
if (stopWords != null)
|
||||
streams.withStopFilter = new StopFilter(false, streams.wrapped, stopWords);
|
||||
streams.withStopFilter = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.wrapped, stopWords);
|
||||
else
|
||||
streams.withStopFilter = streams.wrapped;
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.analysis.LowerCaseFilter;
|
|||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for Russian language.
|
||||
|
@ -60,27 +61,31 @@ public final class RussianAnalyzer extends Analyzer
|
|||
*/
|
||||
private Set stopSet = new HashSet();
|
||||
|
||||
public RussianAnalyzer() {
|
||||
this(RUSSIAN_STOP_WORDS);
|
||||
private final Version matchVersion;
|
||||
|
||||
public RussianAnalyzer(Version matchVersion) {
|
||||
this(matchVersion, RUSSIAN_STOP_WORDS);
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
*/
|
||||
public RussianAnalyzer(String... stopwords)
|
||||
public RussianAnalyzer(Version matchVersion, String... stopwords)
|
||||
{
|
||||
super();
|
||||
stopSet = StopFilter.makeStopSet(stopwords);
|
||||
super();
|
||||
stopSet = StopFilter.makeStopSet(stopwords);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
* Builds an analyzer with the given stop words.
|
||||
* TODO: create a Set version of this ctor
|
||||
*/
|
||||
public RussianAnalyzer(Map stopwords)
|
||||
public RussianAnalyzer(Version matchVersion, Map stopwords)
|
||||
{
|
||||
super();
|
||||
stopSet = new HashSet(stopwords.keySet());
|
||||
super();
|
||||
stopSet = new HashSet(stopwords.keySet());
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -96,7 +101,8 @@ public final class RussianAnalyzer extends Analyzer
|
|||
{
|
||||
TokenStream result = new RussianLetterTokenizer(reader);
|
||||
result = new LowerCaseFilter(result);
|
||||
result = new StopFilter(false, result, stopSet);
|
||||
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
result, stopSet);
|
||||
result = new RussianStemFilter(result);
|
||||
return result;
|
||||
}
|
||||
|
@ -122,7 +128,8 @@ public final class RussianAnalyzer extends Analyzer
|
|||
streams = new SavedStreams();
|
||||
streams.source = new RussianLetterTokenizer(reader);
|
||||
streams.result = new LowerCaseFilter(streams.source);
|
||||
streams.result = new StopFilter(false, streams.result, stopSet);
|
||||
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.result, stopSet);
|
||||
streams.result = new RussianStemFilter(streams.result);
|
||||
setPreviousTokenStream(streams);
|
||||
} else {
|
||||
|
|
|
@ -1,56 +0,0 @@
|
|||
package org.apache.lucene.analysis.ru;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.LowerCaseFilter; // for javadoc
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
|
||||
/**
|
||||
* Normalizes token text to lower case.
|
||||
* @deprecated Use {@link LowerCaseFilter} instead, which has the same
|
||||
* functionality. This filter will be removed in Lucene 3.1
|
||||
*/
|
||||
public final class RussianLowerCaseFilter extends TokenFilter
|
||||
{
|
||||
private TermAttribute termAtt;
|
||||
|
||||
public RussianLowerCaseFilter(TokenStream in)
|
||||
{
|
||||
super(in);
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
}
|
||||
|
||||
public final boolean incrementToken() throws IOException
|
||||
{
|
||||
if (input.incrementToken()) {
|
||||
char[] chArray = termAtt.termBuffer();
|
||||
int chLen = termAtt.termLength();
|
||||
for (int i = 0; i < chLen; i++)
|
||||
{
|
||||
chArray[i] = Character.toLowerCase(chArray[i]);
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -25,22 +25,29 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* {@link Analyzer} for Thai language. It uses {@link java.text.BreakIterator} to break words.
|
||||
* @version 0.2
|
||||
*
|
||||
* <p><b>NOTE</b>: This class uses the same {@link Version}
|
||||
* dependent settings as {@link StandardAnalyzer}.</p>
|
||||
*/
|
||||
public class ThaiAnalyzer extends Analyzer {
|
||||
|
||||
public ThaiAnalyzer() {
|
||||
private final Version matchVersion;
|
||||
|
||||
public ThaiAnalyzer(Version matchVersion) {
|
||||
setOverridesTokenStreamMethod(ThaiAnalyzer.class);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream ts = new StandardTokenizer(reader);
|
||||
TokenStream ts = new StandardTokenizer(matchVersion, reader);
|
||||
ts = new StandardFilter(ts);
|
||||
ts = new ThaiWordFilter(ts);
|
||||
ts = new StopFilter(false, ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
ts = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
ts, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
return ts;
|
||||
}
|
||||
|
||||
|
@ -60,10 +67,11 @@ public class ThaiAnalyzer extends Analyzer {
|
|||
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
streams.source = new StandardTokenizer(reader);
|
||||
streams.source = new StandardTokenizer(matchVersion, reader);
|
||||
streams.result = new StandardFilter(streams.source);
|
||||
streams.result = new ThaiWordFilter(streams.result);
|
||||
streams.result = new StopFilter(false, streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.result, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
setPreviousTokenStream(streams);
|
||||
} else {
|
||||
streams.source.reset(reader);
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.StringReader;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the Arabic Analyzer
|
||||
|
@ -32,14 +33,14 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
/** This test fails with NPE when the
|
||||
* stopwords file is missing in classpath */
|
||||
public void testResourcesAvailable() {
|
||||
new ArabicAnalyzer();
|
||||
new ArabicAnalyzer(Version.LUCENE_CURRENT);
|
||||
}
|
||||
|
||||
/**
|
||||
* Some simple tests showing some features of the analyzer, how some regular forms will conflate
|
||||
*/
|
||||
public void testBasicFeatures() throws Exception {
|
||||
ArabicAnalyzer a = new ArabicAnalyzer();
|
||||
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT);
|
||||
assertAnalyzesTo(a, "كبير", new String[] { "كبير" });
|
||||
assertAnalyzesTo(a, "كبيرة", new String[] { "كبير" }); // feminine marker
|
||||
|
||||
|
@ -60,7 +61,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Simple tests to show things are getting reset correctly, etc.
|
||||
*/
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
ArabicAnalyzer a = new ArabicAnalyzer();
|
||||
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT);
|
||||
assertAnalyzesToReuse(a, "كبير", new String[] { "كبير" });
|
||||
assertAnalyzesToReuse(a, "كبيرة", new String[] { "كبير" }); // feminine marker
|
||||
}
|
||||
|
@ -69,7 +70,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Non-arabic text gets treated in a similar way as SimpleAnalyzer.
|
||||
*/
|
||||
public void testEnglishInput() throws Exception {
|
||||
assertAnalyzesTo(new ArabicAnalyzer(), "English text.", new String[] {
|
||||
assertAnalyzesTo(new ArabicAnalyzer(Version.LUCENE_CURRENT), "English text.", new String[] {
|
||||
"english", "text" });
|
||||
}
|
||||
|
||||
|
@ -77,7 +78,7 @@ public class TestArabicAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Test that custom stopwords work, and are not case-sensitive.
|
||||
*/
|
||||
public void testCustomStopwords() throws Exception {
|
||||
ArabicAnalyzer a = new ArabicAnalyzer(new String[] { "the", "and", "a" });
|
||||
ArabicAnalyzer a = new ArabicAnalyzer(Version.LUCENE_CURRENT, new String[] { "the", "and", "a" });
|
||||
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
|
||||
"brown", "fox" });
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the Brazilian Stem Filter, which only modifies the term text.
|
||||
|
@ -123,7 +124,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new BrazilianAnalyzer();
|
||||
Analyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT);
|
||||
checkReuse(a, "boa", "boa");
|
||||
checkReuse(a, "boainain", "boainain");
|
||||
checkReuse(a, "boas", "boas");
|
||||
|
@ -131,7 +132,7 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testStemExclusionTable() throws Exception {
|
||||
BrazilianAnalyzer a = new BrazilianAnalyzer();
|
||||
BrazilianAnalyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT);
|
||||
a.setStemExclusionTable(new String[] { "quintessência" });
|
||||
checkReuse(a, "quintessência", "quintessência"); // excluded words will be completely unchanged.
|
||||
}
|
||||
|
@ -141,14 +142,14 @@ public class TestBrazilianStemmer extends BaseTokenStreamTestCase {
|
|||
* when using reusable token streams.
|
||||
*/
|
||||
public void testExclusionTableReuse() throws Exception {
|
||||
BrazilianAnalyzer a = new BrazilianAnalyzer();
|
||||
BrazilianAnalyzer a = new BrazilianAnalyzer(Version.LUCENE_CURRENT);
|
||||
checkReuse(a, "quintessência", "quintessente");
|
||||
a.setStemExclusionTable(new String[] { "quintessência" });
|
||||
checkReuse(a, "quintessência", "quintessência");
|
||||
}
|
||||
|
||||
private void check(final String input, final String expected) throws Exception {
|
||||
checkOneTerm(new BrazilianAnalyzer(), input, expected);
|
||||
checkOneTerm(new BrazilianAnalyzer(Version.LUCENE_CURRENT), input, expected);
|
||||
}
|
||||
|
||||
private void checkReuse(Analyzer a, String input, String expected) throws Exception {
|
||||
|
|
|
@ -26,7 +26,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestCJKTokenizer extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -218,7 +218,7 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testTokenStream() throws Exception {
|
||||
Analyzer analyzer = new CJKAnalyzer();
|
||||
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
|
||||
TokenStream ts = analyzer.tokenStream("dummy", new StringReader("\u4e00\u4e01\u4e02"));
|
||||
TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
|
||||
assertTrue(ts.incrementToken());
|
||||
|
@ -229,7 +229,7 @@ public class TestCJKTokenizer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer analyzer = new CJKAnalyzer();
|
||||
Analyzer analyzer = new CJKAnalyzer(Version.LUCENE_CURRENT);
|
||||
String str = "\u3042\u3044\u3046\u3048\u304aabc\u304b\u304d\u304f\u3051\u3053";
|
||||
|
||||
TestToken[] out_tokens = {
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.io.InputStream;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the CzechAnalyzer
|
||||
|
@ -37,11 +38,11 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
|
|||
File customStopFile = new File(dataDir, "org/apache/lucene/analysis/cz/customStopWordFile.txt");
|
||||
|
||||
public void testStopWord() throws Exception {
|
||||
assertAnalyzesTo(new CzechAnalyzer(), "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" });
|
||||
assertAnalyzesTo(new CzechAnalyzer(Version.LUCENE_CURRENT), "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" });
|
||||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer analyzer = new CzechAnalyzer();
|
||||
Analyzer analyzer = new CzechAnalyzer(Version.LUCENE_CURRENT);
|
||||
assertAnalyzesToReuse(analyzer, "Pokud mluvime o volnem", new String[] { "mluvime", "volnem" });
|
||||
assertAnalyzesToReuse(analyzer, "Česká Republika", new String[] { "česká", "republika" });
|
||||
}
|
||||
|
@ -61,7 +62,7 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
|
|||
* this would cause a NPE when it is time to create the StopFilter.
|
||||
*/
|
||||
public void testInvalidStopWordFile() throws Exception {
|
||||
CzechAnalyzer cz = new CzechAnalyzer();
|
||||
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
|
||||
cz.loadStopWords(new UnreliableInputStream(), "UTF-8");
|
||||
assertAnalyzesTo(cz, "Pokud mluvime o volnem",
|
||||
new String[] { "pokud", "mluvime", "o", "volnem" });
|
||||
|
@ -72,7 +73,7 @@ public class TestCzechAnalyzer extends BaseTokenStreamTestCase {
|
|||
* when using reusable token streams.
|
||||
*/
|
||||
public void testStopWordFileReuse() throws Exception {
|
||||
CzechAnalyzer cz = new CzechAnalyzer();
|
||||
CzechAnalyzer cz = new CzechAnalyzer(Version.LUCENE_CURRENT);
|
||||
assertAnalyzesToReuse(cz, "Česká Republika",
|
||||
new String[] { "česká", "republika" });
|
||||
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the German stemmer. The stemming algorithm is known to work less
|
||||
|
@ -61,7 +62,7 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new GermanAnalyzer();
|
||||
Analyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
|
||||
checkReuse(a, "Tisch", "tisch");
|
||||
checkReuse(a, "Tische", "tisch");
|
||||
checkReuse(a, "Tischen", "tisch");
|
||||
|
@ -71,13 +72,17 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
|
|||
* subclass that acts just like whitespace analyzer for testing
|
||||
*/
|
||||
private class GermanSubclassAnalyzer extends GermanAnalyzer {
|
||||
public GermanSubclassAnalyzer(Version matchVersion) {
|
||||
super(matchVersion);
|
||||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new WhitespaceTokenizer(reader);
|
||||
}
|
||||
}
|
||||
|
||||
public void testLUCENE1678BWComp() throws Exception {
|
||||
checkReuse(new GermanSubclassAnalyzer(), "Tischen", "Tischen");
|
||||
checkReuse(new GermanSubclassAnalyzer(Version.LUCENE_CURRENT), "Tischen", "Tischen");
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -85,14 +90,14 @@ public class TestGermanStemFilter extends BaseTokenStreamTestCase {
|
|||
* when using reusable token streams.
|
||||
*/
|
||||
public void testExclusionTableReuse() throws Exception {
|
||||
GermanAnalyzer a = new GermanAnalyzer();
|
||||
GermanAnalyzer a = new GermanAnalyzer(Version.LUCENE_CURRENT);
|
||||
checkReuse(a, "tischen", "tisch");
|
||||
a.setStemExclusionTable(new String[] { "tischen" });
|
||||
checkReuse(a, "tischen", "tischen");
|
||||
}
|
||||
|
||||
private void check(final String input, final String expected) throws Exception {
|
||||
checkOneTerm(new GermanAnalyzer(), input, expected);
|
||||
checkOneTerm(new GermanAnalyzer(Version.LUCENE_CURRENT), input, expected);
|
||||
}
|
||||
|
||||
private void checkReuse(Analyzer a, String input, String expected) throws Exception {
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.analysis.el;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* A unit test class for verifying the correct operation of the GreekAnalyzer.
|
||||
|
@ -33,7 +33,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
* @throws Exception in case an error occurs
|
||||
*/
|
||||
public void testAnalyzer() throws Exception {
|
||||
Analyzer a = new GreekAnalyzer();
|
||||
Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT);
|
||||
// Verify the correct analysis of capitals and small accented letters
|
||||
assertAnalyzesTo(a, "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
|
||||
new String[] { "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1", "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1", "\u03c3\u03b5\u03b9\u03c1\u03b1", "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
|
||||
|
@ -49,7 +49,7 @@ public class GreekAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new GreekAnalyzer();
|
||||
Analyzer a = new GreekAnalyzer(Version.LUCENE_CURRENT);
|
||||
// Verify the correct analysis of capitals and small accented letters
|
||||
assertAnalyzesToReuse(a, "\u039c\u03af\u03b1 \u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03ac \u03ba\u03b1\u03bb\u03ae \u03ba\u03b1\u03b9 \u03c0\u03bb\u03bf\u03cd\u03c3\u03b9\u03b1 \u03c3\u03b5\u03b9\u03c1\u03ac \u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03ae\u03c1\u03c9\u03bd \u03c4\u03b7\u03c2 \u0395\u03bb\u03bb\u03b7\u03bd\u03b9\u03ba\u03ae\u03c2 \u03b3\u03bb\u03ce\u03c3\u03c3\u03b1\u03c2",
|
||||
new String[] { "\u03bc\u03b9\u03b1", "\u03b5\u03be\u03b1\u03b9\u03c1\u03b5\u03c4\u03b9\u03ba\u03b1", "\u03ba\u03b1\u03bb\u03b7", "\u03c0\u03bb\u03bf\u03c5\u03c3\u03b9\u03b1", "\u03c3\u03b5\u03b9\u03c1\u03b1", "\u03c7\u03b1\u03c1\u03b1\u03ba\u03c4\u03b7\u03c1\u03c9\u03bd",
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.StringReader;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the Persian Analyzer
|
||||
|
@ -33,7 +34,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* This test fails with NPE when the stopwords file is missing in classpath
|
||||
*/
|
||||
public void testResourcesAvailable() {
|
||||
new PersianAnalyzer();
|
||||
new PersianAnalyzer(Version.LUCENE_CURRENT);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -44,7 +45,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
|
||||
*/
|
||||
public void testBehaviorVerbs() throws Exception {
|
||||
Analyzer a = new PersianAnalyzer();
|
||||
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
|
||||
// active present indicative
|
||||
assertAnalyzesTo(a, "میخورد", new String[] { "خورد" });
|
||||
// active preterite indicative
|
||||
|
@ -120,7 +121,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* These verb forms are from http://en.wikipedia.org/wiki/Persian_grammar
|
||||
*/
|
||||
public void testBehaviorVerbsDefective() throws Exception {
|
||||
Analyzer a = new PersianAnalyzer();
|
||||
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
|
||||
// active present indicative
|
||||
assertAnalyzesTo(a, "مي خورد", new String[] { "خورد" });
|
||||
// active preterite indicative
|
||||
|
@ -191,7 +192,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* nouns, removing the plural -ha.
|
||||
*/
|
||||
public void testBehaviorNouns() throws Exception {
|
||||
Analyzer a = new PersianAnalyzer();
|
||||
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
|
||||
assertAnalyzesTo(a, "برگ ها", new String[] { "برگ" });
|
||||
assertAnalyzesTo(a, "برگها", new String[] { "برگ" });
|
||||
}
|
||||
|
@ -201,7 +202,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* (lowercased, etc)
|
||||
*/
|
||||
public void testBehaviorNonPersian() throws Exception {
|
||||
Analyzer a = new PersianAnalyzer();
|
||||
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
|
||||
assertAnalyzesTo(a, "English test.", new String[] { "english", "test" });
|
||||
}
|
||||
|
||||
|
@ -209,7 +210,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Basic test ensuring that reusableTokenStream works correctly.
|
||||
*/
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new PersianAnalyzer();
|
||||
Analyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT);
|
||||
assertAnalyzesToReuse(a, "خورده مي شده بوده باشد", new String[] { "خورده" });
|
||||
assertAnalyzesToReuse(a, "برگها", new String[] { "برگ" });
|
||||
}
|
||||
|
@ -218,7 +219,7 @@ public class TestPersianAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Test that custom stopwords work, and are not case-sensitive.
|
||||
*/
|
||||
public void testCustomStopwords() throws Exception {
|
||||
PersianAnalyzer a = new PersianAnalyzer(new String[] { "the", "and", "a" });
|
||||
PersianAnalyzer a = new PersianAnalyzer(Version.LUCENE_CURRENT, new String[] { "the", "and", "a" });
|
||||
assertAnalyzesTo(a, "The quick brown fox.", new String[] { "quick",
|
||||
"brown", "fox" });
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.TokenFilter;
|
|||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -37,7 +38,7 @@ public class TestElision extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testElision() throws Exception {
|
||||
String test = "Plop, juste pour voir l'embrouille avec O'brian. M'enfin.";
|
||||
Tokenizer tokenizer = new StandardTokenizer(new StringReader(test));
|
||||
Tokenizer tokenizer = new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(test));
|
||||
Set articles = new HashSet();
|
||||
articles.add("l");
|
||||
articles.add("M");
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.StringReader;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test case for FrenchAnalyzer.
|
||||
|
@ -32,7 +33,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testAnalyzer() throws Exception {
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer();
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
|
||||
|
||||
assertAnalyzesTo(fa, "", new String[] {
|
||||
});
|
||||
|
@ -116,7 +117,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer();
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
|
||||
// stopwords
|
||||
assertAnalyzesToReuse(
|
||||
fa,
|
||||
|
@ -141,7 +142,7 @@ public class TestFrenchAnalyzer extends BaseTokenStreamTestCase {
|
|||
* when using reusable token streams.
|
||||
*/
|
||||
public void testExclusionTableReuse() throws Exception {
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer();
|
||||
FrenchAnalyzer fa = new FrenchAnalyzer(Version.LUCENE_CURRENT);
|
||||
assertAnalyzesToReuse(fa, "habitable", new String[] { "habit" });
|
||||
fa.setStemExclusionTable(new String[] { "habitable" });
|
||||
assertAnalyzesToReuse(fa, "habitable", new String[] { "habitable" });
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test the Dutch Stem Filter, which only modifies the term text.
|
||||
|
@ -119,7 +120,7 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new DutchAnalyzer();
|
||||
Analyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
|
||||
checkOneTermReuse(a, "lichaamsziek", "lichaamsziek");
|
||||
checkOneTermReuse(a, "lichamelijk", "licham");
|
||||
checkOneTermReuse(a, "lichamelijke", "licham");
|
||||
|
@ -130,13 +131,16 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
|
|||
* subclass that acts just like whitespace analyzer for testing
|
||||
*/
|
||||
private class DutchSubclassAnalyzer extends DutchAnalyzer {
|
||||
public DutchSubclassAnalyzer(Version matchVersion) {
|
||||
super(matchVersion);
|
||||
}
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new WhitespaceTokenizer(reader);
|
||||
}
|
||||
}
|
||||
|
||||
public void testLUCENE1678BWComp() throws Exception {
|
||||
Analyzer a = new DutchSubclassAnalyzer();
|
||||
Analyzer a = new DutchSubclassAnalyzer(Version.LUCENE_CURRENT);
|
||||
checkOneTermReuse(a, "lichaamsziek", "lichaamsziek");
|
||||
checkOneTermReuse(a, "lichamelijk", "lichamelijk");
|
||||
checkOneTermReuse(a, "lichamelijke", "lichamelijke");
|
||||
|
@ -148,7 +152,7 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
|
|||
* when using reusable token streams.
|
||||
*/
|
||||
public void testExclusionTableReuse() throws Exception {
|
||||
DutchAnalyzer a = new DutchAnalyzer();
|
||||
DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
|
||||
checkOneTermReuse(a, "lichamelijk", "licham");
|
||||
a.setStemExclusionTable(new String[] { "lichamelijk" });
|
||||
checkOneTermReuse(a, "lichamelijk", "lichamelijk");
|
||||
|
@ -159,14 +163,14 @@ public class TestDutchStemmer extends BaseTokenStreamTestCase {
|
|||
* when using reusable token streams.
|
||||
*/
|
||||
public void testStemDictionaryReuse() throws Exception {
|
||||
DutchAnalyzer a = new DutchAnalyzer();
|
||||
DutchAnalyzer a = new DutchAnalyzer(Version.LUCENE_CURRENT);
|
||||
checkOneTermReuse(a, "lichamelijk", "licham");
|
||||
a.setStemDictionary(customDictFile);
|
||||
checkOneTermReuse(a, "lichamelijk", "somethingentirelydifferent");
|
||||
}
|
||||
|
||||
private void check(final String input, final String expected) throws Exception {
|
||||
checkOneTerm(new DutchAnalyzer(), input, expected);
|
||||
checkOneTerm(new DutchAnalyzer(Version.LUCENE_CURRENT), input, expected);
|
||||
}
|
||||
|
||||
}
|
|
@ -37,6 +37,7 @@ import org.apache.lucene.queryParser.QueryParser;
|
|||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
||||
String variedFieldValues[] = {"the", "quick", "brown", "fox", "jumped", "over", "the", "lazy", "boring", "dog"};
|
||||
|
@ -62,7 +63,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
writer.close();
|
||||
reader = IndexReader.open(dir, true);
|
||||
protectedAnalyzer = new QueryAutoStopWordAnalyzer(appAnalyzer);
|
||||
protectedAnalyzer = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, appAnalyzer);
|
||||
}
|
||||
|
||||
protected void tearDown() throws Exception {
|
||||
|
@ -72,7 +73,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
|
||||
//Helper method to query
|
||||
private int search(Analyzer a, String queryString) throws IOException, ParseException {
|
||||
QueryParser qp = new QueryParser("repetitiveField", a);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "repetitiveField", a);
|
||||
Query q = qp.parse(queryString);
|
||||
return new IndexSearcher(reader).search(q, null, 1000).totalHits;
|
||||
}
|
||||
|
@ -149,8 +150,8 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
* subclass that acts just like whitespace analyzer for testing
|
||||
*/
|
||||
private class QueryAutoStopWordSubclassAnalyzer extends QueryAutoStopWordAnalyzer {
|
||||
public QueryAutoStopWordSubclassAnalyzer() {
|
||||
super(new WhitespaceAnalyzer());
|
||||
public QueryAutoStopWordSubclassAnalyzer(Version matchVersion) {
|
||||
super(matchVersion, new WhitespaceAnalyzer());
|
||||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
|
@ -159,7 +160,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testLUCENE1678BWComp() throws Exception {
|
||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordSubclassAnalyzer();
|
||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordSubclassAnalyzer(Version.LUCENE_CURRENT);
|
||||
a.addStopWords(reader, "repetitiveField", 10);
|
||||
int numHits = search(a, "repetitiveField:boring");
|
||||
assertFalse(numHits == 0);
|
||||
|
@ -180,7 +181,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testWrappingNonReusableAnalyzer() throws Exception {
|
||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(new NonreusableAnalyzer());
|
||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new NonreusableAnalyzer());
|
||||
a.addStopWords(reader, 10);
|
||||
int numHits = search(a, "repetitiveField:boring");
|
||||
assertTrue(numHits == 0);
|
||||
|
@ -189,7 +190,7 @@ public class QueryAutoStopWordAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testTokenStream() throws Exception {
|
||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(new WhitespaceAnalyzer());
|
||||
QueryAutoStopWordAnalyzer a = new QueryAutoStopWordAnalyzer(Version.LUCENE_CURRENT, new WhitespaceAnalyzer());
|
||||
a.addStopWords(reader, 10);
|
||||
TokenStream ts = a.tokenStream("repetitiveField", new StringReader("this boring"));
|
||||
TermAttribute termAtt = ts.getAttribute(TermAttribute.class);
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test case for RussianAnalyzer.
|
||||
|
@ -49,7 +50,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
|
|||
|
||||
public void testUnicode() throws IOException
|
||||
{
|
||||
RussianAnalyzer ra = new RussianAnalyzer();
|
||||
RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
|
||||
inWords =
|
||||
new InputStreamReader(
|
||||
new FileInputStream(new File(dataDir, "/org/apache/lucene/analysis/ru/testUTF8.txt")),
|
||||
|
@ -90,7 +91,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
|
|||
public void testDigitsInRussianCharset()
|
||||
{
|
||||
Reader reader = new StringReader("text 1000");
|
||||
RussianAnalyzer ra = new RussianAnalyzer();
|
||||
RussianAnalyzer ra = new RussianAnalyzer(Version.LUCENE_CURRENT);
|
||||
TokenStream stream = ra.tokenStream("", reader);
|
||||
|
||||
TermAttribute termText = stream.getAttribute(TermAttribute.class);
|
||||
|
@ -108,7 +109,7 @@ public class TestRussianAnalyzer extends BaseTokenStreamTestCase
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new RussianAnalyzer();
|
||||
Analyzer a = new RussianAnalyzer(Version.LUCENE_CURRENT);
|
||||
assertAnalyzesToReuse(a, "Вместе с тем о силе электромагнитной энергии имели представление еще",
|
||||
new String[] { "вмест", "сил", "электромагнитн", "энерг", "имел", "представлен" });
|
||||
assertAnalyzesToReuse(a, "Но знание это хранилось в тайне",
|
||||
|
|
|
@ -42,6 +42,7 @@ import org.apache.lucene.search.ScoreDoc;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* A test class for ShingleAnalyzerWrapper as regards queries and scoring.
|
||||
|
@ -85,7 +86,7 @@ public class ShingleAnalyzerWrapperTest extends BaseTokenStreamTestCase {
|
|||
protected ScoreDoc[] queryParsingTest(Analyzer analyzer, String qs) throws Exception {
|
||||
searcher = setUpSearcher(analyzer);
|
||||
|
||||
QueryParser qp = new QueryParser("content", analyzer);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "content", analyzer);
|
||||
|
||||
Query q = qp.parse(qs);
|
||||
|
||||
|
|
|
@ -23,6 +23,7 @@ import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceTokenizer;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test case for ThaiAnalyzer, modified from TestFrenchAnalyzer
|
||||
|
@ -36,7 +37,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
* testcase for offsets
|
||||
*/
|
||||
public void testOffsets() throws Exception {
|
||||
assertAnalyzesTo(new ThaiAnalyzer(), "เดอะนิวยอร์กไทมส์",
|
||||
assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_CURRENT), "เดอะนิวยอร์กไทมส์",
|
||||
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์"},
|
||||
new int[] { 0, 2, 7, 9, 12 },
|
||||
new int[] { 2, 7, 9, 12, 17});
|
||||
|
@ -54,7 +55,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Instead, allow the definition of alphanum to include relevant categories like nonspacing marks!
|
||||
*/
|
||||
public void testBuggyTokenType() throws Exception {
|
||||
assertAnalyzesTo(new ThaiAnalyzer(), "เดอะนิวยอร์กไทมส์ ๑๒๓",
|
||||
assertAnalyzesTo(new ThaiAnalyzer(Version.LUCENE_CURRENT), "เดอะนิวยอร์กไทมส์ ๑๒๓",
|
||||
new String[] { "เด", "อะนิว", "ยอ", "ร์ก", "ไทมส์", "๑๒๓" },
|
||||
new String[] { "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>", "<ALPHANUM>" });
|
||||
}
|
||||
|
@ -68,7 +69,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
|
||||
public void testAnalyzer() throws Exception {
|
||||
ThaiAnalyzer analyzer = new ThaiAnalyzer();
|
||||
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
|
||||
|
||||
assertAnalyzesTo(analyzer, "", new String[] {});
|
||||
|
||||
|
@ -90,7 +91,7 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
ThaiAnalyzer analyzer = new ThaiAnalyzer();
|
||||
ThaiAnalyzer analyzer = new ThaiAnalyzer(Version.LUCENE_CURRENT);
|
||||
assertAnalyzesToReuse(analyzer, "", new String[] {});
|
||||
|
||||
assertAnalyzesToReuse(
|
||||
|
@ -108,13 +109,16 @@ public class TestThaiAnalyzer extends BaseTokenStreamTestCase {
|
|||
* subclass that acts just like whitespace analyzer for testing
|
||||
*/
|
||||
private class ThaiSubclassAnalyzer extends ThaiAnalyzer {
|
||||
public ThaiSubclassAnalyzer(Version matchVersion) {
|
||||
super(matchVersion);
|
||||
}
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new WhitespaceTokenizer(reader);
|
||||
}
|
||||
}
|
||||
|
||||
public void testLUCENE1678BWComp() throws Exception {
|
||||
ThaiSubclassAnalyzer a = new ThaiSubclassAnalyzer();
|
||||
ThaiSubclassAnalyzer a = new ThaiSubclassAnalyzer(Version.LUCENE_CURRENT);
|
||||
assertAnalyzesToReuse(a, "การที่ได้ต้องแสดงว่างานดี", new String[] { "การที่ได้ต้องแสดงว่างานดี" });
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.analysis.Tokenizer;
|
|||
import org.apache.lucene.analysis.WordlistLoader;
|
||||
import org.apache.lucene.analysis.cn.smart.SentenceTokenizer;
|
||||
import org.apache.lucene.analysis.cn.smart.WordTokenFilter;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
|
@ -103,11 +104,13 @@ public class SmartChineseAnalyzer extends Analyzer {
|
|||
}
|
||||
}
|
||||
|
||||
private final Version matchVersion;
|
||||
|
||||
/**
|
||||
* Create a new SmartChineseAnalyzer, using the default stopword list.
|
||||
*/
|
||||
public SmartChineseAnalyzer() {
|
||||
this(true);
|
||||
public SmartChineseAnalyzer(Version matchVersion) {
|
||||
this(matchVersion, true);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -121,9 +124,10 @@ public class SmartChineseAnalyzer extends Analyzer {
|
|||
*
|
||||
* @param useDefaultStopWords true to use the default stopword list.
|
||||
*/
|
||||
public SmartChineseAnalyzer(boolean useDefaultStopWords) {
|
||||
public SmartChineseAnalyzer(Version matchVersion, boolean useDefaultStopWords) {
|
||||
stopWords = useDefaultStopWords ? DefaultSetHolder.DEFAULT_STOP_SET
|
||||
: Collections.EMPTY_SET;
|
||||
: Collections.EMPTY_SET;
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -135,8 +139,9 @@ public class SmartChineseAnalyzer extends Analyzer {
|
|||
* </p>
|
||||
* @param stopWords {@link Set} of stopwords to use.
|
||||
*/
|
||||
public SmartChineseAnalyzer(Set stopWords) {
|
||||
public SmartChineseAnalyzer(Version matchVersion, Set stopWords) {
|
||||
this.stopWords = stopWords==null?Collections.EMPTY_SET:stopWords;
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
|
@ -147,7 +152,8 @@ public class SmartChineseAnalyzer extends Analyzer {
|
|||
// The porter stemming is too strict, this is not a bug, this is a feature:)
|
||||
result = new PorterStemFilter(result);
|
||||
if (!stopWords.isEmpty()) {
|
||||
result = new StopFilter(false,result, stopWords, false);
|
||||
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
result, stopWords, false);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -167,7 +173,8 @@ public class SmartChineseAnalyzer extends Analyzer {
|
|||
streams.filteredTokenStream = new WordTokenFilter(streams.tokenStream);
|
||||
streams.filteredTokenStream = new PorterStemFilter(streams.filteredTokenStream);
|
||||
if (!stopWords.isEmpty()) {
|
||||
streams.filteredTokenStream = new StopFilter(false, streams.filteredTokenStream, stopWords, false);
|
||||
streams.filteredTokenStream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.filteredTokenStream, stopWords, false);
|
||||
}
|
||||
} else {
|
||||
streams.tokenStream.reset(reader);
|
||||
|
|
|
@ -26,16 +26,17 @@ import java.util.Date;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testChineseStopWordsDefault() throws Exception {
|
||||
Analyzer ca = new SmartChineseAnalyzer(); /* will load stopwords */
|
||||
Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); /* will load stopwords */
|
||||
String sentence = "我购买了道具和服装。";
|
||||
String result[] = { "我", "购买", "了", "道具", "和", "服装" };
|
||||
assertAnalyzesTo(ca, sentence, result);
|
||||
// set stop-words from the outer world - must yield same behavior
|
||||
ca = new SmartChineseAnalyzer(SmartChineseAnalyzer.getDefaultStopSet());
|
||||
ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT, SmartChineseAnalyzer.getDefaultStopSet());
|
||||
assertAnalyzesTo(ca, sentence, result);
|
||||
}
|
||||
|
||||
|
@ -44,7 +45,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
|||
* This tests to ensure the SentenceTokenizer->WordTokenFilter chain works correctly.
|
||||
*/
|
||||
public void testChineseStopWordsDefaultTwoPhrases() throws Exception {
|
||||
Analyzer ca = new SmartChineseAnalyzer(); /* will load stopwords */
|
||||
Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); /* will load stopwords */
|
||||
String sentence = "我购买了道具和服装。 我购买了道具和服装。";
|
||||
String result[] = { "我", "购买", "了", "道具", "和", "服装", "我", "购买", "了", "道具", "和", "服装" };
|
||||
assertAnalyzesTo(ca, sentence, result);
|
||||
|
@ -55,7 +56,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
|||
* This tests to ensure the stopwords are working correctly.
|
||||
*/
|
||||
public void testChineseStopWordsDefaultTwoPhrasesIdeoSpace() throws Exception {
|
||||
Analyzer ca = new SmartChineseAnalyzer(); /* will load stopwords */
|
||||
Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT); /* will load stopwords */
|
||||
String sentence = "我购买了道具和服装 我购买了道具和服装。";
|
||||
String result[] = { "我", "购买", "了", "道具", "和", "服装", "我", "购买", "了", "道具", "和", "服装" };
|
||||
assertAnalyzesTo(ca, sentence, result);
|
||||
|
@ -69,8 +70,8 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testChineseStopWordsOff() throws Exception {
|
||||
Analyzer[] analyzers = new Analyzer[] {
|
||||
new SmartChineseAnalyzer(false),/* doesn't load stopwords */
|
||||
new SmartChineseAnalyzer(null) /* sets stopwords to empty set */};
|
||||
new SmartChineseAnalyzer(Version.LUCENE_CURRENT, false),/* doesn't load stopwords */
|
||||
new SmartChineseAnalyzer(Version.LUCENE_CURRENT, null) /* sets stopwords to empty set */};
|
||||
String sentence = "我购买了道具和服装。";
|
||||
String result[] = { "我", "购买", "了", "道具", "和", "服装", "," };
|
||||
for (Analyzer analyzer : analyzers) {
|
||||
|
@ -80,7 +81,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testChineseAnalyzer() throws Exception {
|
||||
Analyzer ca = new SmartChineseAnalyzer(true);
|
||||
Analyzer ca = new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true);
|
||||
String sentence = "我购买了道具和服装。";
|
||||
String[] result = { "我", "购买", "了", "道具", "和", "服装" };
|
||||
assertAnalyzesTo(ca, sentence, result);
|
||||
|
@ -90,7 +91,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
|||
* English words are lowercased and porter-stemmed.
|
||||
*/
|
||||
public void testMixedLatinChinese() throws Exception {
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买 Tests 了道具和服装",
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买 Tests 了道具和服装",
|
||||
new String[] { "我", "购买", "test", "了", "道具", "和", "服装"});
|
||||
}
|
||||
|
||||
|
@ -98,7 +99,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Numerics are parsed as their own tokens
|
||||
*/
|
||||
public void testNumerics() throws Exception {
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买 Tests 了道具和服装1234",
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买 Tests 了道具和服装1234",
|
||||
new String[] { "我", "购买", "test", "了", "道具", "和", "服装", "1234"});
|
||||
}
|
||||
|
||||
|
@ -106,7 +107,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Full width alphas and numerics are folded to half-width
|
||||
*/
|
||||
public void testFullWidth() throws Exception {
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买 Tests 了道具和服装1234",
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买 Tests 了道具和服装1234",
|
||||
new String[] { "我", "购买", "test", "了", "道具", "和", "服装", "1234"});
|
||||
}
|
||||
|
||||
|
@ -114,7 +115,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Presentation form delimiters are removed
|
||||
*/
|
||||
public void testDelimiters() throws Exception {
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买︱ Tests 了道具和服装",
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买︱ Tests 了道具和服装",
|
||||
new String[] { "我", "购买", "test", "了", "道具", "和", "服装"});
|
||||
}
|
||||
|
||||
|
@ -123,7 +124,7 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
|||
* (regardless of Unicode category)
|
||||
*/
|
||||
public void testNonChinese() throws Exception {
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买 روبرتTests 了道具和服装",
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买 روبرتTests 了道具和服装",
|
||||
new String[] { "我", "购买", "ر", "و", "ب", "ر", "ت", "test", "了", "道具", "和", "服装"});
|
||||
}
|
||||
|
||||
|
@ -133,22 +134,22 @@ public class TestSmartChineseAnalyzer extends BaseTokenStreamTestCase {
|
|||
* Currently it is being analyzed into single characters...
|
||||
*/
|
||||
public void testOOV() throws Exception {
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(true), "优素福·拉扎·吉拉尼",
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "优素福·拉扎·吉拉尼",
|
||||
new String[] { "优", "素", "福", "拉", "扎", "吉", "拉", "尼" });
|
||||
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(true), "优素福拉扎吉拉尼",
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "优素福拉扎吉拉尼",
|
||||
new String[] { "优", "素", "福", "拉", "扎", "吉", "拉", "尼" });
|
||||
}
|
||||
|
||||
public void testOffsets() throws Exception {
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(true), "我购买了道具和服装",
|
||||
assertAnalyzesTo(new SmartChineseAnalyzer(Version.LUCENE_CURRENT, true), "我购买了道具和服装",
|
||||
new String[] { "我", "购买", "了", "道具", "和", "服装" },
|
||||
new int[] { 0, 1, 3, 4, 6, 7 },
|
||||
new int[] { 1, 3, 4, 6, 7, 9 });
|
||||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new SmartChineseAnalyzer();
|
||||
Analyzer a = new SmartChineseAnalyzer(Version.LUCENE_CURRENT);
|
||||
assertAnalyzesToReuse(a, "我购买 Tests 了道具和服装",
|
||||
new String[] { "我", "购买", "test", "了", "道具", "和", "服装"},
|
||||
new int[] { 0, 1, 4, 10, 11, 13, 14 },
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.search.Searcher;
|
|||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.tools.ant.Project;
|
||||
import org.apache.tools.ant.types.FileSet;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test cases for index task
|
||||
|
@ -69,12 +70,12 @@ public class IndexTaskTest extends TestCase {
|
|||
|
||||
dir = FSDirectory.open(indexDir);
|
||||
searcher = new IndexSearcher(dir, true);
|
||||
analyzer = new StopAnalyzer(false);
|
||||
analyzer = new StopAnalyzer(Version.LUCENE_CURRENT);
|
||||
}
|
||||
|
||||
|
||||
public void testSearch() throws Exception {
|
||||
Query query = new QueryParser("contents",analyzer).parse("test");
|
||||
Query query = new QueryParser(Version.LUCENE_CURRENT, "contents",analyzer).parse("test");
|
||||
|
||||
int numHits = searcher.search(query, null, 1000).totalHits;
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.search.spans.SpanNearQuery;
|
|||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* A QueryMaker that uses common and uncommon actual Wikipedia queries for
|
||||
|
@ -92,7 +93,7 @@ public class EnwikiQueryMaker extends AbstractQueryMaker implements
|
|||
* @return array of Lucene queries
|
||||
*/
|
||||
private static Query[] createQueries(List qs, Analyzer a) {
|
||||
QueryParser qp = new QueryParser(DocMaker.BODY_FIELD, a);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a);
|
||||
List queries = new ArrayList();
|
||||
for (int i = 0; i < qs.size(); i++) {
|
||||
try {
|
||||
|
|
|
@ -5,6 +5,7 @@ import org.apache.lucene.queryParser.QueryParser;
|
|||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
|
@ -48,7 +49,7 @@ public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMake
|
|||
Analyzer anlzr = NewAnalyzerTask.createAnalyzer(config.get("analyzer",
|
||||
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
|
||||
String defaultField = config.get("file.query.maker.default.field", DocMaker.BODY_FIELD);
|
||||
QueryParser qp = new QueryParser(defaultField, anlzr);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, defaultField, anlzr);
|
||||
|
||||
List qq = new ArrayList();
|
||||
String fileName = config.get("file.query.maker.file", null);
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.search.spans.SpanNearQuery;
|
|||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
|
@ -72,7 +73,7 @@ public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker
|
|||
* @return array of Lucene queries
|
||||
*/
|
||||
private static Query[] createQueries(List qs, Analyzer a) {
|
||||
QueryParser qp = new QueryParser(DocMaker.BODY_FIELD, a);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a);
|
||||
List queries = new ArrayList();
|
||||
for (int i = 0; i < qs.size(); i++) {
|
||||
try {
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.search.BooleanQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
||||
|
@ -46,7 +47,7 @@ public class SimpleQueryMaker extends AbstractQueryMaker implements QueryMaker {
|
|||
Analyzer anlzr= NewAnalyzerTask.createAnalyzer(config.get("analyzer",
|
||||
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
|
||||
|
||||
QueryParser qp = new QueryParser(DocMaker.BODY_FIELD,anlzr);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD,anlzr);
|
||||
ArrayList qq = new ArrayList();
|
||||
Query q1 = new TermQuery(new Term(DocMaker.ID_FIELD,"doc2"));
|
||||
qq.add(q1);
|
||||
|
|
|
@ -50,7 +50,7 @@ public class SimpleQQParser implements QualityQueryParser {
|
|||
public Query parse(QualityQuery qq) throws ParseException {
|
||||
QueryParser qp = queryParser.get();
|
||||
if (qp==null) {
|
||||
qp = new QueryParser(indexField, new StandardAnalyzer(Version.LUCENE_CURRENT));
|
||||
qp = new QueryParser(Version.LUCENE_CURRENT, indexField, new StandardAnalyzer(Version.LUCENE_CURRENT));
|
||||
queryParser.set(qp);
|
||||
}
|
||||
return qp.parse(qq.getValue(qqName));
|
||||
|
|
|
@ -39,6 +39,7 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.util.IndexableBinaryStringTools;
|
||||
import org.apache.lucene.queryParser.analyzing.AnalyzingQueryParser;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.CharBuffer;
|
||||
|
@ -83,7 +84,7 @@ public class CollationTestBase extends TestCase {
|
|||
writer.close();
|
||||
IndexSearcher is = new IndexSearcher(ramDir, true);
|
||||
|
||||
AnalyzingQueryParser aqp = new AnalyzingQueryParser("content", analyzer);
|
||||
AnalyzingQueryParser aqp = new AnalyzingQueryParser(Version.LUCENE_CURRENT, "content", analyzer);
|
||||
aqp.setLowercaseExpandedTerms(false);
|
||||
|
||||
// Unicode order would include U+0633 in [ U+062F - U+0698 ], but Farsi
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.queryParser.QueryParser;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* <code>FieldTermStack</code> is a stack that keeps query terms in the specified field
|
||||
|
@ -50,7 +51,7 @@ public class FieldTermStack {
|
|||
|
||||
public static void main( String[] args ) throws Exception {
|
||||
Analyzer analyzer = new WhitespaceAnalyzer();
|
||||
QueryParser parser = new QueryParser( "f", analyzer );
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "f", analyzer );
|
||||
Query query = parser.parse( "a x:b" );
|
||||
FieldQuery fieldQuery = new FieldQuery( query, true, false );
|
||||
|
||||
|
|
|
@ -45,6 +45,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public abstract class AbstractTestCase extends TestCase {
|
||||
|
||||
|
@ -78,8 +79,8 @@ public abstract class AbstractTestCase extends TestCase {
|
|||
protected void setUp() throws Exception {
|
||||
analyzerW = new WhitespaceAnalyzer();
|
||||
analyzerB = new BigramAnalyzer();
|
||||
paW = new QueryParser( F, analyzerW );
|
||||
paB = new QueryParser( F, analyzerB );
|
||||
paW = new QueryParser(Version.LUCENE_CURRENT, F, analyzerW );
|
||||
paB = new QueryParser(Version.LUCENE_CURRENT, F, analyzerB );
|
||||
dir = new RAMDirectory();
|
||||
}
|
||||
|
||||
|
|
|
@ -113,7 +113,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
public void testQueryScorerHits() throws Exception {
|
||||
Analyzer analyzer = new SimpleAnalyzer();
|
||||
QueryParser qp = new QueryParser(FIELD_NAME, analyzer);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
|
||||
query = qp.parse("\"very long\"");
|
||||
searcher = new IndexSearcher(ramDir, true);
|
||||
TopDocs hits = searcher.search(query, 10);
|
||||
|
@ -143,7 +143,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
String s1 = "I call our world Flatland, not because we call it so,";
|
||||
|
||||
QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
|
||||
|
||||
// Verify that a query against the default field results in text being
|
||||
// highlighted
|
||||
|
@ -221,7 +221,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
String q = "(" + f1c + ph1 + " OR " + f2c + ph1 + ") AND (" + f1c + ph2
|
||||
+ " OR " + f2c + ph2 + ")";
|
||||
Analyzer analyzer = new WhitespaceAnalyzer();
|
||||
QueryParser qp = new QueryParser(f1, analyzer);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, f1, analyzer);
|
||||
Query query = qp.parse(q);
|
||||
|
||||
QueryScorer scorer = new QueryScorer(query, f1);
|
||||
|
@ -590,7 +590,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
// Need to explicitly set the QueryParser property to use TermRangeQuery
|
||||
// rather
|
||||
// than RangeFilters
|
||||
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
|
||||
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
query = parser.parse(queryString);
|
||||
doSearching(query);
|
||||
|
@ -930,7 +930,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
String srchkey = "football";
|
||||
|
||||
String s = "football-soccer in the euro 2004 footie competition";
|
||||
QueryParser parser = new QueryParser("bookid", analyzer);
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "bookid", analyzer);
|
||||
Query query = parser.parse(srchkey);
|
||||
|
||||
TokenStream tokenStream = analyzer.tokenStream(null, new StringReader(s));
|
||||
|
@ -1111,7 +1111,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
searcher = new IndexSearcher(ramDir, true);
|
||||
Analyzer analyzer = new StandardAnalyzer(TEST_VERSION);
|
||||
|
||||
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
|
||||
Query query = parser.parse("JF? or Kenned*");
|
||||
System.out.println("Searching with primitive query");
|
||||
// forget to set this and...
|
||||
|
@ -1245,7 +1245,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
searchers[0] = new IndexSearcher(ramDir1, true);
|
||||
searchers[1] = new IndexSearcher(ramDir2, true);
|
||||
MultiSearcher multiSearcher = new MultiSearcher(searchers);
|
||||
QueryParser parser = new QueryParser(FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, new StandardAnalyzer(TEST_VERSION));
|
||||
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
query = parser.parse("multi*");
|
||||
System.out.println("Searching for: " + query.toString(FIELD_NAME));
|
||||
|
@ -1278,7 +1278,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
public void run() throws Exception {
|
||||
String docMainText = "fred is one of the people";
|
||||
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
|
||||
Query query = parser.parse("fred category:people");
|
||||
|
||||
// highlighting respects fieldnames used in query
|
||||
|
@ -1419,64 +1419,64 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
Highlighter highlighter;
|
||||
String result;
|
||||
|
||||
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("foo");
|
||||
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("foo");
|
||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||
assertEquals("Hi-Speed10 <B>foo</B>", result);
|
||||
|
||||
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("10");
|
||||
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("10");
|
||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||
assertEquals("Hi-Speed<B>10</B> foo", result);
|
||||
|
||||
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hi");
|
||||
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("hi");
|
||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||
assertEquals("<B>Hi</B>-Speed10 foo", result);
|
||||
|
||||
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("speed");
|
||||
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("speed");
|
||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||
assertEquals("Hi-<B>Speed</B>10 foo", result);
|
||||
|
||||
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hispeed");
|
||||
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("hispeed");
|
||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
||||
|
||||
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hi speed");
|
||||
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("hi speed");
|
||||
highlighter = getHighlighter(query, "text", getTS2(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2(), s, 3, "...");
|
||||
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
||||
|
||||
// ///////////////// same tests, just put the bigger overlapping token
|
||||
// first
|
||||
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("foo");
|
||||
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("foo");
|
||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||
assertEquals("Hi-Speed10 <B>foo</B>", result);
|
||||
|
||||
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("10");
|
||||
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("10");
|
||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||
assertEquals("Hi-Speed<B>10</B> foo", result);
|
||||
|
||||
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hi");
|
||||
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("hi");
|
||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||
assertEquals("<B>Hi</B>-Speed10 foo", result);
|
||||
|
||||
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("speed");
|
||||
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("speed");
|
||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||
assertEquals("Hi-<B>Speed</B>10 foo", result);
|
||||
|
||||
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hispeed");
|
||||
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("hispeed");
|
||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
||||
|
||||
query = new QueryParser("text", new WhitespaceAnalyzer()).parse("hi speed");
|
||||
query = new QueryParser(Version.LUCENE_CURRENT, "text", new WhitespaceAnalyzer()).parse("hi speed");
|
||||
highlighter = getHighlighter(query, "text", getTS2a(), HighlighterTest.this);
|
||||
result = highlighter.getBestFragments(getTS2a(), s, 3, "...");
|
||||
assertEquals("<B>Hi-Speed</B>10 foo", result);
|
||||
|
@ -1521,7 +1521,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
|
||||
private void searchIndex() throws IOException, ParseException, InvalidTokenOffsetsException {
|
||||
String q = "t_text1:random";
|
||||
QueryParser parser = new QueryParser( "t_text1", a );
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "t_text1", a );
|
||||
Query query = parser.parse( q );
|
||||
IndexSearcher searcher = new IndexSearcher( dir, true );
|
||||
// This scorer can return negative idf -> null fragment
|
||||
|
@ -1575,7 +1575,7 @@ public class HighlighterTest extends BaseTokenStreamTestCase implements Formatte
|
|||
}
|
||||
|
||||
public void doSearching(String queryString) throws Exception {
|
||||
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
|
||||
parser.setEnablePositionIncrements(true);
|
||||
parser.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
query = parser.parse(queryString);
|
||||
|
|
|
@ -195,7 +195,7 @@ class LuceneMethods {
|
|||
for (int ii = 0; ii < arraySize; ii++) {
|
||||
indexedArray[ii] = (String) indexedFields.get(ii);
|
||||
}
|
||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(indexedArray, analyzer);
|
||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT, indexedArray, analyzer);
|
||||
query = parser.parse(queryString);
|
||||
System.out.println("Searching for: " + query.toString());
|
||||
return (query);
|
||||
|
@ -216,7 +216,7 @@ class LuceneMethods {
|
|||
for (int ii = 0; ii < arraySize; ii++) {
|
||||
fieldsArray[ii] = (String) fields.get(ii);
|
||||
}
|
||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(fieldsArray, analyzer);
|
||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fieldsArray, analyzer);
|
||||
query = parser.parse(queryString);
|
||||
System.out.println("Searching for: " + query.toString());
|
||||
}
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.StopFilter;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Efficient Lucene analyzer/tokenizer that preferably operates on a String rather than a
|
||||
|
@ -124,7 +125,7 @@ public class PatternAnalyzer extends Analyzer {
|
|||
* freely across threads without harm); global per class loader.
|
||||
*/
|
||||
public static final PatternAnalyzer DEFAULT_ANALYZER = new PatternAnalyzer(
|
||||
NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
|
||||
/**
|
||||
* A lower-casing word analyzer with <b>extended </b> English stop words
|
||||
|
@ -134,15 +135,18 @@ public class PatternAnalyzer extends Analyzer {
|
|||
* http://thomas.loc.gov/home/all.about.inquery.html
|
||||
*/
|
||||
public static final PatternAnalyzer EXTENDED_ANALYZER = new PatternAnalyzer(
|
||||
NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
|
||||
Version.LUCENE_CURRENT, NON_WORD_PATTERN, true, EXTENDED_ENGLISH_STOP_WORDS);
|
||||
|
||||
private final Pattern pattern;
|
||||
private final boolean toLowerCase;
|
||||
private final Set stopWords;
|
||||
|
||||
private final Version matchVersion;
|
||||
|
||||
/**
|
||||
* Constructs a new instance with the given parameters.
|
||||
*
|
||||
* @param matchVersion If >= {@link Version#LUCENE_29}, StopFilter.enablePositionIncrement is set to true
|
||||
* @param pattern
|
||||
* a regular expression delimiting tokens
|
||||
* @param toLowerCase
|
||||
|
@ -158,7 +162,7 @@ public class PatternAnalyzer extends Analyzer {
|
|||
* or <a href="http://www.unine.ch/info/clef/">other stop words
|
||||
* lists </a>.
|
||||
*/
|
||||
public PatternAnalyzer(Pattern pattern, boolean toLowerCase, Set stopWords) {
|
||||
public PatternAnalyzer(Version matchVersion, Pattern pattern, boolean toLowerCase, Set stopWords) {
|
||||
if (pattern == null)
|
||||
throw new IllegalArgumentException("pattern must not be null");
|
||||
|
||||
|
@ -170,6 +174,7 @@ public class PatternAnalyzer extends Analyzer {
|
|||
this.pattern = pattern;
|
||||
this.toLowerCase = toLowerCase;
|
||||
this.stopWords = stopWords;
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -197,7 +202,7 @@ public class PatternAnalyzer extends Analyzer {
|
|||
}
|
||||
else {
|
||||
stream = new PatternTokenizer(text, pattern, toLowerCase);
|
||||
if (stopWords != null) stream = new StopFilter(false, stream, stopWords);
|
||||
if (stopWords != null) stream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion), stream, stopWords);
|
||||
}
|
||||
|
||||
return stream;
|
||||
|
|
|
@ -53,6 +53,7 @@ import org.apache.lucene.search.Searcher;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour,
|
||||
|
@ -277,7 +278,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
|
|||
|
||||
Analyzer[] analyzers = new Analyzer[] {
|
||||
new SimpleAnalyzer(),
|
||||
new StopAnalyzer(true),
|
||||
new StopAnalyzer(Version.LUCENE_CURRENT),
|
||||
new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT),
|
||||
PatternAnalyzer.DEFAULT_ANALYZER,
|
||||
// new WhitespaceAnalyzer(),
|
||||
|
@ -480,7 +481,7 @@ public class MemoryIndexTest extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
private Query parseQuery(String expression) throws ParseException {
|
||||
QueryParser parser = new QueryParser(FIELD_NAME, analyzer);
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, analyzer);
|
||||
// parser.setPhraseSlop(0);
|
||||
return parser.parse(expression);
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.util.regex.Pattern;
|
|||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Verifies the behavior of PatternAnalyzer.
|
||||
|
@ -36,13 +37,13 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testNonWordPattern() throws IOException {
|
||||
// Split on non-letter pattern, do not lowercase, no stopwords
|
||||
PatternAnalyzer a = new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN,
|
||||
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
|
||||
false, null);
|
||||
check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
|
||||
"The", "quick", "brown", "Fox", "the", "abcd", "dc" });
|
||||
|
||||
// split on non-letter pattern, lowercase, english stopwords
|
||||
PatternAnalyzer b = new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN,
|
||||
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.NON_WORD_PATTERN,
|
||||
true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
|
||||
"quick", "brown", "fox", "abcd", "dc" });
|
||||
|
@ -54,13 +55,13 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testWhitespacePattern() throws IOException {
|
||||
// Split on whitespace patterns, do not lowercase, no stopwords
|
||||
PatternAnalyzer a = new PatternAnalyzer(PatternAnalyzer.WHITESPACE_PATTERN,
|
||||
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
|
||||
false, null);
|
||||
check(a, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
|
||||
"The", "quick", "brown", "Fox,the", "abcd1234", "(56.78)", "dc." });
|
||||
|
||||
// Split on whitespace patterns, lowercase, english stopwords
|
||||
PatternAnalyzer b = new PatternAnalyzer(PatternAnalyzer.WHITESPACE_PATTERN,
|
||||
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
|
||||
true, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
check(b, "The quick brown Fox,the abcd1234 (56.78) dc.", new String[] {
|
||||
"quick", "brown", "fox,the", "abcd1234", "(56.78)", "dc." });
|
||||
|
@ -72,12 +73,12 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
public void testCustomPattern() throws IOException {
|
||||
// Split on comma, do not lowercase, no stopwords
|
||||
PatternAnalyzer a = new PatternAnalyzer(Pattern.compile(","), false, null);
|
||||
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, Pattern.compile(","), false, null);
|
||||
check(a, "Here,Are,some,Comma,separated,words,", new String[] { "Here",
|
||||
"Are", "some", "Comma", "separated", "words" });
|
||||
|
||||
// split on comma, lowercase, english stopwords
|
||||
PatternAnalyzer b = new PatternAnalyzer(Pattern.compile(","), true,
|
||||
PatternAnalyzer b = new PatternAnalyzer(Version.LUCENE_CURRENT, Pattern.compile(","), true,
|
||||
StopAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
check(b, "Here,Are,some,Comma,separated,words,", new String[] { "here",
|
||||
"some", "comma", "separated", "words" });
|
||||
|
@ -102,7 +103,7 @@ public class PatternAnalyzerTest extends BaseTokenStreamTestCase {
|
|||
document.append(largeWord2);
|
||||
|
||||
// Split on whitespace patterns, do not lowercase, no stopwords
|
||||
PatternAnalyzer a = new PatternAnalyzer(PatternAnalyzer.WHITESPACE_PATTERN,
|
||||
PatternAnalyzer a = new PatternAnalyzer(Version.LUCENE_CURRENT, PatternAnalyzer.WHITESPACE_PATTERN,
|
||||
false, null);
|
||||
check(a, document.toString(), new String[] { new String(largeWord),
|
||||
new String(largeWord2) });
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Overrides Lucene's default QueryParser so that Fuzzy-, Prefix-, Range-, and WildcardQuerys
|
||||
|
@ -49,8 +50,8 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
* @param field the default field for query terms.
|
||||
* @param analyzer used to find terms in the query text.
|
||||
*/
|
||||
public AnalyzingQueryParser(String field, Analyzer analyzer) {
|
||||
super(field, analyzer);
|
||||
public AnalyzingQueryParser(Version matchVersion, String field, Analyzer analyzer) {
|
||||
super(matchVersion, field, analyzer);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.search.spans.SpanNotQuery;
|
|||
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* QueryParser which permits complex phrase query syntax eg "(john jon
|
||||
|
@ -67,8 +68,8 @@ public class ComplexPhraseQueryParser extends QueryParser {
|
|||
|
||||
private ComplexPhraseQuery currentPhraseQuery = null;
|
||||
|
||||
public ComplexPhraseQueryParser(String f, Analyzer a) {
|
||||
super(f, a);
|
||||
public ComplexPhraseQueryParser(Version matchVersion, String f, Analyzer a) {
|
||||
super(matchVersion, f, a);
|
||||
}
|
||||
|
||||
protected Query getFieldQuery(String field, String queryText, int slop) {
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.analysis.TokenStream;
|
|||
import org.apache.lucene.analysis.standard.StandardFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* @version $Revision$, $Date$
|
||||
|
@ -97,7 +98,7 @@ public class TestAnalyzingQueryParser extends TestCase {
|
|||
}
|
||||
|
||||
private String parseWithAnalyzingQueryParser(String s, Analyzer a) throws ParseException {
|
||||
AnalyzingQueryParser qp = new AnalyzingQueryParser("field", a);
|
||||
AnalyzingQueryParser qp = new AnalyzingQueryParser(Version.LUCENE_CURRENT, "field", a);
|
||||
org.apache.lucene.search.Query q = qp.parse(s);
|
||||
return q.toString("field");
|
||||
}
|
||||
|
@ -109,7 +110,7 @@ class ASCIIAnalyzer extends org.apache.lucene.analysis.Analyzer {
|
|||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
|
||||
result = new StandardFilter(result);
|
||||
result = new ASCIIFoldingFilter(result);
|
||||
result = new LowerCaseFilter(result);
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestComplexPhraseQuery extends TestCase {
|
||||
|
||||
|
@ -71,7 +72,7 @@ public class TestComplexPhraseQuery extends TestCase {
|
|||
}
|
||||
|
||||
private void checkBadQuery(String qString) {
|
||||
QueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer);
|
||||
QueryParser qp = new ComplexPhraseQueryParser(Version.LUCENE_CURRENT, defaultFieldName, analyzer);
|
||||
Throwable expected = null;
|
||||
try {
|
||||
qp.parse(qString);
|
||||
|
@ -84,7 +85,7 @@ public class TestComplexPhraseQuery extends TestCase {
|
|||
|
||||
private void checkMatches(String qString, String expectedVals)
|
||||
throws Exception {
|
||||
QueryParser qp = new ComplexPhraseQueryParser(defaultFieldName, analyzer);
|
||||
QueryParser qp = new ComplexPhraseQueryParser(Version.LUCENE_CURRENT, defaultFieldName, analyzer);
|
||||
qp.setFuzzyPrefixLength(1); // usually a good idea
|
||||
|
||||
Query q = qp.parse(qString);
|
||||
|
|
|
@ -34,6 +34,7 @@ import org.apache.lucene.queryParser.core.QueryNodeException;
|
|||
import org.apache.lucene.queryParser.standard.StandardQueryParser;
|
||||
import org.apache.lucene.queryParser.standard.config.DefaultOperatorAttribute.Operator;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* This test case is a copy of the core Lucene query parser test, it was adapted
|
||||
|
@ -154,7 +155,7 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
|
||||
result = new TestFilter(result);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
|
@ -222,7 +223,7 @@ public class TestMultiAnalyzerQPHelper extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
|
||||
result = new TestPosIncrementFilter(result);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
|||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.queryParser.standard.QueryParserWrapper;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* This test case is a copy of the core Lucene query parser test, it was adapted
|
||||
|
@ -148,7 +149,7 @@ public class TestMultiAnalyzerWrapper extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
|
||||
result = new TestFilter(result);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
|
@ -216,7 +217,7 @@ public class TestMultiAnalyzerWrapper extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
|
||||
result = new TestPosIncrementFilter(result);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
|
|
|
@ -1070,7 +1070,7 @@ public class TestQPHelper extends LocalizedTestCase {
|
|||
public void testStopwords() throws Exception {
|
||||
StandardQueryParser qp = new StandardQueryParser();
|
||||
qp.setAnalyzer(
|
||||
new StopAnalyzer(StopFilter.makeStopSet("the", "foo" ), true));
|
||||
new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "foo" )));
|
||||
|
||||
Query result = qp.parse("a:the OR a:foo", "a");
|
||||
assertNotNull("result is null and it shouldn't be", result);
|
||||
|
@ -1093,7 +1093,7 @@ public class TestQPHelper extends LocalizedTestCase {
|
|||
public void testPositionIncrement() throws Exception {
|
||||
StandardQueryParser qp = new StandardQueryParser();
|
||||
qp.setAnalyzer(
|
||||
new StopAnalyzer(StopFilter.makeStopSet("the", "in", "are", "this" ), true));
|
||||
new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "in", "are", "this" )));
|
||||
|
||||
qp.setEnablePositionIncrements(true);
|
||||
|
||||
|
|
|
@ -1048,7 +1048,7 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
|
|||
}
|
||||
|
||||
public void testStopwords() throws Exception {
|
||||
QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(StopFilter.makeStopSet("the", "foo"), false));
|
||||
QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "foo")));
|
||||
Query result = qp.parse("a:the OR a:foo");
|
||||
assertNotNull("result is null and it shouldn't be", result);
|
||||
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
|
||||
|
@ -1067,7 +1067,7 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
|
|||
}
|
||||
|
||||
public void testPositionIncrement() throws Exception {
|
||||
QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(StopFilter.makeStopSet("the", "in", "are", "this"), true));
|
||||
QueryParserWrapper qp = new QueryParserWrapper("a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "in", "are", "this")));
|
||||
qp.setEnablePositionIncrements(true);
|
||||
String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
|
||||
// 0 2 5 7 8
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis.snowball;
|
|||
|
||||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.standard.*;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
@ -30,20 +31,25 @@ import java.util.Set;
|
|||
* Available stemmers are listed in org.tartarus.snowball.ext. The name of a
|
||||
* stemmer is the part of the class name before "Stemmer", e.g., the stemmer in
|
||||
* {@link org.tartarus.snowball.ext.EnglishStemmer} is named "English".
|
||||
*
|
||||
* <p><b>NOTE</b>: This class uses the same {@link Version}
|
||||
* dependent settings as {@link StandardAnalyzer}.</p>
|
||||
*/
|
||||
public class SnowballAnalyzer extends Analyzer {
|
||||
private String name;
|
||||
private Set stopSet;
|
||||
private final Version matchVersion;
|
||||
|
||||
/** Builds the named analyzer with no stop words. */
|
||||
public SnowballAnalyzer(String name) {
|
||||
public SnowballAnalyzer(Version matchVersion, String name) {
|
||||
this.name = name;
|
||||
setOverridesTokenStreamMethod(SnowballAnalyzer.class);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/** Builds the named analyzer with the given stop words. */
|
||||
public SnowballAnalyzer(String name, String[] stopWords) {
|
||||
this(name);
|
||||
public SnowballAnalyzer(Version matchVersion, String name, String[] stopWords) {
|
||||
this(matchVersion, name);
|
||||
stopSet = StopFilter.makeStopSet(stopWords);
|
||||
}
|
||||
|
||||
|
@ -51,11 +57,12 @@ public class SnowballAnalyzer extends Analyzer {
|
|||
StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
|
||||
and a {@link SnowballFilter} */
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
TokenStream result = new StandardTokenizer(matchVersion, reader);
|
||||
result = new StandardFilter(result);
|
||||
result = new LowerCaseFilter(result);
|
||||
if (stopSet != null)
|
||||
result = new StopFilter(false, result, stopSet);
|
||||
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
result, stopSet);
|
||||
result = new SnowballFilter(result, name);
|
||||
return result;
|
||||
}
|
||||
|
@ -80,11 +87,12 @@ public class SnowballAnalyzer extends Analyzer {
|
|||
SavedStreams streams = (SavedStreams) getPreviousTokenStream();
|
||||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
streams.source = new StandardTokenizer(reader);
|
||||
streams.source = new StandardTokenizer(matchVersion, reader);
|
||||
streams.result = new StandardFilter(streams.source);
|
||||
streams.result = new LowerCaseFilter(streams.result);
|
||||
if (stopSet != null)
|
||||
streams.result = new StopFilter(false, streams.result, stopSet);
|
||||
streams.result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.result, stopSet);
|
||||
streams.result = new SnowballFilter(streams.result, name);
|
||||
setPreviousTokenStream(streams);
|
||||
} else {
|
||||
|
|
|
@ -31,17 +31,18 @@ import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestSnowball extends BaseTokenStreamTestCase {
|
||||
|
||||
public void testEnglish() throws Exception {
|
||||
Analyzer a = new SnowballAnalyzer("English");
|
||||
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English");
|
||||
assertAnalyzesTo(a, "he abhorred accents",
|
||||
new String[]{"he", "abhor", "accent"});
|
||||
}
|
||||
|
||||
public void testReusableTokenStream() throws Exception {
|
||||
Analyzer a = new SnowballAnalyzer("English");
|
||||
Analyzer a = new SnowballAnalyzer(Version.LUCENE_CURRENT, "English");
|
||||
assertAnalyzesToReuse(a, "he abhorred accents",
|
||||
new String[]{"he", "abhor", "accent"});
|
||||
assertAnalyzesToReuse(a, "she abhorred him",
|
||||
|
@ -53,7 +54,7 @@ public class TestSnowball extends BaseTokenStreamTestCase {
|
|||
*/
|
||||
private class SnowballSubclassAnalyzer extends SnowballAnalyzer {
|
||||
public SnowballSubclassAnalyzer(String name) {
|
||||
super(name);
|
||||
super(Version.LUCENE_CURRENT, name);
|
||||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* See table searcher explanation.
|
||||
|
@ -167,7 +168,7 @@ public class ListSearcher extends AbstractListModel {
|
|||
//build a query based on the fields, searchString and cached analyzer
|
||||
//NOTE: This is an area for improvement since the MultiFieldQueryParser
|
||||
// has some weirdness.
|
||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
|
||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, analyzer);
|
||||
Query query =parser.parse(searchString);
|
||||
//reset this list model with the new results
|
||||
resetSearchResults(is, query);
|
||||
|
|
|
@ -35,7 +35,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.swing.models.ListSearcher.CountingCollector;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* This is a TableModel that encapsulates Lucene
|
||||
|
@ -244,7 +244,7 @@ public class TableSearcher extends AbstractTableModel {
|
|||
//build a query based on the fields, searchString and cached analyzer
|
||||
//NOTE: This is an area for improvement since the MultiFieldQueryParser
|
||||
// has some weirdness.
|
||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer);
|
||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, analyzer);
|
||||
Query query = parser.parse(searchString);
|
||||
//reset this table model with the new results
|
||||
resetSearchResults(is, query);
|
||||
|
|
|
@ -8,6 +8,7 @@ import org.apache.lucene.xmlparser.DOMUtils;
|
|||
import org.apache.lucene.xmlparser.ParserException;
|
||||
import org.apache.lucene.xmlparser.QueryBuilder;
|
||||
import org.w3c.dom.Element;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -88,7 +89,7 @@ public class UserInputQueryBuilder implements QueryBuilder {
|
|||
*/
|
||||
protected QueryParser createQueryParser(String fieldName, Analyzer analyzer)
|
||||
{
|
||||
return new QueryParser(fieldName,analyzer);
|
||||
return new QueryParser(Version.LUCENE_CURRENT, fieldName,analyzer);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -127,7 +127,7 @@ public class SearchFiles {
|
|||
} else {
|
||||
in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
|
||||
}
|
||||
QueryParser parser = new QueryParser(field, analyzer);
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, field, analyzer);
|
||||
while (true) {
|
||||
if (queries == null) // prompt the user
|
||||
System.out.println("Enter query: ");
|
||||
|
|
|
@ -24,7 +24,17 @@ import java.util.Arrays;
|
|||
import java.util.Set;
|
||||
import java.util.List;
|
||||
|
||||
/** Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}. */
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/** Filters {@link LetterTokenizer} with {@link LowerCaseFilter} and {@link StopFilter}.
|
||||
*
|
||||
* <a name="version"/>
|
||||
* <p>You must specify the required {@link Version}
|
||||
* compatibility when creating StopAnalyzer:
|
||||
* <ul>
|
||||
* <li> As of 2.9, position increments are preserved
|
||||
* </ul>
|
||||
*/
|
||||
|
||||
public final class StopAnalyzer extends Analyzer {
|
||||
private final Set<?> stopWords;
|
||||
|
@ -49,40 +59,39 @@ public final class StopAnalyzer extends Analyzer {
|
|||
|
||||
/** Builds an analyzer which removes words in
|
||||
* {@link #ENGLISH_STOP_WORDS}.
|
||||
* @param enablePositionIncrements See {@link
|
||||
* StopFilter#setEnablePositionIncrements} */
|
||||
public StopAnalyzer(boolean enablePositionIncrements) {
|
||||
* @param matchVersion See <a href="#version">above</a>
|
||||
*/
|
||||
public StopAnalyzer(Version matchVersion) {
|
||||
stopWords = ENGLISH_STOP_WORDS_SET;
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given set.
|
||||
* @param matchVersion See <a href="#version">above</a>
|
||||
* @param stopWords Set of stop words
|
||||
* @param enablePositionIncrements See {@link
|
||||
* StopFilter#setEnablePositionIncrements} */
|
||||
public StopAnalyzer(Set<?> stopWords, boolean enablePositionIncrements) {
|
||||
public StopAnalyzer(Version matchVersion, Set<?> stopWords) {
|
||||
this.stopWords = stopWords;
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given file.
|
||||
* @see WordlistLoader#getWordSet(File)
|
||||
* @param stopwordsFile File to load stop words from
|
||||
* @param enablePositionIncrements See {@link
|
||||
* StopFilter#setEnablePositionIncrements} */
|
||||
public StopAnalyzer(File stopwordsFile, boolean enablePositionIncrements) throws IOException {
|
||||
* @param matchVersion See <a href="#version">above</a>
|
||||
* @param stopwordsFile File to load stop words from */
|
||||
public StopAnalyzer(Version matchVersion, File stopwordsFile) throws IOException {
|
||||
stopWords = WordlistLoader.getWordSet(stopwordsFile);
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
this.enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given reader.
|
||||
* @see WordlistLoader#getWordSet(Reader)
|
||||
* @param stopwords Reader to load stop words from
|
||||
* @param enablePositionIncrements See {@link
|
||||
* StopFilter#setEnablePositionIncrements} */
|
||||
public StopAnalyzer(Reader stopwords, boolean enablePositionIncrements) throws IOException {
|
||||
* @param matchVersion See <a href="#version">above</a>
|
||||
* @param stopwords Reader to load stop words from */
|
||||
public StopAnalyzer(Version matchVersion, Reader stopwords) throws IOException {
|
||||
stopWords = WordlistLoader.getWordSet(stopwords);
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
this.enablePositionIncrements = StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion);
|
||||
}
|
||||
|
||||
/** Filters LowerCaseTokenizer with StopFilter. */
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.List;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.queryParser.QueryParser; // for javadoc
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Removes stop words from a token stream.
|
||||
|
@ -150,6 +151,21 @@ public final class StopFilter extends TokenFilter {
|
|||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns version-dependent default for
|
||||
* enablePositionIncrements. Analyzers that embed
|
||||
* StopFilter use this method when creating the
|
||||
* StopFilter. Prior to 2.9, this returns false. On 2.9
|
||||
* or later, it returns true.
|
||||
*/
|
||||
public static boolean getEnablePositionIncrementsVersionDefault(Version matchVersion) {
|
||||
if (matchVersion.onOrAfter(Version.LUCENE_29)) {
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setEnablePositionIncrements(boolean).
|
||||
*/
|
||||
|
|
|
@ -35,7 +35,7 @@ import java.util.Set;
|
|||
* compatibility when creating StandardAnalyzer:
|
||||
* <ul>
|
||||
* <li> As of 2.9, StopFilter preserves position
|
||||
* increments by default
|
||||
* increments
|
||||
* <li> As of 2.4, Tokens incorrectly identified as acronyms
|
||||
* are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
|
||||
* </ul>
|
||||
|
@ -52,6 +52,7 @@ public class StandardAnalyzer extends Analyzer {
|
|||
/** An unmodifiable set containing some common English words that are usually not
|
||||
useful for searching. */
|
||||
public static final Set<?> STOP_WORDS_SET = StopAnalyzer.ENGLISH_STOP_WORDS_SET;
|
||||
private final Version matchVersion;
|
||||
|
||||
/** Builds an analyzer with the default stop words ({@link
|
||||
* #STOP_WORDS_SET}).
|
||||
|
@ -71,6 +72,7 @@ public class StandardAnalyzer extends Analyzer {
|
|||
setOverridesTokenStreamMethod(StandardAnalyzer.class);
|
||||
enableStopPositionIncrements = matchVersion.onOrAfter(Version.LUCENE_29);
|
||||
replaceInvalidAcronym = matchVersion.onOrAfter(Version.LUCENE_24);
|
||||
this.matchVersion = matchVersion;
|
||||
}
|
||||
|
||||
/** Builds an analyzer with the stop words from the given file.
|
||||
|
@ -94,11 +96,12 @@ public class StandardAnalyzer extends Analyzer {
|
|||
/** Constructs a {@link StandardTokenizer} filtered by a {@link
|
||||
StandardFilter}, a {@link LowerCaseFilter} and a {@link StopFilter}. */
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
StandardTokenizer tokenStream = new StandardTokenizer(reader, replaceInvalidAcronym);
|
||||
StandardTokenizer tokenStream = new StandardTokenizer(matchVersion, reader);
|
||||
tokenStream.setMaxTokenLength(maxTokenLength);
|
||||
TokenStream result = new StandardFilter(tokenStream);
|
||||
result = new LowerCaseFilter(result);
|
||||
result = new StopFilter(enableStopPositionIncrements, result, stopSet);
|
||||
result = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
result, stopSet);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
@ -140,10 +143,11 @@ public class StandardAnalyzer extends Analyzer {
|
|||
if (streams == null) {
|
||||
streams = new SavedStreams();
|
||||
setPreviousTokenStream(streams);
|
||||
streams.tokenStream = new StandardTokenizer(reader);
|
||||
streams.tokenStream = new StandardTokenizer(matchVersion, reader);
|
||||
streams.filteredTokenStream = new StandardFilter(streams.tokenStream);
|
||||
streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream);
|
||||
streams.filteredTokenStream = new StopFilter(enableStopPositionIncrements, streams.filteredTokenStream, stopSet);
|
||||
streams.filteredTokenStream = new StopFilter(StopFilter.getEnablePositionIncrementsVersionDefault(matchVersion),
|
||||
streams.filteredTokenStream, stopSet);
|
||||
} else {
|
||||
streams.tokenStream.reset(reader);
|
||||
}
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/** A grammar-based tokenizer constructed with JFlex
|
||||
*
|
||||
|
@ -43,6 +44,14 @@ import org.apache.lucene.util.AttributeSource;
|
|||
* <p>Many applications have specific tokenizer needs. If this tokenizer does
|
||||
* not suit your application, please consider copying this source code
|
||||
* directory to your project and maintaining your own grammar-based tokenizer.
|
||||
*
|
||||
* <a name="version"/>
|
||||
* <p>You must specify the required {@link Version}
|
||||
* compatibility when creating StandardAnalyzer:
|
||||
* <ul>
|
||||
* <li> As of 2.4, Tokens incorrectly identified as acronyms
|
||||
* are corrected (see <a href="https://issues.apache.org/jira/browse/LUCENE-1068">LUCENE-1608</a>
|
||||
* </ul>
|
||||
*/
|
||||
|
||||
public final class StandardTokenizer extends Tokenizer {
|
||||
|
@ -104,56 +113,51 @@ public final class StandardTokenizer extends Tokenizer {
|
|||
return maxTokenLength;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new instance of the {@link StandardTokenizer}. Attaches the
|
||||
* <code>input</code> to a newly created JFlex scanner.
|
||||
*/
|
||||
public StandardTokenizer(Reader input) {
|
||||
this(input, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new instance of the {@link org.apache.lucene.analysis.standard.StandardTokenizer}. Attaches
|
||||
* the <code>input</code> to the newly created JFlex scanner.
|
||||
*
|
||||
* @param input The input reader
|
||||
* @param replaceInvalidAcronym Set to true to replace mischaracterized acronyms with HOST.
|
||||
*
|
||||
* See http://issues.apache.org/jira/browse/LUCENE-1068
|
||||
*/
|
||||
public StandardTokenizer(Reader input, boolean replaceInvalidAcronym) {
|
||||
public StandardTokenizer(Version matchVersion, Reader input) {
|
||||
super();
|
||||
this.scanner = new StandardTokenizerImpl(input);
|
||||
init(input, replaceInvalidAcronym);
|
||||
init(input, matchVersion);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new StandardTokenizer with a given {@link AttributeSource}.
|
||||
*/
|
||||
public StandardTokenizer(AttributeSource source, Reader input, boolean replaceInvalidAcronym) {
|
||||
public StandardTokenizer(Version matchVersion, AttributeSource source, Reader input) {
|
||||
super(source);
|
||||
this.scanner = new StandardTokenizerImpl(input);
|
||||
init(input, replaceInvalidAcronym);
|
||||
init(input, matchVersion);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new StandardTokenizer with a given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}
|
||||
*/
|
||||
public StandardTokenizer(AttributeFactory factory, Reader input, boolean replaceInvalidAcronym) {
|
||||
public StandardTokenizer(Version matchVersion, AttributeFactory factory, Reader input) {
|
||||
super(factory);
|
||||
this.scanner = new StandardTokenizerImpl(input);
|
||||
init(input, replaceInvalidAcronym);
|
||||
init(input, matchVersion);
|
||||
}
|
||||
|
||||
private void init(Reader input, boolean replaceInvalidAcronym) {
|
||||
this.replaceInvalidAcronym = replaceInvalidAcronym;
|
||||
private void init(Reader input, Version matchVersion) {
|
||||
if (matchVersion.onOrAfter(Version.LUCENE_24)) {
|
||||
replaceInvalidAcronym = true;
|
||||
} else {
|
||||
replaceInvalidAcronym = false;
|
||||
}
|
||||
this.input = input;
|
||||
termAtt = addAttribute(TermAttribute.class);
|
||||
offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
typeAtt = addAttribute(TypeAttribute.class);
|
||||
}
|
||||
|
||||
|
||||
// this tokenizer generates three attributes:
|
||||
// offset, positionIncrement and type
|
||||
private TermAttribute termAtt;
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.search.BooleanQuery;
|
|||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* A QueryParser which constructs queries to search multiple fields.
|
||||
|
@ -65,8 +66,8 @@ public class MultiFieldQueryParser extends QueryParser
|
|||
* <p>In other words, all the query's terms must appear, but it doesn't matter in
|
||||
* what fields they appear.</p>
|
||||
*/
|
||||
public MultiFieldQueryParser(String[] fields, Analyzer analyzer, Map<String,Float> boosts) {
|
||||
this(fields,analyzer);
|
||||
public MultiFieldQueryParser(Version matchVersion, String[] fields, Analyzer analyzer, Map boosts) {
|
||||
this(matchVersion, fields, analyzer);
|
||||
this.boosts = boosts;
|
||||
}
|
||||
|
||||
|
@ -90,8 +91,8 @@ public class MultiFieldQueryParser extends QueryParser
|
|||
* <p>In other words, all the query's terms must appear, but it doesn't matter in
|
||||
* what fields they appear.</p>
|
||||
*/
|
||||
public MultiFieldQueryParser(String[] fields, Analyzer analyzer) {
|
||||
super(null, analyzer);
|
||||
public MultiFieldQueryParser(Version matchVersion, String[] fields, Analyzer analyzer) {
|
||||
super(matchVersion, null, analyzer);
|
||||
this.fields = fields;
|
||||
}
|
||||
|
||||
|
@ -196,6 +197,7 @@ public class MultiFieldQueryParser extends QueryParser
|
|||
* (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
|
||||
* </code>
|
||||
* </pre>
|
||||
* @param matchVersion Lucene version to match; this is passed through to QueryParser.
|
||||
* @param queries Queries strings to parse
|
||||
* @param fields Fields to search on
|
||||
* @param analyzer Analyzer to use
|
||||
|
@ -203,7 +205,7 @@ public class MultiFieldQueryParser extends QueryParser
|
|||
* @throws IllegalArgumentException if the length of the queries array differs
|
||||
* from the length of the fields array
|
||||
*/
|
||||
public static Query parse(String[] queries, String[] fields,
|
||||
public static Query parse(Version matchVersion, String[] queries, String[] fields,
|
||||
Analyzer analyzer) throws ParseException
|
||||
{
|
||||
if (queries.length != fields.length)
|
||||
|
@ -211,7 +213,7 @@ public class MultiFieldQueryParser extends QueryParser
|
|||
BooleanQuery bQuery = new BooleanQuery();
|
||||
for (int i = 0; i < fields.length; i++)
|
||||
{
|
||||
QueryParser qp = new QueryParser(fields[i], analyzer);
|
||||
QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
|
||||
Query q = qp.parse(queries[i]);
|
||||
if (q!=null && // q never null, just being defensive
|
||||
(!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) {
|
||||
|
@ -243,6 +245,7 @@ public class MultiFieldQueryParser extends QueryParser
|
|||
* </code>
|
||||
* </pre>
|
||||
*
|
||||
* @param matchVersion Lucene version to match; this is passed through to QueryParser.
|
||||
* @param query Query string to parse
|
||||
* @param fields Fields to search on
|
||||
* @param flags Flags describing the fields
|
||||
|
@ -251,13 +254,13 @@ public class MultiFieldQueryParser extends QueryParser
|
|||
* @throws IllegalArgumentException if the length of the fields array differs
|
||||
* from the length of the flags array
|
||||
*/
|
||||
public static Query parse(String query, String[] fields,
|
||||
public static Query parse(Version matchVersion, String query, String[] fields,
|
||||
BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException {
|
||||
if (fields.length != flags.length)
|
||||
throw new IllegalArgumentException("fields.length != flags.length");
|
||||
BooleanQuery bQuery = new BooleanQuery();
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
QueryParser qp = new QueryParser(fields[i], analyzer);
|
||||
QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
|
||||
Query q = qp.parse(query);
|
||||
if (q!=null && // q never null, just being defensive
|
||||
(!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) {
|
||||
|
@ -290,6 +293,7 @@ public class MultiFieldQueryParser extends QueryParser
|
|||
* </code>
|
||||
* </pre>
|
||||
*
|
||||
* @param matchVersion Lucene version to match; this is passed through to QueryParser.
|
||||
* @param queries Queries string to parse
|
||||
* @param fields Fields to search on
|
||||
* @param flags Flags describing the fields
|
||||
|
@ -298,7 +302,7 @@ public class MultiFieldQueryParser extends QueryParser
|
|||
* @throws IllegalArgumentException if the length of the queries, fields,
|
||||
* and flags array differ
|
||||
*/
|
||||
public static Query parse(String[] queries, String[] fields, BooleanClause.Occur[] flags,
|
||||
public static Query parse(Version matchVersion, String[] queries, String[] fields, BooleanClause.Occur[] flags,
|
||||
Analyzer analyzer) throws ParseException
|
||||
{
|
||||
if (!(queries.length == fields.length && queries.length == flags.length))
|
||||
|
@ -306,7 +310,7 @@ public class MultiFieldQueryParser extends QueryParser
|
|||
BooleanQuery bQuery = new BooleanQuery();
|
||||
for (int i = 0; i < fields.length; i++)
|
||||
{
|
||||
QueryParser qp = new QueryParser(fields[i], analyzer);
|
||||
QueryParser qp = new QueryParser(matchVersion, fields[i], analyzer);
|
||||
Query q = qp.parse(queries[i]);
|
||||
if (q!=null && // q never null, just being defensive
|
||||
(!(q instanceof BooleanQuery) || ((BooleanQuery)q).getClauses().length>0)) {
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* This class is generated by JavaCC. The most important method is
|
||||
|
@ -99,6 +100,14 @@ import org.apache.lucene.search.WildcardQuery;
|
|||
* <p><b>NOTE</b>: there is a new QueryParser in contrib, which matches
|
||||
* the same syntax as this class, but is more modular,
|
||||
* enabling substantial customization to how a query is created.
|
||||
*
|
||||
* <a name="version"/>
|
||||
* <p><b>NOTE</b>: You must specify the required {@link Version}
|
||||
* compatibility when creating QueryParser:
|
||||
* <ul>
|
||||
* <li> As of 2.9, {@link #setEnablePositionIncrements} is true by
|
||||
* default.
|
||||
* </ul>
|
||||
*/
|
||||
public class QueryParser implements QueryParserConstants {
|
||||
|
||||
|
@ -123,7 +132,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
boolean lowercaseExpandedTerms = true;
|
||||
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
|
||||
boolean allowLeadingWildcard = false;
|
||||
boolean enablePositionIncrements = false;
|
||||
boolean enablePositionIncrements = true;
|
||||
|
||||
Analyzer analyzer;
|
||||
String field;
|
||||
|
@ -147,13 +156,19 @@ public class QueryParser implements QueryParserConstants {
|
|||
static public enum Operator { OR, AND }
|
||||
|
||||
/** Constructs a query parser.
|
||||
* @param matchVersion Lucene version to match. See <a href="#version">above</a>)
|
||||
* @param f the default field for query terms.
|
||||
* @param a used to find terms in the query text.
|
||||
*/
|
||||
public QueryParser(String f, Analyzer a) {
|
||||
public QueryParser(Version matchVersion, String f, Analyzer a) {
|
||||
this(new FastCharStream(new StringReader("")));
|
||||
analyzer = a;
|
||||
field = f;
|
||||
if (matchVersion.onOrAfter(Version.LUCENE_29)) {
|
||||
enablePositionIncrements = true;
|
||||
} else {
|
||||
enablePositionIncrements = false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
|
||||
|
@ -1077,7 +1092,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
|
||||
System.exit(0);
|
||||
}
|
||||
QueryParser qp = new QueryParser("field",
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
|
||||
new org.apache.lucene.analysis.SimpleAnalyzer());
|
||||
Query q = qp.parse(args[0]);
|
||||
System.out.println(q.toString("field"));
|
||||
|
@ -1513,12 +1528,6 @@ public class QueryParser implements QueryParserConstants {
|
|||
finally { jj_save(0, xla); }
|
||||
}
|
||||
|
||||
private boolean jj_3R_3() {
|
||||
if (jj_scan_token(STAR)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_2() {
|
||||
if (jj_scan_token(TERM)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
|
@ -1535,6 +1544,12 @@ public class QueryParser implements QueryParserConstants {
|
|||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_3() {
|
||||
if (jj_scan_token(STAR)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/** Generated Token Manager. */
|
||||
public QueryParserTokenManager token_source;
|
||||
/** Current token. */
|
||||
|
@ -1563,7 +1578,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
private int jj_gc = 0;
|
||||
|
||||
/** Constructor with user supplied CharStream. */
|
||||
public QueryParser(CharStream stream) {
|
||||
protected QueryParser(CharStream stream) {
|
||||
token_source = new QueryParserTokenManager(stream);
|
||||
token = new Token();
|
||||
jj_ntk = -1;
|
||||
|
@ -1583,7 +1598,7 @@ public class QueryParser implements QueryParserConstants {
|
|||
}
|
||||
|
||||
/** Constructor with generated Token Manager. */
|
||||
public QueryParser(QueryParserTokenManager tm) {
|
||||
protected QueryParser(QueryParserTokenManager tm) {
|
||||
token_source = tm;
|
||||
token = new Token();
|
||||
jj_ntk = -1;
|
||||
|
|
|
@ -57,6 +57,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* This class is generated by JavaCC. The most important method is
|
||||
|
@ -123,6 +124,14 @@ import org.apache.lucene.search.WildcardQuery;
|
|||
* <p><b>NOTE</b>: there is a new QueryParser in contrib, which matches
|
||||
* the same syntax as this class, but is more modular,
|
||||
* enabling substantial customization to how a query is created.
|
||||
*
|
||||
* <a name="version"/>
|
||||
* <p><b>NOTE</b>: You must specify the required {@link Version}
|
||||
* compatibility when creating QueryParser:
|
||||
* <ul>
|
||||
* <li> As of 2.9, {@link #setEnablePositionIncrements} is true by
|
||||
* default.
|
||||
* </ul>
|
||||
*/
|
||||
public class QueryParser {
|
||||
|
||||
|
@ -147,7 +156,7 @@ public class QueryParser {
|
|||
boolean lowercaseExpandedTerms = true;
|
||||
MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT;
|
||||
boolean allowLeadingWildcard = false;
|
||||
boolean enablePositionIncrements = false;
|
||||
boolean enablePositionIncrements = true;
|
||||
|
||||
Analyzer analyzer;
|
||||
String field;
|
||||
|
@ -171,13 +180,19 @@ public class QueryParser {
|
|||
static public enum Operator { OR, AND }
|
||||
|
||||
/** Constructs a query parser.
|
||||
* @param matchVersion Lucene version to match. See {@link <a href="#version">above</a>)
|
||||
* @param f the default field for query terms.
|
||||
* @param a used to find terms in the query text.
|
||||
*/
|
||||
public QueryParser(String f, Analyzer a) {
|
||||
public QueryParser(Version matchVersion, String f, Analyzer a) {
|
||||
this(new FastCharStream(new StringReader("")));
|
||||
analyzer = a;
|
||||
field = f;
|
||||
if (matchVersion.onOrAfter(Version.LUCENE_29)) {
|
||||
enablePositionIncrements = true;
|
||||
} else {
|
||||
enablePositionIncrements = false;
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
|
||||
|
@ -1101,7 +1116,7 @@ public class QueryParser {
|
|||
System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
|
||||
System.exit(0);
|
||||
}
|
||||
QueryParser qp = new QueryParser("field",
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
|
||||
new org.apache.lucene.analysis.SimpleAnalyzer());
|
||||
Query q = qp.parse(args[0]);
|
||||
System.out.println(q.toString("field"));
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/** Token Manager. */
|
||||
public class QueryParserTokenManager implements QueryParserConstants
|
||||
|
|
|
@ -63,7 +63,7 @@ public class TestDemo extends LuceneTestCase {
|
|||
// Now search the index:
|
||||
IndexSearcher isearcher = new IndexSearcher(directory, true); // read-only=true
|
||||
// Parse a simple query that searches for "text":
|
||||
QueryParser parser = new QueryParser("fieldname", analyzer);
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "fieldname", analyzer);
|
||||
Query query = parser.parse("text");
|
||||
ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
|
||||
assertEquals(1, hits.length);
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.PrintWriter;
|
|||
import java.io.StringWriter;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
import junit.framework.TestSuite;
|
||||
import junit.textui.TestRunner;
|
||||
|
||||
|
@ -107,7 +108,7 @@ public class TestSearch extends LuceneTestCase {
|
|||
};
|
||||
ScoreDoc[] hits = null;
|
||||
|
||||
QueryParser parser = new QueryParser("contents", analyzer);
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "contents", analyzer);
|
||||
parser.setPhraseSlop(4);
|
||||
for (int j = 0; j < queries.length; j++) {
|
||||
Query query = parser.parse(queries[j]);
|
||||
|
|
|
@ -27,8 +27,10 @@ import org.apache.lucene.analysis.*;
|
|||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.queryParser.*;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
import junit.framework.TestSuite;
|
||||
import junit.textui.TestRunner;
|
||||
|
||||
|
@ -97,7 +99,7 @@ public class TestSearchForDuplicates extends LuceneTestCase {
|
|||
// try a search without OR
|
||||
Searcher searcher = new IndexSearcher(directory, true);
|
||||
|
||||
QueryParser parser = new QueryParser(PRIORITY_FIELD, analyzer);
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, PRIORITY_FIELD, analyzer);
|
||||
|
||||
Query query = parser.parse(HIGH_PRIORITY);
|
||||
out.println("Query: " + query.toString(PRIORITY_FIELD));
|
||||
|
@ -112,7 +114,7 @@ public class TestSearchForDuplicates extends LuceneTestCase {
|
|||
searcher = new IndexSearcher(directory, true);
|
||||
hits = null;
|
||||
|
||||
parser = new QueryParser(PRIORITY_FIELD, analyzer);
|
||||
parser = new QueryParser(Version.LUCENE_CURRENT, PRIORITY_FIELD, analyzer);
|
||||
|
||||
query = parser.parse(HIGH_PRIORITY + " OR " + MED_PRIORITY);
|
||||
out.println("Query: " + query.toString(PRIORITY_FIELD));
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.index.Payload;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestAnalyzers extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -74,7 +75,7 @@ public class TestAnalyzers extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testStop() throws Exception {
|
||||
Analyzer a = new StopAnalyzer(true);
|
||||
Analyzer a = new StopAnalyzer(Version.LUCENE_CURRENT);
|
||||
assertAnalyzesTo(a, "foo bar FOO BAR",
|
||||
new String[] { "foo", "bar", "foo", "bar" });
|
||||
assertAnalyzesTo(a, "foo a bar such FOO THESE BAR",
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
||||
|
||||
|
@ -58,7 +59,7 @@ public class TestKeywordAnalyzer extends BaseTokenStreamTestCase {
|
|||
PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new SimpleAnalyzer());
|
||||
analyzer.addAnalyzer("partnum", new KeywordAnalyzer());
|
||||
|
||||
QueryParser queryParser = new QueryParser("description", analyzer);
|
||||
QueryParser queryParser = new QueryParser(Version.LUCENE_CURRENT, "description", analyzer);
|
||||
Query query = queryParser.parse("partnum:Q36 AND SPACE");
|
||||
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
|
|
|
@ -5,6 +5,7 @@ import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.StringReader;
|
||||
|
||||
|
@ -108,15 +109,22 @@ public class TestStandardAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testDomainNames() throws Exception {
|
||||
// Don't reuse a because we alter its state (setReplaceInvalidAcronym)
|
||||
// Current lucene should not show the bug
|
||||
StandardAnalyzer a2 = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT);
|
||||
|
||||
// domain names
|
||||
assertAnalyzesTo(a2, "www.nutch.org", new String[]{"www.nutch.org"});
|
||||
//Notice the trailing . See https://issues.apache.org/jira/browse/LUCENE-1068.
|
||||
// the following should be recognized as HOST:
|
||||
assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
|
||||
|
||||
// 2.3 should show the bug
|
||||
a2 = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_23);
|
||||
assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "wwwnutchorg" }, new String[] { "<ACRONYM>" });
|
||||
|
||||
// 2.4 should not show the bug
|
||||
a2 = new StandardAnalyzer(Version.LUCENE_24);
|
||||
assertAnalyzesTo(a2, "www.nutch.org.", new String[]{ "www.nutch.org" }, new String[] { "<HOST>" });
|
||||
}
|
||||
|
||||
public void testEMailAddresses() throws Exception {
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.analysis;
|
|||
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.StringReader;
|
||||
import java.io.IOException;
|
||||
|
@ -28,7 +29,7 @@ import java.util.HashSet;
|
|||
|
||||
public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
||||
|
||||
private StopAnalyzer stop = new StopAnalyzer(false);
|
||||
private StopAnalyzer stop = new StopAnalyzer(Version.LUCENE_CURRENT);
|
||||
private Set inValidTokens = new HashSet();
|
||||
|
||||
public TestStopAnalyzer(String s) {
|
||||
|
@ -61,7 +62,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
|||
stopWordsSet.add("good");
|
||||
stopWordsSet.add("test");
|
||||
stopWordsSet.add("analyzer");
|
||||
StopAnalyzer newStop = new StopAnalyzer(stopWordsSet, false);
|
||||
StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_24, stopWordsSet);
|
||||
StringReader reader = new StringReader("This is a good test of the english stop analyzer");
|
||||
TokenStream stream = newStop.tokenStream("test", reader);
|
||||
assertNotNull(stream);
|
||||
|
@ -71,7 +72,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
|||
while (stream.incrementToken()) {
|
||||
String text = termAtt.term();
|
||||
assertFalse(stopWordsSet.contains(text));
|
||||
assertEquals(1,posIncrAtt.getPositionIncrement()); // by default stop tokenizer does not apply increments.
|
||||
assertEquals(1,posIncrAtt.getPositionIncrement()); // in 2.4 stop tokenizer does not apply increments.
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -80,7 +81,7 @@ public class TestStopAnalyzer extends BaseTokenStreamTestCase {
|
|||
stopWordsSet.add("good");
|
||||
stopWordsSet.add("test");
|
||||
stopWordsSet.add("analyzer");
|
||||
StopAnalyzer newStop = new StopAnalyzer(stopWordsSet, true);
|
||||
StopAnalyzer newStop = new StopAnalyzer(Version.LUCENE_CURRENT, stopWordsSet);
|
||||
StringReader reader = new StringReader("This is a good test of the english stop analyzer with positions");
|
||||
int expectedIncr[] = { 1, 1, 1, 3, 1, 1, 1, 2, 1};
|
||||
TokenStream stream = newStop.tokenStream("test", reader);
|
||||
|
|
|
@ -22,6 +22,7 @@ import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
import org.apache.lucene.util.English;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
|
@ -167,10 +168,10 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
|
|||
buffer.append(English.intToEnglish(i).toUpperCase()).append(' ');
|
||||
}
|
||||
//make sure we produce the same tokens
|
||||
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))));
|
||||
TeeSinkTokenFilter teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString()))));
|
||||
TokenStream sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(100));
|
||||
teeStream.consumeAllTokens();
|
||||
TokenStream stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))), 100);
|
||||
TokenStream stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString()))), 100);
|
||||
TermAttribute tfTok = stream.addAttribute(TermAttribute.class);
|
||||
TermAttribute sinkTok = sink.addAttribute(TermAttribute.class);
|
||||
for (int i=0; stream.incrementToken(); i++) {
|
||||
|
@ -183,12 +184,12 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
|
|||
int tfPos = 0;
|
||||
long start = System.currentTimeMillis();
|
||||
for (int i = 0; i < 20; i++) {
|
||||
stream = new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString())));
|
||||
stream = new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString())));
|
||||
PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
|
||||
while (stream.incrementToken()) {
|
||||
tfPos += posIncrAtt.getPositionIncrement();
|
||||
}
|
||||
stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))), modCounts[j]);
|
||||
stream = new ModuloTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString()))), modCounts[j]);
|
||||
posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
|
||||
while (stream.incrementToken()) {
|
||||
tfPos += posIncrAtt.getPositionIncrement();
|
||||
|
@ -200,7 +201,7 @@ public class TestTeeSinkTokenFilter extends BaseTokenStreamTestCase {
|
|||
//simulate one field with one sink
|
||||
start = System.currentTimeMillis();
|
||||
for (int i = 0; i < 20; i++) {
|
||||
teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(new StringReader(buffer.toString()))));
|
||||
teeStream = new TeeSinkTokenFilter(new StandardFilter(new StandardTokenizer(Version.LUCENE_CURRENT, new StringReader(buffer.toString()))));
|
||||
sink = teeStream.newSinkTokenStream(new ModuloSinkFilter(modCounts[j]));
|
||||
PositionIncrementAttribute posIncrAtt = teeStream.getAttribute(PositionIncrementAttribute.class);
|
||||
while (teeStream.incrementToken()) {
|
||||
|
|
|
@ -66,6 +66,7 @@ import org.apache.lucene.store.RAMDirectory;
|
|||
import org.apache.lucene.store.SingleInstanceLockFactory;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestIndexWriter extends BaseTokenStreamTestCase {
|
||||
public TestIndexWriter(String name) {
|
||||
|
@ -1701,7 +1702,7 @@ public class TestIndexWriter extends BaseTokenStreamTestCase {
|
|||
IndexWriter writer = new IndexWriter(dir, new Analyzer() {
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new TokenFilter(new StandardTokenizer(reader)) {
|
||||
return new TokenFilter(new StandardTokenizer(Version.LUCENE_CURRENT, reader)) {
|
||||
private int count = 0;
|
||||
|
||||
public boolean incrementToken() throws IOException {
|
||||
|
@ -4167,7 +4168,7 @@ public class TestIndexWriter extends BaseTokenStreamTestCase {
|
|||
// LUCENE-1448
|
||||
public void testEndOffsetPositionStopFilter() throws Exception {
|
||||
MockRAMDirectory dir = new MockRAMDirectory();
|
||||
IndexWriter w = new IndexWriter(dir, new StopAnalyzer(true), IndexWriter.MaxFieldLength.LIMITED);
|
||||
IndexWriter w = new IndexWriter(dir, new StopAnalyzer(Version.LUCENE_CURRENT), IndexWriter.MaxFieldLength.LIMITED);
|
||||
Document doc = new Document();
|
||||
Field f = new Field("field", "abcd the", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
|
||||
doc.add(f);
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
|||
import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test QueryParser's ability to deal with Analyzers that return more
|
||||
|
@ -44,7 +45,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testMultiAnalyzer() throws ParseException {
|
||||
|
||||
QueryParser qp = new QueryParser("", new MultiAnalyzer());
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "", new MultiAnalyzer());
|
||||
|
||||
// trivial, no multiple tokens:
|
||||
assertEquals("foo", qp.parse("foo").toString());
|
||||
|
@ -117,7 +118,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public void testPosIncrementAnalyzer() throws ParseException {
|
||||
QueryParser qp = new QueryParser("", new PosIncrementAnalyzer());
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_24, "", new PosIncrementAnalyzer());
|
||||
assertEquals("quick brown", qp.parse("the quick brown").toString());
|
||||
assertEquals("\"quick brown\"", qp.parse("\"the quick brown\"").toString());
|
||||
assertEquals("quick brown fox", qp.parse("the quick brown fox").toString());
|
||||
|
@ -134,7 +135,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
|
||||
result = new TestFilter(result);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
|
@ -200,7 +201,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
|
|||
}
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new StandardTokenizer(reader);
|
||||
TokenStream result = new StandardTokenizer(Version.LUCENE_CURRENT, reader);
|
||||
result = new TestPosIncrementFilter(result);
|
||||
result = new LowerCaseFilter(result);
|
||||
return result;
|
||||
|
@ -238,7 +239,7 @@ public class TestMultiAnalyzer extends BaseTokenStreamTestCase {
|
|||
private final static class DumbQueryParser extends QueryParser {
|
||||
|
||||
public DumbQueryParser(String f, Analyzer a) {
|
||||
super(f, a);
|
||||
super(Version.LUCENE_CURRENT, f, a);
|
||||
}
|
||||
|
||||
/** expose super's version */
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.lucene.search.BooleanClause.Occur;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Tests QueryParser.
|
||||
|
@ -59,18 +60,18 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
|
|||
String[] fields = {"b", "t"};
|
||||
Occur occur[] = {Occur.SHOULD, Occur.SHOULD};
|
||||
TestQueryParser.QPTestAnalyzer a = new TestQueryParser.QPTestAnalyzer();
|
||||
MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, a);
|
||||
MultiFieldQueryParser mfqp = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, a);
|
||||
|
||||
Query q = mfqp.parse(qtxt);
|
||||
assertEquals(expectedRes, q.toString());
|
||||
|
||||
q = MultiFieldQueryParser.parse(qtxt, fields, occur, a);
|
||||
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, qtxt, fields, occur, a);
|
||||
assertEquals(expectedRes, q.toString());
|
||||
}
|
||||
|
||||
public void testSimple() throws Exception {
|
||||
String[] fields = {"b", "t"};
|
||||
MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
MultiFieldQueryParser mfqp = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
|
||||
Query q = mfqp.parse("one");
|
||||
assertEquals("b:one t:one", q.toString());
|
||||
|
@ -133,7 +134,7 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
|
|||
boosts.put("b", Float.valueOf(5));
|
||||
boosts.put("t", Float.valueOf(10));
|
||||
String[] fields = {"b", "t"};
|
||||
MultiFieldQueryParser mfqp = new MultiFieldQueryParser(fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), boosts);
|
||||
MultiFieldQueryParser mfqp = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT), boosts);
|
||||
|
||||
|
||||
//Check for simple
|
||||
|
@ -159,24 +160,24 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
|
|||
public void testStaticMethod1() throws ParseException {
|
||||
String[] fields = {"b", "t"};
|
||||
String[] queries = {"one", "two"};
|
||||
Query q = MultiFieldQueryParser.parse(queries, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
Query q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
assertEquals("b:one t:two", q.toString());
|
||||
|
||||
String[] queries2 = {"+one", "+two"};
|
||||
q = MultiFieldQueryParser.parse(queries2, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries2, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
assertEquals("(+b:one) (+t:two)", q.toString());
|
||||
|
||||
String[] queries3 = {"one", "+two"};
|
||||
q = MultiFieldQueryParser.parse(queries3, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries3, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
assertEquals("b:one (+t:two)", q.toString());
|
||||
|
||||
String[] queries4 = {"one +more", "+two"};
|
||||
q = MultiFieldQueryParser.parse(queries4, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries4, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
assertEquals("(b:one +b:more) (+t:two)", q.toString());
|
||||
|
||||
String[] queries5 = {"blah"};
|
||||
try {
|
||||
q = MultiFieldQueryParser.parse(queries5, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries5, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
fail();
|
||||
} catch(IllegalArgumentException e) {
|
||||
// expected exception, array length differs
|
||||
|
@ -186,11 +187,11 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
|
|||
TestQueryParser.QPTestAnalyzer stopA = new TestQueryParser.QPTestAnalyzer();
|
||||
|
||||
String[] queries6 = {"((+stop))", "+((stop))"};
|
||||
q = MultiFieldQueryParser.parse(queries6, fields, stopA);
|
||||
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries6, fields, stopA);
|
||||
assertEquals("", q.toString());
|
||||
|
||||
String[] queries7 = {"one ((+stop)) +more", "+((stop)) +two"};
|
||||
q = MultiFieldQueryParser.parse(queries7, fields, stopA);
|
||||
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries7, fields, stopA);
|
||||
assertEquals("(b:one +b:more) (+t:two)", q.toString());
|
||||
|
||||
}
|
||||
|
@ -198,15 +199,15 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
|
|||
public void testStaticMethod2() throws ParseException {
|
||||
String[] fields = {"b", "t"};
|
||||
BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT};
|
||||
Query q = MultiFieldQueryParser.parse("one", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
Query q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, "one", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
assertEquals("+b:one -t:one", q.toString());
|
||||
|
||||
q = MultiFieldQueryParser.parse("one two", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, "one two", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
assertEquals("+(b:one b:two) -(t:one t:two)", q.toString());
|
||||
|
||||
try {
|
||||
BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST};
|
||||
q = MultiFieldQueryParser.parse("blah", fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, "blah", fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
fail();
|
||||
} catch(IllegalArgumentException e) {
|
||||
// expected exception, array length differs
|
||||
|
@ -217,17 +218,17 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
|
|||
String[] fields = {"b", "t"};
|
||||
//int[] flags = {MultiFieldQueryParser.REQUIRED_FIELD, MultiFieldQueryParser.PROHIBITED_FIELD};
|
||||
BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT};
|
||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
|
||||
Query q = MultiFieldQueryParser.parse("one", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));//, fields, flags, new StandardAnalyzer());
|
||||
Query q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, "one", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));//, fields, flags, new StandardAnalyzer());
|
||||
assertEquals("+b:one -t:one", q.toString());
|
||||
|
||||
q = MultiFieldQueryParser.parse("one two", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, "one two", fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
assertEquals("+(b:one b:two) -(t:one t:two)", q.toString());
|
||||
|
||||
try {
|
||||
BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST};
|
||||
q = MultiFieldQueryParser.parse("blah", fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, "blah", fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
fail();
|
||||
} catch(IllegalArgumentException e) {
|
||||
// expected exception, array length differs
|
||||
|
@ -239,12 +240,12 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
|
|||
String[] fields = {"f1", "f2", "f3"};
|
||||
BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST,
|
||||
BooleanClause.Occur.MUST_NOT, BooleanClause.Occur.SHOULD};
|
||||
Query q = MultiFieldQueryParser.parse(queries, fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
Query q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries, fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
assertEquals("+f1:one -f2:two f3:three", q.toString());
|
||||
|
||||
try {
|
||||
BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST};
|
||||
q = MultiFieldQueryParser.parse(queries, fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries, fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
fail();
|
||||
} catch(IllegalArgumentException e) {
|
||||
// expected exception, array length differs
|
||||
|
@ -255,12 +256,12 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
|
|||
String[] queries = {"one", "two"};
|
||||
String[] fields = {"b", "t"};
|
||||
BooleanClause.Occur[] flags = {BooleanClause.Occur.MUST, BooleanClause.Occur.MUST_NOT};
|
||||
Query q = MultiFieldQueryParser.parse(queries, fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
Query q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries, fields, flags, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
assertEquals("+b:one -t:two", q.toString());
|
||||
|
||||
try {
|
||||
BooleanClause.Occur[] flags2 = {BooleanClause.Occur.MUST};
|
||||
q = MultiFieldQueryParser.parse(queries, fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
q = MultiFieldQueryParser.parse(Version.LUCENE_CURRENT, queries, fields, flags2, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
fail();
|
||||
} catch(IllegalArgumentException e) {
|
||||
// expected exception, array length differs
|
||||
|
@ -269,7 +270,7 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
|
|||
|
||||
public void testAnalyzerReturningNull() throws ParseException {
|
||||
String[] fields = new String[] { "f1", "f2", "f3" };
|
||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, new AnalyzerReturningNull());
|
||||
MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, new AnalyzerReturningNull());
|
||||
Query q = parser.parse("bla AND blo");
|
||||
assertEquals("+(f2:bla f3:bla) +(f2:blo f3:blo)", q.toString());
|
||||
// the following queries are not affected as their terms are not analyzed anyway:
|
||||
|
@ -291,7 +292,7 @@ public class TestMultiFieldQueryParser extends BaseTokenStreamTestCase {
|
|||
iw.close();
|
||||
|
||||
MultiFieldQueryParser mfqp =
|
||||
new MultiFieldQueryParser(new String[] {"body"}, analyzer);
|
||||
new MultiFieldQueryParser(Version.LUCENE_CURRENT, new String[] {"body"}, analyzer);
|
||||
mfqp.setDefaultOperator(QueryParser.Operator.AND);
|
||||
Query q = mfqp.parse("the footest");
|
||||
IndexSearcher is = new IndexSearcher(ramDir, true);
|
||||
|
|
|
@ -47,6 +47,7 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.MultiTermQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
|
@ -60,7 +61,10 @@ import org.apache.lucene.search.ScoreDoc;
|
|||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.WildcardQuery;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
import org.apache.lucene.util.LocalizedTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Tests QueryParser.
|
||||
|
@ -127,7 +131,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
|
||||
public static class QPTestParser extends QueryParser {
|
||||
public QPTestParser(String f, Analyzer a) {
|
||||
super(f, a);
|
||||
super(Version.LUCENE_CURRENT, f, a);
|
||||
}
|
||||
|
||||
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException {
|
||||
|
@ -149,7 +153,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
public QueryParser getParser(Analyzer a) throws Exception {
|
||||
if (a == null)
|
||||
a = new SimpleAnalyzer();
|
||||
QueryParser qp = new QueryParser("field", a);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", a);
|
||||
qp.setDefaultOperator(QueryParser.OR_OPERATOR);
|
||||
return qp;
|
||||
}
|
||||
|
@ -219,7 +223,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
throws Exception {
|
||||
if (a == null)
|
||||
a = new SimpleAnalyzer();
|
||||
QueryParser qp = new QueryParser("field", a);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", a);
|
||||
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
||||
return qp.parse(query);
|
||||
}
|
||||
|
@ -291,7 +295,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
assertQueryEquals("+title:(dog OR cat) -author:\"bob dole\"", null,
|
||||
"+(title:dog title:cat) -author:\"bob dole\"");
|
||||
|
||||
QueryParser qp = new QueryParser("field", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
// make sure OR is the default:
|
||||
assertEquals(QueryParser.OR_OPERATOR, qp.getDefaultOperator());
|
||||
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
||||
|
@ -446,7 +450,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
assertQueryEquals("[ a TO z]", null, "[a TO z]");
|
||||
assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]", null)).getRewriteMethod());
|
||||
|
||||
QueryParser qp = new QueryParser("field", new SimpleAnalyzer());
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new SimpleAnalyzer());
|
||||
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,((TermRangeQuery)qp.parse("[ a TO z]")).getRewriteMethod());
|
||||
|
||||
|
@ -472,7 +476,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
iw.close();
|
||||
IndexSearcher is = new IndexSearcher(ramDir, true);
|
||||
|
||||
QueryParser qp = new QueryParser("content", new WhitespaceAnalyzer());
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "content", new WhitespaceAnalyzer());
|
||||
|
||||
// Neither Java 1.4.2 nor 1.5.0 has Farsi Locale collation available in
|
||||
// RuleBasedCollator. However, the Arabic Locale seems to order the Farsi
|
||||
|
@ -569,7 +573,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
final String defaultField = "default";
|
||||
final String monthField = "month";
|
||||
final String hourField = "hour";
|
||||
QueryParser qp = new QueryParser("field", new SimpleAnalyzer());
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new SimpleAnalyzer());
|
||||
|
||||
// Don't set any date resolution and verify if DateField is used
|
||||
assertDateRangeQueryEquals(qp, defaultField, startDate, endDate,
|
||||
|
@ -792,7 +796,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
Set stopWords = new HashSet(1);
|
||||
stopWords.add("on");
|
||||
StandardAnalyzer oneStopAnalyzer = new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT, stopWords);
|
||||
QueryParser qp = new QueryParser("field", oneStopAnalyzer);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", oneStopAnalyzer);
|
||||
Query q = qp.parse("on^1.0");
|
||||
assertNotNull(q);
|
||||
q = qp.parse("\"hello\"^2.0");
|
||||
|
@ -804,7 +808,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
q = qp.parse("\"on\"^1.0");
|
||||
assertNotNull(q);
|
||||
|
||||
QueryParser qp2 = new QueryParser("field", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
QueryParser qp2 = new QueryParser(Version.LUCENE_CURRENT, "field", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
q = qp2.parse("the^3");
|
||||
// "the" is a stop word so the result is an empty query:
|
||||
assertNotNull(q);
|
||||
|
@ -852,7 +856,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
public void testBooleanQuery() throws Exception {
|
||||
BooleanQuery.setMaxClauseCount(2);
|
||||
try {
|
||||
QueryParser qp = new QueryParser("field", new WhitespaceAnalyzer());
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new WhitespaceAnalyzer());
|
||||
qp.parse("one two three");
|
||||
fail("ParseException expected due to too many boolean clauses");
|
||||
} catch (ParseException expected) {
|
||||
|
@ -864,7 +868,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
* This test differs from TestPrecedenceQueryParser
|
||||
*/
|
||||
public void testPrecedence() throws Exception {
|
||||
QueryParser qp = new QueryParser("field", new WhitespaceAnalyzer());
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new WhitespaceAnalyzer());
|
||||
Query query1 = qp.parse("A AND B OR C AND D");
|
||||
Query query2 = qp.parse("+A +B +C +D");
|
||||
assertEquals(query1, query2);
|
||||
|
@ -888,7 +892,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
|
||||
public void testStarParsing() throws Exception {
|
||||
final int[] type = new int[1];
|
||||
QueryParser qp = new QueryParser("field", new WhitespaceAnalyzer()) {
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new WhitespaceAnalyzer()) {
|
||||
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
|
||||
// override error checking of superclass
|
||||
type[0]=1;
|
||||
|
@ -944,7 +948,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
}
|
||||
|
||||
public void testStopwords() throws Exception {
|
||||
QueryParser qp = new QueryParser("a", new StopAnalyzer(StopFilter.makeStopSet("the", "foo"), true));
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "foo")));
|
||||
Query result = qp.parse("a:the OR a:foo");
|
||||
assertNotNull("result is null and it shouldn't be", result);
|
||||
assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
|
||||
|
@ -960,7 +964,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
}
|
||||
|
||||
public void testPositionIncrement() throws Exception {
|
||||
QueryParser qp = new QueryParser("a", new StopAnalyzer(StopFilter.makeStopSet("the", "in", "are", "this"), true));
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "a", new StopAnalyzer(Version.LUCENE_CURRENT, StopFilter.makeStopSet("the", "in", "are", "this")));
|
||||
qp.setEnablePositionIncrements(true);
|
||||
String qtxt = "\"the words in poisitions pos02578 are stopped in this phrasequery\"";
|
||||
// 0 2 5 7 8
|
||||
|
@ -977,7 +981,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
}
|
||||
|
||||
public void testMatchAllDocs() throws Exception {
|
||||
QueryParser qp = new QueryParser("field", new WhitespaceAnalyzer());
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", new WhitespaceAnalyzer());
|
||||
assertEquals(new MatchAllDocsQuery(), qp.parse("*:*"));
|
||||
assertEquals(new MatchAllDocsQuery(), qp.parse("(*:*)"));
|
||||
BooleanQuery bq = (BooleanQuery)qp.parse("+*:* -*:*");
|
||||
|
@ -986,7 +990,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
}
|
||||
|
||||
private void assertHits(int expected, String query, IndexSearcher is) throws ParseException, IOException {
|
||||
QueryParser qp = new QueryParser("date", new WhitespaceAnalyzer());
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "date", new WhitespaceAnalyzer());
|
||||
qp.setLocale(Locale.ENGLISH);
|
||||
Query q = qp.parse(query);
|
||||
ScoreDoc[] hits = is.search(q, null, 1000).scoreDocs;
|
||||
|
@ -1008,4 +1012,49 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
BooleanQuery.setMaxClauseCount(originalMaxClauses);
|
||||
}
|
||||
|
||||
// LUCENE-2002: make sure defaults for StandardAnalyzer's
|
||||
// enableStopPositionIncr & QueryParser's enablePosIncr
|
||||
// "match"
|
||||
public void testPositionIncrements() throws Exception {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
Analyzer a = new StandardAnalyzer(Version.LUCENE_CURRENT);
|
||||
IndexWriter w = new IndexWriter(dir, a, IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("f", "the wizard of ozzy", Field.Store.NO, Field.Index.ANALYZED));
|
||||
w.addDocument(doc);
|
||||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
IndexSearcher s = new IndexSearcher(r);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "f", a);
|
||||
Query q = qp.parse("\"wizard of ozzy\"");
|
||||
assertEquals(1, s.search(q, 1).totalHits);
|
||||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
// LUCENE-2002: when we run javacc to regen QueryParser,
|
||||
// we also run a replaceregexp step to fix 2 of the public
|
||||
// ctors (change them to protected):
|
||||
//
|
||||
// protected QueryParser(CharStream stream)
|
||||
//
|
||||
// protected QueryParser(QueryParserTokenManager tm)
|
||||
//
|
||||
// This test is here as a safety, in case that ant step
|
||||
// doesn't work for some reason.
|
||||
public void testProtectedCtors() throws Exception {
|
||||
try {
|
||||
QueryParser.class.getConstructor(new Class[] {CharStream.class});
|
||||
fail("please switch public QueryParser(CharStream) to be protected");
|
||||
} catch (NoSuchMethodException nsme) {
|
||||
// expected
|
||||
}
|
||||
try {
|
||||
QueryParser.class.getConstructor(new Class[] {QueryParserTokenManager.class});
|
||||
fail("please switch public QueryParser(QueryParserTokenManager) to be protected");
|
||||
} catch (NoSuchMethodException nsme) {
|
||||
// expected
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.store.RAMDirectory;
|
|||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/** Test BooleanQuery2 against BooleanQuery by overriding the standard query parser.
|
||||
* This also tests the scoring order of BooleanQuery.
|
||||
|
@ -104,7 +105,7 @@ public class TestBoolean2 extends LuceneTestCase {
|
|||
};
|
||||
|
||||
public Query makeQuery(String queryText) throws ParseException {
|
||||
Query q = (new QueryParser(field, new WhitespaceAnalyzer())).parse(queryText);
|
||||
Query q = (new QueryParser(Version.LUCENE_CURRENT, field, new WhitespaceAnalyzer())).parse(queryText);
|
||||
return q;
|
||||
}
|
||||
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.search.Sort;
|
|||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test date sorting, i.e. auto-sorting of fields with type "long".
|
||||
|
@ -74,7 +75,7 @@ public class TestDateSort extends LuceneTestCase {
|
|||
|
||||
Sort sort = new Sort(new SortField(DATE_TIME_FIELD, SortField.STRING, true));
|
||||
|
||||
QueryParser queryParser = new QueryParser(TEXT_FIELD, new WhitespaceAnalyzer());
|
||||
QueryParser queryParser = new QueryParser(Version.LUCENE_CURRENT, TEXT_FIELD, new WhitespaceAnalyzer());
|
||||
Query query = queryParser.parse("Document");
|
||||
|
||||
// Execute the search and process the search results.
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.search.spans.SpanQuery;
|
|||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Tests primitive queries (ie: that rewrite to themselves) to
|
||||
|
@ -51,7 +52,7 @@ public class TestExplanations extends LuceneTestCase {
|
|||
public static final String KEY = "KEY";
|
||||
public static final String FIELD = "field";
|
||||
public static final QueryParser qp =
|
||||
new QueryParser(FIELD, new WhitespaceAnalyzer());
|
||||
new QueryParser(Version.LUCENE_CURRENT, FIELD, new WhitespaceAnalyzer());
|
||||
|
||||
public void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.store.RAMDirectory;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Tests {@link FuzzyQuery}.
|
||||
|
@ -313,7 +314,7 @@ public class TestFuzzyQuery extends LuceneTestCase {
|
|||
IndexReader r = w.getReader();
|
||||
w.close();
|
||||
|
||||
Query q = new QueryParser("field", analyzer).parse( "giga~0.9" );
|
||||
Query q = new QueryParser(Version.LUCENE_CURRENT, "field", analyzer).parse( "giga~0.9" );
|
||||
|
||||
// 3. search
|
||||
IndexSearcher searcher = new IndexSearcher(r);
|
||||
|
|
|
@ -100,7 +100,7 @@ public class TestMatchAllDocsQuery extends LuceneTestCase {
|
|||
assertEquals(2, hits.length);
|
||||
|
||||
// test parsable toString()
|
||||
QueryParser qp = new QueryParser("key", analyzer);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);
|
||||
hits = is.search(qp.parse(new MatchAllDocsQuery().toString()), null, 1000).scoreDocs;
|
||||
assertEquals(2, hits.length);
|
||||
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.queryParser.QueryParser;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
|
@ -101,7 +102,7 @@ public class TestMultiSearcher extends LuceneTestCase
|
|||
writerB.close();
|
||||
|
||||
// creating the query
|
||||
QueryParser parser = new QueryParser("fulltext", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
Query query = parser.parse("handle:1");
|
||||
|
||||
// building the searchables
|
||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.lucene.queryParser.ParseException;
|
|||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -87,7 +88,7 @@ public class TestMultiSearcherRanking extends LuceneTestCase {
|
|||
private void checkQuery(String queryStr) throws IOException, ParseException {
|
||||
// check result hit ranking
|
||||
if(verbose) System.out.println("Query: " + queryStr);
|
||||
QueryParser queryParser = new QueryParser(FIELD_NAME, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
QueryParser queryParser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
Query query = queryParser.parse(queryStr);
|
||||
ScoreDoc[] multiSearcherHits = multiSearcher.search(query, null, 1000).scoreDocs;
|
||||
ScoreDoc[] singleSearcherHits = singleSearcher.search(query, null, 1000).scoreDocs;
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.store.RAMDirectory;
|
|||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/** Similarity unit test.
|
||||
*
|
||||
|
@ -48,7 +49,7 @@ public class TestNot extends LuceneTestCase {
|
|||
writer.close();
|
||||
|
||||
Searcher searcher = new IndexSearcher(store, true);
|
||||
QueryParser parser = new QueryParser("field", new SimpleAnalyzer());
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "field", new SimpleAnalyzer());
|
||||
Query query = parser.parse("a NOT b");
|
||||
//System.out.println(query);
|
||||
ScoreDoc[] hits = searcher.search(query, null, 1000).scoreDocs;
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
@ -201,7 +202,7 @@ public class TestPhraseQuery extends LuceneTestCase {
|
|||
|
||||
public void testPhraseQueryWithStopAnalyzer() throws Exception {
|
||||
RAMDirectory directory = new RAMDirectory();
|
||||
StopAnalyzer stopAnalyzer = new StopAnalyzer(false);
|
||||
StopAnalyzer stopAnalyzer = new StopAnalyzer(Version.LUCENE_24);
|
||||
IndexWriter writer = new IndexWriter(directory, stopAnalyzer, true,
|
||||
IndexWriter.MaxFieldLength.LIMITED);
|
||||
Document doc = new Document();
|
||||
|
@ -220,7 +221,7 @@ public class TestPhraseQuery extends LuceneTestCase {
|
|||
QueryUtils.check(query,searcher);
|
||||
|
||||
|
||||
// currently StopAnalyzer does not leave "holes", so this matches.
|
||||
// StopAnalyzer as of 2.4 does not leave "holes", so this matches.
|
||||
query = new PhraseQuery();
|
||||
query.add(new Term("field", "words"));
|
||||
query.add(new Term("field", "here"));
|
||||
|
@ -357,8 +358,8 @@ public class TestPhraseQuery extends LuceneTestCase {
|
|||
}
|
||||
|
||||
public void testToString() throws Exception {
|
||||
StopAnalyzer analyzer = new StopAnalyzer(true);
|
||||
QueryParser qp = new QueryParser("field", analyzer);
|
||||
StopAnalyzer analyzer = new StopAnalyzer(Version.LUCENE_CURRENT);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field", analyzer);
|
||||
qp.setEnablePositionIncrements(true);
|
||||
PhraseQuery q = (PhraseQuery)qp.parse("\"this hi this is a test is\"");
|
||||
assertEquals("field:\"? hi ? ? ? test\"", q.toString());
|
||||
|
|
|
@ -51,6 +51,7 @@ import org.apache.lucene.search.spans.SpanNearQuery;
|
|||
import org.apache.lucene.search.spans.SpanQuery;
|
||||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Term position unit test.
|
||||
|
@ -188,7 +189,7 @@ public class TestPositionIncrement extends BaseTokenStreamTestCase {
|
|||
assertEquals(0, hits.length);
|
||||
|
||||
// should not find "1 2" because there is a gap of 1 in the index
|
||||
QueryParser qp = new QueryParser("field",
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "field",
|
||||
new StopWhitespaceAnalyzer(false));
|
||||
q = (PhraseQuery) qp.parse("\"1 2\"");
|
||||
hits = searcher.search(q, null, 1000).scoreDocs;
|
||||
|
@ -212,7 +213,7 @@ public class TestPositionIncrement extends BaseTokenStreamTestCase {
|
|||
assertEquals(0, hits.length);
|
||||
|
||||
// when both qp qnd stopFilter propagate increments, we should find the doc.
|
||||
qp = new QueryParser("field",
|
||||
qp = new QueryParser(Version.LUCENE_CURRENT, "field",
|
||||
new StopWhitespaceAnalyzer(true));
|
||||
qp.setEnablePositionIncrements(true);
|
||||
q = (PhraseQuery) qp.parse("\"1 stop 2\"");
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.search.spans.SpanQuery;
|
|||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -327,7 +328,7 @@ public class TestSimpleExplanations extends TestExplanations {
|
|||
writerB.addDocument(lDoc3);
|
||||
writerB.close();
|
||||
|
||||
QueryParser parser = new QueryParser("fulltext", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "fulltext", new StandardAnalyzer(org.apache.lucene.util.Version.LUCENE_CURRENT));
|
||||
Query query = parser.parse("handle:1");
|
||||
|
||||
Searcher[] searchers = new Searcher[2];
|
||||
|
|
|
@ -31,6 +31,7 @@ import org.apache.lucene.search.TimeLimitingCollector.TimeExceededException;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Tests the {@link TimeLimitingCollector}. This test checks (1) search
|
||||
|
@ -85,7 +86,7 @@ public class TestTimeLimitingCollector extends LuceneTestCase {
|
|||
for (int i = 0; i < docText.length; i++) {
|
||||
qtxt += ' ' + docText[i]; // large query so that search will be longer
|
||||
}
|
||||
QueryParser queryParser = new QueryParser(FIELD_NAME, new WhitespaceAnalyzer());
|
||||
QueryParser queryParser = new QueryParser(Version.LUCENE_CURRENT, FIELD_NAME, new WhitespaceAnalyzer());
|
||||
query = queryParser.parse(qtxt);
|
||||
|
||||
// warm the searcher
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.index.IndexWriter;
|
|||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -238,7 +239,7 @@ public class TestWildcard
|
|||
public void testParsingAndSearching() throws Exception {
|
||||
String field = "content";
|
||||
boolean dbg = false;
|
||||
QueryParser qp = new QueryParser(field, new WhitespaceAnalyzer());
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, field, new WhitespaceAnalyzer());
|
||||
qp.setAllowLeadingWildcard(true);
|
||||
String docs[] = {
|
||||
"\\ abcdefg1",
|
||||
|
|
|
@ -28,6 +28,7 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryUtils;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
/**
|
||||
* Test CustomScoreQuery search.
|
||||
|
@ -139,7 +140,7 @@ public class TestCustomScoreQuery extends FunctionTestSetup {
|
|||
float boost = (float) dboost;
|
||||
IndexSearcher s = new IndexSearcher(dir, true);
|
||||
FieldScoreQuery qValSrc = new FieldScoreQuery(field,tp); // a query that would score by the field
|
||||
QueryParser qp = new QueryParser(TEXT_FIELD,anlzr);
|
||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, TEXT_FIELD,anlzr);
|
||||
String qtxt = "first aid text"; // from the doc texts in FunctionQuerySetup.
|
||||
|
||||
// regular (boolean) query.
|
||||
|
|
|
@ -30,13 +30,14 @@ import org.apache.lucene.search.Weight;
|
|||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
public class TestNearSpansOrdered extends LuceneTestCase {
|
||||
protected IndexSearcher searcher;
|
||||
|
||||
public static final String FIELD = "field";
|
||||
public static final QueryParser qp =
|
||||
new QueryParser(FIELD, new WhitespaceAnalyzer());
|
||||
new QueryParser(Version.LUCENE_CURRENT, FIELD, new WhitespaceAnalyzer());
|
||||
|
||||
public void tearDown() throws Exception {
|
||||
super.tearDown();
|
||||
|
|
Loading…
Reference in New Issue