mirror of https://github.com/apache/lucene.git
LUCENE-2413: clean up/doc MockAnalyzer, add a MockTokenFilter, which can simulate stopword/lengthfilter/keepfilter,etc
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@944908 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d23eb64bd7
commit
e292af7b12
|
@ -19,6 +19,7 @@ package org.apache.lucene.queryParser.ext;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.queryParser.TestQueryParser;
|
||||
|
@ -46,7 +47,7 @@ public class TestExtendableQueryParser extends TestQueryParser {
|
|||
public QueryParser getParser(Analyzer a, Extensions extensions)
|
||||
throws Exception {
|
||||
if (a == null)
|
||||
a = new MockAnalyzer(MockAnalyzer.SIMPLE, true);
|
||||
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
|
||||
QueryParser qp = extensions == null ? new ExtendableQueryParser(
|
||||
TEST_VERSION_CURRENT, "field", a) : new ExtendableQueryParser(
|
||||
TEST_VERSION_CURRENT, "field", a, extensions);
|
||||
|
|
|
@ -20,7 +20,6 @@ package org.apache.lucene.queryParser.precedence;
|
|||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
|
@ -100,7 +99,7 @@ public class TestPrecedenceQueryParser extends LocalizedTestCase {
|
|||
/** Filters MockTokenizer with StopFilter. */
|
||||
@Override
|
||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new QPTestFilter(new MockTokenizer(reader, MockAnalyzer.SIMPLE, true));
|
||||
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -130,7 +129,7 @@ public class TestPrecedenceQueryParser extends LocalizedTestCase {
|
|||
|
||||
public PrecedenceQueryParser getParser(Analyzer a) throws Exception {
|
||||
if (a == null)
|
||||
a = new MockAnalyzer(MockAnalyzer.SIMPLE, true);
|
||||
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser("field", a);
|
||||
qp.setDefaultOperator(PrecedenceQueryParser.OR_OPERATOR);
|
||||
return qp;
|
||||
|
@ -175,7 +174,7 @@ public class TestPrecedenceQueryParser extends LocalizedTestCase {
|
|||
public Query getQueryDOA(String query, Analyzer a)
|
||||
throws Exception {
|
||||
if (a == null)
|
||||
a = new MockAnalyzer(MockAnalyzer.SIMPLE, true);
|
||||
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
|
||||
PrecedenceQueryParser qp = new PrecedenceQueryParser("field", a);
|
||||
qp.setDefaultOperator(PrecedenceQueryParser.AND_OPERATOR);
|
||||
return qp.parse(query);
|
||||
|
|
|
@ -144,7 +144,7 @@ public class TestQPHelper extends LocalizedTestCase {
|
|||
/** Filters MockTokenizer with StopFilter. */
|
||||
@Override
|
||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new QPTestFilter(new MockTokenizer(reader, MockAnalyzer.SIMPLE, true));
|
||||
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -204,7 +204,7 @@ public class TestQPHelper extends LocalizedTestCase {
|
|||
|
||||
public StandardQueryParser getParser(Analyzer a) throws Exception {
|
||||
if (a == null)
|
||||
a = new MockAnalyzer(MockAnalyzer.SIMPLE, true);
|
||||
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
|
||||
StandardQueryParser qp = new StandardQueryParser();
|
||||
qp.setAnalyzer(a);
|
||||
|
||||
|
@ -294,7 +294,7 @@ public class TestQPHelper extends LocalizedTestCase {
|
|||
|
||||
public Query getQueryDOA(String query, Analyzer a) throws Exception {
|
||||
if (a == null)
|
||||
a = new MockAnalyzer(MockAnalyzer.SIMPLE, true);
|
||||
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
|
||||
StandardQueryParser qp = new StandardQueryParser();
|
||||
qp.setAnalyzer(a);
|
||||
qp.setDefaultOperator(Operator.AND);
|
||||
|
|
|
@ -141,7 +141,7 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
|
|||
/** Filters MockTokenizer with StopFilter. */
|
||||
@Override
|
||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new QPTestFilter(new MockTokenizer(reader, MockAnalyzer.SIMPLE, true));
|
||||
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -219,7 +219,7 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
|
|||
|
||||
public QueryParserWrapper getParser(Analyzer a) throws Exception {
|
||||
if (a == null)
|
||||
a = new MockAnalyzer(MockAnalyzer.SIMPLE, true);
|
||||
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
|
||||
QueryParserWrapper qp = new QueryParserWrapper("field", a);
|
||||
qp.setDefaultOperator(QueryParserWrapper.OR_OPERATOR);
|
||||
return qp;
|
||||
|
@ -304,7 +304,7 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
|
|||
|
||||
public Query getQueryDOA(String query, Analyzer a) throws Exception {
|
||||
if (a == null)
|
||||
a = new MockAnalyzer(MockAnalyzer.SIMPLE, true);
|
||||
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
|
||||
QueryParserWrapper qp = new QueryParserWrapper("field", a);
|
||||
qp.setDefaultOperator(QueryParserWrapper.AND_OPERATOR);
|
||||
return qp.parse(query);
|
||||
|
@ -554,7 +554,7 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
|
|||
assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]", null)).getRewriteMethod());
|
||||
|
||||
QueryParserWrapper qp = new QueryParserWrapper("field",
|
||||
new MockAnalyzer(MockAnalyzer.SIMPLE, true));
|
||||
new MockAnalyzer(MockTokenizer.SIMPLE, true));
|
||||
|
||||
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,((TermRangeQuery)qp.parse("[ a TO z]")).getRewriteMethod());
|
||||
|
@ -685,7 +685,7 @@ public class TestQueryParserWrapper extends LocalizedTestCase {
|
|||
final String monthField = "month";
|
||||
final String hourField = "hour";
|
||||
QueryParserWrapper qp = new QueryParserWrapper("field",
|
||||
new MockAnalyzer(MockAnalyzer.SIMPLE, true));
|
||||
new MockAnalyzer(MockTokenizer.SIMPLE, true));
|
||||
|
||||
// Don't set any date resolution and verify if DateField is used
|
||||
assertDateRangeQueryEquals(qp, defaultField, startDate, endDate,
|
||||
|
|
|
@ -21,52 +21,72 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
|
||||
/**
|
||||
* Analyzer for testing
|
||||
*/
|
||||
public final class MockAnalyzer extends Analyzer {
|
||||
/** Acts Similar to WhitespaceAnalyzer */
|
||||
public static final CharacterRunAutomaton WHITESPACE =
|
||||
new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+").toAutomaton());
|
||||
/** Acts Similar to KeywordAnalyzer.
|
||||
* TODO: Keyword returns an "empty" token for an empty reader...
|
||||
*/
|
||||
public static final CharacterRunAutomaton KEYWORD =
|
||||
new CharacterRunAutomaton(new RegExp(".*").toAutomaton());
|
||||
/** Acts like SimpleAnalyzer/LetterTokenizer. */
|
||||
// the ugly regex below is Unicode 5.2 [:Letter:]
|
||||
public static final CharacterRunAutomaton SIMPLE =
|
||||
new CharacterRunAutomaton(new RegExp("[A-Za-zªµºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬˮͰ-ʹͶͷͺ-ͽΆΈ-ΊΌΎ-ΡΣ-ϵϷ-ҁҊ-ԥԱ-Ֆՙա-ևא-תװ-ײء-يٮٯٱ-ۓەۥۦۮۯۺ-ۼۿܐܒ-ܯݍ-ޥޱߊ-ߪߴߵߺࠀ-ࠕࠚࠤࠨऄ-हऽॐक़-ॡॱॲॹ-ॿঅ-ঌএঐও-নপ-রলশ-হঽৎড়ঢ়য়-ৡৰৱਅ-ਊਏਐਓ-ਨਪ-ਰਲਲ਼ਵਸ਼ਸਹਖ਼-ੜਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલળવ-હઽૐૠૡଅ-ଌଏଐଓ-ନପ-ରଲଳଵ-ହଽଡ଼ଢ଼ୟ-ୡୱஃஅ-ஊஎ-ஐஒ-கஙசஜஞடணதந-பம-ஹௐఅ-ఌఎ-ఐఒ-నప-ళవ-హఽౘౙౠౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽೞೠೡഅ-ഌഎ-ഐഒ-നപ-ഹഽൠൡൺ-ൿඅ-ඖක-නඳ-රලව-ෆก-ะาำเ-ๆກຂຄງຈຊຍດ-ທນ-ຟມ-ຣລວສຫອ-ະາຳຽເ-ໄໆໜໝༀཀ-ཇཉ-ཬྈ-ྋက-ဪဿၐ-ၕၚ-ၝၡၥၦၮ-ၰၵ-ႁႎႠ-Ⴥა-ჺჼᄀ-ቈቊ-ቍቐ-ቖቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏼᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗៜᠠ-ᡷᢀ-ᢨᢪᢰ-ᣵᤀ-ᤜᥐ-ᥭᥰ-ᥴᦀ-ᦫᧁ-ᧇᨀ-ᨖᨠ-ᩔᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮᮯᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱⁿₐ-ₔℂℇℊ-ℓℕℙ-ℝℤΩℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎↃↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⴀ-ⴥⴰ-ⵥⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ々〆〱-〵〻〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆷㇰ-ㇿ㐀-䶵一-鿋ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪꘫꙀ-ꙟꙢ-ꙮꙿ-ꚗꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋꞌꟻ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺꪀ-ꪯꪱꪵꪶꪹ-ꪽꫀꫂꫛ-ꫝꯀ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-鶴侮-舘並-龎ff-stﬓ-ﬗיִײַ-ﬨשׁ-זּטּ-לּמּנּסּףּפּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌞𐌰-𐍀𐍂-𐍉𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐠀-𐠅𐠈𐠊-𐠵𐠷𐠸𐠼𐠿-𐡕𐤀-𐤕𐤠-𐤹𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐬀-𐬵𐭀-𐭕𐭠-𐭲𐰀-𐱈𑂃-𑂯𒀀-𒍮𓀀-𓐮𝐀-𝑔𝑖-𝒜𝒞𝒟𝒢𝒥𝒦𝒩-𝒬𝒮-𝒹𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𠀀-𪛖𪜀-𫜴丽-𪘀]+").toAutomaton());
|
||||
|
||||
public final class MockAnalyzer extends Analyzer {
|
||||
private final CharacterRunAutomaton runAutomaton;
|
||||
private final boolean lowerCase;
|
||||
|
||||
public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase) {
|
||||
private final CharacterRunAutomaton filter;
|
||||
private final boolean enablePositionIncrements;
|
||||
|
||||
/**
|
||||
* Creates a new MockAnalyzer.
|
||||
*
|
||||
* @param runAutomaton DFA describing how tokenization should happen (e.g. [a-zA-Z]+)
|
||||
* @param lowerCase true if the tokenizer should lowercase terms
|
||||
* @param filter DFA describing how terms should be filtered (set of stopwords, etc)
|
||||
* @param enablePositionIncrements true if position increments should reflect filtered terms.
|
||||
*/
|
||||
public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase, CharacterRunAutomaton filter, boolean enablePositionIncrements) {
|
||||
this.runAutomaton = runAutomaton;
|
||||
this.lowerCase = lowerCase;
|
||||
this.filter = filter;
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new MockAnalyzer, with no filtering.
|
||||
*
|
||||
* @param runAutomaton DFA describing how tokenization should happen (e.g. [a-zA-Z]+)
|
||||
* @param lowerCase true if the tokenizer should lowercase terms
|
||||
*/
|
||||
public MockAnalyzer(CharacterRunAutomaton runAutomaton, boolean lowerCase) {
|
||||
this(runAutomaton, lowerCase, MockTokenFilter.EMPTY_STOPSET, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Whitespace-lowercasing analyzer with no stopwords removal
|
||||
*/
|
||||
public MockAnalyzer() {
|
||||
this(WHITESPACE, true);
|
||||
this(MockTokenizer.WHITESPACE, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new MockTokenizer(reader, runAutomaton, lowerCase);
|
||||
MockTokenizer tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
|
||||
return new MockTokenFilter(tokenizer, filter, enablePositionIncrements);
|
||||
}
|
||||
|
||||
private class SavedStreams {
|
||||
MockTokenizer tokenizer;
|
||||
MockTokenFilter filter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream reusableTokenStream(String fieldName, Reader reader)
|
||||
throws IOException {
|
||||
MockTokenizer t = (MockTokenizer) getPreviousTokenStream();
|
||||
if (t == null) {
|
||||
t = new MockTokenizer(reader, runAutomaton, lowerCase);
|
||||
setPreviousTokenStream(t);
|
||||
SavedStreams saved = (SavedStreams) getPreviousTokenStream();
|
||||
if (saved == null) {
|
||||
saved = new SavedStreams();
|
||||
saved.tokenizer = new MockTokenizer(reader, runAutomaton, lowerCase);
|
||||
saved.filter = new MockTokenFilter(saved.tokenizer, filter, enablePositionIncrements);
|
||||
setPreviousTokenStream(saved);
|
||||
return saved.filter;
|
||||
} else {
|
||||
t.reset(reader);
|
||||
saved.tokenizer.reset(reader);
|
||||
return saved.filter;
|
||||
}
|
||||
return t;
|
||||
}
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import static org.apache.lucene.util.automaton.BasicAutomata.makeEmpty;
|
||||
import static org.apache.lucene.util.automaton.BasicAutomata.makeString;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
|
||||
/**
|
||||
* A tokenfilter for testing that removes terms accepted by a DFA.
|
||||
* <ul>
|
||||
* <li>Union a list of singletons to act like a stopfilter.
|
||||
* <li>Use the complement to act like a keepwordfilter
|
||||
* <li>Use a regex like <code>.{12,}</code> to act like a lengthfilter
|
||||
* </ul>
|
||||
*/
|
||||
public final class MockTokenFilter extends TokenFilter {
|
||||
/** Empty set of stopwords */
|
||||
public static final CharacterRunAutomaton EMPTY_STOPSET =
|
||||
new CharacterRunAutomaton(makeEmpty());
|
||||
|
||||
/** Set of common english stopwords */
|
||||
public static final CharacterRunAutomaton ENGLISH_STOPSET =
|
||||
new CharacterRunAutomaton(BasicOperations.union(Arrays.asList(
|
||||
makeString("a"), makeString("an"), makeString("and"), makeString("are"),
|
||||
makeString("as"), makeString("at"), makeString("be"), makeString("but"),
|
||||
makeString("by"), makeString("for"), makeString("if"), makeString("in"),
|
||||
makeString("into"), makeString("is"), makeString("it"), makeString("no"),
|
||||
makeString("not"), makeString("of"), makeString("on"), makeString("or"),
|
||||
makeString("such"), makeString("that"), makeString("the"), makeString("their"),
|
||||
makeString("then"), makeString("there"), makeString("these"), makeString("they"),
|
||||
makeString("this"), makeString("to"), makeString("was"), makeString("will"),
|
||||
makeString("with"))));
|
||||
|
||||
private final CharacterRunAutomaton filter;
|
||||
private boolean enablePositionIncrements = false;
|
||||
|
||||
private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
public MockTokenFilter(TokenStream input, CharacterRunAutomaton filter, boolean enablePositionIncrements) {
|
||||
super(input);
|
||||
this.filter = filter;
|
||||
this.enablePositionIncrements = enablePositionIncrements;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
// return the first non-stop word found
|
||||
int skippedPositions = 0;
|
||||
while (input.incrementToken()) {
|
||||
if (!filter.run(termAtt.buffer(), 0, termAtt.length())) {
|
||||
if (enablePositionIncrements) {
|
||||
posIncrAtt.setPositionIncrement(posIncrAtt.getPositionIncrement() + skippedPositions);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
skippedPositions += posIncrAtt.getPositionIncrement();
|
||||
}
|
||||
// reached EOS -- return false
|
||||
return false;
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setEnablePositionIncrements(boolean)
|
||||
*/
|
||||
public boolean getEnablePositionIncrements() {
|
||||
return enablePositionIncrements;
|
||||
}
|
||||
|
||||
/**
|
||||
* If <code>true</code>, this Filter will preserve
|
||||
* positions of the incoming tokens (ie, accumulate and
|
||||
* set position increments of the removed stop tokens).
|
||||
*/
|
||||
public void setEnablePositionIncrements(boolean enable) {
|
||||
this.enablePositionIncrements = enable;
|
||||
}
|
||||
}
|
|
@ -22,11 +22,25 @@ import java.io.Reader;
|
|||
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
|
||||
/**
|
||||
* Automaton-based tokenizer for testing. Optionally lowercases.
|
||||
*/
|
||||
public class MockTokenizer extends CharTokenizer {
|
||||
/** Acts Similar to WhitespaceTokenizer */
|
||||
public static final CharacterRunAutomaton WHITESPACE =
|
||||
new CharacterRunAutomaton(new RegExp("[^ \t\r\n]+").toAutomaton());
|
||||
/** Acts Similar to KeywordTokenizer.
|
||||
* TODO: Keyword returns an "empty" token for an empty reader...
|
||||
*/
|
||||
public static final CharacterRunAutomaton KEYWORD =
|
||||
new CharacterRunAutomaton(new RegExp(".*").toAutomaton());
|
||||
/** Acts like LetterTokenizer. */
|
||||
// the ugly regex below is Unicode 5.2 [:Letter:]
|
||||
public static final CharacterRunAutomaton SIMPLE =
|
||||
new CharacterRunAutomaton(new RegExp("[A-Za-zªµºÀ-ÖØ-öø-ˁˆ-ˑˠ-ˤˬˮͰ-ʹͶͷͺ-ͽΆΈ-ΊΌΎ-ΡΣ-ϵϷ-ҁҊ-ԥԱ-Ֆՙա-ևא-תװ-ײء-يٮٯٱ-ۓەۥۦۮۯۺ-ۼۿܐܒ-ܯݍ-ޥޱߊ-ߪߴߵߺࠀ-ࠕࠚࠤࠨऄ-हऽॐक़-ॡॱॲॹ-ॿঅ-ঌএঐও-নপ-রলশ-হঽৎড়ঢ়য়-ৡৰৱਅ-ਊਏਐਓ-ਨਪ-ਰਲਲ਼ਵਸ਼ਸਹਖ਼-ੜਫ਼ੲ-ੴઅ-ઍએ-ઑઓ-નપ-રલળવ-હઽૐૠૡଅ-ଌଏଐଓ-ନପ-ରଲଳଵ-ହଽଡ଼ଢ଼ୟ-ୡୱஃஅ-ஊஎ-ஐஒ-கஙசஜஞடணதந-பம-ஹௐఅ-ఌఎ-ఐఒ-నప-ళవ-హఽౘౙౠౡಅ-ಌಎ-ಐಒ-ನಪ-ಳವ-ಹಽೞೠೡഅ-ഌഎ-ഐഒ-നപ-ഹഽൠൡൺ-ൿඅ-ඖක-නඳ-රලව-ෆก-ะาำเ-ๆກຂຄງຈຊຍດ-ທນ-ຟມ-ຣລວສຫອ-ະາຳຽເ-ໄໆໜໝༀཀ-ཇཉ-ཬྈ-ྋက-ဪဿၐ-ၕၚ-ၝၡၥၦၮ-ၰၵ-ႁႎႠ-Ⴥა-ჺჼᄀ-ቈቊ-ቍቐ-ቖቘቚ-ቝበ-ኈኊ-ኍነ-ኰኲ-ኵኸ-ኾዀዂ-ዅወ-ዖዘ-ጐጒ-ጕጘ-ፚᎀ-ᎏᎠ-Ᏼᐁ-ᙬᙯ-ᙿᚁ-ᚚᚠ-ᛪᜀ-ᜌᜎ-ᜑᜠ-ᜱᝀ-ᝑᝠ-ᝬᝮ-ᝰក-ឳៗៜᠠ-ᡷᢀ-ᢨᢪᢰ-ᣵᤀ-ᤜᥐ-ᥭᥰ-ᥴᦀ-ᦫᧁ-ᧇᨀ-ᨖᨠ-ᩔᪧᬅ-ᬳᭅ-ᭋᮃ-ᮠᮮᮯᰀ-ᰣᱍ-ᱏᱚ-ᱽᳩ-ᳬᳮ-ᳱᴀ-ᶿḀ-ἕἘ-Ἕἠ-ὅὈ-Ὅὐ-ὗὙὛὝὟ-ώᾀ-ᾴᾶ-ᾼιῂ-ῄῆ-ῌῐ-ΐῖ-Ίῠ-Ῥῲ-ῴῶ-ῼⁱⁿₐ-ₔℂℇℊ-ℓℕℙ-ℝℤΩℨK-ℭℯ-ℹℼ-ℿⅅ-ⅉⅎↃↄⰀ-Ⱞⰰ-ⱞⱠ-ⳤⳫ-ⳮⴀ-ⴥⴰ-ⵥⵯⶀ-ⶖⶠ-ⶦⶨ-ⶮⶰ-ⶶⶸ-ⶾⷀ-ⷆⷈ-ⷎⷐ-ⷖⷘ-ⷞⸯ々〆〱-〵〻〼ぁ-ゖゝ-ゟァ-ヺー-ヿㄅ-ㄭㄱ-ㆎㆠ-ㆷㇰ-ㇿ㐀-䶵一-鿋ꀀ-ꒌꓐ-ꓽꔀ-ꘌꘐ-ꘟꘪꘫꙀ-ꙟꙢ-ꙮꙿ-ꚗꚠ-ꛥꜗ-ꜟꜢ-ꞈꞋꞌꟻ-ꠁꠃ-ꠅꠇ-ꠊꠌ-ꠢꡀ-ꡳꢂ-ꢳꣲ-ꣷꣻꤊ-ꤥꤰ-ꥆꥠ-ꥼꦄ-ꦲꧏꨀ-ꨨꩀ-ꩂꩄ-ꩋꩠ-ꩶꩺꪀ-ꪯꪱꪵꪶꪹ-ꪽꫀꫂꫛ-ꫝꯀ-ꯢ가-힣ힰ-ퟆퟋ-ퟻ豈-鶴侮-舘並-龎ff-stﬓ-ﬗיִײַ-ﬨשׁ-זּטּ-לּמּנּסּףּפּצּ-ﮱﯓ-ﴽﵐ-ﶏﶒ-ﷇﷰ-ﷻﹰ-ﹴﹶ-ﻼA-Za-zヲ-하-ᅦᅧ-ᅬᅭ-ᅲᅳ-ᅵ𐀀-𐀋𐀍-𐀦𐀨-𐀺𐀼𐀽𐀿-𐁍𐁐-𐁝𐂀-𐃺𐊀-𐊜𐊠-𐋐𐌀-𐌞𐌰-𐍀𐍂-𐍉𐎀-𐎝𐎠-𐏃𐏈-𐏏𐐀-𐒝𐠀-𐠅𐠈𐠊-𐠵𐠷𐠸𐠼𐠿-𐡕𐤀-𐤕𐤠-𐤹𐨀𐨐-𐨓𐨕-𐨗𐨙-𐨳𐩠-𐩼𐬀-𐬵𐭀-𐭕𐭠-𐭲𐰀-𐱈𑂃-𑂯𒀀-𒍮𓀀-𓐮𝐀-𝑔𝑖-𝒜𝒞𝒟𝒢𝒥𝒦𝒩-𝒬𝒮-𝒹𝒻𝒽-𝓃𝓅-𝔅𝔇-𝔊𝔍-𝔔𝔖-𝔜𝔞-𝔹𝔻-𝔾𝕀-𝕄𝕆𝕊-𝕐𝕒-𝚥𝚨-𝛀𝛂-𝛚𝛜-𝛺𝛼-𝜔𝜖-𝜴𝜶-𝝎𝝐-𝝮𝝰-𝞈𝞊-𝞨𝞪-𝟂𝟄-𝟋𠀀-𪛖𪜀-𫜴丽-𪘀]+").toAutomaton());
|
||||
|
||||
private final CharacterRunAutomaton runAutomaton;
|
||||
private final boolean lowerCase;
|
||||
private int state;
|
||||
|
|
|
@ -1,5 +1,13 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.automaton.BasicAutomata;
|
||||
import org.apache.lucene.util.automaton.BasicOperations;
|
||||
import org.apache.lucene.util.automaton.CharacterRunAutomaton;
|
||||
import org.apache.lucene.util.automaton.RegExp;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -19,6 +27,7 @@ package org.apache.lucene.analysis;
|
|||
|
||||
public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
||||
|
||||
/** Test a configuration that behaves a lot like WhitespaceAnalyzer */
|
||||
public void testWhitespace() throws Exception {
|
||||
Analyzer a = new MockAnalyzer();
|
||||
assertAnalyzesTo(a, "A bc defg hiJklmn opqrstuv wxy z ",
|
||||
|
@ -29,8 +38,9 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
new String[] { "break", "on", "whitespace" });
|
||||
}
|
||||
|
||||
/** Test a configuration that behaves a lot like SimpleAnalyzer */
|
||||
public void testSimple() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(MockAnalyzer.SIMPLE, true);
|
||||
Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
|
||||
assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ",
|
||||
new String[] { "a", "bc", "defg", "hijklmn", "opqrstuv", "wxy", "z" });
|
||||
assertAnalyzesToReuse(a, "aba4cadaba-Shazam",
|
||||
|
@ -39,8 +49,9 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
new String[] { "break", "on", "letters" });
|
||||
}
|
||||
|
||||
/** Test a configuration that behaves a lot like KeywordAnalyzer */
|
||||
public void testKeyword() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(MockAnalyzer.KEYWORD, false);
|
||||
Analyzer a = new MockAnalyzer(MockTokenizer.KEYWORD, false);
|
||||
assertAnalyzesTo(a, "a-bc123 defg+hijklmn567opqrstuv78wxy_z ",
|
||||
new String[] { "a-bc123 defg+hijklmn567opqrstuv78wxy_z " });
|
||||
assertAnalyzesToReuse(a, "aba4cadaba-Shazam",
|
||||
|
@ -48,4 +59,40 @@ public class TestMockAnalyzer extends BaseTokenStreamTestCase {
|
|||
assertAnalyzesToReuse(a, "break+on/Nothing",
|
||||
new String[] { "break+on/Nothing" });
|
||||
}
|
||||
|
||||
/** Test a configuration that behaves a lot like StopAnalyzer */
|
||||
public void testStop() throws Exception {
|
||||
Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, true);
|
||||
assertAnalyzesTo(a, "the quick brown a fox",
|
||||
new String[] { "quick", "brown", "fox" },
|
||||
new int[] { 2, 1, 2 });
|
||||
|
||||
// disable positions
|
||||
a = new MockAnalyzer(MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET, false);
|
||||
assertAnalyzesTo(a, "the quick brown a fox",
|
||||
new String[] { "quick", "brown", "fox" },
|
||||
new int[] { 1, 1, 1 });
|
||||
}
|
||||
|
||||
/** Test a configuration that behaves a lot like KeepWordFilter */
|
||||
public void testKeep() throws Exception {
|
||||
CharacterRunAutomaton keepWords =
|
||||
new CharacterRunAutomaton(
|
||||
BasicOperations.complement(
|
||||
Automaton.union(
|
||||
Arrays.asList(BasicAutomata.makeString("foo"), BasicAutomata.makeString("bar")))));
|
||||
Analyzer a = new MockAnalyzer(MockTokenizer.SIMPLE, true, keepWords, true);
|
||||
assertAnalyzesTo(a, "quick foo brown bar bar fox foo",
|
||||
new String[] { "foo", "bar", "bar", "foo" },
|
||||
new int[] { 2, 2, 1, 2 });
|
||||
}
|
||||
|
||||
/** Test a configuration that behaves a lot like LengthFilter */
|
||||
public void testLength() throws Exception {
|
||||
CharacterRunAutomaton length5 = new CharacterRunAutomaton(new RegExp(".{5,}").toAutomaton());
|
||||
Analyzer a = new MockAnalyzer(MockTokenizer.WHITESPACE, true, length5, true);
|
||||
assertAnalyzesTo(a, "ok toolong fine notfine",
|
||||
new String[] { "ok", "fine" },
|
||||
new int[] { 1, 2 });
|
||||
}
|
||||
}
|
||||
|
|
|
@ -101,7 +101,7 @@ public class TestMultiLevelSkipList extends LuceneTestCase {
|
|||
private static class PayloadAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new PayloadFilter(new MockTokenizer(reader, MockAnalyzer.WHITESPACE, true));
|
||||
return new PayloadFilter(new MockTokenizer(reader, MockTokenizer.WHITESPACE, true));
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
|||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.MockRAMDirectory;
|
||||
|
@ -35,7 +36,7 @@ import java.io.File;
|
|||
|
||||
public class TestThreadedOptimize extends LuceneTestCase {
|
||||
|
||||
private static final Analyzer ANALYZER = new MockAnalyzer(MockAnalyzer.SIMPLE, true);
|
||||
private static final Analyzer ANALYZER = new MockAnalyzer(MockTokenizer.SIMPLE, true);
|
||||
|
||||
private final static int NUM_THREADS = 3;
|
||||
//private final static int NUM_THREADS = 5;
|
||||
|
|
|
@ -128,7 +128,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
/** Filters MockTokenizer with StopFilter. */
|
||||
@Override
|
||||
public final TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return new QPTestFilter(new MockTokenizer(reader, MockAnalyzer.SIMPLE, true));
|
||||
return new QPTestFilter(new MockTokenizer(reader, MockTokenizer.SIMPLE, true));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -158,7 +158,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
|
||||
public QueryParser getParser(Analyzer a) throws Exception {
|
||||
if (a == null)
|
||||
a = new MockAnalyzer(MockAnalyzer.SIMPLE, true);
|
||||
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", a);
|
||||
qp.setDefaultOperator(QueryParser.OR_OPERATOR);
|
||||
return qp;
|
||||
|
@ -228,7 +228,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
public Query getQueryDOA(String query, Analyzer a)
|
||||
throws Exception {
|
||||
if (a == null)
|
||||
a = new MockAnalyzer(MockAnalyzer.SIMPLE, true);
|
||||
a = new MockAnalyzer(MockTokenizer.SIMPLE, true);
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", a);
|
||||
qp.setDefaultOperator(QueryParser.AND_OPERATOR);
|
||||
return qp.parse(query);
|
||||
|
@ -456,7 +456,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
assertQueryEquals("[ a TO z]", null, "[a TO z]");
|
||||
assertEquals(MultiTermQuery.CONSTANT_SCORE_AUTO_REWRITE_DEFAULT, ((TermRangeQuery)getQuery("[ a TO z]", null)).getRewriteMethod());
|
||||
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockAnalyzer.SIMPLE, true));
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.SIMPLE, true));
|
||||
qp.setMultiTermRewriteMethod(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE);
|
||||
assertEquals(MultiTermQuery.SCORING_BOOLEAN_QUERY_REWRITE,((TermRangeQuery)qp.parse("[ a TO z]")).getRewriteMethod());
|
||||
|
||||
|
@ -579,7 +579,7 @@ public class TestQueryParser extends LocalizedTestCase {
|
|||
final String defaultField = "default";
|
||||
final String monthField = "month";
|
||||
final String hourField = "hour";
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockAnalyzer.SIMPLE, true));
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "field", new MockAnalyzer(MockTokenizer.SIMPLE, true));
|
||||
|
||||
// Don't set any date resolution and verify if DateField is used
|
||||
assertDateRangeQueryEquals(qp, defaultField, startDate, endDate,
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.InputStream;
|
|||
import java.io.InputStreamReader;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -77,7 +78,7 @@ public class TestFuzzyQuery2 extends LuceneTestCase {
|
|||
int terms = (int) Math.pow(2, bits);
|
||||
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new MockAnalyzer(MockAnalyzer.KEYWORD, false),
|
||||
IndexWriter writer = new IndexWriter(dir, new MockAnalyzer(MockTokenizer.KEYWORD, false),
|
||||
IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
|
||||
Document doc = new Document();
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.search;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -617,7 +618,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
/* build an index */
|
||||
RAMDirectory farsiIndex = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(farsiIndex, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockAnalyzer.SIMPLE, true)));
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("content", "\u0633\u0627\u0628", Field.Store.YES,
|
||||
Field.Index.NOT_ANALYZED));
|
||||
|
@ -657,7 +658,7 @@ public class TestMultiTermConstantScore extends BaseTestRangeFilter {
|
|||
/* build an index */
|
||||
RAMDirectory danishIndex = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(danishIndex, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockAnalyzer.SIMPLE, true)));
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
|
||||
|
||||
// Danish collation orders the words below in the given order
|
||||
// (example taken from TestSort.testInternationalSort() ).
|
||||
|
|
|
@ -334,7 +334,7 @@ final class TestPayloadAnalyzer extends Analyzer {
|
|||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockAnalyzer.WHITESPACE, true);
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.WHITESPACE, true);
|
||||
return new PayloadFilter(result, fieldName);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,6 +19,7 @@ package org.apache.lucene.search;
|
|||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -44,7 +45,7 @@ public class TestTermVectors extends LuceneTestCase {
|
|||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockAnalyzer.SIMPLE, true)));
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
|
||||
//writer.setUseCompoundFile(true);
|
||||
//writer.infoStream = System.out;
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
|
@ -96,7 +97,7 @@ public class TestTermVectors extends LuceneTestCase {
|
|||
public void testTermVectorsFieldOrder() throws IOException {
|
||||
Directory dir = new MockRAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockAnalyzer.SIMPLE, true)));
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("c", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
doc.add(new Field("a", "some content here", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS));
|
||||
|
@ -236,7 +237,7 @@ public class TestTermVectors extends LuceneTestCase {
|
|||
try {
|
||||
IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(MockAnalyzer.SIMPLE, true))
|
||||
new MockAnalyzer(MockTokenizer.SIMPLE, true))
|
||||
.setOpenMode(OpenMode.CREATE));
|
||||
writer.addDocument(testDoc1);
|
||||
writer.addDocument(testDoc2);
|
||||
|
@ -352,7 +353,7 @@ public class TestTermVectors extends LuceneTestCase {
|
|||
// Test only a few docs having vectors
|
||||
public void testRareVectors() throws IOException {
|
||||
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockAnalyzer.SIMPLE, true))
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true))
|
||||
.setOpenMode(OpenMode.CREATE));
|
||||
for (int i = 0; i < 100; i++) {
|
||||
Document doc = new Document();
|
||||
|
@ -386,7 +387,7 @@ public class TestTermVectors extends LuceneTestCase {
|
|||
public void testMixedVectrosVectors() throws IOException {
|
||||
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT,
|
||||
new MockAnalyzer(MockAnalyzer.SIMPLE, true)).setOpenMode(OpenMode.CREATE));
|
||||
new MockAnalyzer(MockTokenizer.SIMPLE, true)).setOpenMode(OpenMode.CREATE));
|
||||
Document doc = new Document();
|
||||
doc.add(new Field("field", "one",
|
||||
Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
|
||||
|
|
|
@ -52,7 +52,7 @@ public class PayloadHelper {
|
|||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockAnalyzer.SIMPLE, true);
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
result = new PayloadFilter(result, fieldName);
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -20,7 +20,6 @@ import java.io.Reader;
|
|||
import java.util.Collection;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -59,7 +58,7 @@ public class TestPayloadNearQuery extends LuceneTestCase {
|
|||
private class PayloadAnalyzer extends Analyzer {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockAnalyzer.SIMPLE, true);
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
result = new PayloadFilter(result, fieldName);
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -30,10 +30,8 @@ import org.apache.lucene.search.spans.SpanTermQuery;
|
|||
import org.apache.lucene.search.spans.Spans;
|
||||
import org.apache.lucene.search.spans.TermSpans;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.LowerCaseTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -69,7 +67,7 @@ public class TestPayloadTermQuery extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockAnalyzer.SIMPLE, true);
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
result = new PayloadFilter(result, fieldName);
|
||||
return result;
|
||||
}
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.search.spans;
|
|||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -58,7 +58,7 @@ public class TestBasics extends LuceneTestCase {
|
|||
super.setUp();
|
||||
RAMDirectory directory = new RAMDirectory();
|
||||
IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockAnalyzer.SIMPLE, true)));
|
||||
TEST_VERSION_CURRENT, new MockAnalyzer(MockTokenizer.SIMPLE, true)));
|
||||
//writer.infoStream = System.out;
|
||||
for (int i = 0; i < 1000; i++) {
|
||||
Document doc = new Document();
|
||||
|
|
|
@ -467,7 +467,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockAnalyzer.SIMPLE, true);
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
result = new PayloadFilter(result, fieldName);
|
||||
return result;
|
||||
}
|
||||
|
@ -519,7 +519,7 @@ public class TestPayloadSpans extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
TokenStream result = new MockTokenizer(reader, MockAnalyzer.SIMPLE, true);
|
||||
TokenStream result = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
|
||||
result = new PayloadFilter(result, fieldName);
|
||||
return result;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue