mirror of https://github.com/apache/lucene.git
LUCENE-2605: Add classic QueryParser option setSplitOnWhitespace() to control whether to split on whitespace prior to text analysis. Default behavior remains unchanged: split-on-whitespace=true.
This commit is contained in:
parent
54b3945572
commit
17d113dac1
|
@ -74,6 +74,10 @@ Improvements
|
|||
ScandinavianNormalizationFilterFactory now implement MultiTermAwareComponent.
|
||||
(Adrien Grand)
|
||||
|
||||
* LUCENE-2605: Add classic QueryParser option setSplitOnWhitespace() to
|
||||
control whether to split on whitespace prior to text analysis. Default
|
||||
behavior remains unchanged: split-on-whitespace=true. (Steve Rowe)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-7330, LUCENE-7339: Speed up conjunction queries. (Adrien Grand)
|
||||
|
|
|
@ -36,6 +36,7 @@ import org.apache.lucene.analysis.MockFixedLengthPayloadFilter;
|
|||
import org.apache.lucene.analysis.MockGraphTokenFilter;
|
||||
import org.apache.lucene.analysis.MockHoleInjectingTokenFilter;
|
||||
import org.apache.lucene.analysis.MockRandomLookaheadTokenFilter;
|
||||
import org.apache.lucene.analysis.MockSynonymFilter;
|
||||
import org.apache.lucene.analysis.MockTokenFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.MockVariableLengthPayloadFilter;
|
||||
|
@ -75,6 +76,7 @@ public class TestAllAnalyzersHaveFactories extends LuceneTestCase {
|
|||
MockGraphTokenFilter.class,
|
||||
MockHoleInjectingTokenFilter.class,
|
||||
MockRandomLookaheadTokenFilter.class,
|
||||
MockSynonymFilter.class,
|
||||
MockTokenFilter.class,
|
||||
MockVariableLengthPayloadFilter.class,
|
||||
ValidatingTokenFilter.class,
|
||||
|
|
|
@ -22,7 +22,6 @@ import java.util.ArrayList;
|
|||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
|
@ -137,40 +136,4 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
|
|||
System.out.println(s);
|
||||
}
|
||||
}
|
||||
|
||||
// stupid filter that inserts synonym of 'hte' for 'the'
|
||||
private class MockSynonymFilter extends TokenFilter {
|
||||
State bufferedState;
|
||||
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
|
||||
MockSynonymFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean incrementToken() throws IOException {
|
||||
if (bufferedState != null) {
|
||||
restoreState(bufferedState);
|
||||
posIncAtt.setPositionIncrement(0);
|
||||
termAtt.setEmpty().append("hte");
|
||||
bufferedState = null;
|
||||
return true;
|
||||
} else if (input.incrementToken()) {
|
||||
if (termAtt.toString().equals("the")) {
|
||||
bufferedState = captureState();
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
bufferedState = null;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -21,6 +21,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockSynonymFilter;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -121,7 +122,7 @@ public class TestQueryBuilder extends LuceneTestCase {
|
|||
assertNull(builder.createBooleanQuery("field", ""));
|
||||
}
|
||||
|
||||
/** adds synonym of "dog" for "dogs". */
|
||||
/** adds synonym of "dog" for "dogs", and synonym of "cavy" for "guinea pig". */
|
||||
static class MockSynonymAnalyzer extends Analyzer {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
|
@ -130,37 +131,6 @@ public class TestQueryBuilder extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* adds synonym of "dog" for "dogs".
|
||||
*/
|
||||
protected static class MockSynonymFilter extends TokenFilter {
|
||||
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
boolean addSynonym = false;
|
||||
|
||||
public MockSynonymFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (addSynonym) { // inject our synonym
|
||||
clearAttributes();
|
||||
termAtt.setEmpty().append("dog");
|
||||
posIncAtt.setPositionIncrement(0);
|
||||
addSynonym = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (input.incrementToken()) {
|
||||
addSynonym = termAtt.toString().equals("dogs");
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** simple synonyms test */
|
||||
public void testSynonyms() throws Exception {
|
||||
SynonymQuery expected = new SynonymQuery(new Term("field", "dogs"), new Term("field", "dog"));
|
||||
|
@ -180,6 +150,15 @@ public class TestQueryBuilder extends LuceneTestCase {
|
|||
assertEquals(expectedBuilder.build(), builder.createPhraseQuery("field", "old dogs"));
|
||||
}
|
||||
|
||||
/** forms multiphrase query */
|
||||
public void testMultiWordSynonymsPhrase() throws Exception {
|
||||
MultiPhraseQuery.Builder expectedBuilder = new MultiPhraseQuery.Builder();
|
||||
expectedBuilder.add(new Term[] { new Term("field", "guinea"), new Term("field", "cavy") });
|
||||
expectedBuilder.add(new Term("field", "pig"));
|
||||
QueryBuilder queryBuilder = new QueryBuilder(new MockSynonymAnalyzer());
|
||||
assertEquals(expectedBuilder.build(), queryBuilder.createPhraseQuery("field", "guinea pig"));
|
||||
}
|
||||
|
||||
protected static class SimpleCJKTokenizer extends Tokenizer {
|
||||
private CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
|
||||
|
|
|
@ -112,4 +112,4 @@ interface CharStream {
|
|||
void Done();
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=c847dd1920bf7901125a7244125682ad (do not edit this line) */
|
||||
/* JavaCC - OriginalChecksum=30b94cad7b10d0d81e3a59a1083939d0 (do not edit this line) */
|
||||
|
|
|
@ -27,6 +27,7 @@ import org.apache.lucene.search.BoostQuery;
|
|||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
||||
/**
|
||||
* A QueryParser which constructs queries to search multiple fields.
|
||||
|
@ -148,18 +149,54 @@ public class MultiFieldQueryParser extends QueryParser
|
|||
protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
|
||||
if (field == null) {
|
||||
List<Query> clauses = new ArrayList<>();
|
||||
Query[] fieldQueries = new Query[fields.length];
|
||||
int maxTerms = 0;
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
Query q = super.getFieldQuery(fields[i], queryText, quoted);
|
||||
if (q != null) {
|
||||
//If the user passes a map of boosts
|
||||
if (boosts != null) {
|
||||
//Get the boost from the map and apply them
|
||||
Float boost = boosts.get(fields[i]);
|
||||
if (boost != null) {
|
||||
q = new BoostQuery(q, boost.floatValue());
|
||||
if (q instanceof TermQuery) {
|
||||
maxTerms = Math.max(1, maxTerms);
|
||||
} else if (q instanceof BooleanQuery) {
|
||||
maxTerms = Math.max(maxTerms, ((BooleanQuery)q).clauses().size());
|
||||
}
|
||||
fieldQueries[i] = q;
|
||||
}
|
||||
}
|
||||
for (int termNum = 0; termNum < maxTerms; termNum++) {
|
||||
List<Query> termClauses = new ArrayList<>();
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
if (fieldQueries[i] != null) {
|
||||
Query q = null;
|
||||
if (fieldQueries[i] instanceof BooleanQuery) {
|
||||
List<BooleanClause> nestedClauses = ((BooleanQuery)fieldQueries[i]).clauses();
|
||||
if (termNum < nestedClauses.size()) {
|
||||
q = nestedClauses.get(termNum).getQuery();
|
||||
}
|
||||
} else if (termNum == 0) { // e.g. TermQuery-s
|
||||
q = fieldQueries[i];
|
||||
}
|
||||
if (q != null) {
|
||||
if (boosts != null) {
|
||||
//Get the boost from the map and apply them
|
||||
Float boost = boosts.get(fields[i]);
|
||||
if (boost != null) {
|
||||
q = new BoostQuery(q, boost);
|
||||
}
|
||||
}
|
||||
termClauses.add(q);
|
||||
}
|
||||
}
|
||||
clauses.add(q);
|
||||
}
|
||||
if (maxTerms > 1) {
|
||||
if (termClauses.size() > 0) {
|
||||
BooleanQuery.Builder builder = newBooleanQuery();
|
||||
for (Query termClause : termClauses) {
|
||||
builder.add(termClause, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
clauses.add(builder.build());
|
||||
}
|
||||
} else {
|
||||
clauses.addAll(termClauses);
|
||||
}
|
||||
}
|
||||
if (clauses.size() == 0) // happens for stopwords
|
||||
|
|
|
@ -184,4 +184,4 @@ public class ParseException extends Exception {
|
|||
}
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=61602edcb3a15810cbc58f5593eba40d (do not edit this line) */
|
||||
/* JavaCC - OriginalChecksum=b187d97d5bb75c3fc63d642c1c26ac6e (do not edit this line) */
|
||||
|
|
|
@ -3,8 +3,11 @@ package org.apache.lucene.queryparser.classic;
|
|||
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
|
@ -81,6 +84,9 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
*/
|
||||
static public enum Operator { OR, AND }
|
||||
|
||||
/** default split on whitespace behavior */
|
||||
public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = true;
|
||||
|
||||
/** Create a query parser.
|
||||
* @param f the default field for query terms.
|
||||
* @param a used to find terms in the query text.
|
||||
|
@ -90,6 +96,28 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
init(f, a);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setSplitOnWhitespace(boolean)
|
||||
*/
|
||||
public boolean getSplitOnWhitespace() {
|
||||
return splitOnWhitespace;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether query text should be split on whitespace prior to analysis.
|
||||
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
|
||||
*/
|
||||
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
|
||||
this.splitOnWhitespace = splitOnWhitespace;
|
||||
}
|
||||
|
||||
private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE;
|
||||
private static Set<Integer> disallowedPostMultiTerm
|
||||
= new HashSet<Integer>(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR));
|
||||
private static boolean allowedPostMultiTerm(int tokenKind) {
|
||||
return disallowedPostMultiTerm.contains(tokenKind) == false;
|
||||
}
|
||||
|
||||
// * Query ::= ( Clause )*
|
||||
// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
|
||||
final public int Conjunction() throws ParseException {
|
||||
|
@ -129,15 +157,15 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case PLUS:
|
||||
jj_consume_token(PLUS);
|
||||
ret = MOD_REQ;
|
||||
ret = MOD_REQ;
|
||||
break;
|
||||
case MINUS:
|
||||
jj_consume_token(MINUS);
|
||||
ret = MOD_NOT;
|
||||
ret = MOD_NOT;
|
||||
break;
|
||||
case NOT:
|
||||
jj_consume_token(NOT);
|
||||
ret = MOD_NOT;
|
||||
ret = MOD_NOT;
|
||||
break;
|
||||
default:
|
||||
jj_la1[2] = jj_gen;
|
||||
|
@ -166,11 +194,37 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
|
||||
Query q, firstQuery=null;
|
||||
int conj, mods;
|
||||
mods = Modifiers();
|
||||
q = Clause(field);
|
||||
addClause(clauses, CONJ_NONE, mods, q);
|
||||
if (mods == MOD_NONE)
|
||||
firstQuery=q;
|
||||
if (jj_2_1(2)) {
|
||||
firstQuery = MultiTerm(field, clauses);
|
||||
} else {
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case NOT:
|
||||
case PLUS:
|
||||
case MINUS:
|
||||
case BAREOPER:
|
||||
case LPAREN:
|
||||
case STAR:
|
||||
case QUOTED:
|
||||
case TERM:
|
||||
case PREFIXTERM:
|
||||
case WILDTERM:
|
||||
case REGEXPTERM:
|
||||
case RANGEIN_START:
|
||||
case RANGEEX_START:
|
||||
case NUMBER:
|
||||
mods = Modifiers();
|
||||
q = Clause(field);
|
||||
addClause(clauses, CONJ_NONE, mods, q);
|
||||
if (mods == MOD_NONE) {
|
||||
firstQuery = q;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
jj_la1[4] = jj_gen;
|
||||
jj_consume_token(-1);
|
||||
throw new ParseException();
|
||||
}
|
||||
}
|
||||
label_1:
|
||||
while (true) {
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
|
@ -193,39 +247,66 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
;
|
||||
break;
|
||||
default:
|
||||
jj_la1[4] = jj_gen;
|
||||
jj_la1[5] = jj_gen;
|
||||
break label_1;
|
||||
}
|
||||
conj = Conjunction();
|
||||
mods = Modifiers();
|
||||
q = Clause(field);
|
||||
addClause(clauses, conj, mods, q);
|
||||
}
|
||||
if (clauses.size() == 1 && firstQuery != null)
|
||||
{if (true) return firstQuery;}
|
||||
else {
|
||||
{if (true) return getBooleanQuery(clauses);}
|
||||
if (jj_2_2(2)) {
|
||||
MultiTerm(field, clauses);
|
||||
} else {
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case AND:
|
||||
case OR:
|
||||
case NOT:
|
||||
case PLUS:
|
||||
case MINUS:
|
||||
case BAREOPER:
|
||||
case LPAREN:
|
||||
case STAR:
|
||||
case QUOTED:
|
||||
case TERM:
|
||||
case PREFIXTERM:
|
||||
case WILDTERM:
|
||||
case REGEXPTERM:
|
||||
case RANGEIN_START:
|
||||
case RANGEEX_START:
|
||||
case NUMBER:
|
||||
conj = Conjunction();
|
||||
mods = Modifiers();
|
||||
q = Clause(field);
|
||||
addClause(clauses, conj, mods, q);
|
||||
break;
|
||||
default:
|
||||
jj_la1[6] = jj_gen;
|
||||
jj_consume_token(-1);
|
||||
throw new ParseException();
|
||||
}
|
||||
}
|
||||
}
|
||||
if (clauses.size() == 1 && firstQuery != null) {
|
||||
{if (true) return firstQuery;}
|
||||
} else {
|
||||
{if (true) return getBooleanQuery(clauses);}
|
||||
}
|
||||
throw new Error("Missing return statement in function");
|
||||
}
|
||||
|
||||
final public Query Clause(String field) throws ParseException {
|
||||
Query q;
|
||||
Token fieldToken=null, boost=null;
|
||||
if (jj_2_1(2)) {
|
||||
if (jj_2_3(2)) {
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case TERM:
|
||||
fieldToken = jj_consume_token(TERM);
|
||||
jj_consume_token(COLON);
|
||||
field=discardEscapeChar(fieldToken.image);
|
||||
field=discardEscapeChar(fieldToken.image);
|
||||
break;
|
||||
case STAR:
|
||||
jj_consume_token(STAR);
|
||||
jj_consume_token(COLON);
|
||||
field="*";
|
||||
field="*";
|
||||
break;
|
||||
default:
|
||||
jj_la1[5] = jj_gen;
|
||||
jj_la1[7] = jj_gen;
|
||||
jj_consume_token(-1);
|
||||
throw new ParseException();
|
||||
}
|
||||
|
@ -255,16 +336,16 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
boost = jj_consume_token(NUMBER);
|
||||
break;
|
||||
default:
|
||||
jj_la1[6] = jj_gen;
|
||||
jj_la1[8] = jj_gen;
|
||||
;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
jj_la1[7] = jj_gen;
|
||||
jj_la1[9] = jj_gen;
|
||||
jj_consume_token(-1);
|
||||
throw new ParseException();
|
||||
}
|
||||
{if (true) return handleBoost(q, boost);}
|
||||
{if (true) return handleBoost(q, boost);}
|
||||
throw new Error("Missing return statement in function");
|
||||
}
|
||||
|
||||
|
@ -291,73 +372,86 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
break;
|
||||
case STAR:
|
||||
term = jj_consume_token(STAR);
|
||||
wildcard=true;
|
||||
wildcard=true;
|
||||
break;
|
||||
case PREFIXTERM:
|
||||
term = jj_consume_token(PREFIXTERM);
|
||||
prefix=true;
|
||||
prefix=true;
|
||||
break;
|
||||
case WILDTERM:
|
||||
term = jj_consume_token(WILDTERM);
|
||||
wildcard=true;
|
||||
wildcard=true;
|
||||
break;
|
||||
case REGEXPTERM:
|
||||
term = jj_consume_token(REGEXPTERM);
|
||||
regexp=true;
|
||||
regexp=true;
|
||||
break;
|
||||
case NUMBER:
|
||||
term = jj_consume_token(NUMBER);
|
||||
break;
|
||||
case BAREOPER:
|
||||
term = jj_consume_token(BAREOPER);
|
||||
term.image = term.image.substring(0,1);
|
||||
term.image = term.image.substring(0,1);
|
||||
break;
|
||||
default:
|
||||
jj_la1[8] = jj_gen;
|
||||
jj_la1[10] = jj_gen;
|
||||
jj_consume_token(-1);
|
||||
throw new ParseException();
|
||||
}
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case FUZZY_SLOP:
|
||||
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
||||
fuzzy=true;
|
||||
break;
|
||||
default:
|
||||
jj_la1[9] = jj_gen;
|
||||
;
|
||||
}
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case CARAT:
|
||||
jj_consume_token(CARAT);
|
||||
boost = jj_consume_token(NUMBER);
|
||||
case FUZZY_SLOP:
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case CARAT:
|
||||
jj_consume_token(CARAT);
|
||||
boost = jj_consume_token(NUMBER);
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case FUZZY_SLOP:
|
||||
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
||||
fuzzy=true;
|
||||
break;
|
||||
default:
|
||||
jj_la1[11] = jj_gen;
|
||||
;
|
||||
}
|
||||
break;
|
||||
case FUZZY_SLOP:
|
||||
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
||||
fuzzy=true;
|
||||
fuzzy=true;
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case CARAT:
|
||||
jj_consume_token(CARAT);
|
||||
boost = jj_consume_token(NUMBER);
|
||||
break;
|
||||
default:
|
||||
jj_la1[12] = jj_gen;
|
||||
;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
jj_la1[10] = jj_gen;
|
||||
;
|
||||
jj_la1[13] = jj_gen;
|
||||
jj_consume_token(-1);
|
||||
throw new ParseException();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
jj_la1[11] = jj_gen;
|
||||
jj_la1[14] = jj_gen;
|
||||
;
|
||||
}
|
||||
q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
|
||||
q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
|
||||
break;
|
||||
case RANGEIN_START:
|
||||
case RANGEEX_START:
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case RANGEIN_START:
|
||||
jj_consume_token(RANGEIN_START);
|
||||
startInc=true;
|
||||
startInc = true;
|
||||
break;
|
||||
case RANGEEX_START:
|
||||
jj_consume_token(RANGEEX_START);
|
||||
break;
|
||||
default:
|
||||
jj_la1[12] = jj_gen;
|
||||
jj_la1[15] = jj_gen;
|
||||
jj_consume_token(-1);
|
||||
throw new ParseException();
|
||||
}
|
||||
|
@ -369,7 +463,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
goop1 = jj_consume_token(RANGE_QUOTED);
|
||||
break;
|
||||
default:
|
||||
jj_la1[13] = jj_gen;
|
||||
jj_la1[16] = jj_gen;
|
||||
jj_consume_token(-1);
|
||||
throw new ParseException();
|
||||
}
|
||||
|
@ -378,7 +472,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
jj_consume_token(RANGE_TO);
|
||||
break;
|
||||
default:
|
||||
jj_la1[14] = jj_gen;
|
||||
jj_la1[17] = jj_gen;
|
||||
;
|
||||
}
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
|
@ -389,20 +483,20 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
goop2 = jj_consume_token(RANGE_QUOTED);
|
||||
break;
|
||||
default:
|
||||
jj_la1[15] = jj_gen;
|
||||
jj_la1[18] = jj_gen;
|
||||
jj_consume_token(-1);
|
||||
throw new ParseException();
|
||||
}
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case RANGEIN_END:
|
||||
jj_consume_token(RANGEIN_END);
|
||||
endInc=true;
|
||||
endInc = true;
|
||||
break;
|
||||
case RANGEEX_END:
|
||||
jj_consume_token(RANGEEX_END);
|
||||
break;
|
||||
default:
|
||||
jj_la1[16] = jj_gen;
|
||||
jj_la1[19] = jj_gen;
|
||||
jj_consume_token(-1);
|
||||
throw new ParseException();
|
||||
}
|
||||
|
@ -412,46 +506,69 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
boost = jj_consume_token(NUMBER);
|
||||
break;
|
||||
default:
|
||||
jj_la1[17] = jj_gen;
|
||||
jj_la1[20] = jj_gen;
|
||||
;
|
||||
}
|
||||
boolean startOpen=false;
|
||||
boolean endOpen=false;
|
||||
if (goop1.kind == RANGE_QUOTED) {
|
||||
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
|
||||
} else if ("*".equals(goop1.image)) {
|
||||
startOpen=true;
|
||||
}
|
||||
if (goop2.kind == RANGE_QUOTED) {
|
||||
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
|
||||
} else if ("*".equals(goop2.image)) {
|
||||
endOpen=true;
|
||||
}
|
||||
q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
|
||||
boolean startOpen=false;
|
||||
boolean endOpen=false;
|
||||
if (goop1.kind == RANGE_QUOTED) {
|
||||
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
|
||||
} else if ("*".equals(goop1.image)) {
|
||||
startOpen=true;
|
||||
}
|
||||
if (goop2.kind == RANGE_QUOTED) {
|
||||
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
|
||||
} else if ("*".equals(goop2.image)) {
|
||||
endOpen=true;
|
||||
}
|
||||
q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
|
||||
break;
|
||||
case QUOTED:
|
||||
term = jj_consume_token(QUOTED);
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case FUZZY_SLOP:
|
||||
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
||||
break;
|
||||
default:
|
||||
jj_la1[18] = jj_gen;
|
||||
;
|
||||
}
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case CARAT:
|
||||
jj_consume_token(CARAT);
|
||||
boost = jj_consume_token(NUMBER);
|
||||
case FUZZY_SLOP:
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case CARAT:
|
||||
jj_consume_token(CARAT);
|
||||
boost = jj_consume_token(NUMBER);
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case FUZZY_SLOP:
|
||||
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
||||
fuzzy=true;
|
||||
break;
|
||||
default:
|
||||
jj_la1[21] = jj_gen;
|
||||
;
|
||||
}
|
||||
break;
|
||||
case FUZZY_SLOP:
|
||||
fuzzySlop = jj_consume_token(FUZZY_SLOP);
|
||||
fuzzy=true;
|
||||
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||
case CARAT:
|
||||
jj_consume_token(CARAT);
|
||||
boost = jj_consume_token(NUMBER);
|
||||
break;
|
||||
default:
|
||||
jj_la1[22] = jj_gen;
|
||||
;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
jj_la1[23] = jj_gen;
|
||||
jj_consume_token(-1);
|
||||
throw new ParseException();
|
||||
}
|
||||
break;
|
||||
default:
|
||||
jj_la1[19] = jj_gen;
|
||||
jj_la1[24] = jj_gen;
|
||||
;
|
||||
}
|
||||
q = handleQuotedTerm(field, term, fuzzySlop);
|
||||
q = handleQuotedTerm(field, term, fuzzySlop);
|
||||
break;
|
||||
default:
|
||||
jj_la1[20] = jj_gen;
|
||||
jj_la1[25] = jj_gen;
|
||||
jj_consume_token(-1);
|
||||
throw new ParseException();
|
||||
}
|
||||
|
@ -459,6 +576,44 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
throw new Error("Missing return statement in function");
|
||||
}
|
||||
|
||||
/** Returns the first query if splitOnWhitespace=true or otherwise the entire produced query */
|
||||
final public Query MultiTerm(String field, List<BooleanClause> clauses) throws ParseException {
|
||||
Token text, whitespace, followingText;
|
||||
Query firstQuery = null;
|
||||
text = jj_consume_token(TERM);
|
||||
if (splitOnWhitespace) {
|
||||
firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false);
|
||||
addClause(clauses, CONJ_NONE, MOD_NONE, firstQuery);
|
||||
}
|
||||
if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) {
|
||||
|
||||
} else {
|
||||
jj_consume_token(-1);
|
||||
throw new ParseException();
|
||||
}
|
||||
label_2:
|
||||
while (true) {
|
||||
followingText = jj_consume_token(TERM);
|
||||
if (splitOnWhitespace) {
|
||||
Query q = getFieldQuery(field, discardEscapeChar(followingText.image), false);
|
||||
addClause(clauses, CONJ_NONE, MOD_NONE, q);
|
||||
} else { // build up the text to send to analysis
|
||||
text.image += " " + followingText.image;
|
||||
}
|
||||
if (getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind)) {
|
||||
;
|
||||
} else {
|
||||
break label_2;
|
||||
}
|
||||
}
|
||||
if (splitOnWhitespace == false) {
|
||||
firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false);
|
||||
addMultiTermClauses(clauses, firstQuery);
|
||||
}
|
||||
{if (true) return firstQuery;}
|
||||
throw new Error("Missing return statement in function");
|
||||
}
|
||||
|
||||
private boolean jj_2_1(int xla) {
|
||||
jj_la = xla; jj_lastpos = jj_scanpos = token;
|
||||
try { return !jj_3_1(); }
|
||||
|
@ -466,23 +621,71 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
finally { jj_save(0, xla); }
|
||||
}
|
||||
|
||||
private boolean jj_3R_2() {
|
||||
private boolean jj_2_2(int xla) {
|
||||
jj_la = xla; jj_lastpos = jj_scanpos = token;
|
||||
try { return !jj_3_2(); }
|
||||
catch(LookaheadSuccess ls) { return true; }
|
||||
finally { jj_save(1, xla); }
|
||||
}
|
||||
|
||||
private boolean jj_2_3(int xla) {
|
||||
jj_la = xla; jj_lastpos = jj_scanpos = token;
|
||||
try { return !jj_3_3(); }
|
||||
catch(LookaheadSuccess ls) { return true; }
|
||||
finally { jj_save(2, xla); }
|
||||
}
|
||||
|
||||
private boolean jj_3R_4() {
|
||||
if (jj_scan_token(TERM)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3_2() {
|
||||
if (jj_3R_3()) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3_1() {
|
||||
if (jj_3R_3()) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_7() {
|
||||
if (jj_scan_token(TERM)) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3_3() {
|
||||
Token xsp;
|
||||
xsp = jj_scanpos;
|
||||
if (jj_3R_2()) {
|
||||
if (jj_3R_4()) {
|
||||
jj_scanpos = xsp;
|
||||
if (jj_3R_3()) return true;
|
||||
if (jj_3R_5()) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_3() {
|
||||
if (jj_scan_token(TERM)) return true;
|
||||
jj_lookingAhead = true;
|
||||
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
|
||||
jj_lookingAhead = false;
|
||||
if (!jj_semLA || jj_3R_6()) return true;
|
||||
Token xsp;
|
||||
if (jj_3R_7()) return true;
|
||||
while (true) {
|
||||
xsp = jj_scanpos;
|
||||
if (jj_3R_7()) { jj_scanpos = xsp; break; }
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_6() {
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean jj_3R_5() {
|
||||
if (jj_scan_token(STAR)) return true;
|
||||
if (jj_scan_token(COLON)) return true;
|
||||
return false;
|
||||
|
@ -497,8 +700,11 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
private int jj_ntk;
|
||||
private Token jj_scanpos, jj_lastpos;
|
||||
private int jj_la;
|
||||
/** Whether we are looking ahead. */
|
||||
private boolean jj_lookingAhead = false;
|
||||
private boolean jj_semLA;
|
||||
private int jj_gen;
|
||||
final private int[] jj_la1 = new int[21];
|
||||
final private int[] jj_la1 = new int[26];
|
||||
static private int[] jj_la1_0;
|
||||
static private int[] jj_la1_1;
|
||||
static {
|
||||
|
@ -506,12 +712,12 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
jj_la1_init_1();
|
||||
}
|
||||
private static void jj_la1_init_0() {
|
||||
jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0xfda7f00,0x120000,0x40000,0xfda6000,0x9d22000,0x200000,0x200000,0x40000,0x6000000,0x80000000,0x10000000,0x80000000,0x60000000,0x40000,0x200000,0x40000,0xfda2000,};
|
||||
jj_la1_0 = new int[] {0x300,0x300,0x1c00,0x1c00,0xfda7c00,0xfda7f00,0xfda7f00,0x120000,0x40000,0xfda6000,0x9d22000,0x200000,0x40000,0x240000,0x240000,0x6000000,0x80000000,0x10000000,0x80000000,0x60000000,0x40000,0x200000,0x40000,0x240000,0x240000,0xfda2000,};
|
||||
}
|
||||
private static void jj_la1_init_1() {
|
||||
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,0x0,};
|
||||
jj_la1_1 = new int[] {0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x0,0x1,0x0,0x1,0x0,0x0,0x0,0x0,0x0,0x0,0x0,};
|
||||
}
|
||||
final private JJCalls[] jj_2_rtns = new JJCalls[1];
|
||||
final private JJCalls[] jj_2_rtns = new JJCalls[3];
|
||||
private boolean jj_rescan = false;
|
||||
private int jj_gc = 0;
|
||||
|
||||
|
@ -521,7 +727,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
token = new Token();
|
||||
jj_ntk = -1;
|
||||
jj_gen = 0;
|
||||
for (int i = 0; i < 21; i++) jj_la1[i] = -1;
|
||||
for (int i = 0; i < 26; i++) jj_la1[i] = -1;
|
||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
|
@ -530,8 +736,9 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
token_source.ReInit(stream);
|
||||
token = new Token();
|
||||
jj_ntk = -1;
|
||||
jj_lookingAhead = false;
|
||||
jj_gen = 0;
|
||||
for (int i = 0; i < 21; i++) jj_la1[i] = -1;
|
||||
for (int i = 0; i < 26; i++) jj_la1[i] = -1;
|
||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
|
@ -541,7 +748,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
token = new Token();
|
||||
jj_ntk = -1;
|
||||
jj_gen = 0;
|
||||
for (int i = 0; i < 21; i++) jj_la1[i] = -1;
|
||||
for (int i = 0; i < 26; i++) jj_la1[i] = -1;
|
||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
|
@ -551,7 +758,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
token = new Token();
|
||||
jj_ntk = -1;
|
||||
jj_gen = 0;
|
||||
for (int i = 0; i < 21; i++) jj_la1[i] = -1;
|
||||
for (int i = 0; i < 26; i++) jj_la1[i] = -1;
|
||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
|
@ -614,7 +821,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
|
||||
/** Get the specific Token. */
|
||||
final public Token getToken(int index) {
|
||||
Token t = token;
|
||||
Token t = jj_lookingAhead ? jj_scanpos : token;
|
||||
for (int i = 0; i < index; i++) {
|
||||
if (t.next != null) t = t.next;
|
||||
else t = t.next = token_source.getNextToken();
|
||||
|
@ -668,7 +875,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
la1tokens[jj_kind] = true;
|
||||
jj_kind = -1;
|
||||
}
|
||||
for (int i = 0; i < 21; i++) {
|
||||
for (int i = 0; i < 26; i++) {
|
||||
if (jj_la1[i] == jj_gen) {
|
||||
for (int j = 0; j < 32; j++) {
|
||||
if ((jj_la1_0[i] & (1<<j)) != 0) {
|
||||
|
@ -707,7 +914,7 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
|
||||
private void jj_rescan_token() {
|
||||
jj_rescan = true;
|
||||
for (int i = 0; i < 1; i++) {
|
||||
for (int i = 0; i < 3; i++) {
|
||||
try {
|
||||
JJCalls p = jj_2_rtns[i];
|
||||
do {
|
||||
|
@ -715,6 +922,8 @@ public class QueryParser extends QueryParserBase implements QueryParserConstants
|
|||
jj_la = p.arg; jj_lastpos = jj_scanpos = p.first;
|
||||
switch (i) {
|
||||
case 0: jj_3_1(); break;
|
||||
case 1: jj_3_2(); break;
|
||||
case 2: jj_3_3(); break;
|
||||
}
|
||||
}
|
||||
p = p.next;
|
||||
|
|
|
@ -27,15 +27,17 @@ package org.apache.lucene.queryparser.classic;
|
|||
|
||||
import java.io.StringReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermRangeQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
||||
/**
|
||||
* This class is generated by JavaCC. The most important method is
|
||||
|
@ -106,6 +108,9 @@ public class QueryParser extends QueryParserBase {
|
|||
*/
|
||||
static public enum Operator { OR, AND }
|
||||
|
||||
/** default split on whitespace behavior */
|
||||
public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = true;
|
||||
|
||||
/** Create a query parser.
|
||||
* @param f the default field for query terms.
|
||||
* @param a used to find terms in the query text.
|
||||
|
@ -114,6 +119,28 @@ public class QueryParser extends QueryParserBase {
|
|||
this(new FastCharStream(new StringReader("")));
|
||||
init(f, a);
|
||||
}
|
||||
|
||||
/**
|
||||
* @see #setSplitOnWhitespace(boolean)
|
||||
*/
|
||||
public boolean getSplitOnWhitespace() {
|
||||
return splitOnWhitespace;
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether query text should be split on whitespace prior to analysis.
|
||||
* Default is <code>{@value #DEFAULT_SPLIT_ON_WHITESPACE}</code>.
|
||||
*/
|
||||
public void setSplitOnWhitespace(boolean splitOnWhitespace) {
|
||||
this.splitOnWhitespace = splitOnWhitespace;
|
||||
}
|
||||
|
||||
private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE;
|
||||
private static Set<Integer> disallowedPostMultiTerm
|
||||
= new HashSet<Integer>(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR));
|
||||
private static boolean allowedPostMultiTerm(int tokenKind) {
|
||||
return disallowedPostMultiTerm.contains(tokenKind) == false;
|
||||
}
|
||||
}
|
||||
|
||||
PARSER_END(QueryParser)
|
||||
|
@ -123,15 +150,14 @@ PARSER_END(QueryParser)
|
|||
/* ***************** */
|
||||
|
||||
<*> TOKEN : {
|
||||
<#_NUM_CHAR: ["0"-"9"] >
|
||||
// every character that follows a backslash is considered as an escaped character
|
||||
| <#_ESCAPED_CHAR: "\\" ~[] >
|
||||
<#_NUM_CHAR: ["0"-"9"] >
|
||||
| <#_ESCAPED_CHAR: "\\" ~[] > // every character that follows a backslash is considered as an escaped character
|
||||
| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^",
|
||||
"[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ]
|
||||
| <_ESCAPED_CHAR> ) >
|
||||
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
|
||||
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
|
||||
| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
|
||||
| <_ESCAPED_CHAR> ) >
|
||||
| <#_TERM_CHAR: ( <_TERM_START_CHAR> | "-" | "+" ) >
|
||||
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") >
|
||||
| <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) >
|
||||
}
|
||||
|
||||
<DEFAULT, Range> SKIP : {
|
||||
|
@ -139,37 +165,37 @@ PARSER_END(QueryParser)
|
|||
}
|
||||
|
||||
<DEFAULT> TOKEN : {
|
||||
<AND: ("AND" | "&&") >
|
||||
| <OR: ("OR" | "||") >
|
||||
| <NOT: ("NOT" | "!") >
|
||||
| <PLUS: "+" >
|
||||
| <MINUS: "-" >
|
||||
| <BAREOPER: ("+"|"-"|"!") <_WHITESPACE> >
|
||||
| <LPAREN: "(" >
|
||||
| <RPAREN: ")" >
|
||||
| <COLON: ":" >
|
||||
| <STAR: "*" >
|
||||
| <CARAT: "^" > : Boost
|
||||
| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
|
||||
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
|
||||
| <FUZZY_SLOP: "~" ((<_NUM_CHAR>)+ (( "." (<_NUM_CHAR>)+ )? (<_TERM_CHAR>)*) | (<_TERM_CHAR>)*) >
|
||||
| <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
|
||||
| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
|
||||
| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >
|
||||
<AND: ("AND" | "&&") >
|
||||
| <OR: ("OR" | "||") >
|
||||
| <NOT: ("NOT" | "!") >
|
||||
| <PLUS: "+" >
|
||||
| <MINUS: "-" >
|
||||
| <BAREOPER: ("+"|"-"|"!") <_WHITESPACE> >
|
||||
| <LPAREN: "(" >
|
||||
| <RPAREN: ")" >
|
||||
| <COLON: ":" >
|
||||
| <STAR: "*" >
|
||||
| <CARAT: "^" > : Boost
|
||||
| <QUOTED: "\"" (<_QUOTED_CHAR>)* "\"">
|
||||
| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* >
|
||||
| <FUZZY_SLOP: "~" ((<_NUM_CHAR>)+ (( "." (<_NUM_CHAR>)+ )? (<_TERM_CHAR>)*) | (<_TERM_CHAR>)*) >
|
||||
| <PREFIXTERM: ("*") | ( <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" ) >
|
||||
| <WILDTERM: (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
|
||||
| <REGEXPTERM: "/" (~[ "/" ] | "\\/" )* "/" >
|
||||
| <RANGEIN_START: "[" > : Range
|
||||
| <RANGEEX_START: "{" > : Range
|
||||
}
|
||||
|
||||
<Boost> TOKEN : {
|
||||
<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
|
||||
<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
|
||||
}
|
||||
|
||||
<Range> TOKEN : {
|
||||
<RANGE_TO: "TO">
|
||||
| <RANGEIN_END: "]"> : DEFAULT
|
||||
| <RANGEEX_END: "}"> : DEFAULT
|
||||
<RANGE_TO: "TO">
|
||||
| <RANGEIN_END: "]"> : DEFAULT
|
||||
| <RANGEEX_END: "}"> : DEFAULT
|
||||
| <RANGE_QUOTED: "\"" (~["\""] | "\\\"")+ "\"">
|
||||
| <RANGE_GOOP: (~[ " ", "]", "}" ])+ >
|
||||
| <RANGE_GOOP: (~[ " ", "]", "}" ])+ >
|
||||
}
|
||||
|
||||
// * Query ::= ( Clause )*
|
||||
|
@ -191,23 +217,20 @@ int Modifiers() : {
|
|||
}
|
||||
{
|
||||
[
|
||||
<PLUS> { ret = MOD_REQ; }
|
||||
| <MINUS> { ret = MOD_NOT; }
|
||||
| <NOT> { ret = MOD_NOT; }
|
||||
<PLUS> { ret = MOD_REQ; }
|
||||
| <MINUS> { ret = MOD_NOT; }
|
||||
| <NOT> { ret = MOD_NOT; }
|
||||
]
|
||||
{ return ret; }
|
||||
}
|
||||
|
||||
// This makes sure that there is no garbage after the query string
|
||||
Query TopLevelQuery(String field) :
|
||||
{
|
||||
Query TopLevelQuery(String field) : {
|
||||
Query q;
|
||||
}
|
||||
{
|
||||
q=Query(field) <EOF>
|
||||
{
|
||||
return q;
|
||||
}
|
||||
{ return q; }
|
||||
}
|
||||
|
||||
Query Query(String field) :
|
||||
|
@ -217,23 +240,30 @@ Query Query(String field) :
|
|||
int conj, mods;
|
||||
}
|
||||
{
|
||||
mods=Modifiers() q=Clause(field)
|
||||
{
|
||||
addClause(clauses, CONJ_NONE, mods, q);
|
||||
if (mods == MOD_NONE)
|
||||
firstQuery=q;
|
||||
}
|
||||
(
|
||||
conj=Conjunction() mods=Modifiers() q=Clause(field)
|
||||
{ addClause(clauses, conj, mods, q); }
|
||||
)*
|
||||
{
|
||||
if (clauses.size() == 1 && firstQuery != null)
|
||||
return firstQuery;
|
||||
else {
|
||||
return getBooleanQuery(clauses);
|
||||
LOOKAHEAD(2)
|
||||
firstQuery=MultiTerm(field, clauses)
|
||||
| mods=Modifiers() q=Clause(field)
|
||||
{
|
||||
addClause(clauses, CONJ_NONE, mods, q);
|
||||
if (mods == MOD_NONE) {
|
||||
firstQuery = q;
|
||||
}
|
||||
}
|
||||
)
|
||||
(
|
||||
LOOKAHEAD(2)
|
||||
MultiTerm(field, clauses)
|
||||
| conj=Conjunction() mods=Modifiers() q=Clause(field)
|
||||
{ addClause(clauses, conj, mods, q); }
|
||||
)*
|
||||
{
|
||||
if (clauses.size() == 1 && firstQuery != null) {
|
||||
return firstQuery;
|
||||
} else {
|
||||
return getBooleanQuery(clauses);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Query Clause(String field) : {
|
||||
|
@ -244,20 +274,17 @@ Query Clause(String field) : {
|
|||
[
|
||||
LOOKAHEAD(2)
|
||||
(
|
||||
fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);}
|
||||
| <STAR> <COLON> {field="*";}
|
||||
fieldToken=<TERM> <COLON> {field=discardEscapeChar(fieldToken.image);}
|
||||
| <STAR> <COLON> {field="*";}
|
||||
)
|
||||
]
|
||||
|
||||
(
|
||||
q=Term(field)
|
||||
| <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
|
||||
|
||||
q=Term(field)
|
||||
| <LPAREN> q=Query(field) <RPAREN> [ <CARAT> boost=<NUMBER> ]
|
||||
)
|
||||
{ return handleBoost(q, boost); }
|
||||
{ return handleBoost(q, boost); }
|
||||
}
|
||||
|
||||
|
||||
Query Term(String field) : {
|
||||
Token term, boost=null, fuzzySlop=null, goop1, goop2;
|
||||
boolean prefix = false;
|
||||
|
@ -270,45 +297,85 @@ Query Term(String field) : {
|
|||
}
|
||||
{
|
||||
(
|
||||
(
|
||||
term=<TERM>
|
||||
| term=<STAR> { wildcard=true; }
|
||||
| term=<PREFIXTERM> { prefix=true; }
|
||||
| term=<WILDTERM> { wildcard=true; }
|
||||
| term=<REGEXPTERM> { regexp=true; }
|
||||
| term=<NUMBER>
|
||||
| term=<BAREOPER> { term.image = term.image.substring(0,1); }
|
||||
)
|
||||
[ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
|
||||
[ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
|
||||
{
|
||||
q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp);
|
||||
}
|
||||
| ( ( <RANGEIN_START> {startInc=true;} | <RANGEEX_START> )
|
||||
( goop1=<RANGE_GOOP>|goop1=<RANGE_QUOTED> )
|
||||
[ <RANGE_TO> ]
|
||||
( goop2=<RANGE_GOOP>|goop2=<RANGE_QUOTED> )
|
||||
( <RANGEIN_END> {endInc=true;} | <RANGEEX_END>))
|
||||
[ <CARAT> boost=<NUMBER> ]
|
||||
{
|
||||
boolean startOpen=false;
|
||||
boolean endOpen=false;
|
||||
if (goop1.kind == RANGE_QUOTED) {
|
||||
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
|
||||
} else if ("*".equals(goop1.image)) {
|
||||
startOpen=true;
|
||||
}
|
||||
if (goop2.kind == RANGE_QUOTED) {
|
||||
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
|
||||
} else if ("*".equals(goop2.image)) {
|
||||
endOpen=true;
|
||||
}
|
||||
q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
|
||||
}
|
||||
| term=<QUOTED>
|
||||
[ fuzzySlop=<FUZZY_SLOP> ]
|
||||
[ <CARAT> boost=<NUMBER> ]
|
||||
{ q = handleQuotedTerm(field, term, fuzzySlop); }
|
||||
(
|
||||
term=<TERM>
|
||||
| term=<STAR> { wildcard=true; }
|
||||
| term=<PREFIXTERM> { prefix=true; }
|
||||
| term=<WILDTERM> { wildcard=true; }
|
||||
| term=<REGEXPTERM> { regexp=true; }
|
||||
| term=<NUMBER>
|
||||
| term=<BAREOPER> { term.image = term.image.substring(0,1); }
|
||||
)
|
||||
[
|
||||
<CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
|
||||
| fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } [ <CARAT> boost=<NUMBER> ]
|
||||
]
|
||||
{ q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp); }
|
||||
|
||||
| ( <RANGEIN_START> { startInc = true; } | <RANGEEX_START> )
|
||||
( goop1=<RANGE_GOOP> | goop1=<RANGE_QUOTED> )
|
||||
[ <RANGE_TO> ]
|
||||
( goop2=<RANGE_GOOP> | goop2=<RANGE_QUOTED> )
|
||||
( <RANGEIN_END> { endInc = true; } | <RANGEEX_END> )
|
||||
[ <CARAT> boost=<NUMBER> ]
|
||||
{
|
||||
boolean startOpen=false;
|
||||
boolean endOpen=false;
|
||||
if (goop1.kind == RANGE_QUOTED) {
|
||||
goop1.image = goop1.image.substring(1, goop1.image.length()-1);
|
||||
} else if ("*".equals(goop1.image)) {
|
||||
startOpen=true;
|
||||
}
|
||||
if (goop2.kind == RANGE_QUOTED) {
|
||||
goop2.image = goop2.image.substring(1, goop2.image.length()-1);
|
||||
} else if ("*".equals(goop2.image)) {
|
||||
endOpen=true;
|
||||
}
|
||||
q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc);
|
||||
}
|
||||
|
||||
| term=<QUOTED>
|
||||
[
|
||||
<CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
|
||||
| fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } [ <CARAT> boost=<NUMBER> ]
|
||||
]
|
||||
{ q = handleQuotedTerm(field, term, fuzzySlop); }
|
||||
)
|
||||
{ return handleBoost(q, boost); }
|
||||
}
|
||||
|
||||
/** Returns the first query if splitOnWhitespace=true or otherwise the entire produced query */
|
||||
Query MultiTerm(String field, List<BooleanClause> clauses) : {
|
||||
Token text, whitespace, followingText;
|
||||
Query firstQuery = null;
|
||||
}
|
||||
{
|
||||
text=<TERM>
|
||||
{
|
||||
if (splitOnWhitespace) {
|
||||
firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false);
|
||||
addClause(clauses, CONJ_NONE, MOD_NONE, firstQuery);
|
||||
}
|
||||
}
|
||||
// Both lookaheads are required; the first lookahead vets the first following term and the second lookahead vets the rest
|
||||
LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) })
|
||||
(
|
||||
LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) })
|
||||
followingText=<TERM>
|
||||
{
|
||||
if (splitOnWhitespace) {
|
||||
Query q = getFieldQuery(field, discardEscapeChar(followingText.image), false);
|
||||
addClause(clauses, CONJ_NONE, MOD_NONE, q);
|
||||
} else { // build up the text to send to analysis
|
||||
text.image += " " + followingText.image;
|
||||
}
|
||||
}
|
||||
)+
|
||||
{
|
||||
if (splitOnWhitespace == false) {
|
||||
firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false);
|
||||
addMultiTermClauses(clauses, firstQuery);
|
||||
}
|
||||
return firstQuery;
|
||||
}
|
||||
}
|
|
@ -464,6 +464,45 @@ public abstract class QueryParserBase extends QueryBuilder implements CommonQuer
|
|||
throw new RuntimeException("Clause cannot be both required and prohibited");
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds clauses generated from analysis over text containing whitespace.
|
||||
* There are no operators, so the query's clauses can either be MUST (if the
|
||||
* default operator is AND) or SHOULD (default OR).
|
||||
*
|
||||
* If all of the clauses in the given Query are TermQuery-s, this method flattens the result
|
||||
* by adding the TermQuery-s individually to the output clause list; otherwise, the given Query
|
||||
* is added as a single clause including its nested clauses.
|
||||
*/
|
||||
protected void addMultiTermClauses(List<BooleanClause> clauses, Query q) {
|
||||
// We might have been passed a null query; the term might have been
|
||||
// filtered away by the analyzer.
|
||||
if (q == null) {
|
||||
return;
|
||||
}
|
||||
boolean allNestedTermQueries = false;
|
||||
if (q instanceof BooleanQuery) {
|
||||
allNestedTermQueries = true;
|
||||
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
|
||||
if ( ! (clause.getQuery() instanceof TermQuery)) {
|
||||
allNestedTermQueries = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (allNestedTermQueries) {
|
||||
clauses.addAll(((BooleanQuery)q).clauses());
|
||||
} else {
|
||||
BooleanClause.Occur occur = operator == OR_OPERATOR ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST;
|
||||
if (q instanceof BooleanQuery) {
|
||||
for (BooleanClause clause : ((BooleanQuery)q).clauses()) {
|
||||
clauses.add(newBooleanClause(clause.getQuery(), occur));
|
||||
}
|
||||
} else {
|
||||
clauses.add(newBooleanClause(q, occur));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow
|
||||
*/
|
||||
|
|
|
@ -285,7 +285,7 @@ private int jjMoveNfa_2(int startState, int curPos)
|
|||
jjCheckNAddTwoStates(33, 34);
|
||||
}
|
||||
else if (curChar == 92)
|
||||
jjCheckNAddTwoStates(35, 35);
|
||||
jjCheckNAdd(35);
|
||||
break;
|
||||
case 0:
|
||||
if ((0x97ffffff87ffffffL & l) != 0L)
|
||||
|
@ -384,7 +384,7 @@ private int jjMoveNfa_2(int startState, int curPos)
|
|||
break;
|
||||
case 26:
|
||||
if (curChar == 92)
|
||||
jjAddStates(27, 28);
|
||||
jjstateSet[jjnewStateCnt++] = 27;
|
||||
break;
|
||||
case 27:
|
||||
if (kind > 21)
|
||||
|
@ -400,7 +400,7 @@ private int jjMoveNfa_2(int startState, int curPos)
|
|||
break;
|
||||
case 29:
|
||||
if (curChar == 92)
|
||||
jjAddStates(29, 30);
|
||||
jjstateSet[jjnewStateCnt++] = 30;
|
||||
break;
|
||||
case 30:
|
||||
if (kind > 21)
|
||||
|
@ -423,7 +423,7 @@ private int jjMoveNfa_2(int startState, int curPos)
|
|||
break;
|
||||
case 34:
|
||||
if (curChar == 92)
|
||||
jjCheckNAddTwoStates(35, 35);
|
||||
jjCheckNAdd(35);
|
||||
break;
|
||||
case 35:
|
||||
if (kind > 23)
|
||||
|
@ -453,7 +453,7 @@ private int jjMoveNfa_2(int startState, int curPos)
|
|||
break;
|
||||
case 43:
|
||||
if (curChar == 92)
|
||||
jjCheckNAddTwoStates(44, 44);
|
||||
jjCheckNAdd(44);
|
||||
break;
|
||||
case 44:
|
||||
if (kind > 20)
|
||||
|
@ -466,7 +466,7 @@ private int jjMoveNfa_2(int startState, int curPos)
|
|||
break;
|
||||
case 46:
|
||||
if (curChar == 92)
|
||||
jjCheckNAddTwoStates(47, 47);
|
||||
jjCheckNAdd(47);
|
||||
break;
|
||||
case 47:
|
||||
jjCheckNAddStates(18, 20);
|
||||
|
@ -645,7 +645,7 @@ private int jjMoveNfa_0(int startState, int curPos)
|
|||
break;
|
||||
if (kind > 27)
|
||||
kind = 27;
|
||||
jjAddStates(31, 32);
|
||||
jjAddStates(27, 28);
|
||||
break;
|
||||
case 1:
|
||||
if (curChar == 46)
|
||||
|
@ -799,11 +799,11 @@ private int jjMoveNfa_1(int startState, int curPos)
|
|||
break;
|
||||
case 2:
|
||||
if ((0xfffffffbffffffffL & l) != 0L)
|
||||
jjCheckNAddStates(33, 35);
|
||||
jjCheckNAddStates(29, 31);
|
||||
break;
|
||||
case 3:
|
||||
if (curChar == 34)
|
||||
jjCheckNAddStates(33, 35);
|
||||
jjCheckNAddStates(29, 31);
|
||||
break;
|
||||
case 5:
|
||||
if (curChar == 34 && kind > 31)
|
||||
|
@ -836,7 +836,7 @@ private int jjMoveNfa_1(int startState, int curPos)
|
|||
jjCheckNAdd(6);
|
||||
break;
|
||||
case 2:
|
||||
jjAddStates(33, 35);
|
||||
jjAddStates(29, 31);
|
||||
break;
|
||||
case 4:
|
||||
if (curChar == 92)
|
||||
|
@ -872,7 +872,7 @@ private int jjMoveNfa_1(int startState, int curPos)
|
|||
break;
|
||||
case 2:
|
||||
if (jjCanMove_1(hiByte, i1, i2, l1, l2))
|
||||
jjAddStates(33, 35);
|
||||
jjAddStates(29, 31);
|
||||
break;
|
||||
case 6:
|
||||
if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
|
||||
|
@ -899,9 +899,8 @@ private int jjMoveNfa_1(int startState, int curPos)
|
|||
}
|
||||
}
|
||||
static final int[] jjnextStates = {
|
||||
37, 39, 40, 17, 18, 20, 42, 45, 31, 46, 43, 22, 23, 25, 26, 24,
|
||||
25, 26, 45, 31, 46, 44, 47, 35, 22, 28, 29, 27, 27, 30, 30, 0,
|
||||
1, 2, 4, 5,
|
||||
37, 39, 40, 17, 18, 20, 42, 43, 45, 46, 31, 22, 23, 25, 26, 24,
|
||||
25, 26, 45, 46, 31, 44, 47, 35, 22, 28, 29, 0, 1, 2, 4, 5,
|
||||
};
|
||||
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
|
||||
{
|
||||
|
|
|
@ -128,4 +128,4 @@ public class Token implements java.io.Serializable {
|
|||
}
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=c1e1418b35aa9e47ef8dc98b87423d70 (do not edit this line) */
|
||||
/* JavaCC - OriginalChecksum=405bb5d2fcd84e94ac1c8f0b12c1f914 (do not edit this line) */
|
||||
|
|
|
@ -144,4 +144,4 @@ public class TokenMgrError extends Error
|
|||
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
|
||||
}
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=0c275864a1972d9a01601ab81426872d (do not edit this line) */
|
||||
/* JavaCC - OriginalChecksum=f433e1a52b8eadbf12f3fbbbf87fd140 (do not edit this line) */
|
||||
|
|
|
@ -18,6 +18,7 @@ package org.apache.lucene.queryparser.classic;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockSynonymAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -33,6 +34,7 @@ import org.apache.lucene.search.BooleanClause;
|
|||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.BoostQuery;
|
||||
import org.apache.lucene.search.MultiPhraseQuery;
|
||||
import org.apache.lucene.search.PhraseQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SynonymQuery;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
|
@ -44,7 +46,9 @@ import java.io.IOException;
|
|||
* Tests QueryParser.
|
||||
*/
|
||||
public class TestQueryParser extends QueryParserTestBase {
|
||||
|
||||
|
||||
protected boolean splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE;
|
||||
|
||||
public static class QPTestParser extends QueryParser {
|
||||
public QPTestParser(String f, Analyzer a) {
|
||||
super(f, a);
|
||||
|
@ -67,6 +71,7 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||
if (a == null) a = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);
|
||||
QueryParser qp = new QueryParser(getDefaultField(), a);
|
||||
qp.setDefaultOperator(QueryParserBase.OR_OPERATOR);
|
||||
qp.setSplitOnWhitespace(splitOnWhitespace);
|
||||
return qp;
|
||||
}
|
||||
|
||||
|
@ -310,18 +315,7 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||
Query unexpanded = new TermQuery(new Term("field", "dogs"));
|
||||
assertEquals(unexpanded, smart.parse("\"dogs\""));
|
||||
}
|
||||
|
||||
// TODO: fold these into QueryParserTestBase
|
||||
|
||||
/** adds synonym of "dog" for "dogs". */
|
||||
static class MockSynonymAnalyzer extends Analyzer {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
MockTokenizer tokenizer = new MockTokenizer();
|
||||
return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** simple synonyms test */
|
||||
public void testSynonyms() throws Exception {
|
||||
Query expected = new SynonymQuery(new Term("field", "dogs"), new Term("field", "dog"));
|
||||
|
@ -483,4 +477,229 @@ public class TestQueryParser extends QueryParserTestBase {
|
|||
qp.parse("a*aaaaaaa");
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: Remove this specialization once the flexible standard parser gets multi-word synonym support
|
||||
@Override
|
||||
public void testQPA() throws Exception {
|
||||
boolean oldSplitOnWhitespace = splitOnWhitespace;
|
||||
splitOnWhitespace = false;
|
||||
|
||||
assertQueryEquals("term phrase term", qpAnalyzer, "term phrase1 phrase2 term");
|
||||
|
||||
CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer);
|
||||
setDefaultOperatorAND(cqpc);
|
||||
assertQueryEquals(cqpc, "field", "term phrase term", "+term +phrase1 +phrase2 +term");
|
||||
|
||||
splitOnWhitespace = oldSplitOnWhitespace;
|
||||
}
|
||||
|
||||
// TODO: Move to QueryParserTestBase once standard flexible parser gets this capability
|
||||
public void testMultiWordSynonyms() throws Exception {
|
||||
QueryParser dumb = new QueryParser("field", new Analyzer1());
|
||||
dumb.setSplitOnWhitespace(false);
|
||||
|
||||
// A multi-word synonym source will form a synonym query for the same-starting-position tokens
|
||||
BooleanQuery.Builder multiWordExpandedBqBuilder = new BooleanQuery.Builder();
|
||||
Query multiWordSynonymQuery = new SynonymQuery(new Term("field", "guinea"), new Term("field", "cavy"));
|
||||
multiWordExpandedBqBuilder.add(multiWordSynonymQuery, BooleanClause.Occur.SHOULD);
|
||||
multiWordExpandedBqBuilder.add(new TermQuery(new Term("field", "pig")), BooleanClause.Occur.SHOULD);
|
||||
Query multiWordExpandedBq = multiWordExpandedBqBuilder.build();
|
||||
assertEquals(multiWordExpandedBq, dumb.parse("guinea pig"));
|
||||
|
||||
// With the phrase operator, a multi-word synonym source will form a multiphrase query.
|
||||
// When the number of expanded term(s) is different from that of the original term(s), this is not good.
|
||||
MultiPhraseQuery.Builder multiWordExpandedMpqBuilder = new MultiPhraseQuery.Builder();
|
||||
multiWordExpandedMpqBuilder.add(new Term[]{new Term("field", "guinea"), new Term("field", "cavy")});
|
||||
multiWordExpandedMpqBuilder.add(new Term("field", "pig"));
|
||||
Query multiWordExpandedMPQ = multiWordExpandedMpqBuilder.build();
|
||||
assertEquals(multiWordExpandedMPQ, dumb.parse("\"guinea pig\""));
|
||||
|
||||
// custom behavior, the synonyms are expanded, unless you use quote operator
|
||||
QueryParser smart = new SmartQueryParser();
|
||||
smart.setSplitOnWhitespace(false);
|
||||
assertEquals(multiWordExpandedBq, smart.parse("guinea pig"));
|
||||
|
||||
PhraseQuery.Builder multiWordUnexpandedPqBuilder = new PhraseQuery.Builder();
|
||||
multiWordUnexpandedPqBuilder.add(new Term("field", "guinea"));
|
||||
multiWordUnexpandedPqBuilder.add(new Term("field", "pig"));
|
||||
Query multiWordUnexpandedPq = multiWordUnexpandedPqBuilder.build();
|
||||
assertEquals(multiWordUnexpandedPq, smart.parse("\"guinea pig\""));
|
||||
}
|
||||
|
||||
// TODO: Move to QueryParserTestBase once standard flexible parser gets this capability
|
||||
public void testOperatorsAndMultiWordSynonyms() throws Exception {
|
||||
Analyzer a = new MockSynonymAnalyzer();
|
||||
|
||||
boolean oldSplitOnWhitespace = splitOnWhitespace;
|
||||
splitOnWhitespace = false;
|
||||
|
||||
// Operators should interrupt multiword analysis of adjacent words if they associate
|
||||
assertQueryEquals("+guinea pig", a, "+guinea pig");
|
||||
assertQueryEquals("-guinea pig", a, "-guinea pig");
|
||||
assertQueryEquals("!guinea pig", a, "-guinea pig");
|
||||
assertQueryEquals("guinea* pig", a, "guinea* pig");
|
||||
assertQueryEquals("guinea? pig", a, "guinea? pig");
|
||||
assertQueryEquals("guinea~2 pig", a, "guinea~2 pig");
|
||||
assertQueryEquals("guinea^2 pig", a, "(guinea)^2.0 pig");
|
||||
|
||||
assertQueryEquals("guinea +pig", a, "guinea +pig");
|
||||
assertQueryEquals("guinea -pig", a, "guinea -pig");
|
||||
assertQueryEquals("guinea !pig", a, "guinea -pig");
|
||||
assertQueryEquals("guinea pig*", a, "guinea pig*");
|
||||
assertQueryEquals("guinea pig?", a, "guinea pig?");
|
||||
assertQueryEquals("guinea pig~2", a, "guinea pig~2");
|
||||
assertQueryEquals("guinea pig^2", a, "guinea (pig)^2.0");
|
||||
|
||||
assertQueryEquals("field:guinea pig", a, "guinea pig");
|
||||
assertQueryEquals("guinea field:pig", a, "guinea pig");
|
||||
|
||||
assertQueryEquals("NOT guinea pig", a, "-guinea pig");
|
||||
assertQueryEquals("guinea NOT pig", a, "guinea -pig");
|
||||
|
||||
assertQueryEquals("guinea pig AND dogs", a, "guinea +pig +Synonym(dog dogs)");
|
||||
assertQueryEquals("dogs AND guinea pig", a, "+Synonym(dog dogs) +guinea pig");
|
||||
assertQueryEquals("guinea pig && dogs", a, "guinea +pig +Synonym(dog dogs)");
|
||||
assertQueryEquals("dogs && guinea pig", a, "+Synonym(dog dogs) +guinea pig");
|
||||
|
||||
assertQueryEquals("guinea pig OR dogs", a, "guinea pig Synonym(dog dogs)");
|
||||
assertQueryEquals("dogs OR guinea pig", a, "Synonym(dog dogs) guinea pig");
|
||||
assertQueryEquals("guinea pig || dogs", a, "guinea pig Synonym(dog dogs)");
|
||||
assertQueryEquals("dogs || guinea pig", a, "Synonym(dog dogs) guinea pig");
|
||||
|
||||
assertQueryEquals("\"guinea\" pig", a, "guinea pig");
|
||||
assertQueryEquals("guinea \"pig\"", a, "guinea pig");
|
||||
|
||||
assertQueryEquals("(guinea) pig", a, "guinea pig");
|
||||
assertQueryEquals("guinea (pig)", a, "guinea pig");
|
||||
|
||||
assertQueryEquals("/guinea/ pig", a, "/guinea/ pig");
|
||||
assertQueryEquals("guinea /pig/", a, "guinea /pig/");
|
||||
|
||||
// Operators should not interrupt multiword analysis if not don't associate
|
||||
assertQueryEquals("(guinea pig)", a, "Synonym(cavy guinea) pig");
|
||||
assertQueryEquals("+(guinea pig)", a, "+(Synonym(cavy guinea) pig)");
|
||||
assertQueryEquals("-(guinea pig)", a, "-(Synonym(cavy guinea) pig)");
|
||||
assertQueryEquals("!(guinea pig)", a, "-(Synonym(cavy guinea) pig)");
|
||||
assertQueryEquals("NOT (guinea pig)", a, "-(Synonym(cavy guinea) pig)");
|
||||
assertQueryEquals("(guinea pig)^2", a, "(Synonym(cavy guinea) pig)^2.0");
|
||||
|
||||
assertQueryEquals("field:(guinea pig)", a, "Synonym(cavy guinea) pig");
|
||||
|
||||
assertQueryEquals("+small guinea pig", a, "+small Synonym(cavy guinea) pig");
|
||||
assertQueryEquals("-small guinea pig", a, "-small Synonym(cavy guinea) pig");
|
||||
assertQueryEquals("!small guinea pig", a, "-small Synonym(cavy guinea) pig");
|
||||
assertQueryEquals("NOT small guinea pig", a, "-small Synonym(cavy guinea) pig");
|
||||
assertQueryEquals("small* guinea pig", a, "small* Synonym(cavy guinea) pig");
|
||||
assertQueryEquals("small? guinea pig", a, "small? Synonym(cavy guinea) pig");
|
||||
assertQueryEquals("\"small\" guinea pig", a, "small Synonym(cavy guinea) pig");
|
||||
|
||||
assertQueryEquals("guinea pig +running", a, "Synonym(cavy guinea) pig +running");
|
||||
assertQueryEquals("guinea pig -running", a, "Synonym(cavy guinea) pig -running");
|
||||
assertQueryEquals("guinea pig !running", a, "Synonym(cavy guinea) pig -running");
|
||||
assertQueryEquals("guinea pig NOT running", a, "Synonym(cavy guinea) pig -running");
|
||||
assertQueryEquals("guinea pig running*", a, "Synonym(cavy guinea) pig running*");
|
||||
assertQueryEquals("guinea pig running?", a, "Synonym(cavy guinea) pig running?");
|
||||
assertQueryEquals("guinea pig \"running\"", a, "Synonym(cavy guinea) pig running");
|
||||
|
||||
assertQueryEquals("\"guinea pig\"~2", a, "\"(guinea cavy) pig\"~2");
|
||||
|
||||
assertQueryEquals("field:\"guinea pig\"", a, "\"(guinea cavy) pig\"");
|
||||
|
||||
splitOnWhitespace = oldSplitOnWhitespace;
|
||||
}
|
||||
|
||||
public void testOperatorsAndMultiWordSynonymsSplitOnWhitespace() throws Exception {
|
||||
Analyzer a = new MockSynonymAnalyzer();
|
||||
|
||||
boolean oldSplitOnWhitespace = splitOnWhitespace;
|
||||
splitOnWhitespace = true;
|
||||
|
||||
assertQueryEquals("+guinea pig", a, "+guinea pig");
|
||||
assertQueryEquals("-guinea pig", a, "-guinea pig");
|
||||
assertQueryEquals("!guinea pig", a, "-guinea pig");
|
||||
assertQueryEquals("guinea* pig", a, "guinea* pig");
|
||||
assertQueryEquals("guinea? pig", a, "guinea? pig");
|
||||
assertQueryEquals("guinea~2 pig", a, "guinea~2 pig");
|
||||
assertQueryEquals("guinea^2 pig", a, "(guinea)^2.0 pig");
|
||||
|
||||
assertQueryEquals("guinea +pig", a, "guinea +pig");
|
||||
assertQueryEquals("guinea -pig", a, "guinea -pig");
|
||||
assertQueryEquals("guinea !pig", a, "guinea -pig");
|
||||
assertQueryEquals("guinea pig*", a, "guinea pig*");
|
||||
assertQueryEquals("guinea pig?", a, "guinea pig?");
|
||||
assertQueryEquals("guinea pig~2", a, "guinea pig~2");
|
||||
assertQueryEquals("guinea pig^2", a, "guinea (pig)^2.0");
|
||||
|
||||
assertQueryEquals("field:guinea pig", a, "guinea pig");
|
||||
assertQueryEquals("guinea field:pig", a, "guinea pig");
|
||||
|
||||
assertQueryEquals("NOT guinea pig", a, "-guinea pig");
|
||||
assertQueryEquals("guinea NOT pig", a, "guinea -pig");
|
||||
|
||||
assertQueryEquals("guinea pig AND dogs", a, "guinea +pig +Synonym(dog dogs)");
|
||||
assertQueryEquals("dogs AND guinea pig", a, "+Synonym(dog dogs) +guinea pig");
|
||||
assertQueryEquals("guinea pig && dogs", a, "guinea +pig +Synonym(dog dogs)");
|
||||
assertQueryEquals("dogs && guinea pig", a, "+Synonym(dog dogs) +guinea pig");
|
||||
|
||||
assertQueryEquals("guinea pig OR dogs", a, "guinea pig Synonym(dog dogs)");
|
||||
assertQueryEquals("dogs OR guinea pig", a, "Synonym(dog dogs) guinea pig");
|
||||
assertQueryEquals("guinea pig || dogs", a, "guinea pig Synonym(dog dogs)");
|
||||
assertQueryEquals("dogs || guinea pig", a, "Synonym(dog dogs) guinea pig");
|
||||
|
||||
assertQueryEquals("\"guinea\" pig", a, "guinea pig");
|
||||
assertQueryEquals("guinea \"pig\"", a, "guinea pig");
|
||||
|
||||
assertQueryEquals("(guinea) pig", a, "guinea pig");
|
||||
assertQueryEquals("guinea (pig)", a, "guinea pig");
|
||||
|
||||
assertQueryEquals("/guinea/ pig", a, "/guinea/ pig");
|
||||
assertQueryEquals("guinea /pig/", a, "guinea /pig/");
|
||||
|
||||
assertQueryEquals("(guinea pig)", a, "guinea pig");
|
||||
assertQueryEquals("+(guinea pig)", a, "+(guinea pig)");
|
||||
assertQueryEquals("-(guinea pig)", a, "-(guinea pig)");
|
||||
assertQueryEquals("!(guinea pig)", a, "-(guinea pig)");
|
||||
assertQueryEquals("NOT (guinea pig)", a, "-(guinea pig)");
|
||||
assertQueryEquals("(guinea pig)^2", a, "(guinea pig)^2.0");
|
||||
|
||||
assertQueryEquals("field:(guinea pig)", a, "guinea pig");
|
||||
|
||||
assertQueryEquals("+small guinea pig", a, "+small guinea pig");
|
||||
assertQueryEquals("-small guinea pig", a, "-small guinea pig");
|
||||
assertQueryEquals("!small guinea pig", a, "-small guinea pig");
|
||||
assertQueryEquals("NOT small guinea pig", a, "-small guinea pig");
|
||||
assertQueryEquals("small* guinea pig", a, "small* guinea pig");
|
||||
assertQueryEquals("small? guinea pig", a, "small? guinea pig");
|
||||
assertQueryEquals("\"small\" guinea pig", a, "small guinea pig");
|
||||
|
||||
assertQueryEquals("guinea pig +running", a, "guinea pig +running");
|
||||
assertQueryEquals("guinea pig -running", a, "guinea pig -running");
|
||||
assertQueryEquals("guinea pig !running", a, "guinea pig -running");
|
||||
assertQueryEquals("guinea pig NOT running", a, "guinea pig -running");
|
||||
assertQueryEquals("guinea pig running*", a, "guinea pig running*");
|
||||
assertQueryEquals("guinea pig running?", a, "guinea pig running?");
|
||||
assertQueryEquals("guinea pig \"running\"", a, "guinea pig running");
|
||||
|
||||
assertQueryEquals("\"guinea pig\"~2", a, "\"(guinea cavy) pig\"~2");
|
||||
|
||||
assertQueryEquals("field:\"guinea pig\"", a, "\"(guinea cavy) pig\"");
|
||||
|
||||
splitOnWhitespace = oldSplitOnWhitespace;
|
||||
}
|
||||
|
||||
public void testDefaultSplitOnWhitespace() throws Exception {
|
||||
QueryParser parser = new QueryParser("field", new Analyzer1());
|
||||
|
||||
assertTrue(parser.getSplitOnWhitespace()); // default is true
|
||||
|
||||
BooleanQuery.Builder bqBuilder = new BooleanQuery.Builder();
|
||||
bqBuilder.add(new TermQuery(new Term("field", "guinea")), BooleanClause.Occur.SHOULD);
|
||||
bqBuilder.add(new TermQuery(new Term("field", "pig")), BooleanClause.Occur.SHOULD);
|
||||
assertEquals(bqBuilder.build(), parser.parse("guinea pig"));
|
||||
|
||||
boolean oldSplitOnWhitespace = splitOnWhitespace;
|
||||
splitOnWhitespace = QueryParser.DEFAULT_SPLIT_ON_WHITESPACE;
|
||||
assertQueryEquals("guinea pig", new MockSynonymAnalyzer(), "guinea pig");
|
||||
splitOnWhitespace = oldSplitOnWhitespace;
|
||||
}
|
||||
}
|
|
@ -50,6 +50,7 @@ public class TestExtendableQueryParser extends TestQueryParser {
|
|||
getDefaultField(), a) : new ExtendableQueryParser(
|
||||
getDefaultField(), a, extensions);
|
||||
qp.setDefaultOperator(QueryParserBase.OR_OPERATOR);
|
||||
qp.setSplitOnWhitespace(splitOnWhitespace);
|
||||
return qp;
|
||||
}
|
||||
|
||||
|
|
|
@ -203,4 +203,15 @@ public class TestStandardQP extends QueryParserTestBase {
|
|||
//TODO test something like "SmartQueryParser()"
|
||||
}
|
||||
|
||||
// TODO: Remove this specialization once the flexible standard parser gets multi-word synonym support
|
||||
@Override
|
||||
public void testQPA() throws Exception {
|
||||
super.testQPA();
|
||||
|
||||
assertQueryEquals("term phrase term", qpAnalyzer, "term (phrase1 phrase2) term");
|
||||
|
||||
CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer);
|
||||
setDefaultOperatorAND(cqpc);
|
||||
assertQueryEquals(cqpc, "field", "term phrase term", "+term +(+phrase1 +phrase2) +term");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -27,7 +27,6 @@ import java.util.TimeZone;
|
|||
import org.apache.lucene.analysis.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.DateTools;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
|
@ -535,8 +534,10 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
assertQueryEquals("term -(stop) term", qpAnalyzer, "term term");
|
||||
|
||||
assertQueryEquals("drop AND stop AND roll", qpAnalyzer, "+drop +roll");
|
||||
assertQueryEquals("term phrase term", qpAnalyzer,
|
||||
"term (phrase1 phrase2) term");
|
||||
|
||||
// TODO: Re-enable once flexible standard parser gets multi-word synonym support
|
||||
// assertQueryEquals("term phrase term", qpAnalyzer,
|
||||
// "term phrase1 phrase2 term");
|
||||
assertQueryEquals("term AND NOT phrase term", qpAnalyzer,
|
||||
"+term -(phrase1 phrase2) term");
|
||||
assertQueryEquals("stop^3", qpAnalyzer, "");
|
||||
|
@ -552,8 +553,9 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
|
||||
CommonQueryParserConfiguration cqpc = getParserConfig(qpAnalyzer);
|
||||
setDefaultOperatorAND(cqpc);
|
||||
assertQueryEquals(cqpc, "field", "term phrase term",
|
||||
"+term +(+phrase1 +phrase2) +term");
|
||||
// TODO: Re-enable once flexible standard parser gets multi-word synonym support
|
||||
// assertQueryEquals(cqpc, "field", "term phrase term",
|
||||
// "+term +phrase1 +phrase2 +term");
|
||||
assertQueryEquals(cqpc, "field", "phrase",
|
||||
"+phrase1 +phrase2");
|
||||
}
|
||||
|
@ -1101,37 +1103,6 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* adds synonym of "dog" for "dogs".
|
||||
*/
|
||||
protected static class MockSynonymFilter extends TokenFilter {
|
||||
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
boolean addSynonym = false;
|
||||
|
||||
public MockSynonymFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (addSynonym) { // inject our synonym
|
||||
clearAttributes();
|
||||
termAtt.setEmpty().append("dog");
|
||||
posIncAtt.setPositionIncrement(0);
|
||||
addSynonym = false;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (input.incrementToken()) {
|
||||
addSynonym = termAtt.toString().equals("dogs");
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** whitespace+lowercase analyzer with synonyms */
|
||||
protected class Analyzer1 extends Analyzer {
|
||||
public Analyzer1(){
|
||||
|
@ -1251,10 +1222,8 @@ public abstract class QueryParserTestBase extends LuceneTestCase {
|
|||
CharacterRunAutomaton stopStopList =
|
||||
new CharacterRunAutomaton(new RegExp("[sS][tT][oO][pP]").toAutomaton());
|
||||
|
||||
CommonQueryParserConfiguration qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
|
||||
|
||||
qp = getParserConfig(
|
||||
new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
|
||||
CommonQueryParserConfiguration qp
|
||||
= getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
|
||||
qp.setEnablePositionIncrements(true);
|
||||
|
||||
PhraseQuery.Builder phraseQuery = new PhraseQuery.Builder();
|
||||
|
|
|
@ -0,0 +1,28 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** adds synonym of "dog" for "dogs", and synonym of "cavy" for "guinea pig". */
|
||||
public class MockSynonymAnalyzer extends Analyzer {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
MockTokenizer tokenizer = new MockTokenizer();
|
||||
return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,97 @@
|
|||
package org.apache.lucene.analysis;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
/** adds synonym of "dog" for "dogs", and synonym of "cavy" for "guinea pig". */
|
||||
public class MockSynonymFilter extends TokenFilter {
|
||||
CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
|
||||
PositionIncrementAttribute posIncAtt = addAttribute(PositionIncrementAttribute.class);
|
||||
OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
PositionLengthAttribute posLenAtt = addAttribute(PositionLengthAttribute.class);
|
||||
List<AttributeSource> tokenQueue = new ArrayList<>();
|
||||
boolean endOfInput = false;
|
||||
|
||||
public MockSynonymFilter(TokenStream input) {
|
||||
super(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset() throws IOException {
|
||||
super.reset();
|
||||
tokenQueue.clear();
|
||||
endOfInput = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (tokenQueue.size() > 0) {
|
||||
tokenQueue.remove(0).copyTo(this);
|
||||
return true;
|
||||
}
|
||||
if (endOfInput == false && input.incrementToken()) {
|
||||
if (termAtt.toString().equals("dogs")) {
|
||||
addSynonymAndRestoreOrigToken("dog", 1, offsetAtt.endOffset());
|
||||
} else if (termAtt.toString().equals("guinea")) {
|
||||
AttributeSource firstSavedToken = cloneAttributes();
|
||||
if (input.incrementToken()) {
|
||||
if (termAtt.toString().equals("pig")) {
|
||||
AttributeSource secondSavedToken = cloneAttributes();
|
||||
int secondEndOffset = offsetAtt.endOffset();
|
||||
firstSavedToken.copyTo(this);
|
||||
addSynonym("cavy", 2, secondEndOffset);
|
||||
tokenQueue.add(secondSavedToken);
|
||||
} else if (termAtt.toString().equals("dogs")) {
|
||||
tokenQueue.add(cloneAttributes());
|
||||
addSynonym("dog", 1, offsetAtt.endOffset());
|
||||
}
|
||||
} else {
|
||||
endOfInput = true;
|
||||
}
|
||||
firstSavedToken.copyTo(this);
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
endOfInput = true;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
private void addSynonym(String synonymText, int posLen, int endOffset) {
|
||||
termAtt.setEmpty().append(synonymText);
|
||||
posIncAtt.setPositionIncrement(0);
|
||||
posLenAtt.setPositionLength(posLen);
|
||||
offsetAtt.setOffset(offsetAtt.startOffset(), endOffset);
|
||||
tokenQueue.add(cloneAttributes());
|
||||
}
|
||||
private void addSynonymAndRestoreOrigToken(String synonymText, int posLen, int endOffset) {
|
||||
AttributeSource origToken = cloneAttributes();
|
||||
addSynonym(synonymText, posLen, endOffset);
|
||||
origToken.copyTo(this);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,151 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.analysis;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/** test the mock synonym filter */
|
||||
public class TestMockSynonymFilter extends BaseTokenStreamTestCase {
|
||||
|
||||
/** test the mock synonym filter */
|
||||
public void test() throws IOException {
|
||||
Analyzer analyzer = new Analyzer() {
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
MockTokenizer tokenizer = new MockTokenizer();
|
||||
return new TokenStreamComponents(tokenizer, new MockSynonymFilter(tokenizer));
|
||||
}
|
||||
};
|
||||
|
||||
assertAnalyzesTo(analyzer, "dogs",
|
||||
new String[]{"dogs", "dog"},
|
||||
new int[]{0, 0}, // start offset
|
||||
new int[]{4, 4}, // end offset
|
||||
null,
|
||||
new int[]{1, 0}, // position increment
|
||||
new int[]{1, 1}, // position length
|
||||
true); // check that offsets are correct
|
||||
|
||||
assertAnalyzesTo(analyzer, "small dogs",
|
||||
new String[]{"small", "dogs", "dog"},
|
||||
new int[]{0, 6, 6}, // start offset
|
||||
new int[]{5, 10, 10}, // end offset
|
||||
null,
|
||||
new int[]{1, 1, 0}, // position increment
|
||||
new int[]{1, 1, 1}, // position length
|
||||
true); // check that offsets are correct
|
||||
|
||||
assertAnalyzesTo(analyzer, "dogs running",
|
||||
new String[]{"dogs", "dog", "running"},
|
||||
new int[]{0, 0, 5}, // start offset
|
||||
new int[]{4, 4, 12}, // end offset
|
||||
null,
|
||||
new int[]{1, 0, 1}, // position increment
|
||||
new int[]{1, 1, 1}, // position length
|
||||
true); // check that offsets are correct
|
||||
|
||||
assertAnalyzesTo(analyzer, "small dogs running",
|
||||
new String[]{"small", "dogs", "dog", "running"},
|
||||
new int[]{0, 6, 6, 11}, // start offset
|
||||
new int[]{5, 10, 10, 18}, // end offset
|
||||
null,
|
||||
new int[]{1, 1, 0, 1}, // position increment
|
||||
new int[]{1, 1, 1, 1}, // position length
|
||||
true); // check that offsets are correct
|
||||
|
||||
assertAnalyzesTo(analyzer, "guinea",
|
||||
new String[]{"guinea"},
|
||||
new int[]{0}, // start offset
|
||||
new int[]{6}, // end offset
|
||||
null,
|
||||
new int[]{1}, // position increment
|
||||
new int[]{1}, // position length
|
||||
true); // check that offsets are correct
|
||||
|
||||
assertAnalyzesTo(analyzer, "pig",
|
||||
new String[]{"pig"},
|
||||
new int[]{0}, // start offset
|
||||
new int[]{3}, // end offset
|
||||
null,
|
||||
new int[]{1}, // position increment
|
||||
new int[]{1}, // position length
|
||||
true); // check that offsets are correct
|
||||
|
||||
assertAnalyzesTo(analyzer, "guinea pig",
|
||||
new String[]{"guinea", "cavy", "pig"},
|
||||
new int[]{0, 0, 7}, // start offset
|
||||
new int[]{6, 10, 10}, // end offset
|
||||
null,
|
||||
new int[]{1, 0, 1}, // position increment
|
||||
new int[]{1, 2, 1}, // position length
|
||||
true); // check that offsets are correct
|
||||
|
||||
assertAnalyzesTo(analyzer, "guinea dogs",
|
||||
new String[]{"guinea", "dogs", "dog"},
|
||||
new int[]{0, 7, 7}, // start offset
|
||||
new int[]{6, 11, 11}, // end offset
|
||||
null,
|
||||
new int[]{1, 1, 0}, // position increment
|
||||
new int[]{1, 1, 1}, // position length
|
||||
true); // check that offsets are correct
|
||||
|
||||
assertAnalyzesTo(analyzer, "dogs guinea",
|
||||
new String[]{"dogs", "dog", "guinea"},
|
||||
new int[]{0, 0, 5}, // start offset
|
||||
new int[]{4, 4, 11}, // end offset
|
||||
null,
|
||||
new int[]{1, 0, 1}, // position increment
|
||||
new int[]{1, 1, 1}, // position length
|
||||
true); // check that offsets are correct
|
||||
|
||||
assertAnalyzesTo(analyzer, "dogs guinea pig",
|
||||
new String[]{"dogs", "dog", "guinea", "cavy", "pig"},
|
||||
new int[]{0, 0, 5, 5, 12}, // start offset
|
||||
new int[]{4, 4, 11, 15, 15}, // end offset
|
||||
null,
|
||||
new int[]{1, 0, 1, 0, 1}, // position increment
|
||||
new int[]{1, 1, 1, 2, 1}, // position length
|
||||
true); // check that offsets are correct
|
||||
|
||||
assertAnalyzesTo(analyzer, "guinea pig dogs",
|
||||
new String[]{"guinea", "cavy", "pig", "dogs", "dog"},
|
||||
new int[]{0, 0, 7, 11, 11}, // start offset
|
||||
new int[]{6, 10, 10, 15, 15}, // end offset
|
||||
null,
|
||||
new int[]{1, 0, 1, 1, 0}, // position increment
|
||||
new int[]{1, 2, 1, 1, 1}, // position length
|
||||
true); // check that offsets are correct
|
||||
|
||||
assertAnalyzesTo(analyzer, "small dogs and guinea pig running",
|
||||
new String[]{"small", "dogs", "dog", "and", "guinea", "cavy", "pig", "running"},
|
||||
new int[]{0, 6, 6, 11, 15, 15, 22, 26}, // start offset
|
||||
new int[]{5, 10, 10, 14, 21, 25, 25, 33}, // end offset
|
||||
null,
|
||||
new int[]{1, 1, 0, 1, 1, 0, 1, 1}, // position increment
|
||||
new int[]{1, 1, 1, 1, 1, 2, 1, 1}, // position length
|
||||
true); // check that offsets are correct
|
||||
|
||||
assertAnalyzesTo(analyzer, "small guinea pig and dogs running",
|
||||
new String[]{"small", "guinea", "cavy", "pig", "and", "dogs", "dog", "running"},
|
||||
new int[]{0, 6, 6, 13, 17, 21, 21, 26}, // start offset
|
||||
new int[]{5, 12, 16, 16, 20, 25, 25, 33}, // end offset
|
||||
null,
|
||||
new int[]{1, 1, 0, 1, 1, 1, 0, 1}, // position increment
|
||||
new int[]{1, 1, 2, 1, 1, 1, 1, 1}, // position length
|
||||
true); // check that offsets are correct
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue