Query: Provide an option to analyze wildcard/prefix in query_string / field queries, closes #787.

This commit is contained in:
kimchy 2011-03-17 22:01:22 +02:00
parent 96d06d6dc8
commit 25124b084b
7 changed files with 205 additions and 18 deletions

View File

@ -19,11 +19,14 @@
package org.apache.lucene.queryParser;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.io.FastStringReader;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.index.mapper.AllFieldMapper;
@ -32,6 +35,9 @@ import org.elasticsearch.index.mapper.FieldMappers;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.query.xcontent.QueryParseContext;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.List;
import static org.elasticsearch.common.lucene.search.Queries.*;
@ -61,6 +67,8 @@ public class MapperQueryParser extends QueryParser {
private FieldMapper currentMapper;
private boolean analyzeWildcard;
public MapperQueryParser(QueryParseContext parseContext) {
super(Lucene.QUERYPARSER_VERSION, null, null);
this.parseContext = parseContext;
@ -83,6 +91,7 @@ public class MapperQueryParser extends QueryParser {
setDefaultOperator(settings.defaultOperator());
setFuzzyMinSim(settings.fuzzyMinSim());
setFuzzyPrefixLength(settings.fuzzyPrefixLength());
this.analyzeWildcard = settings.analyzeWildcard();
}
@Override protected Query newTermQuery(Term term) {
@ -145,22 +154,6 @@ public class MapperQueryParser extends QueryParser {
return newRangeQuery(field, part1, part2, inclusive);
}
@Override protected Query getPrefixQuery(String field, String termStr) throws ParseException {
String indexedNameField = field;
currentMapper = null;
if (parseContext.mapperService() != null) {
MapperService.SmartNameFieldMappers fieldMappers = parseContext.mapperService().smartName(field);
if (fieldMappers != null) {
currentMapper = fieldMappers.fieldMappers().mapper();
if (currentMapper != null) {
indexedNameField = currentMapper.names().indexName();
}
return wrapSmartNameQuery(super.getPrefixQuery(indexedNameField, termStr), fieldMappers, parseContext);
}
}
return super.getPrefixQuery(indexedNameField, termStr);
}
@Override protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException {
String indexedNameField = field;
currentMapper = null;
@ -177,6 +170,65 @@ public class MapperQueryParser extends QueryParser {
return super.getFuzzyQuery(indexedNameField, termStr, minSimilarity);
}
@Override protected Query getPrefixQuery(String field, String termStr) throws ParseException {
String indexedNameField = field;
currentMapper = null;
if (parseContext.mapperService() != null) {
MapperService.SmartNameFieldMappers fieldMappers = parseContext.mapperService().smartName(field);
if (fieldMappers != null) {
currentMapper = fieldMappers.fieldMappers().mapper();
if (currentMapper != null) {
indexedNameField = currentMapper.names().indexName();
}
return wrapSmartNameQuery(getPossiblyAnalyzedPrefixQuery(indexedNameField, termStr), fieldMappers, parseContext);
}
}
return getPossiblyAnalyzedPrefixQuery(indexedNameField, termStr);
}
private Query getPossiblyAnalyzedPrefixQuery(String field, String termStr) throws ParseException {
if (!analyzeWildcard) {
return super.getPrefixQuery(field, termStr);
}
// LUCENE MONITOR: TermAttribute deprecated in 3.1
// get Analyzer from superclass and tokenize the term
TokenStream source = null;
try {
source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr));
} catch (IOException e) {
return super.getPrefixQuery(field, termStr);
}
List<String> tlist = new ArrayList<String>();
TermAttribute termAtt = source.addAttribute(TermAttribute.class);
while (true) {
try {
if (!source.incrementToken()) break;
} catch (IOException e) {
break;
}
tlist.add(termAtt.term());
}
try {
source.close();
} catch (IOException e) {
// ignore
}
if (tlist.size() == 1) {
return super.getPrefixQuery(field, tlist.get(0));
} else {
return super.getPrefixQuery(field, termStr);
/* this means that the analyzer used either added or consumed
* (common for a stemmer) tokens, and we can't build a PrefixQuery */
// throw new ParseException("Cannot build PrefixQuery with analyzer "
// + getAnalyzer().getClass()
// + (tlist.size() > 1 ? " - token(s) added" : " - token consumed"));
}
}
@Override protected Query getWildcardQuery(String field, String termStr) throws ParseException {
if (AllFieldMapper.NAME.equals(field) && termStr.equals("*")) {
return newMatchAllDocsQuery();
@ -190,10 +242,74 @@ public class MapperQueryParser extends QueryParser {
if (currentMapper != null) {
indexedNameField = currentMapper.names().indexName();
}
return wrapSmartNameQuery(super.getWildcardQuery(indexedNameField, termStr), fieldMappers, parseContext);
return wrapSmartNameQuery(getPossiblyAnalyzedWildcardQuery(indexedNameField, termStr), fieldMappers, parseContext);
}
}
return super.getWildcardQuery(indexedNameField, termStr);
return getPossiblyAnalyzedWildcardQuery(indexedNameField, termStr);
}
private Query getPossiblyAnalyzedWildcardQuery(String field, String termStr) throws ParseException {
if (!analyzeWildcard) {
return super.getWildcardQuery(field, termStr);
}
boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*"));
StringBuilder aggStr = new StringBuilder();
StringBuilder tmp = new StringBuilder();
for (int i = 0; i < termStr.length(); i++) {
char c = termStr.charAt(i);
if (c == '?' || c == '*') {
if (isWithinToken) {
try {
TokenStream source = getAnalyzer().reusableTokenStream(field, new FastStringReader(tmp.toString()));
TermAttribute termAtt = source.addAttribute(TermAttribute.class);
if (source.incrementToken()) {
String term = termAtt.term();
if (term.length() == 0) {
// no tokens, just use what we have now
aggStr.append(tmp);
} else {
aggStr.append(term);
}
} else {
// no tokens, just use what we have now
aggStr.append(tmp);
}
source.close();
} catch (IOException e) {
aggStr.append(tmp);
}
tmp.setLength(0);
}
isWithinToken = false;
aggStr.append(c);
} else {
tmp.append(c);
isWithinToken = true;
}
}
if (isWithinToken) {
try {
TokenStream source = getAnalyzer().reusableTokenStream(field, new FastStringReader(tmp.toString()));
TermAttribute termAtt = source.addAttribute(TermAttribute.class);
if (source.incrementToken()) {
String term = termAtt.term();
if (term.length() == 0) {
// no tokens, just use what we have now
aggStr.append(tmp);
} else {
aggStr.append(term);
}
} else {
// no tokens, just use what we have now
aggStr.append(tmp);
}
source.close();
} catch (IOException e) {
aggStr.append(tmp);
}
}
return super.getWildcardQuery(field, aggStr.toString());
}
@Override protected Query getBooleanQuery(List<BooleanClause> clauses, boolean disableCoord) throws ParseException {

View File

@ -37,6 +37,7 @@ public class QueryParserSettings {
private int phraseSlop = 0;
private float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
private int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
private boolean analyzeWildcard = false;
private boolean escape = false;
private Analyzer analyzer = null;
@ -136,6 +137,14 @@ public class QueryParserSettings {
this.analyzer = analyzer;
}
public boolean analyzeWildcard() {
return this.analyzeWildcard;
}
public void analyzeWildcard(boolean analyzeWildcard) {
this.analyzeWildcard = analyzeWildcard;
}
@Override public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
@ -146,6 +155,7 @@ public class QueryParserSettings {
if (Float.compare(that.boost, boost) != 0) return false;
if (enablePositionIncrements != that.enablePositionIncrements) return false;
if (escape != that.escape) return false;
if (analyzeWildcard != that.analyzeWildcard) return false;
if (Float.compare(that.fuzzyMinSim, fuzzyMinSim) != 0) return false;
if (fuzzyPrefixLength != that.fuzzyPrefixLength) return false;
if (lowercaseExpandedTerms != that.lowercaseExpandedTerms) return false;
@ -171,6 +181,7 @@ public class QueryParserSettings {
result = 31 * result + fuzzyPrefixLength;
result = 31 * result + (escape ? 1 : 0);
result = 31 * result + (analyzer != null ? analyzer.hashCode() : 0);
result = 31 * result + (analyzeWildcard ? 1 : 0);
return result;
}
}

View File

@ -51,6 +51,8 @@ public class FieldQueryBuilder extends BaseQueryBuilder {
private Boolean enablePositionIncrements;
private Boolean analyzeWildcard;
private float fuzzyMinSim = -1;
private float boost = -1;
@ -242,6 +244,15 @@ public class FieldQueryBuilder extends BaseQueryBuilder {
return this;
}
/**
* Set to <tt>true</tt> to enable analysis on wildcard and prefix queries.
*/
public FieldQueryBuilder analyzeWildcard(boolean analyzeWildcard) {
this.analyzeWildcard = analyzeWildcard;
extraSet = true;
return this;
}
@Override public void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(FieldQueryParser.NAME);
if (!extraSet) {
@ -276,6 +287,9 @@ public class FieldQueryBuilder extends BaseQueryBuilder {
if (phraseSlop != -1) {
builder.field("phrase_slop", phraseSlop);
}
if (analyzeWildcard != null) {
builder.field("analyze_wildcard", analyzeWildcard);
}
builder.endObject();
}
builder.endObject();

View File

@ -104,6 +104,8 @@ public class FieldQueryParser extends AbstractIndexComponent implements XContent
qpSettings.fuzzyPrefixLength(parser.intValue());
} else if ("escape".equals(currentFieldName)) {
qpSettings.escape(parser.booleanValue());
} else if ("analyze_wildcard".equals(currentFieldName) || "analyzeWildcard".equals(currentFieldName)) {
qpSettings.analyzeWildcard(parser.booleanValue());
}
}
}

View File

@ -58,6 +58,8 @@ public class QueryStringQueryBuilder extends BaseQueryBuilder {
private Boolean enablePositionIncrements;
private Boolean analyzeWildcard;
private float fuzzyMinSim = -1;
private float boost = -1;
@ -209,6 +211,14 @@ public class QueryStringQueryBuilder extends BaseQueryBuilder {
return this;
}
/**
* Set to <tt>true</tt> to enable analysis on wildcard and prefix queries.
*/
public QueryStringQueryBuilder analyzeWildcard(boolean analyzeWildcard) {
this.analyzeWildcard = analyzeWildcard;
return this;
}
/**
* Sets the boost for this query. Documents matching this query will (in addition to the normal
* weightings) have their score multiplied by the boost provided.
@ -271,6 +281,9 @@ public class QueryStringQueryBuilder extends BaseQueryBuilder {
if (phraseSlop != -1) {
builder.field("phrase_slop", phraseSlop);
}
if (analyzeWildcard != null) {
builder.field("analyze_wildcard", analyzeWildcard);
}
builder.endObject();
}
}

View File

@ -155,6 +155,8 @@ public class QueryStringQueryParser extends AbstractIndexComponent implements XC
qpSettings.boost(parser.floatValue());
} else if ("tie_breaker".equals(currentFieldName) || "tieBreaker".equals(currentFieldName)) {
qpSettings.tieBreaker(parser.floatValue());
} else if ("analyze_wildcard".equals(currentFieldName) || "analyzeWildcard".equals(currentFieldName)) {
qpSettings.analyzeWildcard(parser.booleanValue());
}
}
}

View File

@ -68,6 +68,35 @@ public class SimpleQueryTests extends AbstractNodesTests {
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
}
@Test public void queryStringAnalyzedWildcard() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 1)).execute().actionGet();
client.prepareIndex("test", "type1", "1").setSource("field1", "value_1", "field2", "value_2").execute().actionGet();
client.admin().indices().prepareRefresh().execute().actionGet();
SearchResponse searchResponse = client.prepareSearch().setQuery(queryString("value*").analyzeWildcard(true)).execute().actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
searchResponse = client.prepareSearch().setQuery(queryString("*ue*").analyzeWildcard(true)).execute().actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
searchResponse = client.prepareSearch().setQuery(queryString("*ue_1").analyzeWildcard(true)).execute().actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
searchResponse = client.prepareSearch().setQuery(queryString("val*e_1").analyzeWildcard(true)).execute().actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
searchResponse = client.prepareSearch().setQuery(queryString("v?l*e?1").analyzeWildcard(true)).execute().actionGet();
assertThat(searchResponse.hits().totalHits(), equalTo(1l));
}
@Test public void filterExistsMissingTests() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();