Query DSL: Fuzzy Like This, closes #103.

This commit is contained in:
kimchy 2010-04-01 22:19:17 +03:00
parent c2e8804b4d
commit 118aa89614
9 changed files with 581 additions and 0 deletions

View File

@ -0,0 +1,109 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query.json;
import org.elasticsearch.index.query.QueryBuilderException;
import org.elasticsearch.util.json.JsonBuilder;
import java.io.IOException;
/**
* @author kimchy (shay.banon)
*/
public class FuzzyLikeThisFieldJsonQueryBuilder extends BaseJsonQueryBuilder {
private final String name;
private Float boost;
private String likeText = null;
private Float minSimilarity;
private Integer prefixLength;
private Integer maxNumTerms;
private Boolean ignoreTF;
/**
* A fuzzy more like this query on the provided field.
*
* @param name the name of the field
*/
public FuzzyLikeThisFieldJsonQueryBuilder(String name) {
this.name = name;
}
/**
* The text to use in order to find documents that are "like" this.
*/
public FuzzyLikeThisFieldJsonQueryBuilder likeText(String likeText) {
this.likeText = likeText;
return this;
}
public FuzzyLikeThisFieldJsonQueryBuilder minSimilarity(float minSimilarity) {
this.minSimilarity = minSimilarity;
return this;
}
public FuzzyLikeThisFieldJsonQueryBuilder prefixLength(int prefixLength) {
this.prefixLength = prefixLength;
return this;
}
public FuzzyLikeThisFieldJsonQueryBuilder maxNumTerms(int maxNumTerms) {
this.maxNumTerms = maxNumTerms;
return this;
}
public FuzzyLikeThisFieldJsonQueryBuilder ignoreTF(boolean ignoreTF) {
this.ignoreTF = ignoreTF;
return this;
}
public FuzzyLikeThisFieldJsonQueryBuilder boost(float boost) {
this.boost = boost;
return this;
}
@Override protected void doJson(JsonBuilder builder, Params params) throws IOException {
builder.startObject(FuzzyLikeThisFieldJsonQueryParser.NAME);
builder.startObject(name);
if (likeText == null) {
throw new QueryBuilderException("fuzzyLikeThis requires 'likeText' to be provided");
}
builder.field("likeText", likeText);
if (maxNumTerms != null) {
builder.field("maxNumTerms", maxNumTerms);
}
if (minSimilarity != null) {
builder.field("minSimilarity", minSimilarity);
}
if (prefixLength != null) {
builder.field("prefixLength", prefixLength);
}
if (ignoreTF != null) {
builder.field("ignoreTF", ignoreTF);
}
if (boost != null) {
builder.field("boost", boost);
}
builder.endObject();
builder.endObject();
}
}

View File

@ -0,0 +1,141 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query.json;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.FuzzyLikeThisQuery;
import org.apache.lucene.search.Query;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonToken;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.query.QueryParsingException;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.Booleans;
import org.elasticsearch.util.settings.Settings;
import java.io.IOException;
import static org.elasticsearch.index.query.support.QueryParsers.*;
/**
* <pre>
* {
* fuzzyLikeThisField : {
* field1 : {
* maxNumTerms : 12,
* boost : 1.1,
* likeText : "..."
* }
* }
* </pre>
*
* @author kimchy (shay.banon)
*/
public class FuzzyLikeThisFieldJsonQueryParser extends AbstractIndexComponent implements JsonQueryParser {
public static final String NAME = "fuzzyLikeThisField";
public FuzzyLikeThisFieldJsonQueryParser(Index index, @IndexSettings Settings indexSettings) {
super(index, indexSettings);
}
@Override public String name() {
return NAME;
}
@Override public Query parse(JsonQueryParseContext parseContext) throws IOException, QueryParsingException {
JsonParser jp = parseContext.jp();
int maxNumTerms = 100;
float boost = 1.0f;
String likeText = null;
float minSimilarity = 0.5f;
int prefixLength = 0;
boolean ignoreTF = false;
JsonToken token = jp.nextToken();
assert token == JsonToken.FIELD_NAME;
String fieldName = jp.getCurrentName();
// now, we move after the field name, which starts the object
token = jp.nextToken();
assert token == JsonToken.START_OBJECT;
String currentFieldName = null;
while ((token = jp.nextToken()) != JsonToken.END_OBJECT) {
if (token == JsonToken.FIELD_NAME) {
currentFieldName = jp.getCurrentName();
} else if (token == JsonToken.VALUE_STRING) {
if ("likeText".equals(currentFieldName)) {
likeText = jp.getText();
} else if ("maxNumTerms".equals(currentFieldName)) {
maxNumTerms = Integer.parseInt(jp.getText());
} else if ("boost".equals(currentFieldName)) {
boost = Float.parseFloat(jp.getText());
} else if ("ignoreTF".equals(currentFieldName)) {
ignoreTF = Booleans.parseBoolean(jp.getText(), false);
}
} else if (token == JsonToken.VALUE_NUMBER_INT) {
if ("maxNumTerms".equals(currentFieldName)) {
maxNumTerms = jp.getIntValue();
} else if ("boost".equals(currentFieldName)) {
boost = jp.getIntValue();
} else if ("ignoreTF".equals(currentFieldName)) {
ignoreTF = jp.getIntValue() != 0;
}
} else if (token == JsonToken.VALUE_TRUE) {
if ("ignoreTF".equals(currentFieldName)) {
ignoreTF = true;
}
} else if (token == JsonToken.VALUE_NUMBER_FLOAT) {
if ("boost".equals(currentFieldName)) {
boost = jp.getFloatValue();
}
}
}
if (likeText == null) {
throw new QueryParsingException(index, "fuzzyLikeThisField requires 'likeText' to be specified");
}
Analyzer analyzer = parseContext.mapperService().searchAnalyzer();
MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName);
if (smartNameFieldMappers != null) {
if (smartNameFieldMappers.hasMapper()) {
fieldName = smartNameFieldMappers.mapper().names().indexName();
analyzer = smartNameFieldMappers.mapper().searchAnalyzer();
}
}
FuzzyLikeThisQuery query = new FuzzyLikeThisQuery(maxNumTerms, analyzer);
query.addTerms(likeText, fieldName, minSimilarity, prefixLength);
query.setBoost(boost);
query.setIgnoreTF(ignoreTF);
// move to the next end object, to close the field name
token = jp.nextToken();
assert token == JsonToken.END_OBJECT;
return wrapSmartNameQuery(query, smartNameFieldMappers, parseContext.indexCache());
}
}

View File

@ -0,0 +1,121 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query.json;
import org.elasticsearch.index.query.QueryBuilderException;
import org.elasticsearch.util.json.JsonBuilder;
import java.io.IOException;
/**
* @author kimchy (shay.banon)
*/
public class FuzzyLikeThisJsonQueryBuilder extends BaseJsonQueryBuilder {
private final String[] fields;
private Float boost;
private String likeText = null;
private Float minSimilarity;
private Integer prefixLength;
private Integer maxNumTerms;
private Boolean ignoreTF;
/**
* Constructs a new fuzzy like this query which uses the "_all" field.
*/
public FuzzyLikeThisJsonQueryBuilder() {
this.fields = null;
}
/**
* Sets the field names that will be used when generating the 'Fuzzy Like This' query.
*
* @param fields the field names that will be used when generating the 'Fuzzy Like This' query.
*/
public FuzzyLikeThisJsonQueryBuilder(String... fields) {
this.fields = fields;
}
/**
* The text to use in order to find documents that are "like" this.
*/
public FuzzyLikeThisJsonQueryBuilder likeText(String likeText) {
this.likeText = likeText;
return this;
}
public FuzzyLikeThisJsonQueryBuilder minSimilarity(float minSimilarity) {
this.minSimilarity = minSimilarity;
return this;
}
public FuzzyLikeThisJsonQueryBuilder prefixLength(int prefixLength) {
this.prefixLength = prefixLength;
return this;
}
public FuzzyLikeThisJsonQueryBuilder maxNumTerms(int maxNumTerms) {
this.maxNumTerms = maxNumTerms;
return this;
}
public FuzzyLikeThisJsonQueryBuilder ignoreTF(boolean ignoreTF) {
this.ignoreTF = ignoreTF;
return this;
}
public FuzzyLikeThisJsonQueryBuilder boost(float boost) {
this.boost = boost;
return this;
}
@Override protected void doJson(JsonBuilder builder, Params params) throws IOException {
builder.startObject(FuzzyLikeThisJsonQueryParser.NAME);
if (fields != null) {
builder.startArray("fields");
for (String field : fields) {
builder.value(field);
}
builder.endArray();
}
if (likeText == null) {
throw new QueryBuilderException("fuzzyLikeThis requires 'likeText' to be provided");
}
builder.field("likeText", likeText);
if (maxNumTerms != null) {
builder.field("maxNumTerms", maxNumTerms);
}
if (minSimilarity != null) {
builder.field("minSimilarity", minSimilarity);
}
if (prefixLength != null) {
builder.field("prefixLength", prefixLength);
}
if (ignoreTF != null) {
builder.field("ignoreTF", ignoreTF);
}
if (boost != null) {
builder.field("boost", boost);
}
builder.endObject();
}
}

View File

@ -0,0 +1,138 @@
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query.json;
import com.google.common.collect.Lists;
import org.apache.lucene.search.FuzzyLikeThisQuery;
import org.apache.lucene.search.Query;
import org.codehaus.jackson.JsonParser;
import org.codehaus.jackson.JsonToken;
import org.elasticsearch.index.AbstractIndexComponent;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.mapper.AllFieldMapper;
import org.elasticsearch.index.query.QueryParsingException;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.util.Booleans;
import org.elasticsearch.util.settings.Settings;
import java.io.IOException;
import java.util.List;
/**
* <pre>
* {
* fuzzyLikeThis : {
* maxNumTerms : 12,
* boost : 1.1,
* fields : ["field1", "field2"]
* likeText : "..."
* }
* }
* </pre>
*
* @author kimchy (shay.banon)
*/
public class FuzzyLikeThisJsonQueryParser extends AbstractIndexComponent implements JsonQueryParser {
public static final String NAME = "fuzzyLikeThis";
public FuzzyLikeThisJsonQueryParser(Index index, @IndexSettings Settings indexSettings) {
super(index, indexSettings);
}
@Override public String name() {
return NAME;
}
@Override public Query parse(JsonQueryParseContext parseContext) throws IOException, QueryParsingException {
JsonParser jp = parseContext.jp();
int maxNumTerms = 100;
float boost = 1.0f;
List<String> fields = null;
String likeText = null;
float minSimilarity = 0.5f;
int prefixLength = 0;
boolean ignoreTF = false;
JsonToken token;
String currentFieldName = null;
while ((token = jp.nextToken()) != JsonToken.END_OBJECT) {
if (token == JsonToken.FIELD_NAME) {
currentFieldName = jp.getCurrentName();
} else if (token == JsonToken.VALUE_STRING) {
if ("likeText".equals(currentFieldName)) {
likeText = jp.getText();
} else if ("maxNumTerms".equals(currentFieldName)) {
maxNumTerms = Integer.parseInt(jp.getText());
} else if ("boost".equals(currentFieldName)) {
boost = Float.parseFloat(jp.getText());
} else if ("ignoreTF".equals(currentFieldName)) {
ignoreTF = Booleans.parseBoolean(jp.getText(), false);
}
} else if (token == JsonToken.VALUE_NUMBER_INT) {
if ("maxNumTerms".equals(currentFieldName)) {
maxNumTerms = jp.getIntValue();
} else if ("boost".equals(currentFieldName)) {
boost = jp.getIntValue();
} else if ("ignoreTF".equals(currentFieldName)) {
ignoreTF = jp.getIntValue() != 0;
}
} else if (token == JsonToken.VALUE_TRUE) {
if ("ignoreTF".equals(currentFieldName)) {
ignoreTF = true;
}
} else if (token == JsonToken.VALUE_NUMBER_FLOAT) {
if ("boost".equals(currentFieldName)) {
boost = jp.getFloatValue();
}
} else if (token == JsonToken.START_ARRAY) {
if ("fields".equals(currentFieldName)) {
fields = Lists.newArrayList();
while ((token = jp.nextToken()) != JsonToken.END_ARRAY) {
fields.add(parseContext.indexName(jp.getText()));
}
}
}
}
if (likeText == null) {
throw new QueryParsingException(index, "fuzzyLikeThis requires 'likeText' to be specified");
}
FuzzyLikeThisQuery query = new FuzzyLikeThisQuery(maxNumTerms, parseContext.mapperService().searchAnalyzer());
if (fields == null) {
// add the default _all field
query.addTerms(likeText, AllFieldMapper.NAME, minSimilarity, prefixLength);
} else {
for (String field : fields) {
query.addTerms(likeText, field, minSimilarity, prefixLength);
}
}
query.setBoost(boost);
query.setIgnoreTF(ignoreTF);
// move to the next end object, to close the field name
token = jp.nextToken();
assert token == JsonToken.END_OBJECT;
return query;
}
}

View File

@ -279,6 +279,31 @@ public abstract class JsonQueryBuilders {
return new MoreLikeThisJsonQueryBuilder(); return new MoreLikeThisJsonQueryBuilder();
} }
/**
* A fuzzy like this query that finds documents that are "like" the provided {@link FuzzyLikeThisJsonQueryBuilder#likeText(String)}
* which is checked against the fields the query is constructed with.
*
* @param fields The fields to run the query against
*/
public static FuzzyLikeThisJsonQueryBuilder fuzzyLikeThisQuery(String... fields) {
return new FuzzyLikeThisJsonQueryBuilder(fields);
}
/**
* A fuzzy like this query that finds documents that are "like" the provided {@link FuzzyLikeThisJsonQueryBuilder#likeText(String)}
* which is checked against the "_all" field.
*/
public static FuzzyLikeThisJsonQueryBuilder fuzzyLikeThisQuery() {
return new FuzzyLikeThisJsonQueryBuilder();
}
/**
* A fuzzy like this query that finds documents that are "like" the provided {@link FuzzyLikeThisFieldJsonQueryBuilder#likeText(String)}.
*/
public static FuzzyLikeThisFieldJsonQueryBuilder fuzzyLikeThisFieldQuery(String name) {
return new FuzzyLikeThisFieldJsonQueryBuilder(name);
}
/** /**
* A more like this query that runs against a specific field. * A more like this query that runs against a specific field.
* *

View File

@ -65,6 +65,8 @@ public class JsonQueryParserRegistry {
add(queryParsersMap, new SpanOrJsonQueryParser(index, indexSettings)); add(queryParsersMap, new SpanOrJsonQueryParser(index, indexSettings));
add(queryParsersMap, new MoreLikeThisJsonQueryParser(index, indexSettings)); add(queryParsersMap, new MoreLikeThisJsonQueryParser(index, indexSettings));
add(queryParsersMap, new MoreLikeThisFieldJsonQueryParser(index, indexSettings)); add(queryParsersMap, new MoreLikeThisFieldJsonQueryParser(index, indexSettings));
add(queryParsersMap, new FuzzyLikeThisJsonQueryParser(index, indexSettings));
add(queryParsersMap, new FuzzyLikeThisFieldJsonQueryParser(index, indexSettings));
// now, copy over the ones provided // now, copy over the ones provided
if (queryParsers != null) { if (queryParsers != null) {

View File

@ -732,6 +732,36 @@ public class SimpleJsonIndexQueryParserTests {
assertThat(mltQuery.getMaxQueryTerms(), equalTo(12)); assertThat(mltQuery.getMaxQueryTerms(), equalTo(12));
} }
@Test public void testFuzzyLikeThisBuilder() throws Exception {
IndexQueryParser queryParser = newQueryParser();
Query parsedQuery = queryParser.parse(fuzzyLikeThisQuery("name.first", "name.last").likeText("something").maxNumTerms(12));
assertThat(parsedQuery, instanceOf(FuzzyLikeThisQuery.class));
// FuzzyLikeThisQuery fuzzyLikeThisQuery = (FuzzyLikeThisQuery) parsedQuery;
}
@Test public void testFuzzyLikeThis() throws Exception {
IndexQueryParser queryParser = newQueryParser();
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/json/fuzzyLikeThis.json");
Query parsedQuery = queryParser.parse(query);
assertThat(parsedQuery, instanceOf(FuzzyLikeThisQuery.class));
// FuzzyLikeThisQuery fuzzyLikeThisQuery = (FuzzyLikeThisQuery) parsedQuery;
}
@Test public void testFuzzyLikeFieldThisBuilder() throws Exception {
IndexQueryParser queryParser = newQueryParser();
Query parsedQuery = queryParser.parse(fuzzyLikeThisFieldQuery("name.first").likeText("something").maxNumTerms(12));
assertThat(parsedQuery, instanceOf(FuzzyLikeThisQuery.class));
// FuzzyLikeThisQuery fuzzyLikeThisQuery = (FuzzyLikeThisQuery) parsedQuery;
}
@Test public void testFuzzyLikeThisField() throws Exception {
IndexQueryParser queryParser = newQueryParser();
String query = copyToStringFromClasspath("/org/elasticsearch/index/query/json/fuzzyLikeThisField.json");
Query parsedQuery = queryParser.parse(query);
assertThat(parsedQuery, instanceOf(FuzzyLikeThisQuery.class));
// FuzzyLikeThisQuery fuzzyLikeThisQuery = (FuzzyLikeThisQuery) parsedQuery;
}
@Test public void testMoreLikeThisFieldBuilder() throws Exception { @Test public void testMoreLikeThisFieldBuilder() throws Exception {
IndexQueryParser queryParser = newQueryParser(); IndexQueryParser queryParser = newQueryParser();
Query parsedQuery = queryParser.parse(moreLikeThisFieldQuery("name.first").likeText("something").minTermFrequency(1).maxQueryTerms(12)); Query parsedQuery = queryParser.parse(moreLikeThisFieldQuery("name.first").likeText("something").minTermFrequency(1).maxQueryTerms(12));

View File

@ -0,0 +1,7 @@
{
fuzzyLikeThis : {
fields : ["name.first", "name.last"],
likeText : "something",
maxNumTerms : 12
}
}

View File

@ -0,0 +1,8 @@
{
fuzzyLikeThisField : {
"name.first" : {
likeText : "something",
maxNumTerms : 12
}
}
}