diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/JsonQueryBuilders.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/JsonQueryBuilders.java index 488e5a4a735..720dc3e61e3 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/JsonQueryBuilders.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/JsonQueryBuilders.java @@ -116,6 +116,14 @@ public abstract class JsonQueryBuilders { return new ConstantScoreQueryJsonQueryBuilder(filterBuilder); } + public static MoreLikeThisJsonQueryBuilder moreLikeThis(String... fields) { + return new MoreLikeThisJsonQueryBuilder(fields); + } + + public static MoreLikeThisFieldJsonQueryBuilder moreLikeThisField(String name) { + return new MoreLikeThisFieldJsonQueryBuilder(name); + } + private JsonQueryBuilders() { } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/JsonQueryParserRegistry.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/JsonQueryParserRegistry.java index 7f30c3a3de9..706c7a4dc26 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/JsonQueryParserRegistry.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/JsonQueryParserRegistry.java @@ -62,6 +62,8 @@ public class JsonQueryParserRegistry { add(queryParsersMap, new SpanFirstJsonQueryParser(index, indexSettings)); add(queryParsersMap, new SpanNearJsonQueryParser(index, indexSettings)); add(queryParsersMap, new SpanOrJsonQueryParser(index, indexSettings)); + add(queryParsersMap, new MoreLikeThisJsonQueryParser(index, indexSettings)); + add(queryParsersMap, new MoreLikeThisFieldJsonQueryParser(index, indexSettings)); // now, copy over the ones provided if (queryParsers != null) { diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/MoreLikeThisFieldJsonQueryBuilder.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/MoreLikeThisFieldJsonQueryBuilder.java new file mode 100644 index 00000000000..520e91d4800 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/MoreLikeThisFieldJsonQueryBuilder.java @@ -0,0 +1,149 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query.json; + +import org.elasticsearch.index.query.QueryBuilderException; +import org.elasticsearch.util.json.JsonBuilder; + +import java.io.IOException; + +/** + * @author kimchy (Shay Banon) + */ +public class MoreLikeThisFieldJsonQueryBuilder extends BaseJsonQueryBuilder { + + private final String name; + + private String likeText; + private float percentTermsToMatch = -1; + private int minTermFrequency = -1; + private int maxQueryTerms = -1; + private String[] stopWords = null; + private int minDocFreq = -1; + private int maxDocFreq = -1; + private int minWordLen = -1; + private int maxWordLen = -1; + private Boolean boostTerms = null; + private float boostTermsFactor = -1; + + public MoreLikeThisFieldJsonQueryBuilder(String name) { + this.name = name; + } + + public MoreLikeThisFieldJsonQueryBuilder likeText(String likeText) { + this.likeText = likeText; + return this; + } + + public MoreLikeThisFieldJsonQueryBuilder percentTermsToMatch(float percentTermsToMatch) { + this.percentTermsToMatch = percentTermsToMatch; + return this; + } + + public MoreLikeThisFieldJsonQueryBuilder minTermFrequency(int minTermFrequency) { + this.minTermFrequency = minTermFrequency; + return this; + } + + public MoreLikeThisFieldJsonQueryBuilder maxQueryTerms(int maxQueryTerms) { + this.maxQueryTerms = maxQueryTerms; + return this; + } + + public MoreLikeThisFieldJsonQueryBuilder stopWords(String... stopWords) { + this.stopWords = stopWords; + return this; + } + + public MoreLikeThisFieldJsonQueryBuilder minDocFreq(int minDocFreq) { + this.minDocFreq = minDocFreq; + return this; + } + + public MoreLikeThisFieldJsonQueryBuilder maxDocFreq(int maxDocFreq) { + this.maxDocFreq = maxDocFreq; + return this; + } + + public MoreLikeThisFieldJsonQueryBuilder minWordLen(int minWordLen) { + this.minWordLen = minWordLen; + return this; + } + + public MoreLikeThisFieldJsonQueryBuilder maxWordLen(int maxWordLen) { + this.maxWordLen = maxWordLen; + return this; + } + + public MoreLikeThisFieldJsonQueryBuilder boostTerms(boolean boostTerms) { + this.boostTerms = boostTerms; + return this; + } + + public MoreLikeThisFieldJsonQueryBuilder boostTermsFactor(float boostTermsFactor) { + this.boostTermsFactor = boostTermsFactor; + return this; + } + + @Override protected void doJson(JsonBuilder builder, Params params) throws IOException { + builder.startObject(MoreLikeThisFieldJsonQueryParser.NAME); + builder.startObject(name); + if (likeText == null) { + throw new QueryBuilderException("moreLikeThisField requires 'likeText' to be provided"); + } + builder.field("likeText", likeText); + if (percentTermsToMatch != -1) { + builder.field("percentTermsToMatch", percentTermsToMatch); + } + if (minTermFrequency != -1) { + builder.field("minTermFrequency", minTermFrequency); + } + if (maxQueryTerms != -1) { + builder.field("maxQueryTerms", maxQueryTerms); + } + if (stopWords != null && stopWords.length > 0) { + builder.startArray("stopWords"); + for (String stopWord : stopWords) { + builder.string(stopWord); + } + builder.endArray(); + } + if (minDocFreq != -1) { + builder.field("minDocFreq", minDocFreq); + } + if (maxDocFreq != -1) { + builder.field("maxDocFreq", maxDocFreq); + } + if (minWordLen != -1) { + builder.field("minWordLen", minWordLen); + } + if (maxWordLen != -1) { + builder.field("maxWordLen", maxWordLen); + } + if (boostTerms != null) { + builder.field("boostTerms", boostTerms); + } + if (boostTermsFactor != -1) { + builder.field("boostTermsFactor", boostTermsFactor); + } + builder.endObject(); + builder.endObject(); + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/MoreLikeThisFieldJsonQueryParser.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/MoreLikeThisFieldJsonQueryParser.java new file mode 100644 index 00000000000..5fdf8b395dd --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/MoreLikeThisFieldJsonQueryParser.java @@ -0,0 +1,128 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query.json; + +import com.google.common.collect.Sets; +import org.apache.lucene.search.Query; +import org.codehaus.jackson.JsonParser; +import org.codehaus.jackson.JsonToken; +import org.elasticsearch.index.AbstractIndexComponent; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.query.QueryParsingException; +import org.elasticsearch.index.settings.IndexSettings; +import org.elasticsearch.util.lucene.search.MoreLikeThisQuery; +import org.elasticsearch.util.settings.Settings; + +import java.io.IOException; +import java.util.Set; + +import static org.elasticsearch.index.query.support.QueryParsers.*; + +/** + * @author kimchy (shay.banon) + */ +public class MoreLikeThisFieldJsonQueryParser extends AbstractIndexComponent implements JsonQueryParser { + + public static final String NAME = "moreLikeThisField"; + + public MoreLikeThisFieldJsonQueryParser(Index index, @IndexSettings Settings indexSettings) { + super(index, indexSettings); + } + + @Override public String name() { + return NAME; + } + + @Override public Query parse(JsonQueryParseContext parseContext) throws IOException, QueryParsingException { + JsonParser jp = parseContext.jp(); + + JsonToken token = jp.nextToken(); + assert token == JsonToken.FIELD_NAME; + String fieldName = jp.getCurrentName(); + + // now, we move after the field name, which starts the object + token = jp.nextToken(); + assert token == JsonToken.START_OBJECT; + + + MoreLikeThisQuery mltQuery = new MoreLikeThisQuery(); + + String currentFieldName = null; + while ((token = jp.nextToken()) != JsonToken.END_OBJECT) { + if (token == JsonToken.FIELD_NAME) { + currentFieldName = jp.getCurrentName(); + } else if (token == JsonToken.VALUE_STRING) { + if ("likeText".equals(currentFieldName)) { + mltQuery.setLikeText(jp.getText()); + } + } else if (token == JsonToken.VALUE_NUMBER_INT) { + if ("minTermFrequency".equals(currentFieldName)) { + mltQuery.setMinTermFrequency(jp.getIntValue()); + } else if ("maxQueryTerms".equals(currentFieldName)) { + mltQuery.setMaxQueryTerms(jp.getIntValue()); + } else if ("minDocFreq".equals(currentFieldName)) { + mltQuery.setMinDocFreq(jp.getIntValue()); + } else if ("maxDocFreq".equals(currentFieldName)) { + mltQuery.setMaxDocFreq(jp.getIntValue()); + } else if ("minWordLen".equals(currentFieldName)) { + mltQuery.setMinWordLen(jp.getIntValue()); + } else if ("maxWordLen".equals(currentFieldName)) { + mltQuery.setMaxWordLen(jp.getIntValue()); + } else if ("boostTerms".equals(currentFieldName)) { + mltQuery.setBoostTerms(jp.getIntValue() != 0); + } else if ("boostTermsFactor".equals(currentFieldName)) { + mltQuery.setBoostTermsFactor(jp.getIntValue()); + } + } else if (token == JsonToken.VALUE_NUMBER_FLOAT) { + if ("boostTermsFactor".equals(currentFieldName)) { + mltQuery.setBoostTermsFactor(jp.getFloatValue()); + } + } else if (token == JsonToken.START_ARRAY) { + if ("stopWords".equals(currentFieldName)) { + Set stopWords = Sets.newHashSet(); + while ((token = jp.nextToken()) != JsonToken.END_ARRAY) { + stopWords.add(jp.getText()); + } + mltQuery.setStopWords(stopWords); + } + } + } + + if (mltQuery.getLikeText() == null) { + throw new QueryParsingException(index, "moreLikeThisField requires 'likeText' to be specified"); + } + + // move to the next end object, to close the field name + token = jp.nextToken(); + assert token == JsonToken.END_OBJECT; + + mltQuery.setAnalyzer(parseContext.mapperService().searchAnalyzer()); + MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); + if (smartNameFieldMappers != null) { + if (smartNameFieldMappers.hasMapper()) { + fieldName = smartNameFieldMappers.mapper().names().indexName(); + mltQuery.setAnalyzer(smartNameFieldMappers.mapper().searchAnalyzer()); + } + } + mltQuery.setMoreLikeFields(new String[]{fieldName}); + return wrapSmartNameQuery(mltQuery, smartNameFieldMappers, parseContext.filterCache()); + } +} diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/MoreLikeThisJsonQueryBuilder.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/MoreLikeThisJsonQueryBuilder.java new file mode 100644 index 00000000000..1a9f2b980ae --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/MoreLikeThisJsonQueryBuilder.java @@ -0,0 +1,155 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query.json; + +import org.elasticsearch.index.query.QueryBuilderException; +import org.elasticsearch.util.json.JsonBuilder; + +import java.io.IOException; + +/** + * @author kimchy (Shay Banon) + */ +public class MoreLikeThisJsonQueryBuilder extends BaseJsonQueryBuilder { + + private final String[] fields; + + private String likeText; + private float percentTermsToMatch = -1; + private int minTermFrequency = -1; + private int maxQueryTerms = -1; + private String[] stopWords = null; + private int minDocFreq = -1; + private int maxDocFreq = -1; + private int minWordLen = -1; + private int maxWordLen = -1; + private Boolean boostTerms = null; + private float boostTermsFactor = -1; + + public MoreLikeThisJsonQueryBuilder(String... fields) { + this.fields = fields; + } + + public MoreLikeThisJsonQueryBuilder likeText(String likeText) { + this.likeText = likeText; + return this; + } + + public MoreLikeThisJsonQueryBuilder percentTermsToMatch(float percentTermsToMatch) { + this.percentTermsToMatch = percentTermsToMatch; + return this; + } + + public MoreLikeThisJsonQueryBuilder minTermFrequency(int minTermFrequency) { + this.minTermFrequency = minTermFrequency; + return this; + } + + public MoreLikeThisJsonQueryBuilder maxQueryTerms(int maxQueryTerms) { + this.maxQueryTerms = maxQueryTerms; + return this; + } + + public MoreLikeThisJsonQueryBuilder stopWords(String... stopWords) { + this.stopWords = stopWords; + return this; + } + + public MoreLikeThisJsonQueryBuilder minDocFreq(int minDocFreq) { + this.minDocFreq = minDocFreq; + return this; + } + + public MoreLikeThisJsonQueryBuilder maxDocFreq(int maxDocFreq) { + this.maxDocFreq = maxDocFreq; + return this; + } + + public MoreLikeThisJsonQueryBuilder minWordLen(int minWordLen) { + this.minWordLen = minWordLen; + return this; + } + + public MoreLikeThisJsonQueryBuilder maxWordLen(int maxWordLen) { + this.maxWordLen = maxWordLen; + return this; + } + + public MoreLikeThisJsonQueryBuilder boostTerms(boolean boostTerms) { + this.boostTerms = boostTerms; + return this; + } + + public MoreLikeThisJsonQueryBuilder boostTermsFactor(float boostTermsFactor) { + this.boostTermsFactor = boostTermsFactor; + return this; + } + + @Override protected void doJson(JsonBuilder builder, Params params) throws IOException { + builder.startObject(MoreLikeThisJsonQueryParser.NAME); + if (fields == null || fields.length == 0) { + throw new QueryBuilderException("moreLikeThis requires 'fields' to be provided"); + } + builder.startArray("fields"); + for (String field : fields) { + builder.string(field); + } + builder.endArray(); + if (likeText == null) { + throw new QueryBuilderException("moreLikeThis requires 'likeText' to be provided"); + } + builder.field("likeText", likeText); + if (percentTermsToMatch != -1) { + builder.field("percentTermsToMatch", percentTermsToMatch); + } + if (minTermFrequency != -1) { + builder.field("minTermFrequency", minTermFrequency); + } + if (maxQueryTerms != -1) { + builder.field("maxQueryTerms", maxQueryTerms); + } + if (stopWords != null && stopWords.length > 0) { + builder.startArray("stopWords"); + for (String stopWord : stopWords) { + builder.string(stopWord); + } + builder.endArray(); + } + if (minDocFreq != -1) { + builder.field("minDocFreq", minDocFreq); + } + if (maxDocFreq != -1) { + builder.field("maxDocFreq", maxDocFreq); + } + if (minWordLen != -1) { + builder.field("minWordLen", minWordLen); + } + if (maxWordLen != -1) { + builder.field("maxWordLen", maxWordLen); + } + if (boostTerms != null) { + builder.field("boostTerms", boostTerms); + } + if (boostTermsFactor != -1) { + builder.field("boostTermsFactor", boostTermsFactor); + } + builder.endObject(); + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/MoreLikeThisJsonQueryParser.java b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/MoreLikeThisJsonQueryParser.java new file mode 100644 index 00000000000..94575b7f210 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/index/query/json/MoreLikeThisJsonQueryParser.java @@ -0,0 +1,120 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.query.json; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import org.apache.lucene.search.Query; +import org.codehaus.jackson.JsonParser; +import org.codehaus.jackson.JsonToken; +import org.elasticsearch.index.AbstractIndexComponent; +import org.elasticsearch.index.Index; +import org.elasticsearch.index.query.QueryParsingException; +import org.elasticsearch.index.settings.IndexSettings; +import org.elasticsearch.util.lucene.search.MoreLikeThisQuery; +import org.elasticsearch.util.settings.Settings; + +import java.io.IOException; +import java.util.List; +import java.util.Set; + +/** + * @author kimchy (shay.banon) + */ +public class MoreLikeThisJsonQueryParser extends AbstractIndexComponent implements JsonQueryParser { + + public static final String NAME = "moreLikeThis"; + + public MoreLikeThisJsonQueryParser(Index index, @IndexSettings Settings indexSettings) { + super(index, indexSettings); + } + + @Override public String name() { + return NAME; + } + + @Override public Query parse(JsonQueryParseContext parseContext) throws IOException, QueryParsingException { + JsonParser jp = parseContext.jp(); + + MoreLikeThisQuery mltQuery = new MoreLikeThisQuery(); + + JsonToken token; + String currentFieldName = null; + while ((token = jp.nextToken()) != JsonToken.END_OBJECT) { + if (token == JsonToken.FIELD_NAME) { + currentFieldName = jp.getCurrentName(); + } else if (token == JsonToken.VALUE_STRING) { + if ("likeText".equals(currentFieldName)) { + mltQuery.setLikeText(jp.getText()); + } + } else if (token == JsonToken.VALUE_NUMBER_INT) { + if ("minTermFrequency".equals(currentFieldName)) { + mltQuery.setMinTermFrequency(jp.getIntValue()); + } else if ("maxQueryTerms".equals(currentFieldName)) { + mltQuery.setMaxQueryTerms(jp.getIntValue()); + } else if ("minDocFreq".equals(currentFieldName)) { + mltQuery.setMinDocFreq(jp.getIntValue()); + } else if ("maxDocFreq".equals(currentFieldName)) { + mltQuery.setMaxDocFreq(jp.getIntValue()); + } else if ("minWordLen".equals(currentFieldName)) { + mltQuery.setMinWordLen(jp.getIntValue()); + } else if ("maxWordLen".equals(currentFieldName)) { + mltQuery.setMaxWordLen(jp.getIntValue()); + } else if ("boostTerms".equals(currentFieldName)) { + mltQuery.setBoostTerms(jp.getIntValue() != 0); + } else if ("boostTermsFactor".equals(currentFieldName)) { + mltQuery.setBoostTermsFactor(jp.getIntValue()); + } + } else if (token == JsonToken.VALUE_NUMBER_FLOAT) { + if ("boostTermsFactor".equals(currentFieldName)) { + mltQuery.setBoostTermsFactor(jp.getFloatValue()); + } + } else if (token == JsonToken.START_ARRAY) { + if ("stopWords".equals(currentFieldName)) { + Set stopWords = Sets.newHashSet(); + while ((token = jp.nextToken()) != JsonToken.END_ARRAY) { + stopWords.add(jp.getText()); + } + mltQuery.setStopWords(stopWords); + } else if ("fields".equals(currentFieldName)) { + List fields = Lists.newArrayList(); + while ((token = jp.nextToken()) != JsonToken.END_ARRAY) { + fields.add(jp.getText()); + } + mltQuery.setMoreLikeFields(fields.toArray(new String[fields.size()])); + } + } + } + + if (mltQuery.getLikeText() == null) { + throw new QueryParsingException(index, "moreLikeThis requires 'likeText' to be specified"); + } + if (mltQuery.getMoreLikeFields() == null || mltQuery.getMoreLikeFields().length == 0) { + throw new QueryParsingException(index, "moreLikeThis requires 'fields' to be specified"); + } + + // move to the next end object, to close the field name + token = jp.nextToken(); + assert token == JsonToken.END_OBJECT; + + mltQuery.setAnalyzer(parseContext.mapperService().searchAnalyzer()); + return mltQuery; + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/util/lucene/search/MoreLikeThisQuery.java b/modules/elasticsearch/src/main/java/org/elasticsearch/util/lucene/search/MoreLikeThisQuery.java new file mode 100644 index 00000000000..ef30e023a7b --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/util/lucene/search/MoreLikeThisQuery.java @@ -0,0 +1,194 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.util.lucene.search; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.similar.MoreLikeThis; +import org.elasticsearch.util.io.FastStringReader; + +import java.io.IOException; +import java.util.Set; + +/** + * @author kimchy (shay.banon) + */ +public class MoreLikeThisQuery extends Query { + + public static final float DEFAULT_PERCENT_TERMS_TO_MATCH = 0.3f; + + private String likeText; + private String[] moreLikeFields; + private Analyzer analyzer; + private float percentTermsToMatch = DEFAULT_PERCENT_TERMS_TO_MATCH; + private int minTermFrequency = MoreLikeThis.DEFAULT_MIN_TERM_FREQ; + private int maxQueryTerms = MoreLikeThis.DEFAULT_MAX_QUERY_TERMS; + private Set stopWords = MoreLikeThis.DEFAULT_STOP_WORDS; + private int minDocFreq = MoreLikeThis.DEFAULT_MIN_DOC_FREQ; + private int maxDocFreq = MoreLikeThis.DEFAULT_MAX_DOC_FREQ; + private int minWordLen = MoreLikeThis.DEFAULT_MIN_WORD_LENGTH; + private int maxWordLen = MoreLikeThis.DEFAULT_MAX_WORD_LENGTH; + private boolean boostTerms = MoreLikeThis.DEFAULT_BOOST; + private float boostTermsFactor = 1; + + + public MoreLikeThisQuery() { + + } + + public MoreLikeThisQuery(String likeText, String[] moreLikeFields, Analyzer analyzer) { + this.likeText = likeText; + this.moreLikeFields = moreLikeFields; + this.analyzer = analyzer; + } + + @Override public Query rewrite(IndexReader reader) throws IOException { + MoreLikeThis mlt = new MoreLikeThis(reader); + + mlt.setFieldNames(moreLikeFields); + mlt.setAnalyzer(analyzer); + mlt.setMinTermFreq(minTermFrequency); + mlt.setMinDocFreq(minDocFreq); + mlt.setMaxDocFreq(maxDocFreq); + mlt.setMaxQueryTerms(maxQueryTerms); + mlt.setMinWordLen(minWordLen); + mlt.setMaxWordLen(maxWordLen); + mlt.setStopWords(stopWords); + mlt.setBoost(boostTerms); + mlt.setBoostFactor(boostTermsFactor); + BooleanQuery bq = (BooleanQuery) mlt.like(new FastStringReader(likeText)); + BooleanClause[] clauses = bq.getClauses(); + + bq.setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch)); + return bq; + } + + @Override public String toString(String field) { + return "like:" + likeText; + } + + public String getLikeText() { + return likeText; + } + + public void setLikeText(String likeText) { + this.likeText = likeText; + } + + public String[] getMoreLikeFields() { + return moreLikeFields; + } + + public void setMoreLikeFields(String[] moreLikeFields) { + this.moreLikeFields = moreLikeFields; + } + + public Analyzer getAnalyzer() { + return analyzer; + } + + public void setAnalyzer(Analyzer analyzer) { + this.analyzer = analyzer; + } + + public float getPercentTermsToMatch() { + return percentTermsToMatch; + } + + public void setPercentTermsToMatch(float percentTermsToMatch) { + this.percentTermsToMatch = percentTermsToMatch; + } + + public int getMinTermFrequency() { + return minTermFrequency; + } + + public void setMinTermFrequency(int minTermFrequency) { + this.minTermFrequency = minTermFrequency; + } + + public int getMaxQueryTerms() { + return maxQueryTerms; + } + + public void setMaxQueryTerms(int maxQueryTerms) { + this.maxQueryTerms = maxQueryTerms; + } + + public Set getStopWords() { + return stopWords; + } + + public void setStopWords(Set stopWords) { + this.stopWords = stopWords; + } + + public int getMinDocFreq() { + return minDocFreq; + } + + public void setMinDocFreq(int minDocFreq) { + this.minDocFreq = minDocFreq; + } + + public int getMaxDocFreq() { + return maxDocFreq; + } + + public void setMaxDocFreq(int maxDocFreq) { + this.maxDocFreq = maxDocFreq; + } + + public int getMinWordLen() { + return minWordLen; + } + + public void setMinWordLen(int minWordLen) { + this.minWordLen = minWordLen; + } + + public int getMaxWordLen() { + return maxWordLen; + } + + public void setMaxWordLen(int maxWordLen) { + this.maxWordLen = maxWordLen; + } + + public boolean isBoostTerms() { + return boostTerms; + } + + public void setBoostTerms(boolean boostTerms) { + this.boostTerms = boostTerms; + } + + public float getBoostTermsFactor() { + return boostTermsFactor; + } + + public void setBoostTermsFactor(float boostTermsFactor) { + this.boostTermsFactor = boostTermsFactor; + } +} + diff --git a/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/json/SimpleJsonIndexQueryParserTests.java b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/json/SimpleJsonIndexQueryParserTests.java index e9ee23a5424..584ea2099ae 100644 --- a/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/json/SimpleJsonIndexQueryParserTests.java +++ b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/json/SimpleJsonIndexQueryParserTests.java @@ -29,6 +29,7 @@ import org.elasticsearch.index.analysis.AnalysisService; import org.elasticsearch.index.cache.filter.none.NoneFilterCache; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.query.IndexQueryParser; +import org.elasticsearch.util.lucene.search.MoreLikeThisQuery; import org.elasticsearch.util.lucene.search.TermFilter; import org.testng.annotations.Test; @@ -600,6 +601,53 @@ public class SimpleJsonIndexQueryParserTests { assertThat(((TermQuery) wrappedQuery).getTerm(), equalTo(new Term("name.last", "banon"))); } + @Test public void testMoreLikeThisBuilder() throws Exception { + IndexQueryParser queryParser = newQueryParser(); + Query parsedQuery = queryParser.parse(moreLikeThis("name.first", "name.last").likeText("something").minTermFrequency(1).maxQueryTerms(12)); + assertThat(parsedQuery, instanceOf(MoreLikeThisQuery.class)); + MoreLikeThisQuery mltQuery = (MoreLikeThisQuery) parsedQuery; + assertThat(mltQuery.getMoreLikeFields()[0], equalTo("name.first")); + assertThat(mltQuery.getLikeText(), equalTo("something")); + assertThat(mltQuery.getMinTermFrequency(), equalTo(1)); + assertThat(mltQuery.getMaxQueryTerms(), equalTo(12)); + } + + @Test public void testMoreLikeThis() throws Exception { + IndexQueryParser queryParser = newQueryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/index/query/json/mlt.json"); + Query parsedQuery = queryParser.parse(query); + assertThat(parsedQuery, instanceOf(MoreLikeThisQuery.class)); + MoreLikeThisQuery mltQuery = (MoreLikeThisQuery) parsedQuery; + assertThat(mltQuery.getMoreLikeFields()[0], equalTo("name.first")); + assertThat(mltQuery.getMoreLikeFields()[1], equalTo("name.last")); + assertThat(mltQuery.getLikeText(), equalTo("something")); + assertThat(mltQuery.getMinTermFrequency(), equalTo(1)); + assertThat(mltQuery.getMaxQueryTerms(), equalTo(12)); + } + + @Test public void testMoreLikeThisFieldBuilder() throws Exception { + IndexQueryParser queryParser = newQueryParser(); + Query parsedQuery = queryParser.parse(moreLikeThisField("name.first").likeText("something").minTermFrequency(1).maxQueryTerms(12)); + assertThat(parsedQuery, instanceOf(MoreLikeThisQuery.class)); + MoreLikeThisQuery mltQuery = (MoreLikeThisQuery) parsedQuery; + assertThat(mltQuery.getMoreLikeFields()[0], equalTo("name.first")); + assertThat(mltQuery.getLikeText(), equalTo("something")); + assertThat(mltQuery.getMinTermFrequency(), equalTo(1)); + assertThat(mltQuery.getMaxQueryTerms(), equalTo(12)); + } + + @Test public void testMoreLikeThisField() throws Exception { + IndexQueryParser queryParser = newQueryParser(); + String query = copyToStringFromClasspath("/org/elasticsearch/index/query/json/mltField.json"); + Query parsedQuery = queryParser.parse(query); + assertThat(parsedQuery, instanceOf(MoreLikeThisQuery.class)); + MoreLikeThisQuery mltQuery = (MoreLikeThisQuery) parsedQuery; + assertThat(mltQuery.getMoreLikeFields()[0], equalTo("name.first")); + assertThat(mltQuery.getLikeText(), equalTo("something")); + assertThat(mltQuery.getMinTermFrequency(), equalTo(1)); + assertThat(mltQuery.getMaxQueryTerms(), equalTo(12)); + } + private JsonIndexQueryParser newQueryParser() throws IOException { return new JsonIndexQueryParser(new Index("test"), EMPTY_SETTINGS, newMapperService(), new NoneFilterCache(index, EMPTY_SETTINGS), new AnalysisService(index), null, null, "test", null); diff --git a/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/json/mlt.json b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/json/mlt.json new file mode 100644 index 00000000000..a6df52903df --- /dev/null +++ b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/json/mlt.json @@ -0,0 +1,8 @@ +{ + moreLikeThis : { + fields : ["name.first", "name.last"], + likeText : "something", + minTermFrequency : 1, + maxQueryTerms : 12 + } +} \ No newline at end of file diff --git a/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/json/mltField.json b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/json/mltField.json new file mode 100644 index 00000000000..39f2b1e05a1 --- /dev/null +++ b/modules/elasticsearch/src/test/java/org/elasticsearch/index/query/json/mltField.json @@ -0,0 +1,9 @@ +{ + moreLikeThisField : { + "name.first" : { + likeText : "something", + minTermFrequency : 1, + maxQueryTerms : 12 + } + } +} \ No newline at end of file