From 2fdc49c113dc4bd2d438ae96d1e20e32fda23187 Mon Sep 17 00:00:00 2001 From: kimchy Date: Mon, 19 Apr 2010 19:15:24 +0300 Subject: [PATCH] Search API: Indices Boost to apply a boost factor to each index, closes #143. --- .../search/query/QueryPhase.java | 5 +- .../lucene/search/CustomBoostFactorQuery.java | 209 ++++++++++++++++++ .../SimpleIndicesBoostSearchTests.java | 123 +++++++++++ .../SimpleIndicesBoostSearchTests.yml | 6 + 4 files changed, 342 insertions(+), 1 deletion(-) create mode 100644 modules/elasticsearch/src/main/java/org/elasticsearch/util/lucene/search/CustomBoostFactorQuery.java create mode 100644 modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/indicesboost/SimpleIndicesBoostSearchTests.java create mode 100644 modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/indicesboost/SimpleIndicesBoostSearchTests.yml diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/query/QueryPhase.java index cdc182c7d85..91e9ea917c1 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/query/QueryPhase.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/query/QueryPhase.java @@ -28,6 +28,7 @@ import org.elasticsearch.search.SearchParseException; import org.elasticsearch.search.SearchPhase; import org.elasticsearch.search.facets.FacetsPhase; import org.elasticsearch.search.internal.SearchContext; +import org.elasticsearch.util.lucene.search.CustomBoostFactorQuery; import org.elasticsearch.util.lucene.search.TermFilter; import java.util.Map; @@ -60,7 +61,9 @@ public class QueryPhase implements SearchPhase { if (context.query() == null) { throw new SearchParseException(context, "No query specified in search request"); } - context.query().setBoost(context.query().getBoost() * context.queryBoost()); + if (context.queryBoost() != 1.0f) { + context.query(new CustomBoostFactorQuery(context.query(), context.queryBoost())); + } facetsPhase.preProcess(context); } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/util/lucene/search/CustomBoostFactorQuery.java b/modules/elasticsearch/src/main/java/org/elasticsearch/util/lucene/search/CustomBoostFactorQuery.java new file mode 100644 index 00000000000..2a0ce53b785 --- /dev/null +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/util/lucene/search/CustomBoostFactorQuery.java @@ -0,0 +1,209 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.util.lucene.search; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.*; +import org.apache.lucene.util.ToStringUtils; + +import java.io.IOException; +import java.util.Set; + +/** + * A query that wraps another query and applies the provided boost values to it. Simply + * applied the boost factor to the score of the wrapped query. + * + * @author kimchy (shay.banon) + */ +public class CustomBoostFactorQuery extends Query { + + private Query q; + private float boostFactor; + + public CustomBoostFactorQuery(Query subQuery, float boostFactor) { + this.q = subQuery; + this.boostFactor = boostFactor; + } + + public Query getQuery() { + return q; + } + + public float getBoostFactor() { + return boostFactor; + } + + @Override + public Query rewrite(IndexReader reader) throws IOException { + Query newQ = q.rewrite(reader); + if (newQ == q) return this; + CustomBoostFactorQuery bq = (CustomBoostFactorQuery) this.clone(); + bq.q = newQ; + return bq; + } + + @Override + public void extractTerms(Set terms) { + q.extractTerms(terms); + } + + @Override + public Weight createWeight(Searcher searcher) throws IOException { + return new CustomBoostFactorQuery.BoostedWeight(searcher); + } + + private class BoostedWeight extends Weight { + Searcher searcher; + Weight qWeight; + + public BoostedWeight(Searcher searcher) throws IOException { + this.searcher = searcher; + this.qWeight = q.weight(searcher); + } + + public Query getQuery() { + return CustomBoostFactorQuery.this; + } + + public float getValue() { + return getBoost(); + } + + @Override + public float sumOfSquaredWeights() throws IOException { + float sum = qWeight.sumOfSquaredWeights(); + sum *= getBoost() * getBoost(); + return sum; + } + + @Override + public void normalize(float norm) { + norm *= getBoost(); + qWeight.normalize(norm); + } + + @Override + public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { + Scorer subQueryScorer = qWeight.scorer(reader, true, false); + if (subQueryScorer == null) { + return null; + } + return new CustomBoostFactorQuery.CustomScorer(getSimilarity(searcher), reader, this, subQueryScorer); + } + + @Override + public Explanation explain(IndexReader reader, int doc) throws IOException { + Explanation subQueryExpl = qWeight.explain(reader, doc); + if (!subQueryExpl.isMatch()) { + return subQueryExpl; + } + + float sc = subQueryExpl.getValue() * boostFactor; + Explanation res = new ComplexExplanation( + true, sc, CustomBoostFactorQuery.this.toString() + ", product of:"); + res.addDetail(subQueryExpl); + res.addDetail(new Explanation(boostFactor, "boostFactor")); + return res; + } + } + + + private class CustomScorer extends Scorer { + private final CustomBoostFactorQuery.BoostedWeight weight; + private final float qWeight; + private final Scorer scorer; + private final IndexReader reader; + + private CustomScorer(Similarity similarity, IndexReader reader, CustomBoostFactorQuery.BoostedWeight w, + Scorer scorer) throws IOException { + super(similarity); + this.weight = w; + this.qWeight = w.getValue(); + this.scorer = scorer; + this.reader = reader; + } + + @Override + public int docID() { + return scorer.docID(); + } + + @Override + public int advance(int target) throws IOException { + return scorer.advance(target); + } + + @Override + public int nextDoc() throws IOException { + return scorer.nextDoc(); + } + + @Override + public float score() throws IOException { + float score = qWeight * scorer.score() * boostFactor; + + // Current Lucene priority queues can't handle NaN and -Infinity, so + // map to -Float.MAX_VALUE. This conditional handles both -infinity + // and NaN since comparisons with NaN are always false. + return score > Float.NEGATIVE_INFINITY ? score : -Float.MAX_VALUE; + } + + public Explanation explain(int doc) throws IOException { + Explanation subQueryExpl = weight.qWeight.explain(reader, doc); + if (!subQueryExpl.isMatch()) { + return subQueryExpl; + } + float sc = subQueryExpl.getValue() * boostFactor; + Explanation res = new ComplexExplanation( + true, sc, CustomBoostFactorQuery.this.toString() + ", product of:"); + res.addDetail(subQueryExpl); + res.addDetail(new Explanation(boostFactor, "boostFactor")); + return res; + } + } + + + public String toString(String field) { + StringBuilder sb = new StringBuilder(); + sb.append("CustomBoostFactor(").append(q.toString(field)).append(',').append(boostFactor).append(')'); + sb.append(ToStringUtils.boost(getBoost())); + return sb.toString(); + } + + public boolean equals(Object o) { + if (getClass() != o.getClass()) return false; + CustomBoostFactorQuery other = (CustomBoostFactorQuery) o; + return this.getBoost() == other.getBoost() + && this.q.equals(other.q) + && this.boostFactor == other.boostFactor; + } + + public int hashCode() { + int h = q.hashCode(); + h ^= (h << 17) | (h >>> 16); + h += Float.floatToIntBits(boostFactor); + h ^= (h << 8) | (h >>> 25); + h += Float.floatToIntBits(getBoost()); + return h; + } + +} + diff --git a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/indicesboost/SimpleIndicesBoostSearchTests.java b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/indicesboost/SimpleIndicesBoostSearchTests.java new file mode 100644 index 00000000000..62829579bd2 --- /dev/null +++ b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/indicesboost/SimpleIndicesBoostSearchTests.java @@ -0,0 +1,123 @@ +/* + * Licensed to Elastic Search and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. Elastic Search licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.test.integration.search.indicesboost; + +import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.search.SearchType; +import org.elasticsearch.client.Client; +import org.elasticsearch.test.integration.AbstractNodesTests; +import org.testng.annotations.AfterMethod; +import org.testng.annotations.BeforeMethod; +import org.testng.annotations.Test; + +import static org.elasticsearch.client.Requests.*; +import static org.elasticsearch.index.query.json.JsonQueryBuilders.*; +import static org.elasticsearch.search.builder.SearchSourceBuilder.*; +import static org.elasticsearch.util.json.JsonBuilder.*; +import static org.hamcrest.MatcherAssert.*; +import static org.hamcrest.Matchers.*; + +/** + * @author kimchy (shay.banon) + */ +@Test +public class SimpleIndicesBoostSearchTests extends AbstractNodesTests { + + private Client client; + + @BeforeMethod public void createNodes() throws Exception { + startNode("server1"); + client = getClient(); + } + + @AfterMethod public void closeNodes() { + client.close(); + closeAllNodes(); + } + + protected Client getClient() { + return client("server1"); + } + + @Test + public void testIndicesBoost() throws Exception { + client.admin().indices().create(createIndexRequest("test1")).actionGet(); + client.admin().indices().create(createIndexRequest("test2")).actionGet(); + client.index(indexRequest("test1").type("type1").id("1") + .source(jsonBuilder().startObject().field("test", "value check").endObject())).actionGet(); + client.index(indexRequest("test2").type("type1").id("1") + .source(jsonBuilder().startObject().field("test", "value beck").endObject())).actionGet(); + client.admin().indices().refresh(refreshRequest()).actionGet(); + + float indexBoost = 1.1f; + + logger.info("--- QUERY_THEN_FETCH"); + + logger.info("Query with test1 boosted"); + SearchResponse response = client.search(searchRequest() + .searchType(SearchType.QUERY_THEN_FETCH) + .source(searchSource().explain(true).indexBoost("test1", indexBoost).query(termQuery("test", "value"))) + ).actionGet(); + + assertThat(response.hits().totalHits(), equalTo(2l)); + logger.info("Hit[0] {} Explanation {}", response.hits().getAt(0).index(), response.hits().getAt(0).explanation()); + logger.info("Hit[1] {} Explanation {}", response.hits().getAt(1).index(), response.hits().getAt(1).explanation()); + assertThat(response.hits().getAt(0).index(), equalTo("test1")); + assertThat(response.hits().getAt(1).index(), equalTo("test2")); + + logger.info("Query with test2 boosted"); + response = client.search(searchRequest() + .searchType(SearchType.QUERY_THEN_FETCH) + .source(searchSource().explain(true).indexBoost("test2", indexBoost).query(termQuery("test", "value"))) + ).actionGet(); + + assertThat(response.hits().totalHits(), equalTo(2l)); + logger.info("Hit[0] {} Explanation {}", response.hits().getAt(0).index(), response.hits().getAt(0).explanation()); + logger.info("Hit[1] {} Explanation {}", response.hits().getAt(1).index(), response.hits().getAt(1).explanation()); + assertThat(response.hits().getAt(0).index(), equalTo("test2")); + assertThat(response.hits().getAt(1).index(), equalTo("test1")); + + logger.info("--- DFS_QUERY_THEN_FETCH"); + + logger.info("Query with test1 boosted"); + response = client.search(searchRequest() + .searchType(SearchType.DFS_QUERY_THEN_FETCH) + .source(searchSource().explain(true).indexBoost("test1", indexBoost).query(termQuery("test", "value"))) + ).actionGet(); + + assertThat(response.hits().totalHits(), equalTo(2l)); + logger.info("Hit[0] {} Explanation {}", response.hits().getAt(0).index(), response.hits().getAt(0).explanation()); + logger.info("Hit[1] {} Explanation {}", response.hits().getAt(1).index(), response.hits().getAt(1).explanation()); + assertThat(response.hits().getAt(0).index(), equalTo("test1")); + assertThat(response.hits().getAt(1).index(), equalTo("test2")); + + logger.info("Query with test2 boosted"); + response = client.search(searchRequest() + .searchType(SearchType.DFS_QUERY_THEN_FETCH) + .source(searchSource().explain(true).indexBoost("test2", indexBoost).query(termQuery("test", "value"))) + ).actionGet(); + + assertThat(response.hits().totalHits(), equalTo(2l)); + logger.info("Hit[0] {} Explanation {}", response.hits().getAt(0).index(), response.hits().getAt(0).explanation()); + logger.info("Hit[1] {} Explanation {}", response.hits().getAt(1).index(), response.hits().getAt(1).explanation()); + assertThat(response.hits().getAt(0).index(), equalTo("test2")); + assertThat(response.hits().getAt(1).index(), equalTo("test1")); + } +} diff --git a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/indicesboost/SimpleIndicesBoostSearchTests.yml b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/indicesboost/SimpleIndicesBoostSearchTests.yml new file mode 100644 index 00000000000..e700f1f35fa --- /dev/null +++ b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/indicesboost/SimpleIndicesBoostSearchTests.yml @@ -0,0 +1,6 @@ +cluster: + routing: + schedule: 100ms +index: + number_of_shards: 1 + number_of_replicas: 0