From f7100c0698c6808d22736160f4229ea4a780f446 Mon Sep 17 00:00:00 2001 From: kimchy Date: Wed, 23 Feb 2011 22:57:44 +0200 Subject: [PATCH] Improve from hits pagination (duplicates), closes #717. --- .../search/ShardFieldDocSortedHitQueue.java | 120 +++++++++++++++++- .../search/SearchShardTarget.java | 10 +- .../search/controller/ScoreDocQueue.java | 16 ++- .../basic/TransportTwoNodesSearchTests.java | 29 +++++ 4 files changed, 163 insertions(+), 12 deletions(-) diff --git a/modules/elasticsearch/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java b/modules/elasticsearch/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java index 185c0762f90..4828d2af32f 100644 --- a/modules/elasticsearch/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java +++ b/modules/elasticsearch/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java @@ -19,18 +19,128 @@ package org.apache.lucene.search; +import org.apache.lucene.util.PriorityQueue; +import org.elasticsearch.search.controller.ShardFieldDoc; + +import java.text.Collator; +import java.util.Locale; + /** * @author kimchy (Shay Banon) */ -// LUCENE TRACK -public class ShardFieldDocSortedHitQueue extends FieldDocSortedHitQueue { +// LUCENE TRACK, Had to copy over in order ot improve same order tie break to take shards into account +public class ShardFieldDocSortedHitQueue extends PriorityQueue { + volatile SortField[] fields = null; + + // used in the case where the fields are sorted by locale + // based strings + volatile Collator[] collators = null; + + + /** + * Creates a hit queue sorted by the given list of fields. + * + * @param fields Fieldable names, in priority order (highest priority first). + * @param size The number of hits to retain. Must be greater than zero. + */ public ShardFieldDocSortedHitQueue(SortField[] fields, int size) { - super(size); + initialize(size); setFields(fields); } - @Override public void setFields(SortField[] fields) { - super.setFields(fields); + + /** + * Allows redefinition of sort fields if they are null. + * This is to handle the case using ParallelMultiSearcher where the + * original list contains AUTO and we don't know the actual sort + * type until the values come back. The fields can only be set once. + * This method should be synchronized external like all other PQ methods. + * + * @param fields + */ + public void setFields(SortField[] fields) { + this.fields = fields; + this.collators = hasCollators(fields); + } + + + /** + * Returns the fields being used to sort. + */ + SortField[] getFields() { + return fields; + } + + + /** + * Returns an array of collators, possibly null. The collators + * correspond to any SortFields which were given a specific locale. + * + * @param fields Array of sort fields. + * @return Array, possibly null. + */ + private Collator[] hasCollators(final SortField[] fields) { + if (fields == null) return null; + Collator[] ret = new Collator[fields.length]; + for (int i = 0; i < fields.length; ++i) { + Locale locale = fields[i].getLocale(); + if (locale != null) + ret[i] = Collator.getInstance(locale); + } + return ret; + } + + + /** + * Returns whether a is less relevant than b. + * + * @param a ScoreDoc + * @param b ScoreDoc + * @return true if document a should be sorted after document b. + */ + @SuppressWarnings("unchecked") @Override + protected final boolean lessThan(final ShardFieldDoc docA, final ShardFieldDoc docB) { + final int n = fields.length; + int c = 0; + for (int i = 0; i < n && c == 0; ++i) { + final int type = fields[i].getType(); + if (type == SortField.STRING) { + final String s1 = (String) docA.fields[i]; + final String s2 = (String) docB.fields[i]; + // null values need to be sorted first, because of how FieldCache.getStringIndex() + // works - in that routine, any documents without a value in the given field are + // put first. If both are null, the next SortField is used + if (s1 == null) { + c = (s2 == null) ? 0 : -1; + } else if (s2 == null) { + c = 1; + } else if (fields[i].getLocale() == null) { + c = s1.compareTo(s2); + } else { + c = collators[i].compare(s1, s2); + } + } else { + c = docA.fields[i].compareTo(docB.fields[i]); + if (type == SortField.SCORE) { + c = -c; + } + } + // reverse sort + if (fields[i].getReverse()) { + c = -c; + } + } + + // avoid random sort order that could lead to duplicates (bug #31241): + if (c == 0) { + // CHANGE: Add shard base tie breaking + c = docA.shardTarget().compareTo(docB.shardTarget()); + if (c == 0) { + return docA.doc > docB.doc; + } + } + + return c > 0; } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/SearchShardTarget.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/SearchShardTarget.java index 73fe67ab38b..c21da6fdc77 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/SearchShardTarget.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/SearchShardTarget.java @@ -32,7 +32,7 @@ import java.io.Serializable; * * @author kimchy (shay.banon) */ -public class SearchShardTarget implements Streamable, Serializable { +public class SearchShardTarget implements Streamable, Serializable, Comparable { private String nodeId; @@ -80,6 +80,14 @@ public class SearchShardTarget implements Streamable, Serializable { return result; } + @Override public int compareTo(SearchShardTarget o) { + int i = index.compareTo(o.index()); + if (i == 0) { + i = shardId - o.shardId; + } + return i; + } + @Override public void readFrom(StreamInput in) throws IOException { if (in.readBoolean()) { nodeId = in.readUTF(); diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/controller/ScoreDocQueue.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/controller/ScoreDocQueue.java index d822ee7384d..39dca4cc560 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/controller/ScoreDocQueue.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/controller/ScoreDocQueue.java @@ -19,7 +19,6 @@ package org.elasticsearch.search.controller; -import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.util.PriorityQueue; /** @@ -27,16 +26,21 @@ import org.apache.lucene.util.PriorityQueue; * * @author kimchy (Shay Banon) */ -public class ScoreDocQueue extends PriorityQueue { +public class ScoreDocQueue extends PriorityQueue { public ScoreDocQueue(int size) { initialize(size); } - protected final boolean lessThan(ScoreDoc hitA, ScoreDoc hitB) { - if (hitA.score == hitB.score) - return hitA.doc > hitB.doc; - else + protected final boolean lessThan(ShardScoreDoc hitA, ShardScoreDoc hitB) { + if (hitA.score == hitB.score) { + int c = hitA.shardTarget().compareTo(hitB.shardTarget()); + if (c == 0) { + return hitA.doc > hitB.doc; + } + return c > 0; + } else { return hitA.score < hitB.score; + } } } diff --git a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/basic/TransportTwoNodesSearchTests.java b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/basic/TransportTwoNodesSearchTests.java index 2f42bd46e19..30b1c4a2c65 100644 --- a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/basic/TransportTwoNodesSearchTests.java +++ b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/basic/TransportTwoNodesSearchTests.java @@ -59,6 +59,8 @@ public class TransportTwoNodesSearchTests extends AbstractNodesTests { private Client client; + private Set fullExpectedIds = Sets.newHashSet(); + @BeforeClass public void createNodes() throws Exception { startNode("server1"); startNode("server2"); @@ -72,6 +74,7 @@ public class TransportTwoNodesSearchTests extends AbstractNodesTests { for (int i = 0; i < 100; i++) { index(client("server1"), Integer.toString(i), "test", i); + fullExpectedIds.add(Integer.toString(i)); } client.admin().indices().refresh(refreshRequest("test")).actionGet(); } @@ -166,6 +169,32 @@ public class TransportTwoNodesSearchTests extends AbstractNodesTests { } } + @Test public void testQueryThenFetchWithFrom() throws Exception { + SearchSourceBuilder source = searchSource() + .query(matchAllQuery()) + .explain(true); + + Set collectedIds = Sets.newHashSet(); + + SearchResponse searchResponse = client.search(searchRequest("test").source(source.from(0).size(60)).searchType(QUERY_THEN_FETCH)).actionGet(); + assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0)); + assertThat(searchResponse.hits().totalHits(), equalTo(100l)); + assertThat(searchResponse.hits().hits().length, equalTo(60)); + for (int i = 0; i < 60; i++) { + SearchHit hit = searchResponse.hits().hits()[i]; + collectedIds.add(hit.id()); + } + searchResponse = client.search(searchRequest("test").source(source.from(60).size(60)).searchType(QUERY_THEN_FETCH)).actionGet(); + assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0)); + assertThat(searchResponse.hits().totalHits(), equalTo(100l)); + assertThat(searchResponse.hits().hits().length, equalTo(40)); + for (int i = 0; i < 40; i++) { + SearchHit hit = searchResponse.hits().hits()[i]; + collectedIds.add(hit.id()); + } + assertThat(collectedIds, equalTo(fullExpectedIds)); + } + @Test public void testQueryThenFetchWithSort() throws Exception { SearchSourceBuilder source = searchSource() .query(termQuery("multi", "test"))