Improve from hits pagination (duplicates), closes #717.

This commit is contained in:
kimchy 2011-02-23 22:57:44 +02:00
parent 6b7192e744
commit f7100c0698
4 changed files with 163 additions and 12 deletions

View File

@ -19,18 +19,128 @@
package org.apache.lucene.search; package org.apache.lucene.search;
import org.apache.lucene.util.PriorityQueue;
import org.elasticsearch.search.controller.ShardFieldDoc;
import java.text.Collator;
import java.util.Locale;
/** /**
* @author kimchy (Shay Banon) * @author kimchy (Shay Banon)
*/ */
// LUCENE TRACK // LUCENE TRACK, Had to copy over in order ot improve same order tie break to take shards into account
public class ShardFieldDocSortedHitQueue extends FieldDocSortedHitQueue { public class ShardFieldDocSortedHitQueue extends PriorityQueue<ShardFieldDoc> {
volatile SortField[] fields = null;
// used in the case where the fields are sorted by locale
// based strings
volatile Collator[] collators = null;
/**
* Creates a hit queue sorted by the given list of fields.
*
* @param fields Fieldable names, in priority order (highest priority first).
* @param size The number of hits to retain. Must be greater than zero.
*/
public ShardFieldDocSortedHitQueue(SortField[] fields, int size) { public ShardFieldDocSortedHitQueue(SortField[] fields, int size) {
super(size); initialize(size);
setFields(fields); setFields(fields);
} }
@Override public void setFields(SortField[] fields) {
super.setFields(fields); /**
* Allows redefinition of sort fields if they are <code>null</code>.
* This is to handle the case using ParallelMultiSearcher where the
* original list contains AUTO and we don't know the actual sort
* type until the values come back. The fields can only be set once.
* This method should be synchronized external like all other PQ methods.
*
* @param fields
*/
public void setFields(SortField[] fields) {
this.fields = fields;
this.collators = hasCollators(fields);
}
/**
* Returns the fields being used to sort.
*/
SortField[] getFields() {
return fields;
}
/**
* Returns an array of collators, possibly <code>null</code>. The collators
* correspond to any SortFields which were given a specific locale.
*
* @param fields Array of sort fields.
* @return Array, possibly <code>null</code>.
*/
private Collator[] hasCollators(final SortField[] fields) {
if (fields == null) return null;
Collator[] ret = new Collator[fields.length];
for (int i = 0; i < fields.length; ++i) {
Locale locale = fields[i].getLocale();
if (locale != null)
ret[i] = Collator.getInstance(locale);
}
return ret;
}
/**
* Returns whether <code>a</code> is less relevant than <code>b</code>.
*
* @param a ScoreDoc
* @param b ScoreDoc
* @return <code>true</code> if document <code>a</code> should be sorted after document <code>b</code>.
*/
@SuppressWarnings("unchecked") @Override
protected final boolean lessThan(final ShardFieldDoc docA, final ShardFieldDoc docB) {
final int n = fields.length;
int c = 0;
for (int i = 0; i < n && c == 0; ++i) {
final int type = fields[i].getType();
if (type == SortField.STRING) {
final String s1 = (String) docA.fields[i];
final String s2 = (String) docB.fields[i];
// null values need to be sorted first, because of how FieldCache.getStringIndex()
// works - in that routine, any documents without a value in the given field are
// put first. If both are null, the next SortField is used
if (s1 == null) {
c = (s2 == null) ? 0 : -1;
} else if (s2 == null) {
c = 1;
} else if (fields[i].getLocale() == null) {
c = s1.compareTo(s2);
} else {
c = collators[i].compare(s1, s2);
}
} else {
c = docA.fields[i].compareTo(docB.fields[i]);
if (type == SortField.SCORE) {
c = -c;
}
}
// reverse sort
if (fields[i].getReverse()) {
c = -c;
}
}
// avoid random sort order that could lead to duplicates (bug #31241):
if (c == 0) {
// CHANGE: Add shard base tie breaking
c = docA.shardTarget().compareTo(docB.shardTarget());
if (c == 0) {
return docA.doc > docB.doc;
}
}
return c > 0;
} }
} }

View File

@ -32,7 +32,7 @@ import java.io.Serializable;
* *
* @author kimchy (shay.banon) * @author kimchy (shay.banon)
*/ */
public class SearchShardTarget implements Streamable, Serializable { public class SearchShardTarget implements Streamable, Serializable, Comparable<SearchShardTarget> {
private String nodeId; private String nodeId;
@ -80,6 +80,14 @@ public class SearchShardTarget implements Streamable, Serializable {
return result; return result;
} }
@Override public int compareTo(SearchShardTarget o) {
int i = index.compareTo(o.index());
if (i == 0) {
i = shardId - o.shardId;
}
return i;
}
@Override public void readFrom(StreamInput in) throws IOException { @Override public void readFrom(StreamInput in) throws IOException {
if (in.readBoolean()) { if (in.readBoolean()) {
nodeId = in.readUTF(); nodeId = in.readUTF();

View File

@ -19,7 +19,6 @@
package org.elasticsearch.search.controller; package org.elasticsearch.search.controller;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.PriorityQueue;
/** /**
@ -27,16 +26,21 @@ import org.apache.lucene.util.PriorityQueue;
* *
* @author kimchy (Shay Banon) * @author kimchy (Shay Banon)
*/ */
public class ScoreDocQueue extends PriorityQueue<ScoreDoc> { public class ScoreDocQueue extends PriorityQueue<ShardScoreDoc> {
public ScoreDocQueue(int size) { public ScoreDocQueue(int size) {
initialize(size); initialize(size);
} }
protected final boolean lessThan(ScoreDoc hitA, ScoreDoc hitB) { protected final boolean lessThan(ShardScoreDoc hitA, ShardScoreDoc hitB) {
if (hitA.score == hitB.score) if (hitA.score == hitB.score) {
return hitA.doc > hitB.doc; int c = hitA.shardTarget().compareTo(hitB.shardTarget());
else if (c == 0) {
return hitA.doc > hitB.doc;
}
return c > 0;
} else {
return hitA.score < hitB.score; return hitA.score < hitB.score;
}
} }
} }

View File

@ -59,6 +59,8 @@ public class TransportTwoNodesSearchTests extends AbstractNodesTests {
private Client client; private Client client;
private Set<String> fullExpectedIds = Sets.newHashSet();
@BeforeClass public void createNodes() throws Exception { @BeforeClass public void createNodes() throws Exception {
startNode("server1"); startNode("server1");
startNode("server2"); startNode("server2");
@ -72,6 +74,7 @@ public class TransportTwoNodesSearchTests extends AbstractNodesTests {
for (int i = 0; i < 100; i++) { for (int i = 0; i < 100; i++) {
index(client("server1"), Integer.toString(i), "test", i); index(client("server1"), Integer.toString(i), "test", i);
fullExpectedIds.add(Integer.toString(i));
} }
client.admin().indices().refresh(refreshRequest("test")).actionGet(); client.admin().indices().refresh(refreshRequest("test")).actionGet();
} }
@ -166,6 +169,32 @@ public class TransportTwoNodesSearchTests extends AbstractNodesTests {
} }
} }
@Test public void testQueryThenFetchWithFrom() throws Exception {
SearchSourceBuilder source = searchSource()
.query(matchAllQuery())
.explain(true);
Set<String> collectedIds = Sets.newHashSet();
SearchResponse searchResponse = client.search(searchRequest("test").source(source.from(0).size(60)).searchType(QUERY_THEN_FETCH)).actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(100l));
assertThat(searchResponse.hits().hits().length, equalTo(60));
for (int i = 0; i < 60; i++) {
SearchHit hit = searchResponse.hits().hits()[i];
collectedIds.add(hit.id());
}
searchResponse = client.search(searchRequest("test").source(source.from(60).size(60)).searchType(QUERY_THEN_FETCH)).actionGet();
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
assertThat(searchResponse.hits().totalHits(), equalTo(100l));
assertThat(searchResponse.hits().hits().length, equalTo(40));
for (int i = 0; i < 40; i++) {
SearchHit hit = searchResponse.hits().hits()[i];
collectedIds.add(hit.id());
}
assertThat(collectedIds, equalTo(fullExpectedIds));
}
@Test public void testQueryThenFetchWithSort() throws Exception { @Test public void testQueryThenFetchWithSort() throws Exception {
SearchSourceBuilder source = searchSource() SearchSourceBuilder source = searchSource()
.query(termQuery("multi", "test")) .query(termQuery("multi", "test"))