Improve from hits pagination (duplicates), closes #717.
This commit is contained in:
parent
6b7192e744
commit
f7100c0698
|
@ -19,18 +19,128 @@
|
|||
|
||||
package org.apache.lucene.search;
|
||||
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.elasticsearch.search.controller.ShardFieldDoc;
|
||||
|
||||
import java.text.Collator;
|
||||
import java.util.Locale;
|
||||
|
||||
/**
|
||||
* @author kimchy (Shay Banon)
|
||||
*/
|
||||
// LUCENE TRACK
|
||||
public class ShardFieldDocSortedHitQueue extends FieldDocSortedHitQueue {
|
||||
// LUCENE TRACK, Had to copy over in order ot improve same order tie break to take shards into account
|
||||
public class ShardFieldDocSortedHitQueue extends PriorityQueue<ShardFieldDoc> {
|
||||
|
||||
volatile SortField[] fields = null;
|
||||
|
||||
// used in the case where the fields are sorted by locale
|
||||
// based strings
|
||||
volatile Collator[] collators = null;
|
||||
|
||||
|
||||
/**
|
||||
* Creates a hit queue sorted by the given list of fields.
|
||||
*
|
||||
* @param fields Fieldable names, in priority order (highest priority first).
|
||||
* @param size The number of hits to retain. Must be greater than zero.
|
||||
*/
|
||||
public ShardFieldDocSortedHitQueue(SortField[] fields, int size) {
|
||||
super(size);
|
||||
initialize(size);
|
||||
setFields(fields);
|
||||
}
|
||||
|
||||
@Override public void setFields(SortField[] fields) {
|
||||
super.setFields(fields);
|
||||
|
||||
/**
|
||||
* Allows redefinition of sort fields if they are <code>null</code>.
|
||||
* This is to handle the case using ParallelMultiSearcher where the
|
||||
* original list contains AUTO and we don't know the actual sort
|
||||
* type until the values come back. The fields can only be set once.
|
||||
* This method should be synchronized external like all other PQ methods.
|
||||
*
|
||||
* @param fields
|
||||
*/
|
||||
public void setFields(SortField[] fields) {
|
||||
this.fields = fields;
|
||||
this.collators = hasCollators(fields);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the fields being used to sort.
|
||||
*/
|
||||
SortField[] getFields() {
|
||||
return fields;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns an array of collators, possibly <code>null</code>. The collators
|
||||
* correspond to any SortFields which were given a specific locale.
|
||||
*
|
||||
* @param fields Array of sort fields.
|
||||
* @return Array, possibly <code>null</code>.
|
||||
*/
|
||||
private Collator[] hasCollators(final SortField[] fields) {
|
||||
if (fields == null) return null;
|
||||
Collator[] ret = new Collator[fields.length];
|
||||
for (int i = 0; i < fields.length; ++i) {
|
||||
Locale locale = fields[i].getLocale();
|
||||
if (locale != null)
|
||||
ret[i] = Collator.getInstance(locale);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns whether <code>a</code> is less relevant than <code>b</code>.
|
||||
*
|
||||
* @param a ScoreDoc
|
||||
* @param b ScoreDoc
|
||||
* @return <code>true</code> if document <code>a</code> should be sorted after document <code>b</code>.
|
||||
*/
|
||||
@SuppressWarnings("unchecked") @Override
|
||||
protected final boolean lessThan(final ShardFieldDoc docA, final ShardFieldDoc docB) {
|
||||
final int n = fields.length;
|
||||
int c = 0;
|
||||
for (int i = 0; i < n && c == 0; ++i) {
|
||||
final int type = fields[i].getType();
|
||||
if (type == SortField.STRING) {
|
||||
final String s1 = (String) docA.fields[i];
|
||||
final String s2 = (String) docB.fields[i];
|
||||
// null values need to be sorted first, because of how FieldCache.getStringIndex()
|
||||
// works - in that routine, any documents without a value in the given field are
|
||||
// put first. If both are null, the next SortField is used
|
||||
if (s1 == null) {
|
||||
c = (s2 == null) ? 0 : -1;
|
||||
} else if (s2 == null) {
|
||||
c = 1;
|
||||
} else if (fields[i].getLocale() == null) {
|
||||
c = s1.compareTo(s2);
|
||||
} else {
|
||||
c = collators[i].compare(s1, s2);
|
||||
}
|
||||
} else {
|
||||
c = docA.fields[i].compareTo(docB.fields[i]);
|
||||
if (type == SortField.SCORE) {
|
||||
c = -c;
|
||||
}
|
||||
}
|
||||
// reverse sort
|
||||
if (fields[i].getReverse()) {
|
||||
c = -c;
|
||||
}
|
||||
}
|
||||
|
||||
// avoid random sort order that could lead to duplicates (bug #31241):
|
||||
if (c == 0) {
|
||||
// CHANGE: Add shard base tie breaking
|
||||
c = docA.shardTarget().compareTo(docB.shardTarget());
|
||||
if (c == 0) {
|
||||
return docA.doc > docB.doc;
|
||||
}
|
||||
}
|
||||
|
||||
return c > 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,7 +32,7 @@ import java.io.Serializable;
|
|||
*
|
||||
* @author kimchy (shay.banon)
|
||||
*/
|
||||
public class SearchShardTarget implements Streamable, Serializable {
|
||||
public class SearchShardTarget implements Streamable, Serializable, Comparable<SearchShardTarget> {
|
||||
|
||||
private String nodeId;
|
||||
|
||||
|
@ -80,6 +80,14 @@ public class SearchShardTarget implements Streamable, Serializable {
|
|||
return result;
|
||||
}
|
||||
|
||||
@Override public int compareTo(SearchShardTarget o) {
|
||||
int i = index.compareTo(o.index());
|
||||
if (i == 0) {
|
||||
i = shardId - o.shardId;
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
@Override public void readFrom(StreamInput in) throws IOException {
|
||||
if (in.readBoolean()) {
|
||||
nodeId = in.readUTF();
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
package org.elasticsearch.search.controller;
|
||||
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
/**
|
||||
|
@ -27,16 +26,21 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
*
|
||||
* @author kimchy (Shay Banon)
|
||||
*/
|
||||
public class ScoreDocQueue extends PriorityQueue<ScoreDoc> {
|
||||
public class ScoreDocQueue extends PriorityQueue<ShardScoreDoc> {
|
||||
|
||||
public ScoreDocQueue(int size) {
|
||||
initialize(size);
|
||||
}
|
||||
|
||||
protected final boolean lessThan(ScoreDoc hitA, ScoreDoc hitB) {
|
||||
if (hitA.score == hitB.score)
|
||||
return hitA.doc > hitB.doc;
|
||||
else
|
||||
protected final boolean lessThan(ShardScoreDoc hitA, ShardScoreDoc hitB) {
|
||||
if (hitA.score == hitB.score) {
|
||||
int c = hitA.shardTarget().compareTo(hitB.shardTarget());
|
||||
if (c == 0) {
|
||||
return hitA.doc > hitB.doc;
|
||||
}
|
||||
return c > 0;
|
||||
} else {
|
||||
return hitA.score < hitB.score;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -59,6 +59,8 @@ public class TransportTwoNodesSearchTests extends AbstractNodesTests {
|
|||
|
||||
private Client client;
|
||||
|
||||
private Set<String> fullExpectedIds = Sets.newHashSet();
|
||||
|
||||
@BeforeClass public void createNodes() throws Exception {
|
||||
startNode("server1");
|
||||
startNode("server2");
|
||||
|
@ -72,6 +74,7 @@ public class TransportTwoNodesSearchTests extends AbstractNodesTests {
|
|||
|
||||
for (int i = 0; i < 100; i++) {
|
||||
index(client("server1"), Integer.toString(i), "test", i);
|
||||
fullExpectedIds.add(Integer.toString(i));
|
||||
}
|
||||
client.admin().indices().refresh(refreshRequest("test")).actionGet();
|
||||
}
|
||||
|
@ -166,6 +169,32 @@ public class TransportTwoNodesSearchTests extends AbstractNodesTests {
|
|||
}
|
||||
}
|
||||
|
||||
@Test public void testQueryThenFetchWithFrom() throws Exception {
|
||||
SearchSourceBuilder source = searchSource()
|
||||
.query(matchAllQuery())
|
||||
.explain(true);
|
||||
|
||||
Set<String> collectedIds = Sets.newHashSet();
|
||||
|
||||
SearchResponse searchResponse = client.search(searchRequest("test").source(source.from(0).size(60)).searchType(QUERY_THEN_FETCH)).actionGet();
|
||||
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
|
||||
assertThat(searchResponse.hits().totalHits(), equalTo(100l));
|
||||
assertThat(searchResponse.hits().hits().length, equalTo(60));
|
||||
for (int i = 0; i < 60; i++) {
|
||||
SearchHit hit = searchResponse.hits().hits()[i];
|
||||
collectedIds.add(hit.id());
|
||||
}
|
||||
searchResponse = client.search(searchRequest("test").source(source.from(60).size(60)).searchType(QUERY_THEN_FETCH)).actionGet();
|
||||
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
|
||||
assertThat(searchResponse.hits().totalHits(), equalTo(100l));
|
||||
assertThat(searchResponse.hits().hits().length, equalTo(40));
|
||||
for (int i = 0; i < 40; i++) {
|
||||
SearchHit hit = searchResponse.hits().hits()[i];
|
||||
collectedIds.add(hit.id());
|
||||
}
|
||||
assertThat(collectedIds, equalTo(fullExpectedIds));
|
||||
}
|
||||
|
||||
@Test public void testQueryThenFetchWithSort() throws Exception {
|
||||
SearchSourceBuilder source = searchSource()
|
||||
.query(termQuery("multi", "test"))
|
||||
|
|
Loading…
Reference in New Issue