Improve from hits pagination (duplicates), closes #717.
This commit is contained in:
parent
6b7192e744
commit
f7100c0698
|
@ -19,18 +19,128 @@
|
||||||
|
|
||||||
package org.apache.lucene.search;
|
package org.apache.lucene.search;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
import org.elasticsearch.search.controller.ShardFieldDoc;
|
||||||
|
|
||||||
|
import java.text.Collator;
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @author kimchy (Shay Banon)
|
* @author kimchy (Shay Banon)
|
||||||
*/
|
*/
|
||||||
// LUCENE TRACK
|
// LUCENE TRACK, Had to copy over in order ot improve same order tie break to take shards into account
|
||||||
public class ShardFieldDocSortedHitQueue extends FieldDocSortedHitQueue {
|
public class ShardFieldDocSortedHitQueue extends PriorityQueue<ShardFieldDoc> {
|
||||||
|
|
||||||
|
volatile SortField[] fields = null;
|
||||||
|
|
||||||
|
// used in the case where the fields are sorted by locale
|
||||||
|
// based strings
|
||||||
|
volatile Collator[] collators = null;
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates a hit queue sorted by the given list of fields.
|
||||||
|
*
|
||||||
|
* @param fields Fieldable names, in priority order (highest priority first).
|
||||||
|
* @param size The number of hits to retain. Must be greater than zero.
|
||||||
|
*/
|
||||||
public ShardFieldDocSortedHitQueue(SortField[] fields, int size) {
|
public ShardFieldDocSortedHitQueue(SortField[] fields, int size) {
|
||||||
super(size);
|
initialize(size);
|
||||||
setFields(fields);
|
setFields(fields);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override public void setFields(SortField[] fields) {
|
|
||||||
super.setFields(fields);
|
/**
|
||||||
|
* Allows redefinition of sort fields if they are <code>null</code>.
|
||||||
|
* This is to handle the case using ParallelMultiSearcher where the
|
||||||
|
* original list contains AUTO and we don't know the actual sort
|
||||||
|
* type until the values come back. The fields can only be set once.
|
||||||
|
* This method should be synchronized external like all other PQ methods.
|
||||||
|
*
|
||||||
|
* @param fields
|
||||||
|
*/
|
||||||
|
public void setFields(SortField[] fields) {
|
||||||
|
this.fields = fields;
|
||||||
|
this.collators = hasCollators(fields);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the fields being used to sort.
|
||||||
|
*/
|
||||||
|
SortField[] getFields() {
|
||||||
|
return fields;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an array of collators, possibly <code>null</code>. The collators
|
||||||
|
* correspond to any SortFields which were given a specific locale.
|
||||||
|
*
|
||||||
|
* @param fields Array of sort fields.
|
||||||
|
* @return Array, possibly <code>null</code>.
|
||||||
|
*/
|
||||||
|
private Collator[] hasCollators(final SortField[] fields) {
|
||||||
|
if (fields == null) return null;
|
||||||
|
Collator[] ret = new Collator[fields.length];
|
||||||
|
for (int i = 0; i < fields.length; ++i) {
|
||||||
|
Locale locale = fields[i].getLocale();
|
||||||
|
if (locale != null)
|
||||||
|
ret[i] = Collator.getInstance(locale);
|
||||||
|
}
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns whether <code>a</code> is less relevant than <code>b</code>.
|
||||||
|
*
|
||||||
|
* @param a ScoreDoc
|
||||||
|
* @param b ScoreDoc
|
||||||
|
* @return <code>true</code> if document <code>a</code> should be sorted after document <code>b</code>.
|
||||||
|
*/
|
||||||
|
@SuppressWarnings("unchecked") @Override
|
||||||
|
protected final boolean lessThan(final ShardFieldDoc docA, final ShardFieldDoc docB) {
|
||||||
|
final int n = fields.length;
|
||||||
|
int c = 0;
|
||||||
|
for (int i = 0; i < n && c == 0; ++i) {
|
||||||
|
final int type = fields[i].getType();
|
||||||
|
if (type == SortField.STRING) {
|
||||||
|
final String s1 = (String) docA.fields[i];
|
||||||
|
final String s2 = (String) docB.fields[i];
|
||||||
|
// null values need to be sorted first, because of how FieldCache.getStringIndex()
|
||||||
|
// works - in that routine, any documents without a value in the given field are
|
||||||
|
// put first. If both are null, the next SortField is used
|
||||||
|
if (s1 == null) {
|
||||||
|
c = (s2 == null) ? 0 : -1;
|
||||||
|
} else if (s2 == null) {
|
||||||
|
c = 1;
|
||||||
|
} else if (fields[i].getLocale() == null) {
|
||||||
|
c = s1.compareTo(s2);
|
||||||
|
} else {
|
||||||
|
c = collators[i].compare(s1, s2);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
c = docA.fields[i].compareTo(docB.fields[i]);
|
||||||
|
if (type == SortField.SCORE) {
|
||||||
|
c = -c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// reverse sort
|
||||||
|
if (fields[i].getReverse()) {
|
||||||
|
c = -c;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// avoid random sort order that could lead to duplicates (bug #31241):
|
||||||
|
if (c == 0) {
|
||||||
|
// CHANGE: Add shard base tie breaking
|
||||||
|
c = docA.shardTarget().compareTo(docB.shardTarget());
|
||||||
|
if (c == 0) {
|
||||||
|
return docA.doc > docB.doc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return c > 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,7 +32,7 @@ import java.io.Serializable;
|
||||||
*
|
*
|
||||||
* @author kimchy (shay.banon)
|
* @author kimchy (shay.banon)
|
||||||
*/
|
*/
|
||||||
public class SearchShardTarget implements Streamable, Serializable {
|
public class SearchShardTarget implements Streamable, Serializable, Comparable<SearchShardTarget> {
|
||||||
|
|
||||||
private String nodeId;
|
private String nodeId;
|
||||||
|
|
||||||
|
@ -80,6 +80,14 @@ public class SearchShardTarget implements Streamable, Serializable {
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override public int compareTo(SearchShardTarget o) {
|
||||||
|
int i = index.compareTo(o.index());
|
||||||
|
if (i == 0) {
|
||||||
|
i = shardId - o.shardId;
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
@Override public void readFrom(StreamInput in) throws IOException {
|
@Override public void readFrom(StreamInput in) throws IOException {
|
||||||
if (in.readBoolean()) {
|
if (in.readBoolean()) {
|
||||||
nodeId = in.readUTF();
|
nodeId = in.readUTF();
|
||||||
|
|
|
@ -19,7 +19,6 @@
|
||||||
|
|
||||||
package org.elasticsearch.search.controller;
|
package org.elasticsearch.search.controller;
|
||||||
|
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
|
||||||
import org.apache.lucene.util.PriorityQueue;
|
import org.apache.lucene.util.PriorityQueue;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -27,16 +26,21 @@ import org.apache.lucene.util.PriorityQueue;
|
||||||
*
|
*
|
||||||
* @author kimchy (Shay Banon)
|
* @author kimchy (Shay Banon)
|
||||||
*/
|
*/
|
||||||
public class ScoreDocQueue extends PriorityQueue<ScoreDoc> {
|
public class ScoreDocQueue extends PriorityQueue<ShardScoreDoc> {
|
||||||
|
|
||||||
public ScoreDocQueue(int size) {
|
public ScoreDocQueue(int size) {
|
||||||
initialize(size);
|
initialize(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected final boolean lessThan(ScoreDoc hitA, ScoreDoc hitB) {
|
protected final boolean lessThan(ShardScoreDoc hitA, ShardScoreDoc hitB) {
|
||||||
if (hitA.score == hitB.score)
|
if (hitA.score == hitB.score) {
|
||||||
return hitA.doc > hitB.doc;
|
int c = hitA.shardTarget().compareTo(hitB.shardTarget());
|
||||||
else
|
if (c == 0) {
|
||||||
|
return hitA.doc > hitB.doc;
|
||||||
|
}
|
||||||
|
return c > 0;
|
||||||
|
} else {
|
||||||
return hitA.score < hitB.score;
|
return hitA.score < hitB.score;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,6 +59,8 @@ public class TransportTwoNodesSearchTests extends AbstractNodesTests {
|
||||||
|
|
||||||
private Client client;
|
private Client client;
|
||||||
|
|
||||||
|
private Set<String> fullExpectedIds = Sets.newHashSet();
|
||||||
|
|
||||||
@BeforeClass public void createNodes() throws Exception {
|
@BeforeClass public void createNodes() throws Exception {
|
||||||
startNode("server1");
|
startNode("server1");
|
||||||
startNode("server2");
|
startNode("server2");
|
||||||
|
@ -72,6 +74,7 @@ public class TransportTwoNodesSearchTests extends AbstractNodesTests {
|
||||||
|
|
||||||
for (int i = 0; i < 100; i++) {
|
for (int i = 0; i < 100; i++) {
|
||||||
index(client("server1"), Integer.toString(i), "test", i);
|
index(client("server1"), Integer.toString(i), "test", i);
|
||||||
|
fullExpectedIds.add(Integer.toString(i));
|
||||||
}
|
}
|
||||||
client.admin().indices().refresh(refreshRequest("test")).actionGet();
|
client.admin().indices().refresh(refreshRequest("test")).actionGet();
|
||||||
}
|
}
|
||||||
|
@ -166,6 +169,32 @@ public class TransportTwoNodesSearchTests extends AbstractNodesTests {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test public void testQueryThenFetchWithFrom() throws Exception {
|
||||||
|
SearchSourceBuilder source = searchSource()
|
||||||
|
.query(matchAllQuery())
|
||||||
|
.explain(true);
|
||||||
|
|
||||||
|
Set<String> collectedIds = Sets.newHashSet();
|
||||||
|
|
||||||
|
SearchResponse searchResponse = client.search(searchRequest("test").source(source.from(0).size(60)).searchType(QUERY_THEN_FETCH)).actionGet();
|
||||||
|
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
|
||||||
|
assertThat(searchResponse.hits().totalHits(), equalTo(100l));
|
||||||
|
assertThat(searchResponse.hits().hits().length, equalTo(60));
|
||||||
|
for (int i = 0; i < 60; i++) {
|
||||||
|
SearchHit hit = searchResponse.hits().hits()[i];
|
||||||
|
collectedIds.add(hit.id());
|
||||||
|
}
|
||||||
|
searchResponse = client.search(searchRequest("test").source(source.from(60).size(60)).searchType(QUERY_THEN_FETCH)).actionGet();
|
||||||
|
assertThat("Failures " + Arrays.toString(searchResponse.shardFailures()), searchResponse.shardFailures().length, equalTo(0));
|
||||||
|
assertThat(searchResponse.hits().totalHits(), equalTo(100l));
|
||||||
|
assertThat(searchResponse.hits().hits().length, equalTo(40));
|
||||||
|
for (int i = 0; i < 40; i++) {
|
||||||
|
SearchHit hit = searchResponse.hits().hits()[i];
|
||||||
|
collectedIds.add(hit.id());
|
||||||
|
}
|
||||||
|
assertThat(collectedIds, equalTo(fullExpectedIds));
|
||||||
|
}
|
||||||
|
|
||||||
@Test public void testQueryThenFetchWithSort() throws Exception {
|
@Test public void testQueryThenFetchWithSort() throws Exception {
|
||||||
SearchSourceBuilder source = searchSource()
|
SearchSourceBuilder source = searchSource()
|
||||||
.query(termQuery("multi", "test"))
|
.query(termQuery("multi", "test"))
|
||||||
|
|
Loading…
Reference in New Issue