Merge pull request #12127 from jpountz/fix/sort_merge
Clean up handling of missing values when merging shard results on the coordinating node.
This commit is contained in:
commit
f82b5ce201
|
@ -413,7 +413,13 @@ public class Lucene {
|
|||
if (in.readBoolean()) {
|
||||
field = in.readString();
|
||||
}
|
||||
fields[i] = new SortField(field, readSortType(in), in.readBoolean());
|
||||
SortField.Type sortType = readSortType(in);
|
||||
Object missingValue = readMissingValue(in);
|
||||
boolean reverse = in.readBoolean();
|
||||
fields[i] = new SortField(field, sortType, reverse);
|
||||
if (missingValue != null) {
|
||||
fields[i].setMissingValue(missingValue);
|
||||
}
|
||||
}
|
||||
|
||||
FieldDoc[] fieldDocs = new FieldDoc[in.readVInt()];
|
||||
|
@ -485,9 +491,12 @@ public class Lucene {
|
|||
out.writeString(sortField.getField());
|
||||
}
|
||||
if (sortField.getComparatorSource() != null) {
|
||||
writeSortType(out, ((IndexFieldData.XFieldComparatorSource) sortField.getComparatorSource()).reducedType());
|
||||
IndexFieldData.XFieldComparatorSource comparatorSource = (IndexFieldData.XFieldComparatorSource) sortField.getComparatorSource();
|
||||
writeSortType(out, comparatorSource.reducedType());
|
||||
writeMissingValue(out, comparatorSource.missingValue(sortField.getReverse()));
|
||||
} else {
|
||||
writeSortType(out, sortField.getType());
|
||||
writeMissingValue(out, sortField.missingValue);
|
||||
}
|
||||
out.writeBoolean(sortField.getReverse());
|
||||
}
|
||||
|
@ -508,6 +517,31 @@ public class Lucene {
|
|||
}
|
||||
}
|
||||
|
||||
private static void writeMissingValue(StreamOutput out, Object missingValue) throws IOException {
|
||||
if (missingValue == SortField.STRING_FIRST) {
|
||||
out.writeByte((byte) 1);
|
||||
} else if (missingValue == SortField.STRING_LAST) {
|
||||
out.writeByte((byte) 2);
|
||||
} else {
|
||||
out.writeByte((byte) 0);
|
||||
out.writeGenericValue(missingValue);
|
||||
}
|
||||
}
|
||||
|
||||
private static Object readMissingValue(StreamInput in) throws IOException {
|
||||
final byte id = in.readByte();
|
||||
switch (id) {
|
||||
case 0:
|
||||
return in.readGenericValue();
|
||||
case 1:
|
||||
return SortField.STRING_FIRST;
|
||||
case 2:
|
||||
return SortField.STRING_LAST;
|
||||
default:
|
||||
throw new IOException("Unknown missing value id: " + id);
|
||||
}
|
||||
}
|
||||
|
||||
public static void writeFieldDoc(StreamOutput out, FieldDoc fieldDoc) throws IOException {
|
||||
out.writeVInt(fieldDoc.fields.length);
|
||||
for (Object field : fieldDoc.fields) {
|
||||
|
|
|
@ -25,13 +25,11 @@ import org.apache.lucene.search.*;
|
|||
import org.apache.lucene.search.join.BitDocIdSetFilter;
|
||||
import org.apache.lucene.util.BitDocIdSet;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.IndexComponent;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
import org.elasticsearch.index.mapper.MappedFieldType;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
|
@ -112,16 +110,6 @@ public interface IndexFieldData<FD extends AtomicFieldData> extends IndexCompone
|
|||
// on another node (we don't have the custom source them...)
|
||||
public abstract class XFieldComparatorSource extends FieldComparatorSource {
|
||||
|
||||
/** UTF-8 term containing a single code point: {@link Character#MAX_CODE_POINT} which will compare greater than all other index terms
|
||||
* since {@link Character#MAX_CODE_POINT} is a noncharacter and thus shouldn't appear in an index term. */
|
||||
public static final BytesRef MAX_TERM;
|
||||
static {
|
||||
BytesRefBuilder builder = new BytesRefBuilder();
|
||||
final char[] chars = Character.toChars(Character.MAX_CODE_POINT);
|
||||
builder.copyChars(chars, 0, chars.length);
|
||||
MAX_TERM = builder.toBytesRef();
|
||||
}
|
||||
|
||||
/**
|
||||
* Simple wrapper class around a filter that matches parent documents
|
||||
* and a filter that matches child documents. For every root document R,
|
||||
|
@ -179,7 +167,7 @@ public interface IndexFieldData<FD extends AtomicFieldData> extends IndexCompone
|
|||
return min ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
|
||||
case STRING:
|
||||
case STRING_VAL:
|
||||
return min ? null : MAX_TERM;
|
||||
return null;
|
||||
default:
|
||||
throw new UnsupportedOperationException("Unsupported reduced type: " + reducedType());
|
||||
}
|
||||
|
@ -225,6 +213,18 @@ public interface IndexFieldData<FD extends AtomicFieldData> extends IndexCompone
|
|||
}
|
||||
|
||||
public abstract SortField.Type reducedType();
|
||||
|
||||
/**
|
||||
* Return a missing value that is understandable by {@link SortField#setMissingValue(Object)}.
|
||||
* Most implementations return null because they already replace the value at the fielddata level.
|
||||
* However this can't work in case of strings since there is no such thing as a string which
|
||||
* compares greater than any other string, so in that case we need to return
|
||||
* {@link SortField#STRING_FIRST} or {@link SortField#STRING_LAST} so that the coordinating node
|
||||
* knows how to deal with null values.
|
||||
*/
|
||||
public Object missingValue(boolean reversed) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
interface Builder {
|
||||
|
|
|
@ -59,6 +59,19 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat
|
|||
return SortField.Type.STRING;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object missingValue(boolean reversed) {
|
||||
if (sortMissingFirst(missingValue) || sortMissingLast(missingValue)) {
|
||||
if (sortMissingLast(missingValue) ^ reversed) {
|
||||
return SortField.STRING_LAST;
|
||||
} else {
|
||||
return SortField.STRING_FIRST;
|
||||
}
|
||||
}
|
||||
// otherwise we fill missing values ourselves
|
||||
return null;
|
||||
}
|
||||
|
||||
protected SortedBinaryDocValues getValues(LeafReaderContext context) throws IOException {
|
||||
return indexFieldData.load(context).getBytesValues();
|
||||
}
|
||||
|
@ -97,29 +110,6 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat
|
|||
BytesRefFieldComparatorSource.this.setScorer(scorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef value(int slot) {
|
||||
// TODO: When serializing the response to the coordinating node, we lose the information about
|
||||
// whether the comparator sorts missing docs first or last. We should fix it and let
|
||||
// TopDocs.merge deal with it (it knows how to)
|
||||
BytesRef value = super.value(slot);
|
||||
if (value == null) {
|
||||
assert sortMissingFirst(missingValue) || sortMissingLast(missingValue);
|
||||
value = missingBytes;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(BytesRef topValue) {
|
||||
// symetric of value(int): if we need to feed the comparator with <tt>null</tt>
|
||||
// if we overrode the value with MAX_TERM in value(int)
|
||||
if (topValue == missingBytes && (sortMissingFirst(missingValue) || sortMissingLast(missingValue))) {
|
||||
topValue = null;
|
||||
}
|
||||
super.setTopValue(topValue);
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
|
@ -156,15 +146,6 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat
|
|||
BytesRefFieldComparatorSource.this.setScorer(scorer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef value(int slot) {
|
||||
BytesRef value = super.value(slot);
|
||||
if (value == null) {
|
||||
value = missingBytes;
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -64,7 +64,6 @@ import static org.elasticsearch.search.internal.InternalSearchHitField.readSearc
|
|||
public class InternalSearchHit implements SearchHit {
|
||||
|
||||
private static final Object[] EMPTY_SORT_VALUES = new Object[0];
|
||||
private static final Text MAX_TERM_AS_TEXT = new StringAndBytesText(BytesRefFieldComparatorSource.MAX_TERM.utf8ToString());
|
||||
|
||||
private transient int docId;
|
||||
|
||||
|
@ -506,13 +505,7 @@ public class InternalSearchHit implements SearchHit {
|
|||
if (sortValues != null && sortValues.length > 0) {
|
||||
builder.startArray(Fields.SORT);
|
||||
for (Object sortValue : sortValues) {
|
||||
if (sortValue != null && sortValue.equals(MAX_TERM_AS_TEXT)) {
|
||||
// We don't display MAX_TERM in JSON responses in case some clients have UTF-8 parsers that wouldn't accept a
|
||||
// non-character in the response, even though this is valid UTF-8
|
||||
builder.nullValue();
|
||||
} else {
|
||||
builder.value(sortValue);
|
||||
}
|
||||
builder.value(sortValue);
|
||||
}
|
||||
builder.endArray();
|
||||
}
|
||||
|
|
|
@ -247,9 +247,9 @@ public abstract class AbstractFieldDataImplTests extends AbstractFieldDataTests
|
|||
assertThat(topDocs.scoreDocs[5].doc, equalTo(6));
|
||||
assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[5]).fields[0]).utf8ToString(), equalTo("08"));
|
||||
assertThat(topDocs.scoreDocs[6].doc, equalTo(1));
|
||||
assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[6]).fields[0], equalTo(XFieldComparatorSource.MAX_TERM));
|
||||
assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[6]).fields[0], equalTo(null));
|
||||
assertThat(topDocs.scoreDocs[7].doc, equalTo(5));
|
||||
assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0], equalTo(XFieldComparatorSource.MAX_TERM));
|
||||
assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0], equalTo(null));
|
||||
|
||||
topDocs = searcher.search(new MatchAllDocsQuery(), 10,
|
||||
new Sort(new SortField("value", indexFieldData.comparatorSource(null, MultiValueMode.MAX, null), true)));
|
||||
|
|
|
@ -443,13 +443,11 @@ public abstract class AbstractStringFieldDataTests extends AbstractFieldDataImpl
|
|||
if (cmpValue == null) {
|
||||
if ("_first".equals(missingValue)) {
|
||||
cmpValue = new BytesRef();
|
||||
} else if ("_last".equals(missingValue)) {
|
||||
cmpValue = XFieldComparatorSource.MAX_TERM;
|
||||
} else {
|
||||
} else if ("_last".equals(missingValue) == false) {
|
||||
cmpValue = (BytesRef) missingValue;
|
||||
}
|
||||
}
|
||||
if (previous != null) {
|
||||
if (previous != null && cmpValue != null) {
|
||||
assertTrue(previous.utf8ToString() + " / " + cmpValue.utf8ToString(), previous.compareTo(cmpValue) <= 0);
|
||||
}
|
||||
previous = cmpValue;
|
||||
|
|
|
@ -185,7 +185,7 @@ public class ParentChildFieldDataTests extends AbstractFieldDataTests {
|
|||
assertThat(topDocs.scoreDocs[6].doc, equalTo(6));
|
||||
assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[6]).fields[0]).utf8ToString(), equalTo("2"));
|
||||
assertThat(topDocs.scoreDocs[7].doc, equalTo(7));
|
||||
assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0]), equalTo(XFieldComparatorSource.MAX_TERM));
|
||||
assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0]), equalTo(null));
|
||||
|
||||
topDocs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField(ParentFieldMapper.NAME, comparator, true)));
|
||||
assertThat(topDocs.totalHits, equalTo(8));
|
||||
|
|
|
@ -37,7 +37,6 @@ import org.elasticsearch.common.text.Text;
|
|||
import org.elasticsearch.common.unit.DistanceUnit;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.common.xcontent.XContentFactory;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.mapper.Uid;
|
||||
import org.elasticsearch.index.query.QueryBuilders;
|
||||
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
|
||||
|
@ -2017,10 +2016,9 @@ public class SimpleSortTests extends ElasticsearchIntegrationTest {
|
|||
.addSort(fieldSort("str_field").order(SortOrder.ASC).unmappedType("string"))
|
||||
.addSort(fieldSort("str_field2").order(SortOrder.DESC).unmappedType("string")).get();
|
||||
|
||||
final StringAndBytesText maxTerm = new StringAndBytesText(IndexFieldData.XFieldComparatorSource.MAX_TERM.utf8ToString());
|
||||
assertSortValues(resp,
|
||||
new Object[] {new StringAndBytesText("bcd"), null},
|
||||
new Object[] {maxTerm, null});
|
||||
new Object[] {null, null});
|
||||
|
||||
resp = client().prepareSearch("test1", "test2")
|
||||
.addSort(fieldSort("long_field").order(SortOrder.ASC).unmappedType("long"))
|
||||
|
|
Loading…
Reference in New Issue