Merge pull request #12127 from jpountz/fix/sort_merge

Clean up handling of missing values when merging shard results on the coordinating node.
This commit is contained in:
Adrien Grand 2015-07-09 08:55:52 +02:00
commit f82b5ce201
8 changed files with 69 additions and 65 deletions

View File

@ -413,7 +413,13 @@ public class Lucene {
if (in.readBoolean()) {
field = in.readString();
}
fields[i] = new SortField(field, readSortType(in), in.readBoolean());
SortField.Type sortType = readSortType(in);
Object missingValue = readMissingValue(in);
boolean reverse = in.readBoolean();
fields[i] = new SortField(field, sortType, reverse);
if (missingValue != null) {
fields[i].setMissingValue(missingValue);
}
}
FieldDoc[] fieldDocs = new FieldDoc[in.readVInt()];
@ -485,9 +491,12 @@ public class Lucene {
out.writeString(sortField.getField());
}
if (sortField.getComparatorSource() != null) {
writeSortType(out, ((IndexFieldData.XFieldComparatorSource) sortField.getComparatorSource()).reducedType());
IndexFieldData.XFieldComparatorSource comparatorSource = (IndexFieldData.XFieldComparatorSource) sortField.getComparatorSource();
writeSortType(out, comparatorSource.reducedType());
writeMissingValue(out, comparatorSource.missingValue(sortField.getReverse()));
} else {
writeSortType(out, sortField.getType());
writeMissingValue(out, sortField.missingValue);
}
out.writeBoolean(sortField.getReverse());
}
@ -508,6 +517,31 @@ public class Lucene {
}
}
private static void writeMissingValue(StreamOutput out, Object missingValue) throws IOException {
if (missingValue == SortField.STRING_FIRST) {
out.writeByte((byte) 1);
} else if (missingValue == SortField.STRING_LAST) {
out.writeByte((byte) 2);
} else {
out.writeByte((byte) 0);
out.writeGenericValue(missingValue);
}
}
private static Object readMissingValue(StreamInput in) throws IOException {
final byte id = in.readByte();
switch (id) {
case 0:
return in.readGenericValue();
case 1:
return SortField.STRING_FIRST;
case 2:
return SortField.STRING_LAST;
default:
throw new IOException("Unknown missing value id: " + id);
}
}
public static void writeFieldDoc(StreamOutput out, FieldDoc fieldDoc) throws IOException {
out.writeVInt(fieldDoc.fields.length);
for (Object field : fieldDoc.fields) {

View File

@ -25,13 +25,11 @@ import org.apache.lucene.search.*;
import org.apache.lucene.search.join.BitDocIdSetFilter;
import org.apache.lucene.util.BitDocIdSet;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.Index;
import org.elasticsearch.index.IndexComponent;
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.MapperService;
import org.elasticsearch.index.settings.IndexSettings;
@ -112,16 +110,6 @@ public interface IndexFieldData<FD extends AtomicFieldData> extends IndexCompone
// on another node (we don't have the custom source them...)
public abstract class XFieldComparatorSource extends FieldComparatorSource {
/** UTF-8 term containing a single code point: {@link Character#MAX_CODE_POINT} which will compare greater than all other index terms
* since {@link Character#MAX_CODE_POINT} is a noncharacter and thus shouldn't appear in an index term. */
public static final BytesRef MAX_TERM;
static {
BytesRefBuilder builder = new BytesRefBuilder();
final char[] chars = Character.toChars(Character.MAX_CODE_POINT);
builder.copyChars(chars, 0, chars.length);
MAX_TERM = builder.toBytesRef();
}
/**
* Simple wrapper class around a filter that matches parent documents
* and a filter that matches child documents. For every root document R,
@ -179,7 +167,7 @@ public interface IndexFieldData<FD extends AtomicFieldData> extends IndexCompone
return min ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
case STRING:
case STRING_VAL:
return min ? null : MAX_TERM;
return null;
default:
throw new UnsupportedOperationException("Unsupported reduced type: " + reducedType());
}
@ -225,6 +213,18 @@ public interface IndexFieldData<FD extends AtomicFieldData> extends IndexCompone
}
public abstract SortField.Type reducedType();
/**
* Return a missing value that is understandable by {@link SortField#setMissingValue(Object)}.
* Most implementations return null because they already replace the value at the fielddata level.
* However this can't work in case of strings since there is no such thing as a string which
* compares greater than any other string, so in that case we need to return
* {@link SortField#STRING_FIRST} or {@link SortField#STRING_LAST} so that the coordinating node
* knows how to deal with null values.
*/
public Object missingValue(boolean reversed) {
return null;
}
}
interface Builder {

View File

@ -59,6 +59,19 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat
return SortField.Type.STRING;
}
@Override
public Object missingValue(boolean reversed) {
if (sortMissingFirst(missingValue) || sortMissingLast(missingValue)) {
if (sortMissingLast(missingValue) ^ reversed) {
return SortField.STRING_LAST;
} else {
return SortField.STRING_FIRST;
}
}
// otherwise we fill missing values ourselves
return null;
}
protected SortedBinaryDocValues getValues(LeafReaderContext context) throws IOException {
return indexFieldData.load(context).getBytesValues();
}
@ -97,29 +110,6 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat
BytesRefFieldComparatorSource.this.setScorer(scorer);
}
@Override
public BytesRef value(int slot) {
// TODO: When serializing the response to the coordinating node, we lose the information about
// whether the comparator sorts missing docs first or last. We should fix it and let
// TopDocs.merge deal with it (it knows how to)
BytesRef value = super.value(slot);
if (value == null) {
assert sortMissingFirst(missingValue) || sortMissingLast(missingValue);
value = missingBytes;
}
return value;
}
@Override
public void setTopValue(BytesRef topValue) {
// symetric of value(int): if we need to feed the comparator with <tt>null</tt>
// if we overrode the value with MAX_TERM in value(int)
if (topValue == missingBytes && (sortMissingFirst(missingValue) || sortMissingLast(missingValue))) {
topValue = null;
}
super.setTopValue(topValue);
}
};
}
@ -156,15 +146,6 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat
BytesRefFieldComparatorSource.this.setScorer(scorer);
}
@Override
public BytesRef value(int slot) {
BytesRef value = super.value(slot);
if (value == null) {
value = missingBytes;
}
return value;
}
};
}

View File

@ -64,7 +64,6 @@ import static org.elasticsearch.search.internal.InternalSearchHitField.readSearc
public class InternalSearchHit implements SearchHit {
private static final Object[] EMPTY_SORT_VALUES = new Object[0];
private static final Text MAX_TERM_AS_TEXT = new StringAndBytesText(BytesRefFieldComparatorSource.MAX_TERM.utf8ToString());
private transient int docId;
@ -506,13 +505,7 @@ public class InternalSearchHit implements SearchHit {
if (sortValues != null && sortValues.length > 0) {
builder.startArray(Fields.SORT);
for (Object sortValue : sortValues) {
if (sortValue != null && sortValue.equals(MAX_TERM_AS_TEXT)) {
// We don't display MAX_TERM in JSON responses in case some clients have UTF-8 parsers that wouldn't accept a
// non-character in the response, even though this is valid UTF-8
builder.nullValue();
} else {
builder.value(sortValue);
}
builder.value(sortValue);
}
builder.endArray();
}

View File

@ -247,9 +247,9 @@ public abstract class AbstractFieldDataImplTests extends AbstractFieldDataTests
assertThat(topDocs.scoreDocs[5].doc, equalTo(6));
assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[5]).fields[0]).utf8ToString(), equalTo("08"));
assertThat(topDocs.scoreDocs[6].doc, equalTo(1));
assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[6]).fields[0], equalTo(XFieldComparatorSource.MAX_TERM));
assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[6]).fields[0], equalTo(null));
assertThat(topDocs.scoreDocs[7].doc, equalTo(5));
assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0], equalTo(XFieldComparatorSource.MAX_TERM));
assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0], equalTo(null));
topDocs = searcher.search(new MatchAllDocsQuery(), 10,
new Sort(new SortField("value", indexFieldData.comparatorSource(null, MultiValueMode.MAX, null), true)));

View File

@ -443,13 +443,11 @@ public abstract class AbstractStringFieldDataTests extends AbstractFieldDataImpl
if (cmpValue == null) {
if ("_first".equals(missingValue)) {
cmpValue = new BytesRef();
} else if ("_last".equals(missingValue)) {
cmpValue = XFieldComparatorSource.MAX_TERM;
} else {
} else if ("_last".equals(missingValue) == false) {
cmpValue = (BytesRef) missingValue;
}
}
if (previous != null) {
if (previous != null && cmpValue != null) {
assertTrue(previous.utf8ToString() + " / " + cmpValue.utf8ToString(), previous.compareTo(cmpValue) <= 0);
}
previous = cmpValue;

View File

@ -185,7 +185,7 @@ public class ParentChildFieldDataTests extends AbstractFieldDataTests {
assertThat(topDocs.scoreDocs[6].doc, equalTo(6));
assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[6]).fields[0]).utf8ToString(), equalTo("2"));
assertThat(topDocs.scoreDocs[7].doc, equalTo(7));
assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0]), equalTo(XFieldComparatorSource.MAX_TERM));
assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0]), equalTo(null));
topDocs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField(ParentFieldMapper.NAME, comparator, true)));
assertThat(topDocs.totalHits, equalTo(8));

View File

@ -37,7 +37,6 @@ import org.elasticsearch.common.text.Text;
import org.elasticsearch.common.unit.DistanceUnit;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.fielddata.IndexFieldData;
import org.elasticsearch.index.mapper.Uid;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
@ -2017,10 +2016,9 @@ public class SimpleSortTests extends ElasticsearchIntegrationTest {
.addSort(fieldSort("str_field").order(SortOrder.ASC).unmappedType("string"))
.addSort(fieldSort("str_field2").order(SortOrder.DESC).unmappedType("string")).get();
final StringAndBytesText maxTerm = new StringAndBytesText(IndexFieldData.XFieldComparatorSource.MAX_TERM.utf8ToString());
assertSortValues(resp,
new Object[] {new StringAndBytesText("bcd"), null},
new Object[] {maxTerm, null});
new Object[] {null, null});
resp = client().prepareSearch("test1", "test2")
.addSort(fieldSort("long_field").order(SortOrder.ASC).unmappedType("long"))