diff --git a/core/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/core/src/main/java/org/elasticsearch/common/lucene/Lucene.java index 031a1498898..4c1e7c49394 100644 --- a/core/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/core/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -413,7 +413,13 @@ public class Lucene { if (in.readBoolean()) { field = in.readString(); } - fields[i] = new SortField(field, readSortType(in), in.readBoolean()); + SortField.Type sortType = readSortType(in); + Object missingValue = readMissingValue(in); + boolean reverse = in.readBoolean(); + fields[i] = new SortField(field, sortType, reverse); + if (missingValue != null) { + fields[i].setMissingValue(missingValue); + } } FieldDoc[] fieldDocs = new FieldDoc[in.readVInt()]; @@ -485,9 +491,12 @@ public class Lucene { out.writeString(sortField.getField()); } if (sortField.getComparatorSource() != null) { - writeSortType(out, ((IndexFieldData.XFieldComparatorSource) sortField.getComparatorSource()).reducedType()); + IndexFieldData.XFieldComparatorSource comparatorSource = (IndexFieldData.XFieldComparatorSource) sortField.getComparatorSource(); + writeSortType(out, comparatorSource.reducedType()); + writeMissingValue(out, comparatorSource.missingValue(sortField.getReverse())); } else { writeSortType(out, sortField.getType()); + writeMissingValue(out, sortField.missingValue); } out.writeBoolean(sortField.getReverse()); } @@ -508,6 +517,31 @@ public class Lucene { } } + private static void writeMissingValue(StreamOutput out, Object missingValue) throws IOException { + if (missingValue == SortField.STRING_FIRST) { + out.writeByte((byte) 1); + } else if (missingValue == SortField.STRING_LAST) { + out.writeByte((byte) 2); + } else { + out.writeByte((byte) 0); + out.writeGenericValue(missingValue); + } + } + + private static Object readMissingValue(StreamInput in) throws IOException { + final byte id = in.readByte(); + switch (id) { + case 0: + return in.readGenericValue(); + case 1: + return SortField.STRING_FIRST; + case 2: + return SortField.STRING_LAST; + default: + throw new IOException("Unknown missing value id: " + id); + } + } + public static void writeFieldDoc(StreamOutput out, FieldDoc fieldDoc) throws IOException { out.writeVInt(fieldDoc.fields.length); for (Object field : fieldDoc.fields) { diff --git a/core/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java b/core/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java index d53651ee762..f2d77b5af44 100644 --- a/core/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java +++ b/core/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java @@ -25,13 +25,11 @@ import org.apache.lucene.search.*; import org.apache.lucene.search.join.BitDocIdSetFilter; import org.apache.lucene.util.BitDocIdSet; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.BytesRefBuilder; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.IndexComponent; import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested; -import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MappedFieldType; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.settings.IndexSettings; @@ -112,16 +110,6 @@ public interface IndexFieldData extends IndexCompone // on another node (we don't have the custom source them...) public abstract class XFieldComparatorSource extends FieldComparatorSource { - /** UTF-8 term containing a single code point: {@link Character#MAX_CODE_POINT} which will compare greater than all other index terms - * since {@link Character#MAX_CODE_POINT} is a noncharacter and thus shouldn't appear in an index term. */ - public static final BytesRef MAX_TERM; - static { - BytesRefBuilder builder = new BytesRefBuilder(); - final char[] chars = Character.toChars(Character.MAX_CODE_POINT); - builder.copyChars(chars, 0, chars.length); - MAX_TERM = builder.toBytesRef(); - } - /** * Simple wrapper class around a filter that matches parent documents * and a filter that matches child documents. For every root document R, @@ -179,7 +167,7 @@ public interface IndexFieldData extends IndexCompone return min ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY; case STRING: case STRING_VAL: - return min ? null : MAX_TERM; + return null; default: throw new UnsupportedOperationException("Unsupported reduced type: " + reducedType()); } @@ -225,6 +213,18 @@ public interface IndexFieldData extends IndexCompone } public abstract SortField.Type reducedType(); + + /** + * Return a missing value that is understandable by {@link SortField#setMissingValue(Object)}. + * Most implementations return null because they already replace the value at the fielddata level. + * However this can't work in case of strings since there is no such thing as a string which + * compares greater than any other string, so in that case we need to return + * {@link SortField#STRING_FIRST} or {@link SortField#STRING_LAST} so that the coordinating node + * knows how to deal with null values. + */ + public Object missingValue(boolean reversed) { + return null; + } } interface Builder { diff --git a/core/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefFieldComparatorSource.java b/core/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefFieldComparatorSource.java index 71ea9a90edb..8b15049e188 100644 --- a/core/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefFieldComparatorSource.java +++ b/core/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefFieldComparatorSource.java @@ -59,6 +59,19 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat return SortField.Type.STRING; } + @Override + public Object missingValue(boolean reversed) { + if (sortMissingFirst(missingValue) || sortMissingLast(missingValue)) { + if (sortMissingLast(missingValue) ^ reversed) { + return SortField.STRING_LAST; + } else { + return SortField.STRING_FIRST; + } + } + // otherwise we fill missing values ourselves + return null; + } + protected SortedBinaryDocValues getValues(LeafReaderContext context) throws IOException { return indexFieldData.load(context).getBytesValues(); } @@ -97,29 +110,6 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat BytesRefFieldComparatorSource.this.setScorer(scorer); } - @Override - public BytesRef value(int slot) { - // TODO: When serializing the response to the coordinating node, we lose the information about - // whether the comparator sorts missing docs first or last. We should fix it and let - // TopDocs.merge deal with it (it knows how to) - BytesRef value = super.value(slot); - if (value == null) { - assert sortMissingFirst(missingValue) || sortMissingLast(missingValue); - value = missingBytes; - } - return value; - } - - @Override - public void setTopValue(BytesRef topValue) { - // symetric of value(int): if we need to feed the comparator with null - // if we overrode the value with MAX_TERM in value(int) - if (topValue == missingBytes && (sortMissingFirst(missingValue) || sortMissingLast(missingValue))) { - topValue = null; - } - super.setTopValue(topValue); - } - }; } @@ -156,15 +146,6 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat BytesRefFieldComparatorSource.this.setScorer(scorer); } - @Override - public BytesRef value(int slot) { - BytesRef value = super.value(slot); - if (value == null) { - value = missingBytes; - } - return value; - } - }; } diff --git a/core/src/main/java/org/elasticsearch/search/internal/InternalSearchHit.java b/core/src/main/java/org/elasticsearch/search/internal/InternalSearchHit.java index 3b475c5bd51..649bdc22370 100644 --- a/core/src/main/java/org/elasticsearch/search/internal/InternalSearchHit.java +++ b/core/src/main/java/org/elasticsearch/search/internal/InternalSearchHit.java @@ -64,7 +64,6 @@ import static org.elasticsearch.search.internal.InternalSearchHitField.readSearc public class InternalSearchHit implements SearchHit { private static final Object[] EMPTY_SORT_VALUES = new Object[0]; - private static final Text MAX_TERM_AS_TEXT = new StringAndBytesText(BytesRefFieldComparatorSource.MAX_TERM.utf8ToString()); private transient int docId; @@ -506,13 +505,7 @@ public class InternalSearchHit implements SearchHit { if (sortValues != null && sortValues.length > 0) { builder.startArray(Fields.SORT); for (Object sortValue : sortValues) { - if (sortValue != null && sortValue.equals(MAX_TERM_AS_TEXT)) { - // We don't display MAX_TERM in JSON responses in case some clients have UTF-8 parsers that wouldn't accept a - // non-character in the response, even though this is valid UTF-8 - builder.nullValue(); - } else { - builder.value(sortValue); - } + builder.value(sortValue); } builder.endArray(); } diff --git a/core/src/test/java/org/elasticsearch/index/fielddata/AbstractFieldDataImplTests.java b/core/src/test/java/org/elasticsearch/index/fielddata/AbstractFieldDataImplTests.java index fe5bf83a29b..03328f24c3b 100644 --- a/core/src/test/java/org/elasticsearch/index/fielddata/AbstractFieldDataImplTests.java +++ b/core/src/test/java/org/elasticsearch/index/fielddata/AbstractFieldDataImplTests.java @@ -247,9 +247,9 @@ public abstract class AbstractFieldDataImplTests extends AbstractFieldDataTests assertThat(topDocs.scoreDocs[5].doc, equalTo(6)); assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[5]).fields[0]).utf8ToString(), equalTo("08")); assertThat(topDocs.scoreDocs[6].doc, equalTo(1)); - assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[6]).fields[0], equalTo(XFieldComparatorSource.MAX_TERM)); + assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[6]).fields[0], equalTo(null)); assertThat(topDocs.scoreDocs[7].doc, equalTo(5)); - assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0], equalTo(XFieldComparatorSource.MAX_TERM)); + assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0], equalTo(null)); topDocs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField("value", indexFieldData.comparatorSource(null, MultiValueMode.MAX, null), true))); diff --git a/core/src/test/java/org/elasticsearch/index/fielddata/AbstractStringFieldDataTests.java b/core/src/test/java/org/elasticsearch/index/fielddata/AbstractStringFieldDataTests.java index e9e5839bc0e..4d19f92097d 100644 --- a/core/src/test/java/org/elasticsearch/index/fielddata/AbstractStringFieldDataTests.java +++ b/core/src/test/java/org/elasticsearch/index/fielddata/AbstractStringFieldDataTests.java @@ -443,13 +443,11 @@ public abstract class AbstractStringFieldDataTests extends AbstractFieldDataImpl if (cmpValue == null) { if ("_first".equals(missingValue)) { cmpValue = new BytesRef(); - } else if ("_last".equals(missingValue)) { - cmpValue = XFieldComparatorSource.MAX_TERM; - } else { + } else if ("_last".equals(missingValue) == false) { cmpValue = (BytesRef) missingValue; } } - if (previous != null) { + if (previous != null && cmpValue != null) { assertTrue(previous.utf8ToString() + " / " + cmpValue.utf8ToString(), previous.compareTo(cmpValue) <= 0); } previous = cmpValue; diff --git a/core/src/test/java/org/elasticsearch/index/fielddata/ParentChildFieldDataTests.java b/core/src/test/java/org/elasticsearch/index/fielddata/ParentChildFieldDataTests.java index ad1639a8d28..90934bc177b 100644 --- a/core/src/test/java/org/elasticsearch/index/fielddata/ParentChildFieldDataTests.java +++ b/core/src/test/java/org/elasticsearch/index/fielddata/ParentChildFieldDataTests.java @@ -185,7 +185,7 @@ public class ParentChildFieldDataTests extends AbstractFieldDataTests { assertThat(topDocs.scoreDocs[6].doc, equalTo(6)); assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[6]).fields[0]).utf8ToString(), equalTo("2")); assertThat(topDocs.scoreDocs[7].doc, equalTo(7)); - assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0]), equalTo(XFieldComparatorSource.MAX_TERM)); + assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0]), equalTo(null)); topDocs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField(ParentFieldMapper.NAME, comparator, true))); assertThat(topDocs.totalHits, equalTo(8)); diff --git a/core/src/test/java/org/elasticsearch/search/sort/SimpleSortTests.java b/core/src/test/java/org/elasticsearch/search/sort/SimpleSortTests.java index 1cf81f87e99..d6a9cc8dd5d 100644 --- a/core/src/test/java/org/elasticsearch/search/sort/SimpleSortTests.java +++ b/core/src/test/java/org/elasticsearch/search/sort/SimpleSortTests.java @@ -37,7 +37,6 @@ import org.elasticsearch.common.text.Text; import org.elasticsearch.common.unit.DistanceUnit; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; -import org.elasticsearch.index.fielddata.IndexFieldData; import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders; @@ -2017,10 +2016,9 @@ public class SimpleSortTests extends ElasticsearchIntegrationTest { .addSort(fieldSort("str_field").order(SortOrder.ASC).unmappedType("string")) .addSort(fieldSort("str_field2").order(SortOrder.DESC).unmappedType("string")).get(); - final StringAndBytesText maxTerm = new StringAndBytesText(IndexFieldData.XFieldComparatorSource.MAX_TERM.utf8ToString()); assertSortValues(resp, new Object[] {new StringAndBytesText("bcd"), null}, - new Object[] {maxTerm, null}); + new Object[] {null, null}); resp = client().prepareSearch("test1", "test2") .addSort(fieldSort("long_field").order(SortOrder.ASC).unmappedType("long"))