Merge pull request #12127 from jpountz/fix/sort_merge

Clean up handling of missing values when merging shard results on the coordinating node.
2015-07-09 08:55:52 +02:00 · 2015-07-09 08:55:52 +02:00 · f82b5ce201
parent 5a84e5264d fbab48e451
commit f82b5ce201
8 changed files with 69 additions and 65 deletions
--- a/core/src/main/java/org/elasticsearch/common/lucene/Lucene.java
+++ b/core/src/main/java/org/elasticsearch/common/lucene/Lucene.java
@ -413,7 +413,13 @@ public class Lucene {
                if (in.readBoolean()) {
                    field = in.readString();
                }
-                fields[i] = new SortField(field, readSortType(in), in.readBoolean());
+                SortField.Type sortType = readSortType(in);
+                Object missingValue = readMissingValue(in);
+                boolean reverse = in.readBoolean();
+                fields[i] = new SortField(field, sortType, reverse);
+                if (missingValue != null) {
+                    fields[i].setMissingValue(missingValue);
+                }
            }

            FieldDoc[] fieldDocs = new FieldDoc[in.readVInt()];
@ -485,9 +491,12 @@ public class Lucene {
                    out.writeString(sortField.getField());
                }
                if (sortField.getComparatorSource() != null) {
-                    writeSortType(out, ((IndexFieldData.XFieldComparatorSource) sortField.getComparatorSource()).reducedType());
+                    IndexFieldData.XFieldComparatorSource comparatorSource = (IndexFieldData.XFieldComparatorSource) sortField.getComparatorSource();
+                    writeSortType(out, comparatorSource.reducedType());
+                    writeMissingValue(out, comparatorSource.missingValue(sortField.getReverse()));
                } else {
                    writeSortType(out, sortField.getType());
+                    writeMissingValue(out, sortField.missingValue);
                }
                out.writeBoolean(sortField.getReverse());
            }
@ -508,6 +517,31 @@ public class Lucene {
        }
    }

+    private static void writeMissingValue(StreamOutput out, Object missingValue) throws IOException {
+        if (missingValue == SortField.STRING_FIRST) {
+            out.writeByte((byte) 1);
+        } else if (missingValue == SortField.STRING_LAST) {
+            out.writeByte((byte) 2);
+        } else {
+            out.writeByte((byte) 0);
+            out.writeGenericValue(missingValue);
+        }
+    }
+
+    private static Object readMissingValue(StreamInput in) throws IOException {
+        final byte id = in.readByte();
+        switch (id) {
+        case 0:
+            return in.readGenericValue();
+        case 1:
+            return SortField.STRING_FIRST;
+        case 2:
+            return SortField.STRING_LAST;
+        default:
+            throw new IOException("Unknown missing value id: " + id);
+        }
+    }
+
    public static void writeFieldDoc(StreamOutput out, FieldDoc fieldDoc) throws IOException {
        out.writeVInt(fieldDoc.fields.length);
        for (Object field : fieldDoc.fields) {
--- a/core/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java
+++ b/core/src/main/java/org/elasticsearch/index/fielddata/IndexFieldData.java
@ -25,13 +25,11 @@ import org.apache.lucene.search.*;
 import org.apache.lucene.search.join.BitDocIdSetFilter;
 import org.apache.lucene.util.BitDocIdSet;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.BytesRefBuilder;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.Index;
 import org.elasticsearch.index.IndexComponent;
 import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
-import org.elasticsearch.index.mapper.FieldMapper;
 import org.elasticsearch.index.mapper.MappedFieldType;
 import org.elasticsearch.index.mapper.MapperService;
 import org.elasticsearch.index.settings.IndexSettings;
@ -112,16 +110,6 @@ public interface IndexFieldData<FD extends AtomicFieldData> extends IndexCompone
    // on another node (we don't have the custom source them...)
    public abstract class XFieldComparatorSource extends FieldComparatorSource {

-        /** UTF-8 term containing a single code point: {@link Character#MAX_CODE_POINT} which will compare greater than all other index terms
-         *  since {@link Character#MAX_CODE_POINT} is a noncharacter and thus shouldn't appear in an index term. */
-        public static final BytesRef MAX_TERM;
-        static {
-            BytesRefBuilder builder = new BytesRefBuilder();
-            final char[] chars = Character.toChars(Character.MAX_CODE_POINT);
-            builder.copyChars(chars, 0, chars.length);
-            MAX_TERM = builder.toBytesRef();
-        }
-
        /**
         * Simple wrapper class around a filter that matches parent documents
         * and a filter that matches child documents. For every root document R,
@ -179,7 +167,7 @@ public interface IndexFieldData<FD extends AtomicFieldData> extends IndexCompone
                    return min ? Double.NEGATIVE_INFINITY : Double.POSITIVE_INFINITY;
                case STRING:
                case STRING_VAL:
-                    return min ? null : MAX_TERM;
+                    return null;
                default:
                    throw new UnsupportedOperationException("Unsupported reduced type: " + reducedType());
                }
@ -225,6 +213,18 @@ public interface IndexFieldData<FD extends AtomicFieldData> extends IndexCompone
        }

        public abstract SortField.Type reducedType();
+
+        /**
+         * Return a missing value that is understandable by {@link SortField#setMissingValue(Object)}.
+         * Most implementations return null because they already replace the value at the fielddata level.
+         * However this can't work in case of strings since there is no such thing as a string which
+         * compares greater than any other string, so in that case we need to return
+         * {@link SortField#STRING_FIRST} or {@link SortField#STRING_LAST} so that the coordinating node
+         * knows how to deal with null values.
+         */
+        public Object missingValue(boolean reversed) {
+            return null;
+        }
    }

    interface Builder {
--- a/core/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefFieldComparatorSource.java
+++ b/core/src/main/java/org/elasticsearch/index/fielddata/fieldcomparator/BytesRefFieldComparatorSource.java
@ -59,6 +59,19 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat
        return SortField.Type.STRING;
    }

+    @Override
+    public Object missingValue(boolean reversed) {
+        if (sortMissingFirst(missingValue) || sortMissingLast(missingValue)) {
+            if (sortMissingLast(missingValue) ^ reversed) {
+                return SortField.STRING_LAST;
+            } else {
+                return SortField.STRING_FIRST;
+            }
+        }
+        // otherwise we fill missing values ourselves
+        return null;
+    }
+
    protected SortedBinaryDocValues getValues(LeafReaderContext context) throws IOException {
        return indexFieldData.load(context).getBytesValues();
    }
@ -97,29 +110,6 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat
                    BytesRefFieldComparatorSource.this.setScorer(scorer);
                }

-                @Override
-                public BytesRef value(int slot) {
-                    // TODO: When serializing the response to the coordinating node, we lose the information about
-                    // whether the comparator sorts missing docs first or last. We should fix it and let
-                    // TopDocs.merge deal with it (it knows how to)
-                    BytesRef value = super.value(slot);
-                    if (value == null) {
-                        assert sortMissingFirst(missingValue) || sortMissingLast(missingValue);
-                        value = missingBytes;
-                    }
-                    return value;
-                }
-                
-                @Override
-                public void setTopValue(BytesRef topValue) {
-                    // symetric of value(int): if we need to feed the comparator with <tt>null</tt>
-                    // if we overrode the value with MAX_TERM in value(int)
-                    if (topValue == missingBytes && (sortMissingFirst(missingValue) || sortMissingLast(missingValue))) {
-                        topValue = null;
-                    }
-                    super.setTopValue(topValue);
-                }
-
            };
        }

@ -156,15 +146,6 @@ public class BytesRefFieldComparatorSource extends IndexFieldData.XFieldComparat
                BytesRefFieldComparatorSource.this.setScorer(scorer);
            }

-            @Override
-            public BytesRef value(int slot) {
-                BytesRef value = super.value(slot);
-                if (value == null) {
-                    value = missingBytes;
-                }
-                return value;
-            }
-
        };
    }
    
--- a/core/src/main/java/org/elasticsearch/search/internal/InternalSearchHit.java
+++ b/core/src/main/java/org/elasticsearch/search/internal/InternalSearchHit.java
@ -64,7 +64,6 @@ import static org.elasticsearch.search.internal.InternalSearchHitField.readSearc
 public class InternalSearchHit implements SearchHit {

    private static final Object[] EMPTY_SORT_VALUES = new Object[0];
-    private static final Text MAX_TERM_AS_TEXT = new StringAndBytesText(BytesRefFieldComparatorSource.MAX_TERM.utf8ToString());

    private transient int docId;

@ -506,13 +505,7 @@ public class InternalSearchHit implements SearchHit {
        if (sortValues != null && sortValues.length > 0) {
            builder.startArray(Fields.SORT);
            for (Object sortValue : sortValues) {
-                if (sortValue != null && sortValue.equals(MAX_TERM_AS_TEXT)) {
-                    // We don't display MAX_TERM in JSON responses in case some clients have UTF-8 parsers that wouldn't accept a
-                    // non-character in the response, even though this is valid UTF-8
-                    builder.nullValue();
-                } else {
-                    builder.value(sortValue);
-                }
+                builder.value(sortValue);
            }
            builder.endArray();
        }
--- a/core/src/test/java/org/elasticsearch/index/fielddata/AbstractFieldDataImplTests.java
+++ b/core/src/test/java/org/elasticsearch/index/fielddata/AbstractFieldDataImplTests.java
@ -247,9 +247,9 @@ public abstract class AbstractFieldDataImplTests extends AbstractFieldDataTests
        assertThat(topDocs.scoreDocs[5].doc, equalTo(6));
        assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[5]).fields[0]).utf8ToString(), equalTo("08"));
        assertThat(topDocs.scoreDocs[6].doc, equalTo(1));
-        assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[6]).fields[0], equalTo(XFieldComparatorSource.MAX_TERM));
+        assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[6]).fields[0], equalTo(null));
        assertThat(topDocs.scoreDocs[7].doc, equalTo(5));
-        assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0], equalTo(XFieldComparatorSource.MAX_TERM));
+        assertThat((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0], equalTo(null));

        topDocs = searcher.search(new MatchAllDocsQuery(), 10,
                new Sort(new SortField("value", indexFieldData.comparatorSource(null, MultiValueMode.MAX, null), true)));
--- a/core/src/test/java/org/elasticsearch/index/fielddata/AbstractStringFieldDataTests.java
+++ b/core/src/test/java/org/elasticsearch/index/fielddata/AbstractStringFieldDataTests.java
@ -443,13 +443,11 @@ public abstract class AbstractStringFieldDataTests extends AbstractFieldDataImpl
            if (cmpValue == null) {
                if ("_first".equals(missingValue)) {
                    cmpValue = new BytesRef();
-                } else if ("_last".equals(missingValue)) {
-                    cmpValue = XFieldComparatorSource.MAX_TERM;
-                } else {
+                } else if ("_last".equals(missingValue) == false) {
                    cmpValue = (BytesRef) missingValue;
                }
            }
-            if (previous != null) {
+            if (previous != null && cmpValue != null) {
                assertTrue(previous.utf8ToString() + "   /   " + cmpValue.utf8ToString(), previous.compareTo(cmpValue) <= 0);
            }
            previous = cmpValue;
--- a/core/src/test/java/org/elasticsearch/index/fielddata/ParentChildFieldDataTests.java
+++ b/core/src/test/java/org/elasticsearch/index/fielddata/ParentChildFieldDataTests.java
@ -185,7 +185,7 @@ public class ParentChildFieldDataTests extends AbstractFieldDataTests {
        assertThat(topDocs.scoreDocs[6].doc, equalTo(6));
        assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[6]).fields[0]).utf8ToString(), equalTo("2"));
        assertThat(topDocs.scoreDocs[7].doc, equalTo(7));
-        assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0]), equalTo(XFieldComparatorSource.MAX_TERM));
+        assertThat(((BytesRef) ((FieldDoc) topDocs.scoreDocs[7]).fields[0]), equalTo(null));

        topDocs = searcher.search(new MatchAllDocsQuery(), 10, new Sort(new SortField(ParentFieldMapper.NAME, comparator, true)));
        assertThat(topDocs.totalHits, equalTo(8));
--- a/core/src/test/java/org/elasticsearch/search/sort/SimpleSortTests.java
+++ b/core/src/test/java/org/elasticsearch/search/sort/SimpleSortTests.java
@ -37,7 +37,6 @@ import org.elasticsearch.common.text.Text;
 import org.elasticsearch.common.unit.DistanceUnit;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.common.xcontent.XContentFactory;
-import org.elasticsearch.index.fielddata.IndexFieldData;
 import org.elasticsearch.index.mapper.Uid;
 import org.elasticsearch.index.query.QueryBuilders;
 import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilders;
@ -2017,10 +2016,9 @@ public class SimpleSortTests extends ElasticsearchIntegrationTest {
                .addSort(fieldSort("str_field").order(SortOrder.ASC).unmappedType("string"))
                .addSort(fieldSort("str_field2").order(SortOrder.DESC).unmappedType("string")).get();

-        final StringAndBytesText maxTerm = new StringAndBytesText(IndexFieldData.XFieldComparatorSource.MAX_TERM.utf8ToString());
        assertSortValues(resp,
                new Object[] {new StringAndBytesText("bcd"), null},
-                new Object[] {maxTerm, null});
+                new Object[] {null, null});

        resp = client().prepareSearch("test1", "test2")
                .addSort(fieldSort("long_field").order(SortOrder.ASC).unmappedType("long"))