Fix InternalSearchHits serialization to be deterministic

The assertion on binary equality for streamable serialization sometimes fails due to the usage of identify hashmaps inside the InternalSearchHits serialization. This only happens if the number of shards the result set is composed of is very high. This commit makes the serialziation deterministic and removes the need to serialize the ordinal due to in-order serialization.
2025-02-07 13:38:49 +00:00 · 2014-07-22 12:03:13 +02:00 · 2014-07-22 12:03:13 +02:00 · 8db7b2b20b
commit 8db7b2b20b
parent 924f91588b
3 changed files with 30 additions and 19 deletions
--- a/src/main/java/org/elasticsearch/search/internal/InternalSearchHits.java
+++ b/src/main/java/org/elasticsearch/search/internal/InternalSearchHits.java
@ -21,6 +21,7 @@ package org.elasticsearch.search.internal;

 import com.carrotsearch.hppc.IntObjectOpenHashMap;
 import com.google.common.collect.Iterators;
+import org.elasticsearch.Version;
 import org.elasticsearch.common.io.stream.StreamInput;
 import org.elasticsearch.common.io.stream.StreamOutput;
 import org.elasticsearch.common.xcontent.XContentBuilder;
@ -30,9 +31,7 @@ import org.elasticsearch.search.SearchHits;
 import org.elasticsearch.search.SearchShardTarget;

 import java.io.IOException;
-import java.util.IdentityHashMap;
-import java.util.Iterator;
-import java.util.Map;
+import java.util.*;

 import static org.elasticsearch.search.SearchShardTarget.readSearchShardTarget;
 import static org.elasticsearch.search.internal.InternalSearchHit.readSearchHit;
@ -212,9 +211,16 @@ public class InternalSearchHits implements SearchHits {
        } else {
            if (context.streamShardTarget() == StreamContext.ShardTargetType.LOOKUP) {
                // read the lookup table first
-                int lookupSize = in.readVInt();
-                for (int i = 0; i < lookupSize; i++) {
-                    context.handleShardLookup().put(in.readVInt(), readSearchShardTarget(in));
+                final IntObjectOpenHashMap<SearchShardTarget> handleShardLookup = context.handleShardLookup();
+                final int lookupSize = in.readVInt();
+                if (in.getVersion().onOrAfter(Version.V_1_4_0)) {
+                    for (int i = 0; i < lookupSize; i++) {
+                        handleShardLookup.put(i + 1, readSearchShardTarget(in));
+                    }
+                } else {
+                    for (int i = 0; i < lookupSize; i++) {
+                        handleShardLookup.put(in.readVInt(), readSearchShardTarget(in));
+                    }
                }
            }

@ -238,19 +244,26 @@ public class InternalSearchHits implements SearchHits {
            if (context.streamShardTarget() == StreamContext.ShardTargetType.LOOKUP) {
                // start from 1, 0 is for null!
                int counter = 1;
+                List<SearchShardTarget> targets = new ArrayList<>();
                for (InternalSearchHit hit : hits) {
                    if (hit.shard() != null) {
                        Integer handle = context.shardHandleLookup().get(hit.shard());
                        if (handle == null) {
                            context.shardHandleLookup().put(hit.shard(), counter++);
+                            targets.add(hit.shard());
                        }
                    }
                }
-                out.writeVInt(context.shardHandleLookup().size());
-                if (!context.shardHandleLookup().isEmpty()) {
-                    for (Map.Entry<SearchShardTarget, Integer> entry : context.shardHandleLookup().entrySet()) {
-                        out.writeVInt(entry.getValue());
-                        entry.getKey().writeTo(out);
+                out.writeVInt(targets.size());
+                if (out.getVersion().onOrAfter(Version.V_1_4_0)) {
+                    for (int i = 0; i < targets.size(); i++) {
+                        // the ordinal is implicit here since we write it in-order
+                        targets.get(i).writeTo(out);
+                    }
+                } else {
+                    for (int i = 0; i < targets.size(); i++) {
+                        out.writeVInt(i+1);
+                        targets.get(i).writeTo(out);
                    }
                }
            }
--- a/src/test/java/org/elasticsearch/search/sort/SimpleSortTests.java
+++ b/src/test/java/org/elasticsearch/search/sort/SimpleSortTests.java
@ -20,9 +20,7 @@
 package org.elasticsearch.search.sort;


-import com.carrotsearch.randomizedtesting.annotations.Repeat;
 import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.LuceneTestCase;
 import org.apache.lucene.util.TestUtil;
 import org.apache.lucene.util.UnicodeUtil;
 import org.elasticsearch.ElasticsearchException;
@ -63,24 +61,24 @@ import static org.hamcrest.Matchers.*;
 public class SimpleSortTests extends ElasticsearchIntegrationTest {


-    @LuceneTestCase.AwaitsFix(bugUrl = "simon is working on this")
+    @Test
    public void testIssue6614() throws ExecutionException, InterruptedException {
        List<IndexRequestBuilder> builders = new ArrayList<>();
        boolean strictTimeBasedIndices = randomBoolean();
-        final int numIndices = randomIntBetween(2, 25); // at most 25 days in the month
+        final int numIndices = scaledRandomIntBetween(2, 25); // at most 25 days in the month
        for (int i = 0; i < numIndices; i++) {
          final String indexId = strictTimeBasedIndices ? "idx_" + i : "idx";
          if (strictTimeBasedIndices || i == 0) {
            createIndex(indexId);
          }
-          final int numDocs = randomIntBetween(1, 23);  // hour of the day
+          final int numDocs = scaledRandomIntBetween(1, 23);  // hour of the day
          for (int j = 0; j < numDocs; j++) {
            builders.add(client().prepareIndex(indexId, "type").setSource("foo", "bar", "timeUpdated", "2014/07/" + String.format(Locale.ROOT, "%02d", i+1)+" " + String.format(Locale.ROOT, "%02d", j+1) + ":00:00"));
          }
        }
        int docs = builders.size();
        indexRandom(true, builders);
-        ensureYellow();
+        ensureGreen();
        SearchResponse allDocsResponse = client().prepareSearch().setQuery(QueryBuilders.filteredQuery(matchAllQuery(),
                FilterBuilders.boolFilter().must(FilterBuilders.termFilter("foo", "bar"),
                        FilterBuilders.rangeFilter("timeUpdated").gte("2014/0" + randomIntBetween(1, 7) + "/01").cache(randomBoolean()))))
--- a/src/test/java/org/elasticsearch/test/hamcrest/ElasticsearchAssertions.java
+++ b/src/test/java/org/elasticsearch/test/hamcrest/ElasticsearchAssertions.java
@ -494,8 +494,8 @@ public class ElasticsearchAssertions {
            StreamInput input = new BytesStreamInput(orig);
            input.setVersion(version);
            newInstance.readFrom(input);
-            assertThat("Stream should be fully read with version [" + version + "] for streamable [" + streamable + "]", input.available(), equalTo(0));
-            assertThat("Serialization failed with version [" + version + "] bytes should be equal for streamable [" + streamable + "]", serialize(version, streamable), equalTo(orig));
+            assertThat("Stream should be fully read with version [" + version + "] for streamable [" + newInstance + "]", input.available(), equalTo(0));
+            assertThat("Serialization failed with version [" + version + "] bytes should be equal for streamable [" + streamable + "]", serialize(version, newInstance), equalTo(orig));
        } catch (Throwable ex) {
            throw new RuntimeException("failed to check serialization - version [" + version + "] for streamable [" + streamable + "]", ex);
        }