From f0cf552bc594a6d56faf9354b7039cde33a5da60 Mon Sep 17 00:00:00 2001 From: kimchy Date: Tue, 13 Jul 2010 09:38:55 +0300 Subject: [PATCH] sorting breaks when sorting on a field that has no value in some documents --- .../search/ShardFieldDocSortedHitQueue.java | 4 ++ .../elasticsearch/common/lucene/Lucene.java | 54 ++++++++------- .../controller/SearchPhaseController.java | 30 +++++++-- .../search/sort/SimpleSortTests.java | 67 +++++++++++++++++++ .../testng/src/main/java/log4j.properties | 1 + 5 files changed, 126 insertions(+), 30 deletions(-) diff --git a/modules/elasticsearch/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java b/modules/elasticsearch/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java index 7d3324ef884..185c0762f90 100644 --- a/modules/elasticsearch/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java +++ b/modules/elasticsearch/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java @@ -29,4 +29,8 @@ public class ShardFieldDocSortedHitQueue extends FieldDocSortedHitQueue { super(size); setFields(fields); } + + @Override public void setFields(SortField[] fields) { + super.setFields(fields); + } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/Lucene.java index 25a3381f010..1fc41b9eda4 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -134,16 +134,18 @@ public class Lucene { for (int j = 0; j < cFields.length; j++) { byte type = in.readByte(); if (type == 0) { - cFields[j] = in.readUTF(); + cFields[j] = null; } else if (type == 1) { - cFields[j] = in.readInt(); + cFields[j] = in.readUTF(); } else if (type == 2) { - cFields[j] = in.readLong(); + cFields[j] = in.readInt(); } else if (type == 3) { - cFields[j] = in.readFloat(); + cFields[j] = in.readLong(); } else if (type == 4) { - cFields[j] = in.readDouble(); + cFields[j] = in.readFloat(); } else if (type == 5) { + cFields[j] = in.readDouble(); + } else if (type == 6) { cFields[j] = in.readByte(); } else { throw new IOException("Can't match type [" + type + "]"); @@ -198,27 +200,31 @@ public class Lucene { FieldDoc fieldDoc = (FieldDoc) doc; out.writeVInt(fieldDoc.fields.length); for (Comparable field : fieldDoc.fields) { - Class type = field.getClass(); - if (type == String.class) { + if (field == null) { out.writeByte((byte) 0); - out.writeUTF((String) field); - } else if (type == Integer.class) { - out.writeByte((byte) 1); - out.writeInt((Integer) field); - } else if (type == Long.class) { - out.writeByte((byte) 2); - out.writeLong((Long) field); - } else if (type == Float.class) { - out.writeByte((byte) 3); - out.writeFloat((Float) field); - } else if (type == Double.class) { - out.writeByte((byte) 4); - out.writeDouble((Double) field); - } else if (type == Byte.class) { - out.writeByte((byte) 5); - out.writeByte((Byte) field); } else { - throw new IOException("Can't handle sort field value of type [" + type + "]"); + Class type = field.getClass(); + if (type == String.class) { + out.writeByte((byte) 1); + out.writeUTF((String) field); + } else if (type == Integer.class) { + out.writeByte((byte) 2); + out.writeInt((Integer) field); + } else if (type == Long.class) { + out.writeByte((byte) 3); + out.writeLong((Long) field); + } else if (type == Float.class) { + out.writeByte((byte) 4); + out.writeFloat((Float) field); + } else if (type == Double.class) { + out.writeByte((byte) 5); + out.writeDouble((Double) field); + } else if (type == Byte.class) { + out.writeByte((byte) 6); + out.writeByte((Byte) field); + } else { + throw new IOException("Can't handle sort field value of type [" + type + "]"); + } } } diff --git a/modules/elasticsearch/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java b/modules/elasticsearch/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java index 67ff7f7dab4..4bd09789e6f 100644 --- a/modules/elasticsearch/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java +++ b/modules/elasticsearch/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java @@ -20,10 +20,7 @@ package org.elasticsearch.search.controller; import org.apache.lucene.index.Term; -import org.apache.lucene.search.FieldDoc; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.ShardFieldDocSortedHitQueue; -import org.apache.lucene.search.TopFieldDocs; +import org.apache.lucene.search.*; import org.apache.lucene.util.PriorityQueue; import org.elasticsearch.common.collect.Iterables; import org.elasticsearch.common.collect.Lists; @@ -83,8 +80,29 @@ public class SearchPhaseController { } PriorityQueue queue; if (queryResultProvider.queryResult().topDocs() instanceof TopFieldDocs) { - // sorting ... - queue = new ShardFieldDocSortedHitQueue(((TopFieldDocs) queryResultProvider.queryResult().topDocs()).fields, queueSize); // we need to accumulate for all and then filter the from + // sorting, first if the type is a String, chance CUSTOM to STRING so we handle nulls properly + TopFieldDocs fieldDocs = (TopFieldDocs) queryResultProvider.queryResult().topDocs(); + for (int i = 0; i < fieldDocs.fields.length; i++) { + boolean resolvedField = false; + for (QuerySearchResultProvider resultProvider : results) { + for (ScoreDoc doc : resultProvider.queryResult().topDocs().scoreDocs) { + FieldDoc fDoc = (FieldDoc) doc; + if (fDoc.fields[i] != null) { + if (fDoc.fields[i] instanceof String) { + fieldDocs.fields[i] = new SortField(fieldDocs.fields[i].getField(), SortField.STRING, fieldDocs.fields[i].getReverse()); + } + resolvedField = true; + break; + } + } + if (resolvedField) { + break; + } + } + } + queue = new ShardFieldDocSortedHitQueue(fieldDocs.fields, queueSize); + + // we need to accumulate for all and then filter the from for (QuerySearchResultProvider resultProvider : results) { QuerySearchResult result = resultProvider.queryResult(); ScoreDoc[] scoreDocs = result.topDocs().scoreDocs; diff --git a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/sort/SimpleSortTests.java b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/sort/SimpleSortTests.java index 3556ede60f1..5cfa81f55e6 100644 --- a/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/sort/SimpleSortTests.java +++ b/modules/test/integration/src/test/java/org/elasticsearch/test/integration/search/sort/SimpleSortTests.java @@ -20,6 +20,7 @@ package org.elasticsearch.test.integration.search.sort; import org.elasticsearch.action.search.SearchResponse; +import org.elasticsearch.action.search.ShardSearchFailure; import org.elasticsearch.client.Client; import org.elasticsearch.search.builder.SearchSourceBuilder; import org.elasticsearch.test.integration.AbstractNodesTests; @@ -140,4 +141,70 @@ public class SimpleSortTests extends AbstractNodesTests { assertThat((String) searchResponse.hits().getAt(0).field("id").value(), equalTo("2")); assertThat((String) searchResponse.hits().getAt(1).field("id").value(), equalTo("1")); } + + @Test public void testDocumentsWithNullValue() throws Exception { + try { + client.admin().indices().prepareDelete("test").execute().actionGet(); + } catch (Exception e) { + // ignore + } + client.admin().indices().prepareCreate("test").execute().actionGet(); + client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); + + client.prepareIndex("test", "type1").setSource(jsonBuilder().startObject() + .field("id", "1") + .field("svalue", "aaa") + .endObject()).execute().actionGet(); + + client.prepareIndex("test", "type1").setSource(jsonBuilder().startObject() + .field("id", "2") + .nullField("svalue") + .endObject()).execute().actionGet(); + + client.prepareIndex("test", "type1").setSource(jsonBuilder().startObject() + .field("id", "3") + .field("svalue", "bbb") + .endObject()).execute().actionGet(); + + + client.admin().indices().prepareFlush().setRefresh(true).execute().actionGet(); + + SearchResponse searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addScriptField("id", "doc['id'].value") + .addSort("svalue", SearchSourceBuilder.Order.ASC) + .execute().actionGet(); + + if (searchResponse.failedShards() > 0) { + logger.warn("Failed shards:"); + for (ShardSearchFailure shardSearchFailure : searchResponse.shardFailures()) { + logger.warn("-> {}", shardSearchFailure); + } + } + assertThat(searchResponse.failedShards(), equalTo(0)); + + assertThat(searchResponse.hits().getTotalHits(), equalTo(3l)); + assertThat((String) searchResponse.hits().getAt(0).field("id").value(), equalTo("2")); + assertThat((String) searchResponse.hits().getAt(1).field("id").value(), equalTo("1")); + assertThat((String) searchResponse.hits().getAt(2).field("id").value(), equalTo("3")); + + searchResponse = client.prepareSearch() + .setQuery(matchAllQuery()) + .addScriptField("id", "doc['id'].value") + .addSort("svalue", SearchSourceBuilder.Order.DESC) + .execute().actionGet(); + + if (searchResponse.failedShards() > 0) { + logger.warn("Failed shards:"); + for (ShardSearchFailure shardSearchFailure : searchResponse.shardFailures()) { + logger.warn("-> {}", shardSearchFailure); + } + } + assertThat(searchResponse.failedShards(), equalTo(0)); + + assertThat(searchResponse.hits().getTotalHits(), equalTo(3l)); + assertThat((String) searchResponse.hits().getAt(0).field("id").value(), equalTo("3")); + assertThat((String) searchResponse.hits().getAt(1).field("id").value(), equalTo("1")); + assertThat((String) searchResponse.hits().getAt(2).field("id").value(), equalTo("2")); + } } diff --git a/modules/test/testng/src/main/java/log4j.properties b/modules/test/testng/src/main/java/log4j.properties index 2af669a4ce8..6c4d3ab8849 100644 --- a/modules/test/testng/src/main/java/log4j.properties +++ b/modules/test/testng/src/main/java/log4j.properties @@ -1,6 +1,7 @@ log4j.rootLogger=INFO, out log4j.logger.jgroups=WARN +#log4j.logger.action=DEBUG #log4j.logger.transport=TRACE #log4j.logger.discovery=TRACE #log4j.logger.cluster.service=TRACE