From 6b4e90617ddb5a9897070bc60e2c6e78d8488f12 Mon Sep 17 00:00:00 2001 From: Erick Erickson Date: Fri, 22 Feb 2019 11:04:31 -0800 Subject: [PATCH] SOLR-13261: Make SortableTextField work with export/streaming --- solr/CHANGES.txt | 2 + .../solr/handler/export/ExportWriter.java | 9 ++-- .../conf/schema-sortingresponse.xml | 25 ++++++++++- .../cloud-managed/conf/managed-schema | 12 +++++ .../solr/cloud/DocValuesNotIndexedTest.java | 44 +++++++++++-------- .../solr/handler/export/TestExportWriter.java | 32 ++++++++++++-- 6 files changed, 97 insertions(+), 27 deletions(-) diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index bc5fdb2887f..2a763ff3401 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -58,6 +58,8 @@ New Features * SOLR-13171 : A true streaming parser for javabin payload/stream without creating any objects (noble) +* SOLR-13261: Make SortableTextField work with export/streaming + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java index 2c1ab96598a..c80cae36787 100644 --- a/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/export/ExportWriter.java @@ -58,6 +58,7 @@ import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.IntValueFieldType; import org.apache.solr.schema.LongValueFieldType; import org.apache.solr.schema.SchemaField; +import org.apache.solr.schema.SortableTextField; import org.apache.solr.schema.StrField; import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SortSpec; @@ -358,7 +359,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable { } else { writers[i] = new DoubleFieldWriter(field); } - } else if (fieldType instanceof StrField) { + } else if (fieldType instanceof StrField || fieldType instanceof SortableTextField) { if (multiValued) { writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false); } else { @@ -377,7 +378,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable { writers[i] = new BoolFieldWriter(field, fieldType); } } else { - throw new IOException("Export fields must either be one of the following types: int,float,long,double,string,date,boolean"); + throw new IOException("Export fields must be one of the following types: int,float,long,double,string,date,boolean,SortableText"); } } return writers; @@ -421,7 +422,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable { } else { sortValues[i] = new LongValue(field, new LongAsc()); } - } else if (ft instanceof StrField) { + } else if (ft instanceof StrField || ft instanceof SortableTextField) { LeafReader reader = searcher.getSlowAtomicReader(); SortedDocValues vals = reader.getSortedDocValues(field); if (reverse) { @@ -447,7 +448,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable { sortValues[i] = new StringValue(vals, field, new IntAsc()); } } else { - throw new IOException("Sort fields must be one of the following types: int,float,long,double,string,date,boolean"); + throw new IOException("Sort fields must be one of the following types: int,float,long,double,string,date,boolean,SortableText"); } } //SingleValueSortDoc etc are specialized classes which don't have array lookups. On benchmarking large datasets diff --git a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml index 57e684db9d4..281e698eb4a 100644 --- a/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml +++ b/solr/core/src/test-files/solr/collection1/conf/schema-sortingresponse.xml @@ -26,7 +26,7 @@ seconds part (.999) is optional. --> - + @@ -40,6 +40,27 @@ + + + + + + + + + + + + + + @@ -47,6 +68,7 @@ + @@ -55,6 +77,7 @@ + diff --git a/solr/core/src/test-files/solr/configsets/cloud-managed/conf/managed-schema b/solr/core/src/test-files/solr/configsets/cloud-managed/conf/managed-schema index 7ce25e97b91..455cb55a429 100644 --- a/solr/core/src/test-files/solr/configsets/cloud-managed/conf/managed-schema +++ b/solr/core/src/test-files/solr/configsets/cloud-managed/conf/managed-schema @@ -23,5 +23,17 @@ + + + + + + + + + + + + id diff --git a/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java b/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java index f396a5d1b4d..562083158f3 100644 --- a/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/DocValuesNotIndexedTest.java @@ -98,7 +98,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase { new FieldProps("floatField", "float", 1), new FieldProps("dateField", "date", 1), new FieldProps("stringField", "string", 1), - new FieldProps("boolField", "boolean", 1) + new FieldProps("boolField", "boolean", 1), + new FieldProps("sortableText", "sortabletext", 1) )); fieldsToTestMulti = @@ -109,10 +110,11 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase { new FieldProps("floatFieldMulti", "float", 5), new FieldProps("dateFieldMulti", "date", 5), new FieldProps("stringFieldMulti", "string", 5), - new FieldProps("boolFieldMulti", "boolean", 2) + new FieldProps("boolFieldMulti", "boolean", 2), + new FieldProps("sortableFieldMulti", "sortabletext", 5) )); - // Fields to test for grouping and sorting with sortMinssingFirst/Last. + // Fields to test for grouping and sorting with sortMissingFirst/Last. fieldsToTestGroupSortFirst = Collections.unmodifiableList(Arrays.asList( new FieldProps("intGSF", "int"), @@ -121,7 +123,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase { new FieldProps("floatGSF", "float"), new FieldProps("dateGSF", "date"), new FieldProps("stringGSF", "string"), - new FieldProps("boolGSF", "boolean") + new FieldProps("boolGSF", "boolean"), + new FieldProps("sortableGSF", "sortabletext") )); fieldsToTestGroupSortLast = @@ -132,7 +135,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase { new FieldProps("floatGSL", "float"), new FieldProps("dateGSL", "date"), new FieldProps("stringGSL", "string"), - new FieldProps("boolGSL", "boolean") + new FieldProps("boolGSL", "boolean"), + new FieldProps("sortableGSL", "sortabletext") )); List updateList = new ArrayList<>(fieldsToTestSingle.size() + @@ -210,18 +214,18 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase { final QueryResponse rsp = client.query(COLLECTION, solrQuery); for (FieldProps props : fieldsToTestSingle) { - testFacet(props, rsp); + doTestFacet(props, rsp); } for (FieldProps props : fieldsToTestMulti) { - testFacet(props, rsp); + doTestFacet(props, rsp); } } // We should be able to sort thing with missing first/last and that are _NOT_ present at all on one server. @Test - @AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028") + //@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028") public void testGroupingSorting() throws IOException, SolrServerException { CloudSolrClient client = cluster.getSolrClient(); @@ -314,12 +318,17 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase { // 12-Jun-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 04-May-2018 // commented 15-Sep-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018 public void testGroupingDVOnly() throws IOException, SolrServerException { + doGroupingDvOnly(fieldsToTestGroupSortFirst, "boolGSF"); + doGroupingDvOnly(fieldsToTestGroupSortLast, "boolGSL"); + } + private void doGroupingDvOnly(List fieldProps, String boolName) throws IOException, SolrServerException { + List docs = new ArrayList<>(50); for (int idx = 0; idx < 49; ++idx) { SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", idx); boolean doInc = ((idx % 7) == 0); - for (FieldProps prop : fieldsToTestGroupSortFirst) { + for (FieldProps prop : fieldProps) { doc.addField(prop.getName(), prop.getValue(doInc)); } docs.add(doc); @@ -337,13 +346,10 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase { .commit(client, COLLECTION); // OK, we should have one group with 10 entries for null, a group with 1 entry and 7 groups with 7 - for (FieldProps prop : fieldsToTestGroupSortFirst) { - // Special handling until SOLR-9802 is fixed + for (FieldProps prop : fieldProps) { + + // Solr 9802 if (prop.getName().startsWith("date")) continue; - // SOLR-9802 to here - - // TODO: gsf fails this - if (prop.getName().endsWith("GSF") ) continue; final SolrQuery solrQuery = new SolrQuery( "q", "*:*", @@ -376,7 +382,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase { case 25: case 24: ++boolCount; - assertEquals("We should have more counts for boolean fields!", "boolGSF", prop.getName()); + assertEquals("We should have more counts for boolean fields!", boolName, prop.getName()); break; default: @@ -442,7 +448,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase { } - private void testFacet(FieldProps props, QueryResponse rsp) { + private void doTestFacet(FieldProps props, QueryResponse rsp) { String name = props.getName(); final List counts = rsp.getFacetField(name).getValues(); long expectedCount = props.getExpectedCount(); @@ -483,7 +489,7 @@ class FieldProps { base = Math.abs(random().nextLong()); } else if (name.startsWith("bool")) { base = true; // Must start with a known value since bools only have a two values.... - } else if (name.startsWith("string")) { + } else if (name.startsWith("string") || name.startsWith("sortable")) { base = "base_string_" + random().nextInt(1_000_000) + "_"; } else { throw new RuntimeException("Should have found a prefix for the field before now!"); @@ -531,7 +537,7 @@ class FieldProps { base = !((boolean) base); return ret; } - if (name.startsWith("string")) { + if (name.startsWith("string") || name.startsWith("sortable")) { return String.format(Locale.ROOT, "%s_%08d", (String) base, counter); } throw new RuntimeException("Should have found a prefix for the field before now!"); diff --git a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java index f28f7dc9805..4cebb12c8bc 100644 --- a/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java +++ b/solr/core/src/test/org/apache/solr/handler/export/TestExportWriter.java @@ -129,7 +129,10 @@ public class TestExportWriter extends SolrTestCaseJ4 { "datedv_m", "2017-06-16T01:00:00Z", "datedv_m", "2017-06-16T02:00:00Z", "datedv_m", "2017-06-16T03:00:00Z", - "datedv_m", "2017-06-16T04:00:00Z")); + "datedv_m", "2017-06-16T04:00:00Z", + "sortabledv_m", "this is some text one_1", + "sortabledv_m", "this is some text two_1", + "sortabledv_m", "this is some text three_1")); assertU(adoc("id","7", "floatdv","2.1", @@ -166,7 +169,8 @@ public class TestExportWriter extends SolrTestCaseJ4 { "int_is_t", "1", "int_is_t", "1", "int_is_t", "1", - "int_is_t", "1")); + "int_is_t", "1", + "sortabledv", "this is some text_1")); assertU(commit()); assertU(adoc("id","8", "floatdv","2.1", @@ -191,7 +195,11 @@ public class TestExportWriter extends SolrTestCaseJ4 { "int_is_p", "1", "int_is_p", "1", "int_is_p", "1", - "int_is_p", "1")); + "int_is_p", "1", + "sortabledv", "this is some text_2", + "sortabledv_m", "this is some text one_2", + "sortabledv_m", "this is some text two_2", + "sortabledv_m", "this is some text three_2")); assertU(commit()); @@ -491,6 +499,24 @@ public class TestExportWriter extends SolrTestCaseJ4 { s = h.query(req("q", "id:8", "qt", "/export", "fl", "stringdv", "sort", "intdv asc")); assertJsonEquals(s, "{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":1, \"docs\":[{\"stringdv\":\"chello \\\"world\\\"\"}]}}"); + + // Test sortable text fields: + s = h.query(req("q", "id:(1 OR 3 OR 8)", "qt", "/export", "fl", "sortabledv_m,sortabledv", "sort", "sortabledv asc")); + assertJsonEquals(s, "{\n" + + " \"responseHeader\":{\"status\":0},\n" + + " \"response\":{\n" + + " \"numFound\":3,\n" + + " \"docs\":[{\n" + + " \"sortabledv_m\":[\"this is some text one_1\"\n" + + " ,\"this is some text three_1\"\n" + + " ,\"this is some text two_1\"]}\n" + + " ,{\n" + + " \"sortabledv\":\"this is some text_1\"}\n" + + " ,{\n" + + " \"sortabledv_m\":[\"this is some text one_2\"\n" + + " ,\"this is some text three_2\"\n" + + " ,\"this is some text two_2\"],\n" + + " \"sortabledv\":\"this is some text_2\"}]}}"); } private void assertJsonEquals(String actual, String expected) {