SOLR-13261: Make SortableTextField work with export/streaming

This commit is contained in:
Erick Erickson 2019-02-22 11:04:31 -08:00
parent 9b8a4a9e6e
commit 6b4e90617d
6 changed files with 97 additions and 27 deletions

View File

@ -58,6 +58,8 @@ New Features
* SOLR-13171 : A true streaming parser for javabin payload/stream without creating any objects (noble) * SOLR-13171 : A true streaming parser for javabin payload/stream without creating any objects (noble)
* SOLR-13261: Make SortableTextField work with export/streaming
Bug Fixes Bug Fixes
---------------------- ----------------------

View File

@ -58,6 +58,7 @@ import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.IntValueFieldType; import org.apache.solr.schema.IntValueFieldType;
import org.apache.solr.schema.LongValueFieldType; import org.apache.solr.schema.LongValueFieldType;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.SortableTextField;
import org.apache.solr.schema.StrField; import org.apache.solr.schema.StrField;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpec; import org.apache.solr.search.SortSpec;
@ -358,7 +359,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
} else { } else {
writers[i] = new DoubleFieldWriter(field); writers[i] = new DoubleFieldWriter(field);
} }
} else if (fieldType instanceof StrField) { } else if (fieldType instanceof StrField || fieldType instanceof SortableTextField) {
if (multiValued) { if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false); writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false);
} else { } else {
@ -377,7 +378,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
writers[i] = new BoolFieldWriter(field, fieldType); writers[i] = new BoolFieldWriter(field, fieldType);
} }
} else { } else {
throw new IOException("Export fields must either be one of the following types: int,float,long,double,string,date,boolean"); throw new IOException("Export fields must be one of the following types: int,float,long,double,string,date,boolean,SortableText");
} }
} }
return writers; return writers;
@ -421,7 +422,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
} else { } else {
sortValues[i] = new LongValue(field, new LongAsc()); sortValues[i] = new LongValue(field, new LongAsc());
} }
} else if (ft instanceof StrField) { } else if (ft instanceof StrField || ft instanceof SortableTextField) {
LeafReader reader = searcher.getSlowAtomicReader(); LeafReader reader = searcher.getSlowAtomicReader();
SortedDocValues vals = reader.getSortedDocValues(field); SortedDocValues vals = reader.getSortedDocValues(field);
if (reverse) { if (reverse) {
@ -447,7 +448,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
sortValues[i] = new StringValue(vals, field, new IntAsc()); sortValues[i] = new StringValue(vals, field, new IntAsc());
} }
} else { } else {
throw new IOException("Sort fields must be one of the following types: int,float,long,double,string,date,boolean"); throw new IOException("Sort fields must be one of the following types: int,float,long,double,string,date,boolean,SortableText");
} }
} }
//SingleValueSortDoc etc are specialized classes which don't have array lookups. On benchmarking large datasets //SingleValueSortDoc etc are specialized classes which don't have array lookups. On benchmarking large datasets

View File

@ -26,7 +26,7 @@
seconds part (.999) is optional. seconds part (.999) is optional.
--> -->
<fieldType name="date" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/> <fieldType name="date" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
<!-- Point Fields --> <!-- Point Fields -->
<fieldType name="pint" class="solr.IntPointField" docValues="true"/> <fieldType name="pint" class="solr.IntPointField" docValues="true"/>
<fieldType name="plong" class="solr.LongPointField" docValues="true"/> <fieldType name="plong" class="solr.LongPointField" docValues="true"/>
@ -40,6 +40,27 @@
<fieldType name="uuid" class="solr.UUIDField"/> <fieldType name="uuid" class="solr.UUIDField"/>
<!-- SortableTextField generaly functions exactly like TextField,
except that it supports, and by default uses, docValues for sorting (or faceting)
on the first 1024 characters of the original field values (which is configurable).
This makes it a bit more useful then TextField in many situations, but the trade-off
is that it takes up more space on disk; which is why it's not used in place of TextField
for every fieldType in this _default schema.
-->
<fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<field name="id" type="string" required="true" indexed="true" docValues="true"/> <field name="id" type="string" required="true" indexed="true" docValues="true"/>
<field name="floatdv_m" type="float" indexed="false" stored="false" docValues="true" multiValued="true"/> <field name="floatdv_m" type="float" indexed="false" stored="false" docValues="true" multiValued="true"/>
<field name="intdv_m" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/> <field name="intdv_m" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/>
@ -47,6 +68,7 @@
<field name="longdv_m" type="long" indexed="false" stored="false" docValues="true" multiValued="true"/> <field name="longdv_m" type="long" indexed="false" stored="false" docValues="true" multiValued="true"/>
<field name="datedv_m" type="date" indexed="false" stored="false" docValues="true" multiValued="true"/> <field name="datedv_m" type="date" indexed="false" stored="false" docValues="true" multiValued="true"/>
<field name="stringdv_m" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/> <field name="stringdv_m" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/>
<field name="sortabledv_m" type="text_gen_sort" indexed="true" stored="true" multiValued="true" />
<field name="floatdv" type="float" indexed="false" stored="false" docValues="true"/> <field name="floatdv" type="float" indexed="false" stored="false" docValues="true"/>
<field name="intdv" type="int" indexed="false" stored="false" docValues="true"/> <field name="intdv" type="int" indexed="false" stored="false" docValues="true"/>
@ -55,6 +77,7 @@
<field name="datedv" type="date" indexed="false" stored="false" docValues="true"/> <field name="datedv" type="date" indexed="false" stored="false" docValues="true"/>
<field name="stringdv" type="string" indexed="false" stored="false" docValues="true"/> <field name="stringdv" type="string" indexed="false" stored="false" docValues="true"/>
<field name="booleandv" type="boolean" indexed="false" stored="false" docValues="true" /> <field name="booleandv" type="boolean" indexed="false" stored="false" docValues="true" />
<field name="sortabledv" type="text_gen_sort" indexed="true" stored="true" multiValued="false" />
<dynamicField name="*_s_dv" type="string" indexed="true" stored="true" docValues="true" multiValued="false"/> <dynamicField name="*_s_dv" type="string" indexed="true" stored="true" docValues="true" multiValued="false"/>

View File

@ -23,5 +23,17 @@
<field name="_version_" type="long" indexed="true" stored="true"/> <field name="_version_" type="long" indexed="true" stored="true"/>
<field name="_root_" type="string" indexed="true" stored="true" multiValued="false" required="false"/> <field name="_root_" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="id" type="string" indexed="true" stored="true"/> <field name="id" type="string" indexed="true" stored="true"/>
<fieldType name="sortabletext" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<uniqueKey>id</uniqueKey> <uniqueKey>id</uniqueKey>
</schema> </schema>

View File

@ -98,7 +98,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
new FieldProps("floatField", "float", 1), new FieldProps("floatField", "float", 1),
new FieldProps("dateField", "date", 1), new FieldProps("dateField", "date", 1),
new FieldProps("stringField", "string", 1), new FieldProps("stringField", "string", 1),
new FieldProps("boolField", "boolean", 1) new FieldProps("boolField", "boolean", 1),
new FieldProps("sortableText", "sortabletext", 1)
)); ));
fieldsToTestMulti = fieldsToTestMulti =
@ -109,10 +110,11 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
new FieldProps("floatFieldMulti", "float", 5), new FieldProps("floatFieldMulti", "float", 5),
new FieldProps("dateFieldMulti", "date", 5), new FieldProps("dateFieldMulti", "date", 5),
new FieldProps("stringFieldMulti", "string", 5), new FieldProps("stringFieldMulti", "string", 5),
new FieldProps("boolFieldMulti", "boolean", 2) new FieldProps("boolFieldMulti", "boolean", 2),
new FieldProps("sortableFieldMulti", "sortabletext", 5)
)); ));
// Fields to test for grouping and sorting with sortMinssingFirst/Last. // Fields to test for grouping and sorting with sortMissingFirst/Last.
fieldsToTestGroupSortFirst = fieldsToTestGroupSortFirst =
Collections.unmodifiableList(Arrays.asList( Collections.unmodifiableList(Arrays.asList(
new FieldProps("intGSF", "int"), new FieldProps("intGSF", "int"),
@ -121,7 +123,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
new FieldProps("floatGSF", "float"), new FieldProps("floatGSF", "float"),
new FieldProps("dateGSF", "date"), new FieldProps("dateGSF", "date"),
new FieldProps("stringGSF", "string"), new FieldProps("stringGSF", "string"),
new FieldProps("boolGSF", "boolean") new FieldProps("boolGSF", "boolean"),
new FieldProps("sortableGSF", "sortabletext")
)); ));
fieldsToTestGroupSortLast = fieldsToTestGroupSortLast =
@ -132,7 +135,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
new FieldProps("floatGSL", "float"), new FieldProps("floatGSL", "float"),
new FieldProps("dateGSL", "date"), new FieldProps("dateGSL", "date"),
new FieldProps("stringGSL", "string"), new FieldProps("stringGSL", "string"),
new FieldProps("boolGSL", "boolean") new FieldProps("boolGSL", "boolean"),
new FieldProps("sortableGSL", "sortabletext")
)); ));
List<Update> updateList = new ArrayList<>(fieldsToTestSingle.size() + List<Update> updateList = new ArrayList<>(fieldsToTestSingle.size() +
@ -210,18 +214,18 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
final QueryResponse rsp = client.query(COLLECTION, solrQuery); final QueryResponse rsp = client.query(COLLECTION, solrQuery);
for (FieldProps props : fieldsToTestSingle) { for (FieldProps props : fieldsToTestSingle) {
testFacet(props, rsp); doTestFacet(props, rsp);
} }
for (FieldProps props : fieldsToTestMulti) { for (FieldProps props : fieldsToTestMulti) {
testFacet(props, rsp); doTestFacet(props, rsp);
} }
} }
// We should be able to sort thing with missing first/last and that are _NOT_ present at all on one server. // We should be able to sort thing with missing first/last and that are _NOT_ present at all on one server.
@Test @Test
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028") //@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028")
public void testGroupingSorting() throws IOException, SolrServerException { public void testGroupingSorting() throws IOException, SolrServerException {
CloudSolrClient client = cluster.getSolrClient(); CloudSolrClient client = cluster.getSolrClient();
@ -314,12 +318,17 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
// 12-Jun-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 04-May-2018 // 12-Jun-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 04-May-2018
// commented 15-Sep-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018 // commented 15-Sep-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
public void testGroupingDVOnly() throws IOException, SolrServerException { public void testGroupingDVOnly() throws IOException, SolrServerException {
doGroupingDvOnly(fieldsToTestGroupSortFirst, "boolGSF");
doGroupingDvOnly(fieldsToTestGroupSortLast, "boolGSL");
}
private void doGroupingDvOnly(List<FieldProps> fieldProps, String boolName) throws IOException, SolrServerException {
List<SolrInputDocument> docs = new ArrayList<>(50); List<SolrInputDocument> docs = new ArrayList<>(50);
for (int idx = 0; idx < 49; ++idx) { for (int idx = 0; idx < 49; ++idx) {
SolrInputDocument doc = new SolrInputDocument(); SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", idx); doc.addField("id", idx);
boolean doInc = ((idx % 7) == 0); boolean doInc = ((idx % 7) == 0);
for (FieldProps prop : fieldsToTestGroupSortFirst) { for (FieldProps prop : fieldProps) {
doc.addField(prop.getName(), prop.getValue(doInc)); doc.addField(prop.getName(), prop.getValue(doInc));
} }
docs.add(doc); docs.add(doc);
@ -337,13 +346,10 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
.commit(client, COLLECTION); .commit(client, COLLECTION);
// OK, we should have one group with 10 entries for null, a group with 1 entry and 7 groups with 7 // OK, we should have one group with 10 entries for null, a group with 1 entry and 7 groups with 7
for (FieldProps prop : fieldsToTestGroupSortFirst) { for (FieldProps prop : fieldProps) {
// Special handling until SOLR-9802 is fixed
// Solr 9802
if (prop.getName().startsWith("date")) continue; if (prop.getName().startsWith("date")) continue;
// SOLR-9802 to here
// TODO: gsf fails this
if (prop.getName().endsWith("GSF") ) continue;
final SolrQuery solrQuery = new SolrQuery( final SolrQuery solrQuery = new SolrQuery(
"q", "*:*", "q", "*:*",
@ -376,7 +382,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
case 25: case 25:
case 24: case 24:
++boolCount; ++boolCount;
assertEquals("We should have more counts for boolean fields!", "boolGSF", prop.getName()); assertEquals("We should have more counts for boolean fields!", boolName, prop.getName());
break; break;
default: default:
@ -442,7 +448,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
} }
private void testFacet(FieldProps props, QueryResponse rsp) { private void doTestFacet(FieldProps props, QueryResponse rsp) {
String name = props.getName(); String name = props.getName();
final List<FacetField.Count> counts = rsp.getFacetField(name).getValues(); final List<FacetField.Count> counts = rsp.getFacetField(name).getValues();
long expectedCount = props.getExpectedCount(); long expectedCount = props.getExpectedCount();
@ -483,7 +489,7 @@ class FieldProps {
base = Math.abs(random().nextLong()); base = Math.abs(random().nextLong());
} else if (name.startsWith("bool")) { } else if (name.startsWith("bool")) {
base = true; // Must start with a known value since bools only have a two values.... base = true; // Must start with a known value since bools only have a two values....
} else if (name.startsWith("string")) { } else if (name.startsWith("string") || name.startsWith("sortable")) {
base = "base_string_" + random().nextInt(1_000_000) + "_"; base = "base_string_" + random().nextInt(1_000_000) + "_";
} else { } else {
throw new RuntimeException("Should have found a prefix for the field before now!"); throw new RuntimeException("Should have found a prefix for the field before now!");
@ -531,7 +537,7 @@ class FieldProps {
base = !((boolean) base); base = !((boolean) base);
return ret; return ret;
} }
if (name.startsWith("string")) { if (name.startsWith("string") || name.startsWith("sortable")) {
return String.format(Locale.ROOT, "%s_%08d", (String) base, counter); return String.format(Locale.ROOT, "%s_%08d", (String) base, counter);
} }
throw new RuntimeException("Should have found a prefix for the field before now!"); throw new RuntimeException("Should have found a prefix for the field before now!");

View File

@ -129,7 +129,10 @@ public class TestExportWriter extends SolrTestCaseJ4 {
"datedv_m", "2017-06-16T01:00:00Z", "datedv_m", "2017-06-16T01:00:00Z",
"datedv_m", "2017-06-16T02:00:00Z", "datedv_m", "2017-06-16T02:00:00Z",
"datedv_m", "2017-06-16T03:00:00Z", "datedv_m", "2017-06-16T03:00:00Z",
"datedv_m", "2017-06-16T04:00:00Z")); "datedv_m", "2017-06-16T04:00:00Z",
"sortabledv_m", "this is some text one_1",
"sortabledv_m", "this is some text two_1",
"sortabledv_m", "this is some text three_1"));
assertU(adoc("id","7", assertU(adoc("id","7",
"floatdv","2.1", "floatdv","2.1",
@ -166,7 +169,8 @@ public class TestExportWriter extends SolrTestCaseJ4 {
"int_is_t", "1", "int_is_t", "1",
"int_is_t", "1", "int_is_t", "1",
"int_is_t", "1", "int_is_t", "1",
"int_is_t", "1")); "int_is_t", "1",
"sortabledv", "this is some text_1"));
assertU(commit()); assertU(commit());
assertU(adoc("id","8", assertU(adoc("id","8",
"floatdv","2.1", "floatdv","2.1",
@ -191,7 +195,11 @@ public class TestExportWriter extends SolrTestCaseJ4 {
"int_is_p", "1", "int_is_p", "1",
"int_is_p", "1", "int_is_p", "1",
"int_is_p", "1", "int_is_p", "1",
"int_is_p", "1")); "int_is_p", "1",
"sortabledv", "this is some text_2",
"sortabledv_m", "this is some text one_2",
"sortabledv_m", "this is some text two_2",
"sortabledv_m", "this is some text three_2"));
assertU(commit()); assertU(commit());
@ -491,6 +499,24 @@ public class TestExportWriter extends SolrTestCaseJ4 {
s = h.query(req("q", "id:8", "qt", "/export", "fl", "stringdv", "sort", "intdv asc")); s = h.query(req("q", "id:8", "qt", "/export", "fl", "stringdv", "sort", "intdv asc"));
assertJsonEquals(s, "{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":1, \"docs\":[{\"stringdv\":\"chello \\\"world\\\"\"}]}}"); assertJsonEquals(s, "{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":1, \"docs\":[{\"stringdv\":\"chello \\\"world\\\"\"}]}}");
// Test sortable text fields:
s = h.query(req("q", "id:(1 OR 3 OR 8)", "qt", "/export", "fl", "sortabledv_m,sortabledv", "sort", "sortabledv asc"));
assertJsonEquals(s, "{\n" +
" \"responseHeader\":{\"status\":0},\n" +
" \"response\":{\n" +
" \"numFound\":3,\n" +
" \"docs\":[{\n" +
" \"sortabledv_m\":[\"this is some text one_1\"\n" +
" ,\"this is some text three_1\"\n" +
" ,\"this is some text two_1\"]}\n" +
" ,{\n" +
" \"sortabledv\":\"this is some text_1\"}\n" +
" ,{\n" +
" \"sortabledv_m\":[\"this is some text one_2\"\n" +
" ,\"this is some text three_2\"\n" +
" ,\"this is some text two_2\"],\n" +
" \"sortabledv\":\"this is some text_2\"}]}}");
} }
private void assertJsonEquals(String actual, String expected) { private void assertJsonEquals(String actual, String expected) {