SOLR-13261: Make SortableTextField work with export/streaming

This commit is contained in:
Erick Erickson 2019-02-22 11:04:31 -08:00
parent 9b8a4a9e6e
commit 6b4e90617d
6 changed files with 97 additions and 27 deletions

View File

@ -58,6 +58,8 @@ New Features
* SOLR-13171 : A true streaming parser for javabin payload/stream without creating any objects (noble)
* SOLR-13261: Make SortableTextField work with export/streaming
Bug Fixes
----------------------

View File

@ -58,6 +58,7 @@ import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.IntValueFieldType;
import org.apache.solr.schema.LongValueFieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.SortableTextField;
import org.apache.solr.schema.StrField;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.SortSpec;
@ -358,7 +359,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
} else {
writers[i] = new DoubleFieldWriter(field);
}
} else if (fieldType instanceof StrField) {
} else if (fieldType instanceof StrField || fieldType instanceof SortableTextField) {
if (multiValued) {
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false);
} else {
@ -377,7 +378,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
writers[i] = new BoolFieldWriter(field, fieldType);
}
} else {
throw new IOException("Export fields must either be one of the following types: int,float,long,double,string,date,boolean");
throw new IOException("Export fields must be one of the following types: int,float,long,double,string,date,boolean,SortableText");
}
}
return writers;
@ -421,7 +422,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
} else {
sortValues[i] = new LongValue(field, new LongAsc());
}
} else if (ft instanceof StrField) {
} else if (ft instanceof StrField || ft instanceof SortableTextField) {
LeafReader reader = searcher.getSlowAtomicReader();
SortedDocValues vals = reader.getSortedDocValues(field);
if (reverse) {
@ -447,7 +448,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
sortValues[i] = new StringValue(vals, field, new IntAsc());
}
} else {
throw new IOException("Sort fields must be one of the following types: int,float,long,double,string,date,boolean");
throw new IOException("Sort fields must be one of the following types: int,float,long,double,string,date,boolean,SortableText");
}
}
//SingleValueSortDoc etc are specialized classes which don't have array lookups. On benchmarking large datasets

View File

@ -40,6 +40,27 @@
<fieldType name="uuid" class="solr.UUIDField"/>
<!-- SortableTextField generaly functions exactly like TextField,
except that it supports, and by default uses, docValues for sorting (or faceting)
on the first 1024 characters of the original field values (which is configurable).
This makes it a bit more useful then TextField in many situations, but the trade-off
is that it takes up more space on disk; which is why it's not used in place of TextField
for every fieldType in this _default schema.
-->
<fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<field name="id" type="string" required="true" indexed="true" docValues="true"/>
<field name="floatdv_m" type="float" indexed="false" stored="false" docValues="true" multiValued="true"/>
<field name="intdv_m" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/>
@ -47,6 +68,7 @@
<field name="longdv_m" type="long" indexed="false" stored="false" docValues="true" multiValued="true"/>
<field name="datedv_m" type="date" indexed="false" stored="false" docValues="true" multiValued="true"/>
<field name="stringdv_m" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/>
<field name="sortabledv_m" type="text_gen_sort" indexed="true" stored="true" multiValued="true" />
<field name="floatdv" type="float" indexed="false" stored="false" docValues="true"/>
<field name="intdv" type="int" indexed="false" stored="false" docValues="true"/>
@ -55,6 +77,7 @@
<field name="datedv" type="date" indexed="false" stored="false" docValues="true"/>
<field name="stringdv" type="string" indexed="false" stored="false" docValues="true"/>
<field name="booleandv" type="boolean" indexed="false" stored="false" docValues="true" />
<field name="sortabledv" type="text_gen_sort" indexed="true" stored="true" multiValued="false" />
<dynamicField name="*_s_dv" type="string" indexed="true" stored="true" docValues="true" multiValued="false"/>

View File

@ -23,5 +23,17 @@
<field name="_version_" type="long" indexed="true" stored="true"/>
<field name="_root_" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
<field name="id" type="string" indexed="true" stored="true"/>
<fieldType name="sortabletext" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<uniqueKey>id</uniqueKey>
</schema>

View File

@ -98,7 +98,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
new FieldProps("floatField", "float", 1),
new FieldProps("dateField", "date", 1),
new FieldProps("stringField", "string", 1),
new FieldProps("boolField", "boolean", 1)
new FieldProps("boolField", "boolean", 1),
new FieldProps("sortableText", "sortabletext", 1)
));
fieldsToTestMulti =
@ -109,10 +110,11 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
new FieldProps("floatFieldMulti", "float", 5),
new FieldProps("dateFieldMulti", "date", 5),
new FieldProps("stringFieldMulti", "string", 5),
new FieldProps("boolFieldMulti", "boolean", 2)
new FieldProps("boolFieldMulti", "boolean", 2),
new FieldProps("sortableFieldMulti", "sortabletext", 5)
));
// Fields to test for grouping and sorting with sortMinssingFirst/Last.
// Fields to test for grouping and sorting with sortMissingFirst/Last.
fieldsToTestGroupSortFirst =
Collections.unmodifiableList(Arrays.asList(
new FieldProps("intGSF", "int"),
@ -121,7 +123,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
new FieldProps("floatGSF", "float"),
new FieldProps("dateGSF", "date"),
new FieldProps("stringGSF", "string"),
new FieldProps("boolGSF", "boolean")
new FieldProps("boolGSF", "boolean"),
new FieldProps("sortableGSF", "sortabletext")
));
fieldsToTestGroupSortLast =
@ -132,7 +135,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
new FieldProps("floatGSL", "float"),
new FieldProps("dateGSL", "date"),
new FieldProps("stringGSL", "string"),
new FieldProps("boolGSL", "boolean")
new FieldProps("boolGSL", "boolean"),
new FieldProps("sortableGSL", "sortabletext")
));
List<Update> updateList = new ArrayList<>(fieldsToTestSingle.size() +
@ -210,18 +214,18 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
final QueryResponse rsp = client.query(COLLECTION, solrQuery);
for (FieldProps props : fieldsToTestSingle) {
testFacet(props, rsp);
doTestFacet(props, rsp);
}
for (FieldProps props : fieldsToTestMulti) {
testFacet(props, rsp);
doTestFacet(props, rsp);
}
}
// We should be able to sort thing with missing first/last and that are _NOT_ present at all on one server.
@Test
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028")
//@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028")
public void testGroupingSorting() throws IOException, SolrServerException {
CloudSolrClient client = cluster.getSolrClient();
@ -314,12 +318,17 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
// 12-Jun-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 04-May-2018
// commented 15-Sep-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
public void testGroupingDVOnly() throws IOException, SolrServerException {
doGroupingDvOnly(fieldsToTestGroupSortFirst, "boolGSF");
doGroupingDvOnly(fieldsToTestGroupSortLast, "boolGSL");
}
private void doGroupingDvOnly(List<FieldProps> fieldProps, String boolName) throws IOException, SolrServerException {
List<SolrInputDocument> docs = new ArrayList<>(50);
for (int idx = 0; idx < 49; ++idx) {
SolrInputDocument doc = new SolrInputDocument();
doc.addField("id", idx);
boolean doInc = ((idx % 7) == 0);
for (FieldProps prop : fieldsToTestGroupSortFirst) {
for (FieldProps prop : fieldProps) {
doc.addField(prop.getName(), prop.getValue(doInc));
}
docs.add(doc);
@ -337,13 +346,10 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
.commit(client, COLLECTION);
// OK, we should have one group with 10 entries for null, a group with 1 entry and 7 groups with 7
for (FieldProps prop : fieldsToTestGroupSortFirst) {
// Special handling until SOLR-9802 is fixed
if (prop.getName().startsWith("date")) continue;
// SOLR-9802 to here
for (FieldProps prop : fieldProps) {
// TODO: gsf fails this
if (prop.getName().endsWith("GSF") ) continue;
// Solr 9802
if (prop.getName().startsWith("date")) continue;
final SolrQuery solrQuery = new SolrQuery(
"q", "*:*",
@ -376,7 +382,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
case 25:
case 24:
++boolCount;
assertEquals("We should have more counts for boolean fields!", "boolGSF", prop.getName());
assertEquals("We should have more counts for boolean fields!", boolName, prop.getName());
break;
default:
@ -442,7 +448,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
}
private void testFacet(FieldProps props, QueryResponse rsp) {
private void doTestFacet(FieldProps props, QueryResponse rsp) {
String name = props.getName();
final List<FacetField.Count> counts = rsp.getFacetField(name).getValues();
long expectedCount = props.getExpectedCount();
@ -483,7 +489,7 @@ class FieldProps {
base = Math.abs(random().nextLong());
} else if (name.startsWith("bool")) {
base = true; // Must start with a known value since bools only have a two values....
} else if (name.startsWith("string")) {
} else if (name.startsWith("string") || name.startsWith("sortable")) {
base = "base_string_" + random().nextInt(1_000_000) + "_";
} else {
throw new RuntimeException("Should have found a prefix for the field before now!");
@ -531,7 +537,7 @@ class FieldProps {
base = !((boolean) base);
return ret;
}
if (name.startsWith("string")) {
if (name.startsWith("string") || name.startsWith("sortable")) {
return String.format(Locale.ROOT, "%s_%08d", (String) base, counter);
}
throw new RuntimeException("Should have found a prefix for the field before now!");

View File

@ -129,7 +129,10 @@ public class TestExportWriter extends SolrTestCaseJ4 {
"datedv_m", "2017-06-16T01:00:00Z",
"datedv_m", "2017-06-16T02:00:00Z",
"datedv_m", "2017-06-16T03:00:00Z",
"datedv_m", "2017-06-16T04:00:00Z"));
"datedv_m", "2017-06-16T04:00:00Z",
"sortabledv_m", "this is some text one_1",
"sortabledv_m", "this is some text two_1",
"sortabledv_m", "this is some text three_1"));
assertU(adoc("id","7",
"floatdv","2.1",
@ -166,7 +169,8 @@ public class TestExportWriter extends SolrTestCaseJ4 {
"int_is_t", "1",
"int_is_t", "1",
"int_is_t", "1",
"int_is_t", "1"));
"int_is_t", "1",
"sortabledv", "this is some text_1"));
assertU(commit());
assertU(adoc("id","8",
"floatdv","2.1",
@ -191,7 +195,11 @@ public class TestExportWriter extends SolrTestCaseJ4 {
"int_is_p", "1",
"int_is_p", "1",
"int_is_p", "1",
"int_is_p", "1"));
"int_is_p", "1",
"sortabledv", "this is some text_2",
"sortabledv_m", "this is some text one_2",
"sortabledv_m", "this is some text two_2",
"sortabledv_m", "this is some text three_2"));
assertU(commit());
@ -491,6 +499,24 @@ public class TestExportWriter extends SolrTestCaseJ4 {
s = h.query(req("q", "id:8", "qt", "/export", "fl", "stringdv", "sort", "intdv asc"));
assertJsonEquals(s, "{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":1, \"docs\":[{\"stringdv\":\"chello \\\"world\\\"\"}]}}");
// Test sortable text fields:
s = h.query(req("q", "id:(1 OR 3 OR 8)", "qt", "/export", "fl", "sortabledv_m,sortabledv", "sort", "sortabledv asc"));
assertJsonEquals(s, "{\n" +
" \"responseHeader\":{\"status\":0},\n" +
" \"response\":{\n" +
" \"numFound\":3,\n" +
" \"docs\":[{\n" +
" \"sortabledv_m\":[\"this is some text one_1\"\n" +
" ,\"this is some text three_1\"\n" +
" ,\"this is some text two_1\"]}\n" +
" ,{\n" +
" \"sortabledv\":\"this is some text_1\"}\n" +
" ,{\n" +
" \"sortabledv_m\":[\"this is some text one_2\"\n" +
" ,\"this is some text three_2\"\n" +
" ,\"this is some text two_2\"],\n" +
" \"sortabledv\":\"this is some text_2\"}]}}");
}
private void assertJsonEquals(String actual, String expected) {