mirror of https://github.com/apache/lucene.git
SOLR-13261: Make SortableTextField work with export/streaming
This commit is contained in:
parent
9b8a4a9e6e
commit
6b4e90617d
|
@ -58,6 +58,8 @@ New Features
|
|||
|
||||
* SOLR-13171 : A true streaming parser for javabin payload/stream without creating any objects (noble)
|
||||
|
||||
* SOLR-13261: Make SortableTextField work with export/streaming
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -58,6 +58,7 @@ import org.apache.solr.schema.IndexSchema;
|
|||
import org.apache.solr.schema.IntValueFieldType;
|
||||
import org.apache.solr.schema.LongValueFieldType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.SortableTextField;
|
||||
import org.apache.solr.schema.StrField;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.search.SortSpec;
|
||||
|
@ -358,7 +359,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
|
|||
} else {
|
||||
writers[i] = new DoubleFieldWriter(field);
|
||||
}
|
||||
} else if (fieldType instanceof StrField) {
|
||||
} else if (fieldType instanceof StrField || fieldType instanceof SortableTextField) {
|
||||
if (multiValued) {
|
||||
writers[i] = new MultiFieldWriter(field, fieldType, schemaField, false);
|
||||
} else {
|
||||
|
@ -377,7 +378,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
|
|||
writers[i] = new BoolFieldWriter(field, fieldType);
|
||||
}
|
||||
} else {
|
||||
throw new IOException("Export fields must either be one of the following types: int,float,long,double,string,date,boolean");
|
||||
throw new IOException("Export fields must be one of the following types: int,float,long,double,string,date,boolean,SortableText");
|
||||
}
|
||||
}
|
||||
return writers;
|
||||
|
@ -421,7 +422,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
|
|||
} else {
|
||||
sortValues[i] = new LongValue(field, new LongAsc());
|
||||
}
|
||||
} else if (ft instanceof StrField) {
|
||||
} else if (ft instanceof StrField || ft instanceof SortableTextField) {
|
||||
LeafReader reader = searcher.getSlowAtomicReader();
|
||||
SortedDocValues vals = reader.getSortedDocValues(field);
|
||||
if (reverse) {
|
||||
|
@ -447,7 +448,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
|
|||
sortValues[i] = new StringValue(vals, field, new IntAsc());
|
||||
}
|
||||
} else {
|
||||
throw new IOException("Sort fields must be one of the following types: int,float,long,double,string,date,boolean");
|
||||
throw new IOException("Sort fields must be one of the following types: int,float,long,double,string,date,boolean,SortableText");
|
||||
}
|
||||
}
|
||||
//SingleValueSortDoc etc are specialized classes which don't have array lookups. On benchmarking large datasets
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
seconds part (.999) is optional.
|
||||
-->
|
||||
<fieldType name="date" class="${solr.tests.DateFieldType}" docValues="${solr.tests.numeric.dv}" precisionStep="0" omitNorms="true" positionIncrementGap="0"/>
|
||||
|
||||
|
||||
<!-- Point Fields -->
|
||||
<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
|
||||
<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
|
||||
|
@ -40,6 +40,27 @@
|
|||
|
||||
<fieldType name="uuid" class="solr.UUIDField"/>
|
||||
|
||||
|
||||
<!-- SortableTextField generaly functions exactly like TextField,
|
||||
except that it supports, and by default uses, docValues for sorting (or faceting)
|
||||
on the first 1024 characters of the original field values (which is configurable).
|
||||
|
||||
This makes it a bit more useful then TextField in many situations, but the trade-off
|
||||
is that it takes up more space on disk; which is why it's not used in place of TextField
|
||||
for every fieldType in this _default schema.
|
||||
-->
|
||||
<fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
|
||||
<field name="id" type="string" required="true" indexed="true" docValues="true"/>
|
||||
<field name="floatdv_m" type="float" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
||||
<field name="intdv_m" type="int" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
||||
|
@ -47,6 +68,7 @@
|
|||
<field name="longdv_m" type="long" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
||||
<field name="datedv_m" type="date" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
||||
<field name="stringdv_m" type="string" indexed="false" stored="false" docValues="true" multiValued="true"/>
|
||||
<field name="sortabledv_m" type="text_gen_sort" indexed="true" stored="true" multiValued="true" />
|
||||
|
||||
<field name="floatdv" type="float" indexed="false" stored="false" docValues="true"/>
|
||||
<field name="intdv" type="int" indexed="false" stored="false" docValues="true"/>
|
||||
|
@ -55,6 +77,7 @@
|
|||
<field name="datedv" type="date" indexed="false" stored="false" docValues="true"/>
|
||||
<field name="stringdv" type="string" indexed="false" stored="false" docValues="true"/>
|
||||
<field name="booleandv" type="boolean" indexed="false" stored="false" docValues="true" />
|
||||
<field name="sortabledv" type="text_gen_sort" indexed="true" stored="true" multiValued="false" />
|
||||
|
||||
<dynamicField name="*_s_dv" type="string" indexed="true" stored="true" docValues="true" multiValued="false"/>
|
||||
|
||||
|
|
|
@ -23,5 +23,17 @@
|
|||
<field name="_version_" type="long" indexed="true" stored="true"/>
|
||||
<field name="_root_" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
|
||||
<field name="id" type="string" indexed="true" stored="true"/>
|
||||
|
||||
<fieldType name="sortabletext" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
|
||||
<analyzer type="index">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
<analyzer type="query">
|
||||
<tokenizer class="solr.StandardTokenizerFactory"/>
|
||||
<filter class="solr.LowerCaseFilterFactory"/>
|
||||
</analyzer>
|
||||
</fieldType>
|
||||
|
||||
<uniqueKey>id</uniqueKey>
|
||||
</schema>
|
||||
|
|
|
@ -98,7 +98,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
|
|||
new FieldProps("floatField", "float", 1),
|
||||
new FieldProps("dateField", "date", 1),
|
||||
new FieldProps("stringField", "string", 1),
|
||||
new FieldProps("boolField", "boolean", 1)
|
||||
new FieldProps("boolField", "boolean", 1),
|
||||
new FieldProps("sortableText", "sortabletext", 1)
|
||||
));
|
||||
|
||||
fieldsToTestMulti =
|
||||
|
@ -109,10 +110,11 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
|
|||
new FieldProps("floatFieldMulti", "float", 5),
|
||||
new FieldProps("dateFieldMulti", "date", 5),
|
||||
new FieldProps("stringFieldMulti", "string", 5),
|
||||
new FieldProps("boolFieldMulti", "boolean", 2)
|
||||
new FieldProps("boolFieldMulti", "boolean", 2),
|
||||
new FieldProps("sortableFieldMulti", "sortabletext", 5)
|
||||
));
|
||||
|
||||
// Fields to test for grouping and sorting with sortMinssingFirst/Last.
|
||||
// Fields to test for grouping and sorting with sortMissingFirst/Last.
|
||||
fieldsToTestGroupSortFirst =
|
||||
Collections.unmodifiableList(Arrays.asList(
|
||||
new FieldProps("intGSF", "int"),
|
||||
|
@ -121,7 +123,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
|
|||
new FieldProps("floatGSF", "float"),
|
||||
new FieldProps("dateGSF", "date"),
|
||||
new FieldProps("stringGSF", "string"),
|
||||
new FieldProps("boolGSF", "boolean")
|
||||
new FieldProps("boolGSF", "boolean"),
|
||||
new FieldProps("sortableGSF", "sortabletext")
|
||||
));
|
||||
|
||||
fieldsToTestGroupSortLast =
|
||||
|
@ -132,7 +135,8 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
|
|||
new FieldProps("floatGSL", "float"),
|
||||
new FieldProps("dateGSL", "date"),
|
||||
new FieldProps("stringGSL", "string"),
|
||||
new FieldProps("boolGSL", "boolean")
|
||||
new FieldProps("boolGSL", "boolean"),
|
||||
new FieldProps("sortableGSL", "sortabletext")
|
||||
));
|
||||
|
||||
List<Update> updateList = new ArrayList<>(fieldsToTestSingle.size() +
|
||||
|
@ -210,18 +214,18 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
|
|||
final QueryResponse rsp = client.query(COLLECTION, solrQuery);
|
||||
|
||||
for (FieldProps props : fieldsToTestSingle) {
|
||||
testFacet(props, rsp);
|
||||
doTestFacet(props, rsp);
|
||||
}
|
||||
|
||||
for (FieldProps props : fieldsToTestMulti) {
|
||||
testFacet(props, rsp);
|
||||
doTestFacet(props, rsp);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// We should be able to sort thing with missing first/last and that are _NOT_ present at all on one server.
|
||||
@Test
|
||||
@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028")
|
||||
//@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-12028")
|
||||
public void testGroupingSorting() throws IOException, SolrServerException {
|
||||
CloudSolrClient client = cluster.getSolrClient();
|
||||
|
||||
|
@ -314,12 +318,17 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
|
|||
// 12-Jun-2018 @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 04-May-2018
|
||||
// commented 15-Sep-2018 @LuceneTestCase.BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") // 2-Aug-2018
|
||||
public void testGroupingDVOnly() throws IOException, SolrServerException {
|
||||
doGroupingDvOnly(fieldsToTestGroupSortFirst, "boolGSF");
|
||||
doGroupingDvOnly(fieldsToTestGroupSortLast, "boolGSL");
|
||||
}
|
||||
private void doGroupingDvOnly(List<FieldProps> fieldProps, String boolName) throws IOException, SolrServerException {
|
||||
|
||||
List<SolrInputDocument> docs = new ArrayList<>(50);
|
||||
for (int idx = 0; idx < 49; ++idx) {
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
doc.addField("id", idx);
|
||||
boolean doInc = ((idx % 7) == 0);
|
||||
for (FieldProps prop : fieldsToTestGroupSortFirst) {
|
||||
for (FieldProps prop : fieldProps) {
|
||||
doc.addField(prop.getName(), prop.getValue(doInc));
|
||||
}
|
||||
docs.add(doc);
|
||||
|
@ -337,13 +346,10 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
|
|||
.commit(client, COLLECTION);
|
||||
|
||||
// OK, we should have one group with 10 entries for null, a group with 1 entry and 7 groups with 7
|
||||
for (FieldProps prop : fieldsToTestGroupSortFirst) {
|
||||
// Special handling until SOLR-9802 is fixed
|
||||
for (FieldProps prop : fieldProps) {
|
||||
|
||||
// Solr 9802
|
||||
if (prop.getName().startsWith("date")) continue;
|
||||
// SOLR-9802 to here
|
||||
|
||||
// TODO: gsf fails this
|
||||
if (prop.getName().endsWith("GSF") ) continue;
|
||||
|
||||
final SolrQuery solrQuery = new SolrQuery(
|
||||
"q", "*:*",
|
||||
|
@ -376,7 +382,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
|
|||
case 25:
|
||||
case 24:
|
||||
++boolCount;
|
||||
assertEquals("We should have more counts for boolean fields!", "boolGSF", prop.getName());
|
||||
assertEquals("We should have more counts for boolean fields!", boolName, prop.getName());
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -442,7 +448,7 @@ public class DocValuesNotIndexedTest extends SolrCloudTestCase {
|
|||
}
|
||||
|
||||
|
||||
private void testFacet(FieldProps props, QueryResponse rsp) {
|
||||
private void doTestFacet(FieldProps props, QueryResponse rsp) {
|
||||
String name = props.getName();
|
||||
final List<FacetField.Count> counts = rsp.getFacetField(name).getValues();
|
||||
long expectedCount = props.getExpectedCount();
|
||||
|
@ -483,7 +489,7 @@ class FieldProps {
|
|||
base = Math.abs(random().nextLong());
|
||||
} else if (name.startsWith("bool")) {
|
||||
base = true; // Must start with a known value since bools only have a two values....
|
||||
} else if (name.startsWith("string")) {
|
||||
} else if (name.startsWith("string") || name.startsWith("sortable")) {
|
||||
base = "base_string_" + random().nextInt(1_000_000) + "_";
|
||||
} else {
|
||||
throw new RuntimeException("Should have found a prefix for the field before now!");
|
||||
|
@ -531,7 +537,7 @@ class FieldProps {
|
|||
base = !((boolean) base);
|
||||
return ret;
|
||||
}
|
||||
if (name.startsWith("string")) {
|
||||
if (name.startsWith("string") || name.startsWith("sortable")) {
|
||||
return String.format(Locale.ROOT, "%s_%08d", (String) base, counter);
|
||||
}
|
||||
throw new RuntimeException("Should have found a prefix for the field before now!");
|
||||
|
|
|
@ -129,7 +129,10 @@ public class TestExportWriter extends SolrTestCaseJ4 {
|
|||
"datedv_m", "2017-06-16T01:00:00Z",
|
||||
"datedv_m", "2017-06-16T02:00:00Z",
|
||||
"datedv_m", "2017-06-16T03:00:00Z",
|
||||
"datedv_m", "2017-06-16T04:00:00Z"));
|
||||
"datedv_m", "2017-06-16T04:00:00Z",
|
||||
"sortabledv_m", "this is some text one_1",
|
||||
"sortabledv_m", "this is some text two_1",
|
||||
"sortabledv_m", "this is some text three_1"));
|
||||
|
||||
assertU(adoc("id","7",
|
||||
"floatdv","2.1",
|
||||
|
@ -166,7 +169,8 @@ public class TestExportWriter extends SolrTestCaseJ4 {
|
|||
"int_is_t", "1",
|
||||
"int_is_t", "1",
|
||||
"int_is_t", "1",
|
||||
"int_is_t", "1"));
|
||||
"int_is_t", "1",
|
||||
"sortabledv", "this is some text_1"));
|
||||
assertU(commit());
|
||||
assertU(adoc("id","8",
|
||||
"floatdv","2.1",
|
||||
|
@ -191,7 +195,11 @@ public class TestExportWriter extends SolrTestCaseJ4 {
|
|||
"int_is_p", "1",
|
||||
"int_is_p", "1",
|
||||
"int_is_p", "1",
|
||||
"int_is_p", "1"));
|
||||
"int_is_p", "1",
|
||||
"sortabledv", "this is some text_2",
|
||||
"sortabledv_m", "this is some text one_2",
|
||||
"sortabledv_m", "this is some text two_2",
|
||||
"sortabledv_m", "this is some text three_2"));
|
||||
assertU(commit());
|
||||
|
||||
|
||||
|
@ -491,6 +499,24 @@ public class TestExportWriter extends SolrTestCaseJ4 {
|
|||
|
||||
s = h.query(req("q", "id:8", "qt", "/export", "fl", "stringdv", "sort", "intdv asc"));
|
||||
assertJsonEquals(s, "{\"responseHeader\": {\"status\": 0}, \"response\":{\"numFound\":1, \"docs\":[{\"stringdv\":\"chello \\\"world\\\"\"}]}}");
|
||||
|
||||
// Test sortable text fields:
|
||||
s = h.query(req("q", "id:(1 OR 3 OR 8)", "qt", "/export", "fl", "sortabledv_m,sortabledv", "sort", "sortabledv asc"));
|
||||
assertJsonEquals(s, "{\n" +
|
||||
" \"responseHeader\":{\"status\":0},\n" +
|
||||
" \"response\":{\n" +
|
||||
" \"numFound\":3,\n" +
|
||||
" \"docs\":[{\n" +
|
||||
" \"sortabledv_m\":[\"this is some text one_1\"\n" +
|
||||
" ,\"this is some text three_1\"\n" +
|
||||
" ,\"this is some text two_1\"]}\n" +
|
||||
" ,{\n" +
|
||||
" \"sortabledv\":\"this is some text_1\"}\n" +
|
||||
" ,{\n" +
|
||||
" \"sortabledv_m\":[\"this is some text one_2\"\n" +
|
||||
" ,\"this is some text three_2\"\n" +
|
||||
" ,\"this is some text two_2\"],\n" +
|
||||
" \"sortabledv\":\"this is some text_2\"}]}}");
|
||||
}
|
||||
|
||||
private void assertJsonEquals(String actual, String expected) {
|
||||
|
|
Loading…
Reference in New Issue