SOLR-5652: test fixes: add 'plain' docvalue field variants to schema, and check codecs for support of missing docValues before trying to sort on them

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1562155 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Chris M. Hostetter 2014-01-28 19:03:35 +00:00
parent fd35f2609a
commit d593f195ba
4 changed files with 217 additions and 30 deletions

View File

@ -1365,6 +1365,18 @@ public abstract class LuceneTestCase extends Assert {
}
}
/** Returns true if the default codec supports single valued docvalues with missing values */
public static boolean defaultCodecSupportsMissingDocValues() {
String name = Codec.getDefault().getName();
if (name.equals("Lucene3x") ||
name.equals("Lucene40") || name.equals("Appending") ||
name.equals("Lucene41") ||
name.equals("Lucene42")) {
return false;
}
return true;
}
/** Returns true if the default codec supports SORTED_SET docvalues */
public static boolean defaultCodecSupportsSortedSet() {
String name = Codec.getDefault().getName();

View File

@ -32,36 +32,42 @@ NOTE: Tests expect every field in this schema to be sortable.
<field name="str" type="str" />
<field name="str_last" type="str_last" />
<field name="str_first" type="str_first" />
<field name="str_dv" type="str_dv" />
<field name="str_dv_last" type="str_dv_last" />
<field name="str_dv_first" type="str_dv_first" />
<field name="bin" type="bin" />
<field name="bin_last" type="bin_last" />
<field name="bin_first" type="bin_first" />
<field name="bin_dv" type="bin_dv" />
<field name="bin_dv_last" type="bin_dv_last" />
<field name="bin_dv_first" type="bin_dv_first" />
<field name="int" type="int" />
<field name="int_last" type="int_last" />
<field name="int_first" type="int_first" />
<field name="int_dv" type="int_dv" />
<field name="int_dv_last" type="int_dv_last" />
<field name="int_dv_first" type="int_dv_first" />
<field name="long" type="long" />
<field name="long_last" type="long_last" />
<field name="long_first" type="long_first" />
<field name="long_dv" type="long_dv" />
<field name="long_dv_last" type="long_dv_last" />
<field name="long_dv_first" type="long_dv_first" />
<field name="float" type="float" />
<field name="float_last" type="float_last" />
<field name="float_first" type="float_first" />
<field name="float_dv" type="float_dv" />
<field name="float_dv_last" type="float_dv_last" />
<field name="float_dv_first" type="float_dv_first" />
<field name="double" type="double" />
<field name="double_last" type="double_last" />
<field name="double_first" type="double_first" />
<field name="double_dv" type="double_dv" />
<field name="double_dv_last" type="double_dv_last" />
<field name="double_dv_first" type="double_dv_first" />
@ -73,26 +79,37 @@ NOTE: Tests expect every field in this schema to be sortable.
<copyField source="str" dest="str_last" />
<copyField source="str" dest="str_first" />
<copyField source="str" dest="str_dv" />
<copyField source="str" dest="str_dv_last" />
<copyField source="str" dest="str_dv_first" />
<copyField source="bin" dest="bin_last" />
<copyField source="bin" dest="bin_first" />
<copyField source="bin" dest="bin_dv" />
<copyField source="bin" dest="bin_dv_last" />
<copyField source="bin" dest="bin_dv_first" />
<copyField source="int" dest="int_last" />
<copyField source="int" dest="int_first" />
<copyField source="int" dest="int_dv" />
<copyField source="int" dest="int_dv_last" />
<copyField source="int" dest="int_dv_first" />
<copyField source="long" dest="long_last" />
<copyField source="long" dest="long_first" />
<copyField source="long" dest="long_dv" />
<copyField source="long" dest="long_dv_last" />
<copyField source="long" dest="long_dv_first" />
<copyField source="float" dest="float_last" />
<copyField source="float" dest="float_first" />
<copyField source="float" dest="float_dv" />
<copyField source="float" dest="float_dv_last" />
<copyField source="float" dest="float_dv_first" />
<copyField source="double" dest="double_last" />
<copyField source="double" dest="double_first" />
<copyField source="double" dest="double_dv" />
<copyField source="double" dest="double_dv_last" />
<copyField source="double" dest="double_dv_first" />
@ -101,36 +118,42 @@ NOTE: Tests expect every field in this schema to be sortable.
<fieldtype name="str" class="solr.StrField" stored="true" indexed="true" />
<fieldtype name="str_last" class="solr.StrField" stored="true" indexed="true" sortMissingLast="true"/>
<fieldtype name="str_first" class="solr.StrField" stored="true" indexed="true" sortMissingFirst="true"/>
<fieldtype name="str_dv" class="solr.StrField" stored="true" indexed="false" docValues="true"/>
<fieldtype name="str_dv_last" class="solr.StrField" stored="true" indexed="false" docValues="true" sortMissingLast="true"/>
<fieldtype name="str_dv_first" class="solr.StrField" stored="true" indexed="false" docValues="true" sortMissingFirst="true"/>
<fieldtype name="bin" class="solr.SortableBinaryField" stored="true" indexed="true" />
<fieldtype name="bin_last" class="solr.SortableBinaryField" stored="true" indexed="true" sortMissingLast="true"/>
<fieldtype name="bin_first" class="solr.SortableBinaryField" stored="true" indexed="true" sortMissingFirst="true"/>
<fieldtype name="bin_dv" class="solr.SortableBinaryField" stored="true" indexed="false" docValues="true"/>
<fieldtype name="bin_dv_last" class="solr.SortableBinaryField" stored="true" indexed="false" docValues="true" sortMissingLast="true"/>
<fieldtype name="bin_dv_first" class="solr.SortableBinaryField" stored="true" indexed="false" docValues="true" sortMissingFirst="true"/>
<fieldtype name="int" class="solr.TrieIntField" stored="true" indexed="true" />
<fieldtype name="int_last" class="solr.TrieIntField" stored="true" indexed="true" sortMissingLast="true"/>
<fieldtype name="int_first" class="solr.TrieIntField" stored="true" indexed="true" sortMissingFirst="true"/>
<fieldtype name="int_dv" class="solr.TrieIntField" stored="true" indexed="false" docValues="true"/>
<fieldtype name="int_dv_last" class="solr.TrieIntField" stored="true" indexed="false" docValues="true" sortMissingLast="true"/>
<fieldtype name="int_dv_first" class="solr.TrieIntField" stored="true" indexed="false" docValues="true" sortMissingFirst="true"/>
<fieldtype name="long" class="solr.TrieLongField" stored="true" indexed="true" />
<fieldtype name="long_last" class="solr.TrieLongField" stored="true" indexed="true" sortMissingLast="true"/>
<fieldtype name="long_first" class="solr.TrieLongField" stored="true" indexed="true" sortMissingFirst="true"/>
<fieldtype name="long_dv" class="solr.TrieLongField" stored="true" indexed="false" docValues="true"/>
<fieldtype name="long_dv_last" class="solr.TrieLongField" stored="true" indexed="false" docValues="true" sortMissingLast="true"/>
<fieldtype name="long_dv_first" class="solr.TrieLongField" stored="true" indexed="false" docValues="true" sortMissingFirst="true"/>
<fieldtype name="float" class="solr.TrieFloatField" stored="true" indexed="true" />
<fieldtype name="float_last" class="solr.TrieFloatField" stored="true" indexed="true" sortMissingLast="true"/>
<fieldtype name="float_first" class="solr.TrieFloatField" stored="true" indexed="true" sortMissingFirst="true"/>
<fieldtype name="float_dv" class="solr.TrieFloatField" stored="true" indexed="false" docValues="true"/>
<fieldtype name="float_dv_last" class="solr.TrieFloatField" stored="true" indexed="false" docValues="true" sortMissingLast="true"/>
<fieldtype name="float_dv_first" class="solr.TrieFloatField" stored="true" indexed="false" docValues="true" sortMissingFirst="true"/>
<fieldtype name="double" class="solr.TrieDoubleField" stored="true" indexed="true" />
<fieldtype name="double_last" class="solr.TrieDoubleField" stored="true" indexed="true" sortMissingLast="true"/>
<fieldtype name="double_first" class="solr.TrieDoubleField" stored="true" indexed="true" sortMissingFirst="true"/>
<fieldtype name="double_dv" class="solr.TrieDoubleField" stored="true" indexed="false" docValues="true"/>
<fieldtype name="double_dv_last" class="solr.TrieDoubleField" stored="true" indexed="false" docValues="true" sortMissingLast="true"/>
<fieldtype name="double_dv_first" class="solr.TrieDoubleField" stored="true" indexed="false" docValues="true" sortMissingFirst="true"/>

View File

@ -120,6 +120,9 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
String cursorMark;
SolrParams params = null;
final String intsort = "int" + (random().nextBoolean() ? "" : "_dv");
final String intmissingsort = defaultCodecSupportsMissingDocValues() ? intsort : "int";
// trivial base case: ensure cursorMark against an empty index doesn't blow up
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
@ -145,7 +148,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
assertU(adoc("id", "6", "str", "a", "float", "64.5", "int", "7"));
assertU(adoc("id", "1", "str", "a", "float", "64.5", "int", "7"));
assertU(adoc("id", "4", "str", "a", "float", "11.1", "int", "6"));
assertU(adoc("id", "3", "str", "a", "float", "11.1", "int", "3"));
assertU(adoc("id", "3", "str", "a", "float", "11.1")); // int is missing
assertU(commit());
// base case: ensure cursorMark that matches no docs doesn't blow up
@ -241,7 +244,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
"facet", "true",
"facet.field", "str",
"json.nl", "map",
"sort", "int asc, id asc");
"sort", intsort + " asc, id asc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
@ -269,6 +272,66 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
,"/facet_counts/facet_fields/str=={'a':4,'b':1,'c':3}"
));
// int missing first sort with dups, id tie breaker
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:2001 -int:4055",
"rows","3",
"fl", "id",
"json.nl", "map",
"sort", intmissingsort + "_first asc, id asc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[{'id':3},{'id':7},{'id':0}]"
);
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[{'id':4},{'id':1},{'id':6}]"
);
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[{'id':9},{'id':2}]"
);
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark,
assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[]"
));
// int missing last sort with dups, id tie breaker
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:2001 -int:4055",
"rows","3",
"fl", "id",
"json.nl", "map",
"sort", intmissingsort + "_last asc, id asc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[{'id':7},{'id':0},{'id':4}]"
);
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[{'id':1},{'id':6},{'id':9}]"
);
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[{'id':2},{'id':3}]"
);
// no more, so no change to cursorMark, and no new docs
assertEquals(cursorMark,
assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==8"
,"/response/start==0"
,"/response/docs==[]"
));
// string sort with dups, id tie breaker
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
@ -298,7 +361,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
params = params("q", "*:*",
"rows","2",
"fl", "id",
"sort", "float asc, int desc, id desc");
"sort", "float asc, "+intsort+" desc, id desc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==10"
,"/response/start==0"
@ -338,7 +401,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
params = params("q", "id:3 id:7",
"rows","111",
"fl", "id",
"sort", "int asc, id asc");
"sort", intsort + " asc, id asc");
cursorMark = assertCursor(req(params, CURSOR_MARK_PARAM, cursorMark)
,"/response/numFound==2"
,"/response/start==0"
@ -367,7 +430,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
ids = assertFullWalkNoDups(9, params("q", "*:*",
"rows", "3",
"fq", "-id:6",
"sort", "float desc, id asc, int asc"));
"sort", "float desc, id asc, "+intsort+" asc"));
assertEquals(9, ids.size());
assertFalse("matched on id:6 unexpectedly", ids.exists(6));
ids = assertFullWalkNoDups(9, params("q", "float:[0 TO *] int:7 id:6",
@ -451,7 +514,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
assertU(adoc("id", "3", "str", "a", "float", "11.1", "int", "3"));
assertU(commit());
final Collection<String> allFieldNames = getAllFieldNames();
final Collection<String> allFieldNames = getAllSortFieldNames();
final SolrInfoMBean filterCacheStats
= h.getCore().getInfoRegistry().get("filterCache");
@ -488,7 +551,7 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
/** randomized testing of a non-trivial number of docs using assertFullWalkNoDups
*/
public void testRandomSortsOnLargeIndex() throws Exception {
final Collection<String> allFieldNames = getAllFieldNames();
final Collection<String> allFieldNames = getAllSortFieldNames();
final int initialDocs = _TestUtil.nextInt(random(),100,200);
final int totalDocs = atLeast(5000);
@ -555,16 +618,42 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
}
/**
* An immutable list of the fields in the schema (excluding _version_) in a
* An immutable list of the fields in the schema that can be used for sorting,
* deterministically random order.
*/
private List<String> getAllFieldNames() {
private List<String> getAllSortFieldNames() {
return pruneAndDeterministicallySort
(h.getCore().getLatestSchema().getFields().keySet());
}
/**
* <p>
* Given a list of field names in the schema, returns an immutable list in
* deterministically random order with the following things removed:
* </p>
* <ul>
* <li><code>_version_</code> is removed</li>
* <li><code>dv_last</code> and <code>dv_first</code> fields are removed
* if the codec doesn't support them</li>
* </ul>
* @see #defaultCodecSupportsMissingDocValues
*/
public static List<String> pruneAndDeterministicallySort(Collection<String> raw) {
final boolean prune_dv_missing = ! defaultCodecSupportsMissingDocValues();
ArrayList<String> names = new ArrayList<String>(37);
for (String f : h.getCore().getLatestSchema().getFields().keySet()) {
if (! f.equals("_version_")) {
names.add(f);
for (String f : raw) {
if (f.equals("_version_")) {
continue;
}
if (prune_dv_missing && (f.endsWith("_dv_last") || f.endsWith("_dv_first")) ) {
continue;
}
names.add(f);
}
Collections.sort(names);
Collections.shuffle(names,random());
return Collections.<String>unmodifiableList(names);
@ -628,9 +717,9 @@ public class CursorPagingTest extends SolrTestCaseJ4 {
}
assertU(commit());
Collection<String> allFieldNames = getAllFieldNames();
Collection<String> allFieldNames = getAllSortFieldNames();
String[] fieldNames = new String[allFieldNames.size()];
getAllFieldNames().toArray(fieldNames);
allFieldNames.toArray(fieldNames);
String f = fieldNames[_TestUtil.nextInt(random(), 0, fieldNames.length - 1)];
String order = 0 == _TestUtil.nextInt(random(), 0, 1) ? " asc" : " desc";
String sort = f + order + (f.equals("id") ? "" : ", id" + order);

View File

@ -149,7 +149,10 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
String cursorMark = CURSOR_MARK_START;
SolrParams params = null;
QueryResponse rsp = null;
final String intsort = "int" + (random().nextBoolean() ? "" : "_dv");
final String intmissingsort = defaultCodecSupportsMissingDocValues() ? intsort : "int";
// trivial base case: ensure cursorMark against an empty index doesn't blow up
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
@ -173,7 +176,7 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
indexDoc(sdoc("id", "6", "str", "a", "float", "64.5", "int", "7"));
indexDoc(sdoc("id", "1", "str", "a", "float", "64.5", "int", "7"));
indexDoc(sdoc("id", "4", "str", "a", "float", "11.1", "int", "6"));
indexDoc(sdoc("id", "3", "str", "a", "float", "11.1", "int", "3"));
indexDoc(sdoc("id", "3", "str", "a", "float", "11.1")); // int is missing
commit();
// base case: ensure cursorMark that matches no docs doesn't blow up
@ -248,7 +251,7 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
"facet", "true",
"facet.field", "str",
"json.nl", "map",
"sort", "int asc, id asc");
"sort", intsort + " asc, id asc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
@ -282,6 +285,70 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
assertEquals("no more docs, but cursorMark has changed",
cursorMark, assertHashNextCursorMark(rsp));
// int missing first sort with dups, id tie breaker
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:2001 -int:4055",
"rows","3",
"fl", "id",
"json.nl", "map",
"sort", intmissingsort + "_first asc, id asc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 3, 7, 0);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 4, 1, 6);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 9, 2);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed",
cursorMark, assertHashNextCursorMark(rsp));
// int missing last sort with dups, id tie breaker
cursorMark = CURSOR_MARK_START;
params = params("q", "-int:2001 -int:4055",
"rows","3",
"fl", "id",
"json.nl", "map",
"sort", intmissingsort + "_last asc, id asc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 7, 0, 4);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 1, 6, 9);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp, 2, 3);
cursorMark = assertHashNextCursorMark(rsp);
//
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(8, rsp);
assertStartsAt(0, rsp);
assertDocList(rsp);
assertEquals("no more docs, but cursorMark has changed",
cursorMark, assertHashNextCursorMark(rsp));
// string sort with dups, id tie breaker
cursorMark = CURSOR_MARK_START;
params = params("q", "*:*",
@ -312,7 +379,7 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
params = params("q", "*:*",
"rows","2",
"fl", "id",
"sort", "float asc, int desc, id desc");
"sort", "float asc, "+intsort+" desc, id desc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(10, rsp);
assertStartsAt(0, rsp);
@ -356,7 +423,7 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
params = params("q", "id:3 id:7",
"rows","111",
"fl", "id",
"sort", "int asc, id asc");
"sort", intsort + " asc, id asc");
rsp = query(p(params, CURSOR_MARK_PARAM, cursorMark));
assertNumFound(2, rsp);
assertStartsAt(0, rsp);
@ -449,7 +516,7 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
/** randomized testing of a non-trivial number of docs using assertFullWalkNoDups
*/
public void doRandomSortsOnLargeIndex() throws Exception {
final Collection<String> allFieldNames = getAllFieldNames();
final Collection<String> allFieldNames = getAllSortFieldNames();
final int numInitialDocs = _TestUtil.nextInt(random(),100,200);
final int totalDocs = atLeast(5000);
@ -538,24 +605,20 @@ public class DistribCursorPagingTest extends AbstractFullDistribZkTestBase {
/**
* Asks the LukeRequestHandler on the control client for a list of the fields in the
* schema (excluding _version_) and then returns the field names in a deterministically
* random order.
* schema and then prunes that list down to just the fields that can be used for sorting,
* and returns them as an immutable list in a deterministically random order.
*/
private List<String> getAllFieldNames() throws SolrServerException, IOException {
private List<String> getAllSortFieldNames() throws SolrServerException, IOException {
LukeRequest req = new LukeRequest("/admin/luke");
req.setShowSchema(true);
NamedList<Object> rsp = controlClient.request(req);
NamedList<Object> fields = (NamedList) ((NamedList)rsp.get("schema")).get("fields");
ArrayList<String> names = new ArrayList<String>(fields.size());
for (Map.Entry<String,Object> item : fields) {
String f = item.getKey();
if (! f.equals("_version_")) {
names.add(item.getKey());
}
names.add(item.getKey());
}
Collections.sort(names);
Collections.shuffle(names,random());
return Collections.<String>unmodifiableList(names);
return CursorPagingTest.pruneAndDeterministicallySort(names);
}
/**