mirror of https://github.com/apache/lucene.git
Tighten up the test variability to reduce the risk of some random collection/query combinations that can't be verified
This commit is contained in:
parent
8997d41357
commit
0545901c24
|
@ -61,19 +61,18 @@ import org.slf4j.LoggerFactory;
|
||||||
* Note that unlike normal facet "count" verification, using a high limit + overrequest isn't a substitute
|
* Note that unlike normal facet "count" verification, using a high limit + overrequest isn't a substitute
|
||||||
* for refinement in order to ensure accurate "skg" computation across shards. For that reason, this
|
* for refinement in order to ensure accurate "skg" computation across shards. For that reason, this
|
||||||
* tests forces <code>refine: true</code> (unlike {@link TestCloudJSONFacetJoinDomain}) and specifices a
|
* tests forces <code>refine: true</code> (unlike {@link TestCloudJSONFacetJoinDomain}) and specifices a
|
||||||
* <code>domain: { 'query':'{!v=$back' }</code> for every facet, in order to garuntee that all popularity
|
* <code>domain: { 'query':'*:*' }</code> for every facet, in order to garuntee that all shards
|
||||||
* & relatedness values returned can be proven with validation requests.
|
* participate in all facets, so that the popularity & relatedness values returned can be proven
|
||||||
|
* with validation requests.
|
||||||
* </p>
|
* </p>
|
||||||
* <p>
|
* <p>
|
||||||
* (Refinement alone is not enough. Using the background query as the facet domain is neccessary to
|
* (Refinement alone is not enough. Using the '*:*' query as the facet domain is neccessary to
|
||||||
* prevent situations where a single shardX may return candidate bucket with no child-buckets due to
|
* prevent situations where a single shardX may return candidate bucket with no child-buckets due to
|
||||||
* the normal facet intersections, but when refined on other shardY(s), can produce "high scoring"
|
* the normal facet intersections, but when refined on other shardY(s), can produce "high scoring"
|
||||||
* SKG child-buckets, which would then be missing the foreground/background "size" contributions from
|
* SKG child-buckets, which would then be missing the foreground/background "size" contributions from
|
||||||
* shardX.
|
* shardX.
|
||||||
* </p>
|
* </p>
|
||||||
*
|
*
|
||||||
*
|
|
||||||
*
|
|
||||||
* @see TestCloudJSONFacetJoinDomain
|
* @see TestCloudJSONFacetJoinDomain
|
||||||
*/
|
*/
|
||||||
@Slow
|
@Slow
|
||||||
|
@ -138,8 +137,19 @@ public class TestCloudJSONFacetSKG extends SolrCloudTestCase {
|
||||||
for (int id = 0; id < numDocs; id++) {
|
for (int id = 0; id < numDocs; id++) {
|
||||||
SolrInputDocument doc = sdoc("id", ""+id);
|
SolrInputDocument doc = sdoc("id", ""+id);
|
||||||
for (int fieldNum = 0; fieldNum < MAX_FIELD_NUM; fieldNum++) {
|
for (int fieldNum = 0; fieldNum < MAX_FIELD_NUM; fieldNum++) {
|
||||||
// NOTE: some docs may have no value in a field
|
// NOTE: we ensure every doc has at least one value in each field
|
||||||
final int numValsThisDoc = TestUtil.nextInt(random(), 0, (usually() ? 5 : 10));
|
// that way, if a term is returned for a parent there there is garunteed to be at least one
|
||||||
|
// one term in the child facet as well.
|
||||||
|
//
|
||||||
|
// otherwise, we'd face the risk of a single shardX returning parentTermX as a top term for
|
||||||
|
// the parent facet, but having no child terms -- meanwhile on refinement another shardY that
|
||||||
|
// did *not* returned parentTermX in phase#1, could return some *new* child terms under
|
||||||
|
// parentTermX, but their stats would not include the bgCount from shardX.
|
||||||
|
//
|
||||||
|
// in normal operation, this is an edge case that isn't a big deal because the ratios &
|
||||||
|
// relatedness scores are statistically approximate, but for the purpose of this test where
|
||||||
|
// we verify correctness via exactness we need all shards to contribute to the SKG statistics
|
||||||
|
final int numValsThisDoc = TestUtil.nextInt(random(), 1, (usually() ? 5 : 10));
|
||||||
for (int v = 0; v < numValsThisDoc; v++) {
|
for (int v = 0; v < numValsThisDoc; v++) {
|
||||||
final String fieldValue = randFieldValue(fieldNum);
|
final String fieldValue = randFieldValue(fieldNum);
|
||||||
|
|
||||||
|
@ -259,7 +269,6 @@ public class TestCloudJSONFacetSKG extends SolrCloudTestCase {
|
||||||
|
|
||||||
final int numIters = atLeast(10);
|
final int numIters = atLeast(10);
|
||||||
for (int iter = 0; iter < numIters && 0 < maxBucketsToCheck.get(); iter++) {
|
for (int iter = 0; iter < numIters && 0 < maxBucketsToCheck.get(); iter++) {
|
||||||
|
|
||||||
assertFacetSKGsAreCorrect(maxBucketsToCheck, TermFacet.buildRandomFacets(),
|
assertFacetSKGsAreCorrect(maxBucketsToCheck, TermFacet.buildRandomFacets(),
|
||||||
buildRandomQuery(), buildRandomQuery(), buildRandomQuery());
|
buildRandomQuery(), buildRandomQuery(), buildRandomQuery());
|
||||||
}
|
}
|
||||||
|
@ -489,8 +498,8 @@ public class TestCloudJSONFacetSKG extends SolrCloudTestCase {
|
||||||
final StringBuilder sb
|
final StringBuilder sb
|
||||||
= new StringBuilder("{ type:terms, field:" + field + limitStr + overrequestStr + sortStr);
|
= new StringBuilder("{ type:terms, field:" + field + limitStr + overrequestStr + sortStr);
|
||||||
|
|
||||||
// see class javadocs for why we always use refine:true & the query:$back domain for this test.
|
// see class javadocs for why we always use refine:true & the query:'*:*' domain for this test.
|
||||||
sb.append(", refine: true, domain: { query: '{!v=$back}' }, facet:");
|
sb.append(", refine: true, domain: { query: '*:*' }, facet:");
|
||||||
sb.append(toJSONFacetParamValue(subFacets, "skg : 'relatedness($fore,$back)'"));
|
sb.append(toJSONFacetParamValue(subFacets, "skg : 'relatedness($fore,$back)'"));
|
||||||
sb.append("}");
|
sb.append("}");
|
||||||
return sb;
|
return sb;
|
||||||
|
|
Loading…
Reference in New Issue