mirror of https://github.com/apache/lucene.git
SOLR-11363: handle repeated values in points docvalues fields
This commit is contained in:
parent
8e12f20113
commit
7cc9ee6563
|
@ -133,6 +133,10 @@ Bug Fixes
|
|||
|
||||
* SOLR-11348: Fix the DIH database example (James Dyer)
|
||||
|
||||
* SOLR-11363: JSON Facet API: repeated values in a numeric points field with docValues enabled
|
||||
were double counted. (Hossman, yonik)
|
||||
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -390,9 +390,16 @@ class FacetFieldProcessorByHashDV extends FacetFieldProcessor {
|
|||
values.advance(segDoc);
|
||||
}
|
||||
if (segDoc == values.docID()) {
|
||||
for (int i = 0; i < values.docValueCount(); i++) {
|
||||
collectValFirstPhase(segDoc, values.nextValue());
|
||||
long l = values.nextValue(); // This document must have at least one value
|
||||
collectValFirstPhase(segDoc, l);
|
||||
for (int i = 1; i < values.docValueCount(); i++) {
|
||||
long lnew = values.nextValue();
|
||||
if (lnew != l) { // Skip the value if it's equal to the last one, we don't want to double-count it
|
||||
collectValFirstPhase(segDoc, lnew);
|
||||
}
|
||||
l = lnew;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
});
|
||||
|
|
|
@ -235,6 +235,7 @@ public class HLLAgg extends StrAggValueSource {
|
|||
@Override
|
||||
protected void collectValues(int doc, HLL hll) throws IOException {
|
||||
for (int i = 0; i < values.docValueCount(); i++) {
|
||||
// duplicates may be produced for a single doc, but won't matter here.
|
||||
long val = values.nextValue();
|
||||
long hash = Hash.fmix64(val);
|
||||
hll.addRaw(hash);
|
||||
|
|
|
@ -254,6 +254,7 @@ public class UniqueAgg extends StrAggValueSource {
|
|||
@Override
|
||||
protected void collectValues(int doc, LongSet set) throws IOException {
|
||||
for (int i = 0; i < values.docValueCount(); i++) {
|
||||
// duplicates may be produced for a single doc, but won't matter here.
|
||||
set.add(values.nextValue());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -60,6 +60,7 @@ public class GraphPointsCollector extends GraphEdgeCollector {
|
|||
if (valuesDoc == doc) {
|
||||
int count = values.docValueCount();
|
||||
for (int i = 0; i < count; i++) {
|
||||
// duplicates may be produced for a single doc, but won't matter here.
|
||||
long v = values.nextValue();
|
||||
set.add(v);
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
package org.apache.solr.cloud;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
|
@ -29,10 +28,9 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
|
||||
import org.apache.commons.lang.StringUtils;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.solr.SolrTestCaseJ4.SuppressPointFields;
|
||||
import org.apache.solr.client.solrj.SolrClient;
|
||||
import org.apache.solr.client.solrj.SolrServerException;
|
||||
import org.apache.solr.client.solrj.embedded.JettySolrRunner;
|
||||
|
@ -46,7 +44,6 @@ import org.apache.solr.common.SolrInputDocument;
|
|||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
import org.slf4j.Logger;
|
||||
|
@ -62,7 +59,6 @@ import org.slf4j.LoggerFactory;
|
|||
*
|
||||
* @see TestCloudPivotFacet
|
||||
*/
|
||||
@SuppressPointFields(bugUrl="https://issues.apache.org/jira/browse/SOLR-10939")
|
||||
public class TestCloudJSONFacetJoinDomain extends SolrCloudTestCase {
|
||||
|
||||
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
|
||||
|
@ -600,9 +596,21 @@ public class TestCloudJSONFacetJoinDomain extends SolrCloudTestCase {
|
|||
final String[] suffixes = random().nextBoolean() ? STR_FIELD_SUFFIXES : INT_FIELD_SUFFIXES;
|
||||
|
||||
final boolean noJoin = random().nextBoolean();
|
||||
final String from = noJoin ? null : field(suffixes, random().nextInt(MAX_FIELD_NUM));
|
||||
final String to = noJoin ? null : field(suffixes, random().nextInt(MAX_FIELD_NUM));
|
||||
|
||||
|
||||
String from = null;
|
||||
String to = null;
|
||||
for (;;) {
|
||||
if (noJoin) break;
|
||||
from = field(suffixes, random().nextInt(MAX_FIELD_NUM));
|
||||
to = field(suffixes, random().nextInt(MAX_FIELD_NUM));
|
||||
// HACK: joined numeric point fields need docValues.. for now just skip _is fields if we are dealing with points.
|
||||
if (Boolean.getBoolean(NUMERIC_POINTS_SYSPROP) && (from.endsWith("_is") || to.endsWith("_is")))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
// keep it simple, only filter on string fields - not point of test
|
||||
final String filterField = strfield(random().nextInt(MAX_FIELD_NUM));
|
||||
|
||||
|
|
|
@ -42,6 +42,10 @@ import org.junit.AfterClass;
|
|||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
// Related tests:
|
||||
// TestCloudJSONFacetJoinDomain for random field faceting tests with domain modifications
|
||||
// TestJsonFacetRefinement for refinement tests
|
||||
|
||||
@LuceneTestCase.SuppressCodecs({"Lucene3x","Lucene40","Lucene41","Lucene42","Lucene45","Appending"})
|
||||
public class TestJsonFacets extends SolrTestCaseHS {
|
||||
|
||||
|
@ -208,6 +212,28 @@ public class TestJsonFacets extends SolrTestCaseHS {
|
|||
client.commit();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testRepeatedNumerics() throws Exception {
|
||||
Client client = Client.localClient();
|
||||
String field = "num_is"; // docValues of multi-valued points field can contain duplicate values... make sure they don't mess up our counts.
|
||||
client.add(sdoc("id", "1", "cat_s", "A", "where_s", "NY", "num_d", "4", "num_i", "2", "val_b", "true", "sparse_s", "one", field,"0", field,"0"), null);
|
||||
client.commit();
|
||||
|
||||
client.testJQ(params("q", "id:1", "field", field
|
||||
, "json.facet", "{" +
|
||||
"f1:{terms:${field}}" +
|
||||
",f2:'hll(${field})'" +
|
||||
",f3:{type:range, field:${field}, start:0, end:1, gap:1}" +
|
||||
"}"
|
||||
)
|
||||
, "facets=={count:1, " +
|
||||
"f1:{buckets:[{val:0, count:1}]}" +
|
||||
",f2:1" +
|
||||
",f3:{buckets:[{val:0, count:1}]}" +
|
||||
"}"
|
||||
);
|
||||
}
|
||||
|
||||
public void testDomainJoinSelf() throws Exception {
|
||||
Client client = Client.localClient();
|
||||
indexSimple(client);
|
||||
|
|
Loading…
Reference in New Issue