diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 3b220d2e1fa..0d507e345ad 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -77,6 +77,8 @@ prefix, then you will now get an error as these options are incompatible with nu
New Features
----------------------
+* SOLR-5725: facet.method=enum can bypass exact counts calculation with facet.exists=true, it just returns 1 for
+ terms which exists in result docset. (Alexey Kozhemiakin, Sebastian Koziel, Radoslaw Zielinski via Mikhail Khludnev)
Bug Fixes
----------------------
diff --git a/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java b/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
index 26b2e597bab..90608c0089b 100644
--- a/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
+++ b/solr/core/src/java/org/apache/solr/handler/component/FacetComponent.java
@@ -1265,7 +1265,14 @@ public class FacetComponent extends SearchComponent {
if (facetFs != null) {
for (String field : facetFs) {
- DistribFieldFacet ff = new DistribFieldFacet(rb, field);
+ final DistribFieldFacet ff;
+
+ if (params.getFieldBool(field, FacetParams.FACET_EXISTS, false)) {
+ // cap facet count by 1 with this method
+ ff = new DistribFacetExistsField(rb, field);
+ } else {
+ ff = new DistribFieldFacet(rb, field);
+ }
facets.put(ff.getKey(), ff);
}
}
@@ -1469,7 +1476,7 @@ public class FacetComponent extends SearchComponent {
sfc.termNum = termNum++;
counts.put(name, sfc);
}
- sfc.count += count;
+ incCount(sfc, count);
terms.set(sfc.termNum);
last = count;
}
@@ -1485,6 +1492,10 @@ public class FacetComponent extends SearchComponent {
missingMax[shardNum] = last;
counted[shardNum] = terms;
}
+
+ protected void incCount(ShardFacetCount sfc, long count) {
+ sfc.count += count;
+ }
public ShardFacetCount[] getLexSorted() {
ShardFacetCount[] arr
@@ -1530,7 +1541,7 @@ public class FacetComponent extends SearchComponent {
}
}
}
-
+
/**
* This API is experimental and subject to change
*/
@@ -1547,4 +1558,18 @@ public class FacetComponent extends SearchComponent {
}
}
+
+ private static final class DistribFacetExistsField extends DistribFieldFacet {
+ private DistribFacetExistsField(ResponseBuilder rb, String facetStr) {
+ super(rb, facetStr);
+ SimpleFacets.checkMincountOnExists(field, minCount);
+ }
+
+ @Override
+ protected void incCount(ShardFacetCount sfc, long count) {
+ if (count>0) {
+ sfc.count = 1;
+ }
+ }
+ }
}
diff --git a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
index c2f68f93bf8..52c2129a99e 100644
--- a/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
+++ b/solr/core/src/java/org/apache/solr/request/SimpleFacets.java
@@ -406,7 +406,8 @@ public class SimpleFacets {
String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
String contains = params.getFieldParam(field, FacetParams.FACET_CONTAINS);
boolean ignoreCase = params.getFieldBool(field, FacetParams.FACET_CONTAINS_IGNORE_CASE, false);
-
+ boolean exists = params.getFieldBool(field, FacetParams.FACET_EXISTS, false);
+
NamedList counts;
SchemaField sf = searcher.getSchema().getField(field);
FieldType ft = sf.getType();
@@ -422,13 +423,15 @@ public class SimpleFacets {
requestedMethod = FacetMethod.FC;
} else if(FacetParams.FACET_METHOD_uif.equals(methodStr)) {
requestedMethod = FacetMethod.UIF;
- }else{
+ } else {
requestedMethod=null;
}
final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
- FacetMethod appliedFacetMethod = selectFacetMethod(sf, requestedMethod, mincount);
+ FacetMethod appliedFacetMethod = selectFacetMethod(field,
+ sf, requestedMethod, mincount,
+ exists);
RTimer timer = null;
if (fdebug != null) {
@@ -446,7 +449,8 @@ public class SimpleFacets {
switch (appliedFacetMethod) {
case ENUM:
assert TrieField.getMainValuePrefix(ft) == null;
- counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix, contains, ignoreCase, params);
+ counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix, contains, ignoreCase,
+ exists);
break;
case FCS:
assert !multiToken;
@@ -538,6 +542,29 @@ public class SimpleFacets {
return counts;
}
+ /**
+ * @param existsRequested facet.exists=true is passed for the given field
+ * */
+ static FacetMethod selectFacetMethod(String fieldName,
+ SchemaField field, FacetMethod method, Integer mincount,
+ boolean existsRequested) {
+ if (existsRequested) {
+ checkMincountOnExists(fieldName, mincount);
+ if (method == null) {
+ method = FacetMethod.ENUM;
+ }
+ }
+ final FacetMethod facetMethod = selectFacetMethod(field, method, mincount);
+
+ if (existsRequested && facetMethod!=FacetMethod.ENUM) {
+ throw new SolrException (ErrorCode.BAD_REQUEST,
+ FacetParams.FACET_EXISTS + "=true is requested, but "+
+ FacetParams.FACET_METHOD+"="+FacetParams.FACET_METHOD_enum+ " can't be used with "+fieldName
+ );
+ }
+ return facetMethod;
+ }
+
/**
* This method will force the appropriate facet method even if the user provided a different one as a request parameter
*
@@ -811,7 +838,8 @@ public class SimpleFacets {
* @see FacetParams#FACET_ZEROS
* @see FacetParams#FACET_MISSING
*/
- public NamedList getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String contains, boolean ignoreCase, SolrParams params)
+ public NamedList getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing,
+ String sort, String prefix, String contains, boolean ignoreCase, boolean intersectsCheck)
throws IOException {
/* :TODO: potential optimization...
@@ -901,7 +929,11 @@ public class SimpleFacets {
deState.postingsEnum = postingsEnum;
}
- c = searcher.numDocs(docs, deState);
+ if (intersectsCheck) {
+ c = searcher.intersects(docs, deState) ? 1 : 0;
+ } else {
+ c = searcher.numDocs(docs, deState);
+ }
postingsEnum = deState.postingsEnum;
} else {
@@ -916,19 +948,33 @@ public class SimpleFacets {
if (postingsEnum instanceof MultiPostingsEnum) {
MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
+
+ SEGMENTS_LOOP:
for (int subindex = 0; subindex < numSubs; subindex++) {
MultiPostingsEnum.EnumWithSlice sub = subs[subindex];
if (sub.postingsEnum == null) continue;
int base = sub.slice.start;
int docid;
while ((docid = sub.postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- if (fastForRandomSet.exists(docid + base)) c++;
+ if (fastForRandomSet.exists(docid + base)) {
+ c++;
+ if (intersectsCheck) {
+ assert c==1;
+ break SEGMENTS_LOOP;
+ }
+ }
}
}
} else {
int docid;
while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
- if (fastForRandomSet.exists(docid)) c++;
+ if (fastForRandomSet.exists(docid)) {
+ c++;
+ if (intersectsCheck) {
+ assert c==1;
+ break;
+ }
+ }
}
}
@@ -969,6 +1015,15 @@ public class SimpleFacets {
return res;
}
+ public static void checkMincountOnExists(String fieldName, int mincount) {
+ if (mincount > 1) {
+ throw new SolrException (ErrorCode.BAD_REQUEST,
+ FacetParams.FACET_MINCOUNT + "="+mincount+" exceed 1 that's not supported with " +
+ FacetParams.FACET_EXISTS + "=true for " + fieldName
+ );
+ }
+ }
+
/**
* A simple key=>val pair whose natural order is such that
* higher vals come before lower vals.
diff --git a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
index 7f155742ba5..4c188096aaf 100644
--- a/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
+++ b/solr/core/src/java/org/apache/solr/search/SolrIndexSearcher.java
@@ -2285,6 +2285,11 @@ public class SolrIndexSearcher extends IndexSearcher implements Closeable, SolrI
return all.andNotSize(positiveA.union(positiveB));
}
+ /** @lucene.internal */
+ public boolean intersects(DocSet a, DocsEnumState deState) throws IOException {
+ return a.intersects(getDocSet(deState));
+ }
+
/**
* Takes a list of document IDs, and returns an array of Documents containing all of the stored fields.
*/
diff --git a/solr/core/src/test/org/apache/solr/TestRandomFaceting.java b/solr/core/src/test/org/apache/solr/TestRandomFaceting.java
index daafca1e6b2..2ffefdc8588 100644
--- a/solr/core/src/test/org/apache/solr/TestRandomFaceting.java
+++ b/solr/core/src/test/org/apache/solr/TestRandomFaceting.java
@@ -16,22 +16,39 @@
*/
package org.apache.solr;
-import org.apache.lucene.util.TestUtil;
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.List;
+import java.util.ListIterator;
+import java.util.Map;
+import java.util.Map.Entry;
+import java.util.Random;
+import java.util.Set;
+import java.util.function.Consumer;
+import java.util.regex.Pattern;
+
import org.apache.lucene.util.LuceneTestCase.Slow;
+import org.apache.lucene.util.TestUtil;
+import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.SchemaField;
import org.junit.BeforeClass;
import org.junit.Test;
+import org.noggit.JSONUtil;
+import org.noggit.ObjectBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import java.lang.invoke.MethodHandles;
-import java.util.*;
-
@Slow
public class TestRandomFaceting extends SolrTestCaseJ4 {
+ private static final Pattern trieFields = Pattern.compile(".*_t.");
+
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
public static final String FOO_STRING_FIELD = "foo_s1";
@@ -80,6 +97,21 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
types.add(new FldType("missing_ss",new IRange(0,0), new SVal('a','b',1,1)));
// TODO: doubles, multi-floats, ints with precisionStep>0, booleans
+ types.add(new FldType("small_tf",ZERO_ONE, new FVal(-4,5)));
+ assert trieFields.matcher("small_tf").matches();
+ assert !trieFields.matcher("small_f").matches();
+
+ types.add(new FldType("foo_ti",ZERO_ONE, new IRange(-2,indexSize)));
+ assert trieFields.matcher("foo_ti").matches();
+ assert !trieFields.matcher("foo_i").matches();
+
+ types.add(new FldType("bool_b",ZERO_ONE, new Vals(){
+ @Override
+ public Comparable get() {
+ return random().nextBoolean();
+ }
+
+ }));
}
void addMoreDocs(int ndocs) throws Exception {
@@ -144,8 +176,8 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
}
- List multiValuedMethods = Arrays.asList(new String[]{"enum","fc"});
- List singleValuedMethods = Arrays.asList(new String[]{"enum","fc","fcs"});
+ List multiValuedMethods = Arrays.asList(new String[]{"enum","fc", null});
+ List singleValuedMethods = Arrays.asList(new String[]{"enum","fc","fcs", null});
void doFacetTests(FldType ftype) throws Exception {
@@ -154,10 +186,9 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
Random rand = random();
boolean validate = validateResponses;
ModifiableSolrParams params = params("facet","true", "wt","json", "indent","true", "omitHeader","true");
- params.add("q","*:*", "rows","0"); // TODO: select subsets
+ params.add("q","*:*"); // TODO: select subsets
params.add("rows","0");
-
SchemaField sf = req.getSchema().getField(ftype.fname);
boolean multiValued = sf.getType().multiValuedFieldCache();
@@ -198,6 +229,10 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
params.add("facet.missing", "true");
}
+ if (rand.nextBoolean()) {
+ params.add("facet.enum.cache.minDf",""+ rand.nextInt(indexSize));
+ }
+
// TODO: randomly add other facet params
String key = ftype.fname;
String facet_field = ftype.fname;
@@ -210,45 +245,207 @@ public class TestRandomFaceting extends SolrTestCaseJ4 {
List methods = multiValued ? multiValuedMethods : singleValuedMethods;
List responses = new ArrayList<>(methods.size());
for (String method : methods) {
- // params.add("facet.field", "{!key="+method+"}" + ftype.fname);
- // TODO: allow method to be passed on local params?
-
- params.set("facet.method", method);
-
- // if (random().nextBoolean()) params.set("facet.mincount", "1"); // uncomment to test that validation fails
-
- String strResponse = h.query(req(params));
- // Object realResponse = ObjectBuilder.fromJSON(strResponse);
- // System.out.println(strResponse);
-
- responses.add(strResponse);
+ for (boolean exists : new boolean [] {false, true}) {
+ // params.add("facet.field", "{!key="+method+"}" + ftype.fname);
+ // TODO: allow method to be passed on local params?
+ if (method!=null) {
+ params.set("facet.method", method);
+ } else {
+ params.remove("facet.method");
+ }
+
+ params.set("facet.exists", ""+exists);
+ if (!exists && rand.nextBoolean()) {
+ params.remove("facet.exists");
+ }
+
+ // if (random().nextBoolean()) params.set("facet.mincount", "1"); // uncomment to test that validation fails
+ if (params.getInt("facet.limit", 100)!=0) { // it bypasses all processing, and we can go to empty validation
+ if (exists && params.getInt("facet.mincount", 0)>1) {
+ assertQEx("no mincount on facet.exists",
+ rand.nextBoolean() ? "facet.exists":"facet.mincount",
+ req(params), ErrorCode.BAD_REQUEST);
+ continue;
+ }
+ // facet.exists can't be combined with non-enum nor with enum requested for tries, because it will be flipped to FC/FCS
+ final boolean notEnum = method != null && !method.equals("enum");
+ final boolean trieField = trieFields.matcher(ftype.fname).matches();
+ if ((notEnum || trieField) && exists) {
+ assertQEx("facet.exists only when enum or ommitted",
+ "facet.exists", req(params), ErrorCode.BAD_REQUEST);
+ continue;
+ }
+ }
+ String strResponse = h.query(req(params));
+ responses.add(strResponse);
+
+ if (responses.size()>1) {
+ validateResponse(responses.get(0), strResponse, params, method, methods);
+ }
+ }
+
}
-
+
/**
String strResponse = h.query(req(params));
Object realResponse = ObjectBuilder.fromJSON(strResponse);
**/
-
- if (validate) {
- for (int i=1; i0) {
+ count = 1L; // capping here
+ strata = 1; // non-zero count become zero
+ } else {
+ strata = 0; // zero-count
+ }
+ }
+ final List