mirror of https://github.com/apache/lucene.git
SOLR-3855: Fix faceting on numeric fields with precisionStep < Integer.MAX_VALUE, facet.mincount=0 and facet.method=fcs.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1449360 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
69c559c00b
commit
e6231ad585
|
@ -35,7 +35,6 @@ import org.apache.solr.request.SolrQueryRequest;
|
|||
import org.apache.solr.request.UnInvertedField;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.TrieField;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
@ -214,10 +213,7 @@ class SimpleStats {
|
|||
FieldType ft = sf.getType();
|
||||
NamedList<?> stv;
|
||||
|
||||
// Currently, only UnInvertedField can deal with multi-part trie fields
|
||||
String prefix = TrieField.getMainValuePrefix(ft);
|
||||
|
||||
if (sf.multiValued() || ft.multiValuedFieldCache() || prefix!=null) {
|
||||
if (sf.multiValued() || ft.multiValuedFieldCache()) {
|
||||
//use UnInvertedField for multivalued fields
|
||||
UnInvertedField uif = UnInvertedField.getUnInvertedField(f, searcher);
|
||||
stv = uif.getStats(searcher, docs, facets).getStatsValues();
|
||||
|
|
|
@ -40,10 +40,12 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRef;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.TrieField;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
@ -266,10 +268,28 @@ final class NumericFacets {
|
|||
}
|
||||
final Terms terms = searcher.getAtomicReader().terms(fieldName);
|
||||
if (terms != null) {
|
||||
final String prefixStr = TrieField.getMainValuePrefix(ft);
|
||||
final BytesRef prefix;
|
||||
if (prefixStr != null) {
|
||||
prefix = new BytesRef(prefixStr);
|
||||
} else {
|
||||
prefix = new BytesRef();
|
||||
}
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
BytesRef term = termsEnum.next();
|
||||
BytesRef term;
|
||||
switch (termsEnum.seekCeil(prefix)) {
|
||||
case FOUND:
|
||||
case NOT_FOUND:
|
||||
term = termsEnum.term();
|
||||
break;
|
||||
case END:
|
||||
term = null;
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
final CharsRef spare = new CharsRef();
|
||||
for (int skipped = hashTable.size; skipped < offset && term != null; ) {
|
||||
for (int skipped = hashTable.size; skipped < offset && term != null && StringHelper.startsWith(term, prefix); ) {
|
||||
ft.indexedToReadable(term, spare);
|
||||
final String termStr = spare.toString();
|
||||
if (!alreadySeen.contains(termStr)) {
|
||||
|
@ -277,7 +297,7 @@ final class NumericFacets {
|
|||
}
|
||||
term = termsEnum.next();
|
||||
}
|
||||
for ( ; term != null && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
|
||||
for ( ; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
|
||||
ft.indexedToReadable(term, spare);
|
||||
final String termStr = spare.toString();
|
||||
if (!alreadySeen.contains(termStr)) {
|
||||
|
@ -301,13 +321,31 @@ final class NumericFacets {
|
|||
}
|
||||
final Terms terms = searcher.getAtomicReader().terms(fieldName);
|
||||
if (terms != null) {
|
||||
final String prefixStr = TrieField.getMainValuePrefix(ft);
|
||||
final BytesRef prefix;
|
||||
if (prefixStr != null) {
|
||||
prefix = new BytesRef(prefixStr);
|
||||
} else {
|
||||
prefix = new BytesRef();
|
||||
}
|
||||
final TermsEnum termsEnum = terms.iterator(null);
|
||||
BytesRef term;
|
||||
switch (termsEnum.seekCeil(prefix)) {
|
||||
case FOUND:
|
||||
case NOT_FOUND:
|
||||
term = termsEnum.term();
|
||||
break;
|
||||
case END:
|
||||
term = null;
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
final CharsRef spare = new CharsRef();
|
||||
BytesRef term = termsEnum.next();
|
||||
for (int i = 0; i < offset && term != null; ++i) {
|
||||
for (int i = 0; i < offset && term != null && StringHelper.startsWith(term, prefix); ++i) {
|
||||
term = termsEnum.next();
|
||||
}
|
||||
for ( ; term != null && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
|
||||
for ( ; term != null && StringHelper.startsWith(term, prefix) && (limit < 0 || result.size() < limit); term = termsEnum.next()) {
|
||||
ft.indexedToReadable(term, spare);
|
||||
final String termStr = spare.toString();
|
||||
Integer count = counts.get(termStr);
|
||||
|
|
|
@ -320,6 +320,10 @@ public class SimpleFacets {
|
|||
return collector.getGroupCount();
|
||||
}
|
||||
|
||||
enum FacetMethod {
|
||||
ENUM, FC, FCS;
|
||||
}
|
||||
|
||||
public NamedList<Integer> getTermCounts(String field) throws IOException {
|
||||
int offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
|
||||
int limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
|
||||
|
@ -342,57 +346,84 @@ public class SimpleFacets {
|
|||
FieldType ft = sf.getType();
|
||||
|
||||
// determine what type of faceting method to use
|
||||
String method = params.getFieldParam(field, FacetParams.FACET_METHOD);
|
||||
boolean enumMethod = FacetParams.FACET_METHOD_enum.equals(method);
|
||||
final String methodStr = params.getFieldParam(field, FacetParams.FACET_METHOD);
|
||||
FacetMethod method = null;
|
||||
if (FacetParams.FACET_METHOD_enum.equals(methodStr)) {
|
||||
method = FacetMethod.ENUM;
|
||||
} else if (FacetParams.FACET_METHOD_fcs.equals(methodStr)) {
|
||||
method = FacetMethod.FCS;
|
||||
} else if (FacetParams.FACET_METHOD_fc.equals(methodStr)) {
|
||||
method = FacetMethod.FC;
|
||||
}
|
||||
|
||||
// TODO: default to per-segment or not?
|
||||
boolean per_segment = FacetParams.FACET_METHOD_fcs.equals(method) // explicit
|
||||
|| (ft.getNumericType() != null && sf.hasDocValues()); // numeric doc values are per-segment by default
|
||||
if (method == FacetMethod.ENUM && TrieField.getMainValuePrefix(ft) != null) {
|
||||
// enum can't deal with trie fields that index several terms per value
|
||||
method = sf.multiValued() ? FacetMethod.FC : FacetMethod.FCS;
|
||||
}
|
||||
|
||||
if (method == null && ft instanceof BoolField) {
|
||||
// Always use filters for booleans... we know the number of values is very small.
|
||||
enumMethod = true;
|
||||
method = FacetMethod.ENUM;
|
||||
}
|
||||
boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
|
||||
|
||||
if (TrieField.getMainValuePrefix(ft) != null) {
|
||||
// A TrieField with multiple parts indexed per value... currently only
|
||||
// UnInvertedField can handle this case, so force it's use.
|
||||
enumMethod = false;
|
||||
multiToken = true;
|
||||
final boolean multiToken = sf.multiValued() || ft.multiValuedFieldCache();
|
||||
|
||||
if (method == null && ft.getNumericType() != null && !sf.multiValued()) {
|
||||
// the per-segment approach is optimal for numeric field types since there
|
||||
// are no global ords to merge and no need to create an expensive
|
||||
// top-level reader
|
||||
method = FacetMethod.FCS;
|
||||
}
|
||||
|
||||
if (ft.getNumericType() != null && sf.hasDocValues()) {
|
||||
// only fcs is able to leverage the numeric field caches
|
||||
method = FacetMethod.FCS;
|
||||
}
|
||||
|
||||
if (method == null) {
|
||||
// TODO: default to per-segment or not?
|
||||
method = FacetMethod.FC;
|
||||
}
|
||||
|
||||
if (method == FacetMethod.FCS && multiToken) {
|
||||
// only fc knows how to deal with multi-token fields
|
||||
method = FacetMethod.FC;
|
||||
}
|
||||
|
||||
if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) {
|
||||
counts = getGroupedCounts(searcher, docs, field, multiToken, offset,limit, mincount, missing, sort, prefix);
|
||||
} else {
|
||||
// unless the enum method is explicitly specified, use a counting method.
|
||||
if (enumMethod) {
|
||||
counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix);
|
||||
} else {
|
||||
if (multiToken) {
|
||||
UnInvertedField uif = UnInvertedField.getUnInvertedField(field, searcher);
|
||||
counts = uif.getCounts(searcher, docs, offset, limit, mincount,missing,sort,prefix);
|
||||
} else {
|
||||
// TODO: future logic could use filters instead of the fieldcache if
|
||||
// the number of terms in the field is small enough.
|
||||
if (per_segment) {
|
||||
if (ft.getNumericType() != null && !sf.multiValued()) {
|
||||
// force numeric faceting
|
||||
if (prefix != null && !prefix.isEmpty()) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_PREFIX + " is not supported on numeric types");
|
||||
}
|
||||
counts = NumericFacets.getCounts(searcher, docs, field, offset, limit, mincount, missing, sort);
|
||||
} else {
|
||||
PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, docs, field, offset,limit, mincount, missing, sort, prefix);
|
||||
Executor executor = threads == 0 ? directExecutor : facetExecutor;
|
||||
ps.setNumThreads(threads);
|
||||
counts = ps.getFacetCounts(executor);
|
||||
assert method != null;
|
||||
switch (method) {
|
||||
case ENUM:
|
||||
assert TrieField.getMainValuePrefix(ft) == null;
|
||||
counts = getFacetTermEnumCounts(searcher, docs, field, offset, limit, mincount,missing,sort,prefix);
|
||||
break;
|
||||
case FCS:
|
||||
assert !multiToken;
|
||||
if (ft.getNumericType() != null && !sf.multiValued()) {
|
||||
// force numeric faceting
|
||||
if (prefix != null && !prefix.isEmpty()) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_PREFIX + " is not supported on numeric types");
|
||||
}
|
||||
counts = NumericFacets.getCounts(searcher, docs, field, offset, limit, mincount, missing, sort);
|
||||
} else {
|
||||
PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, docs, field, offset,limit, mincount, missing, sort, prefix);
|
||||
Executor executor = threads == 0 ? directExecutor : facetExecutor;
|
||||
ps.setNumThreads(threads);
|
||||
counts = ps.getFacetCounts(executor);
|
||||
}
|
||||
break;
|
||||
case FC:
|
||||
if (multiToken || TrieField.getMainValuePrefix(ft) != null) {
|
||||
UnInvertedField uif = UnInvertedField.getUnInvertedField(field, searcher);
|
||||
counts = uif.getCounts(searcher, docs, offset, limit, mincount,missing,sort,prefix);
|
||||
} else {
|
||||
counts = getFieldCacheCounts(searcher, docs, field, offset,limit, mincount, missing, sort, prefix);
|
||||
}
|
||||
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new AssertionError();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,8 @@
|
|||
|
||||
package org.apache.solr.request;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Random;
|
||||
|
||||
|
@ -25,6 +27,7 @@ import org.apache.lucene.index.Term;
|
|||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.params.FacetParams;
|
||||
import org.junit.After;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
@ -263,5 +266,32 @@ public class TestFaceting extends SolrTestCaseJ4 {
|
|||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrieFields() {
|
||||
// make sure that terms are correctly filtered even for trie fields that index several
|
||||
// terms for a single value
|
||||
List<String> fields = new ArrayList<String>();
|
||||
fields.add("id");
|
||||
fields.add("7");
|
||||
final String[] suffixes = new String[] {"ti", "tis", "tf", "tfs", "tl", "tls", "td", "tds"};
|
||||
for (String suffix : suffixes) {
|
||||
fields.add("f_" + suffix);
|
||||
fields.add("42");
|
||||
}
|
||||
assertU(adoc(fields.toArray(new String[0])));
|
||||
assertU(commit());
|
||||
for (String suffix : suffixes) {
|
||||
for (String facetMethod : new String[] {FacetParams.FACET_METHOD_enum, FacetParams.FACET_METHOD_fc, FacetParams.FACET_METHOD_fcs}) {
|
||||
for (String facetSort : new String[] {FacetParams.FACET_SORT_COUNT, FacetParams.FACET_SORT_INDEX}) {
|
||||
for (String value : new String[] {"42", "43"}) { // match or not
|
||||
final String field = "f_" + suffix;
|
||||
assertQ("field=" + field + ",method=" + facetMethod + ",sort=" + facetSort,
|
||||
req("q", field + ":" + value, FacetParams.FACET, "true", FacetParams.FACET_FIELD, field, FacetParams.FACET_MINCOUNT, "0", FacetParams.FACET_SORT, facetSort, FacetParams.FACET_METHOD, facetMethod),
|
||||
"*[count(//lst[@name='" + field + "']/int)=1]"); // exactly 1 facet count
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue