diff --git a/lucene/core/src/java/org/apache/lucene/util/StringHelper.java b/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
index e3138a33fe1..eabd8d75671 100644
--- a/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
+++ b/lucene/core/src/java/org/apache/lucene/util/StringHelper.java
@@ -22,6 +22,7 @@ import java.math.BigInteger;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
+import java.util.Locale;
import java.util.Properties;
/**
@@ -33,11 +34,11 @@ public abstract class StringHelper {
/**
* Compares two {@link BytesRef}, element by element, and returns the
- * number of elements common to both arrays.
+ * number of elements common to both arrays (from the start of each).
*
* @param left The first {@link BytesRef} to compare
* @param right The second {@link BytesRef} to compare
- * @return The number of common elements.
+ * @return The number of common elements (from the start of each).
*/
public static int bytesDifference(BytesRef left, BytesRef right) {
int len = left.length < right.length ? left.length : right.length;
@@ -134,6 +135,34 @@ public abstract class StringHelper {
return sliceEquals(ref, suffix, ref.length - suffix.length);
}
+ /**
+ * Returns true
iff the ref contains the given slice. Otherwise
+ * false
.
+ *
+ * @param ref
+ * the {@link BytesRef} to test
+ * @param slice
+ * the slice to look for
+ * @param ignoreCase
+ * whether the comparison should be case-insensitive
+ * @return Returns true
iff the ref contains the given slice.
+ * Otherwise false
.
+ */
+ public static boolean contains(BytesRef ref, BytesRef slice, boolean ignoreCase) {
+ if (ignoreCase) {
+ String s1 = ref.utf8ToString();
+ String s2 = slice.utf8ToString();
+ return s1.toLowerCase(Locale.ENGLISH).contains(s2.toLowerCase(Locale.ENGLISH));
+ } else {
+ for (int pos = 0; pos <= ref.length - slice.length; ++pos) {
+ if (sliceEquals(ref, slice, pos)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
private static boolean sliceEquals(BytesRef sliceToTest, BytesRef other, int pos) {
if (pos < 0 || sliceToTest.length - pos < other.length) {
return false;
diff --git a/lucene/core/src/test/org/apache/lucene/util/TestStringHelper.java b/lucene/core/src/test/org/apache/lucene/util/TestStringHelper.java
index 4256a7d588f..033c3250d58 100644
--- a/lucene/core/src/test/org/apache/lucene/util/TestStringHelper.java
+++ b/lucene/core/src/test/org/apache/lucene/util/TestStringHelper.java
@@ -18,6 +18,73 @@ package org.apache.lucene.util;
*/
public class TestStringHelper extends LuceneTestCase {
+
+ public void testBytesDifference() {
+ BytesRef left = new BytesRef("foobar");
+ BytesRef right = new BytesRef("foozo");
+ assertEquals(3, StringHelper.bytesDifference(left, right));
+ }
+
+ public void testEquals() {
+ assertTrue(StringHelper.equals("foo", "foo"));
+ assertFalse(StringHelper.equals("foo", null));
+ assertFalse(StringHelper.equals(null, "foo"));
+ assertTrue(StringHelper.equals(null, null));
+ }
+
+ public void testStartsWith() {
+ BytesRef ref = new BytesRef("foobar");
+ BytesRef slice = new BytesRef("foo");
+ assertTrue(StringHelper.startsWith(ref, slice));
+ }
+
+ public void testEndsWith() {
+ BytesRef ref = new BytesRef("foobar");
+ BytesRef slice = new BytesRef("bar");
+ assertTrue(StringHelper.endsWith(ref, slice));
+ }
+
+ public void testStartsWithWhole() {
+ BytesRef ref = new BytesRef("foobar");
+ BytesRef slice = new BytesRef("foobar");
+ assertTrue(StringHelper.startsWith(ref, slice));
+ }
+
+ public void testEndsWithWhole() {
+ BytesRef ref = new BytesRef("foobar");
+ BytesRef slice = new BytesRef("foobar");
+ assertTrue(StringHelper.endsWith(ref, slice));
+ }
+
+ public void testContainsAtStart() {
+ BytesRef ref = new BytesRef("foobar");
+ BytesRef slice = new BytesRef("foo");
+ assertTrue(StringHelper.contains(ref, slice, false));
+ }
+
+ public void testContains() {
+ BytesRef ref = new BytesRef("foobar");
+ BytesRef slice = new BytesRef("ooba");
+ assertTrue(StringHelper.contains(ref, slice, false));
+ }
+
+ public void testContainsAtEnd() {
+ BytesRef ref = new BytesRef("foobar");
+ BytesRef slice = new BytesRef("bar");
+ assertTrue(StringHelper.contains(ref, slice, false));
+ }
+
+ public void testContainsWhole() {
+ BytesRef ref = new BytesRef("foobar");
+ BytesRef slice = new BytesRef("foobar");
+ assertTrue(StringHelper.contains(ref, slice, false));
+ }
+
+ public void testContainsIgnoreCase() {
+ BytesRef ref = new BytesRef("FooBar");
+ BytesRef slice = new BytesRef("bar");
+ assertTrue(StringHelper.contains(ref, slice, true));
+ }
public void testMurmurHash3() throws Exception {
// Hashes computed using murmur3_32 from https://code.google.com/p/pyfasthash
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 7cda0871e83..068331a8719 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -211,6 +211,9 @@ New Features
* SOLR-7307: EmbeddedSolrServer can now be started up by passing a path to a
solr home directory, or a NodeConfig object (Alan Woodward, Mike Drob)
+* SOLR-1387: Add facet.contains and facet.contains.ignoreCase options (Tom Winch
+ via Alan Woodward)
+
Bug Fixes
----------------------
diff --git a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
index 9985671cd4b..cfd3e05636e 100644
--- a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
+++ b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java
@@ -34,6 +34,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.LongValues;
+import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.util.NamedList;
@@ -58,7 +59,7 @@ import org.apache.solr.util.LongPriorityQueue;
public class DocValuesFacets {
private DocValuesFacets() {}
- public static NamedList getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException {
+ public static NamedList getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String contains, boolean ignoreCase) throws IOException {
SchemaField schemaField = searcher.getSchema().getField(fieldName);
FieldType ft = schemaField.getType();
NamedList res = new NamedList<>();
@@ -97,6 +98,8 @@ public class DocValuesFacets {
prefixRef = new BytesRefBuilder();
prefixRef.copyChars(prefix);
}
+
+ final BytesRef containsBR = contains != null ? new BytesRef(contains) : null;
int startTermIndex, endTermIndex;
if (prefix!=null) {
@@ -170,6 +173,12 @@ public class DocValuesFacets {
int min=mincount-1; // the smallest value in the top 'N' values
for (int i=(startTermIndex==-1)?1:0; imin) {
// NOTE: we use c>min rather than c>=min as an optimization because we are going in
// index order, so we already know that the keys are ordered. This can be very
@@ -203,18 +212,28 @@ public class DocValuesFacets {
} else {
// add results in index order
int i=(startTermIndex==-1)?1:0;
- if (mincount<=0) {
- // if mincount<=0, then we won't discard any terms and we know exactly
- // where to start.
+ if (mincount<=0 && containsBR == null) {
+ // if mincount<=0 and we're not examining the values for contains, then
+ // we won't discard any terms and we know exactly where to start.
i+=off;
off=0;
}
for (; i=0) continue;
+ if (c=0) continue;
if (--lim<0) break;
- final BytesRef term = si.lookupOrd(startTermIndex+i);
+ if (term == null) {
+ term = si.lookupOrd(startTermIndex+i);
+ }
ft.indexedToReadable(term, charsRef);
res.add(charsRef.toString(), c);
}
diff --git a/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java b/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java
index beccaf857cd..6292d285ba0 100644
--- a/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java
+++ b/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java
@@ -32,6 +32,7 @@ import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.PriorityQueue;
+import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.FacetParams;
@@ -54,12 +55,14 @@ class PerSegmentSingleValuedFaceting {
boolean missing;
String sort;
String prefix;
+ BytesRef containsBR;
+ boolean ignoreCase;
Filter baseSet;
int nThreads;
- public PerSegmentSingleValuedFaceting(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix) {
+ public PerSegmentSingleValuedFaceting(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String contains, boolean ignoreCase) {
this.searcher = searcher;
this.docs = docs;
this.fieldName = fieldName;
@@ -69,6 +72,8 @@ class PerSegmentSingleValuedFaceting {
this.missing = missing;
this.sort = sort;
this.prefix = prefix;
+ this.containsBR = contains != null ? new BytesRef(contains) : null;
+ this.ignoreCase = ignoreCase;
}
public void setNumThreads(int threads) {
@@ -173,16 +178,23 @@ class PerSegmentSingleValuedFaceting {
while (queue.size() > 0) {
SegFacet seg = queue.top();
-
+
+ // if facet.contains specified, only actually collect the count if substring contained
+ boolean collect = containsBR == null || StringHelper.contains(seg.tempBR, containsBR, ignoreCase);
+
// we will normally end up advancing the term enum for this segment
// while still using "val", so we need to make a copy since the BytesRef
// may be shared across calls.
- val.copyBytes(seg.tempBR);
-
+ if (collect) {
+ val.copyBytes(seg.tempBR);
+ }
+
int count = 0;
do {
- count += seg.counts[seg.pos - seg.startTermIndex];
+ if (collect) {
+ count += seg.counts[seg.pos - seg.startTermIndex];
+ }
// TODO: OPTIMIZATION...
// if mincount>0 then seg.pos++ can skip ahead to the next non-zero entry.
@@ -190,14 +202,16 @@ class PerSegmentSingleValuedFaceting {
if (seg.pos >= seg.endTermIndex) {
queue.pop();
seg = queue.top();
- } else {
+ } else {
seg.tempBR = seg.tenum.next();
seg = queue.updateTop();
}
} while (seg != null && val.get().compareTo(seg.tempBR) == 0);
- boolean stop = collector.collect(val.get(), count);
- if (stop) break;
+ if (collect) {
+ boolean stop = collector.collect(val.get(), count);
+ if (stop) break;
+ }
}
NamedList res = collector.getFacetCounts();
@@ -253,31 +267,28 @@ class PerSegmentSingleValuedFaceting {
startTermIndex=-1;
endTermIndex=si.getValueCount();
}
-
final int nTerms=endTermIndex-startTermIndex;
- if (nTerms>0) {
- // count collection array only needs to be as big as the number of terms we are
- // going to collect counts for.
- final int[] counts = this.counts = new int[nTerms];
- DocIdSet idSet = baseSet.getDocIdSet(context, null); // this set only includes live docs
- DocIdSetIterator iter = idSet.iterator();
+ if (nTerms == 0) return;
+ // count collection array only needs to be as big as the number of terms we are
+ // going to collect counts for.
+ final int[] counts = this.counts = new int[nTerms];
+ DocIdSet idSet = baseSet.getDocIdSet(context, null); // this set only includes live docs
+ DocIdSetIterator iter = idSet.iterator();
- ////
+ if (prefix==null) {
+ // specialized version when collecting counts for all terms
int doc;
-
- if (prefix==null) {
- // specialized version when collecting counts for all terms
- while ((doc = iter.nextDoc()) < DocIdSetIterator.NO_MORE_DOCS) {
- counts[1+si.getOrd(doc)]++;
- }
- } else {
- // version that adjusts term numbers because we aren't collecting the full range
- while ((doc = iter.nextDoc()) < DocIdSetIterator.NO_MORE_DOCS) {
- int term = si.getOrd(doc);
- int arrIdx = term-startTermIndex;
- if (arrIdx>=0 && arrIdx=0 && arrIdx0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
- String prefix = params.getFieldParam(field,FacetParams.FACET_PREFIX);
-
+ String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX);
+ String contains = params.getFieldParam(field, FacetParams.FACET_CONTAINS);
+ boolean ignoreCase = params.getFieldBool(field, FacetParams.FACET_CONTAINS_IGNORE_CASE, false);
NamedList counts;
SchemaField sf = searcher.getSchema().getField(field);
@@ -435,13 +435,13 @@ public class SimpleFacets {
}
if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) {
- counts = getGroupedCounts(searcher, base, field, multiToken, offset,limit, mincount, missing, sort, prefix);
+ counts = getGroupedCounts(searcher, base, field, multiToken, offset,limit, mincount, missing, sort, prefix, contains, ignoreCase);
} else {
assert method != null;
switch (method) {
case ENUM:
assert TrieField.getMainValuePrefix(ft) == null;
- counts = getFacetTermEnumCounts(searcher, base, field, offset, limit, mincount,missing,sort,prefix);
+ counts = getFacetTermEnumCounts(searcher, base, field, offset, limit, mincount, missing, sort, prefix, contains, ignoreCase);
break;
case FCS:
assert !multiToken;
@@ -450,16 +450,19 @@ public class SimpleFacets {
if (prefix != null && !prefix.isEmpty()) {
throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_PREFIX + " is not supported on numeric types");
}
+ if (contains != null && !contains.isEmpty()) {
+ throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_CONTAINS + " is not supported on numeric types");
+ }
counts = NumericFacets.getCounts(searcher, base, field, offset, limit, mincount, missing, sort);
} else {
- PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, base, field, offset,limit, mincount, missing, sort, prefix);
+ PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, base, field, offset,limit, mincount, missing, sort, prefix, contains, ignoreCase);
Executor executor = threads == 0 ? directExecutor : facetExecutor;
ps.setNumThreads(threads);
counts = ps.getFacetCounts(executor);
}
break;
case FC:
- counts = DocValuesFacets.getCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix);
+ counts = DocValuesFacets.getCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix, contains, ignoreCase);
break;
default:
throw new AssertionError();
@@ -478,7 +481,9 @@ public class SimpleFacets {
int mincount,
boolean missing,
String sort,
- String prefix) throws IOException {
+ String prefix,
+ String contains,
+ boolean ignoreCase) throws IOException {
GroupingSpecification groupingSpecification = rb.getGroupingSpec();
final String groupField = groupingSpecification != null ? groupingSpecification.getFields()[0] : null;
if (groupField == null) {
@@ -488,8 +493,9 @@ public class SimpleFacets {
);
}
- BytesRef prefixBR = prefix != null ? new BytesRef(prefix) : null;
- final TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, field, multiToken, prefixBR, 128);
+ BytesRef prefixBytesRef = prefix != null ? new BytesRef(prefix) : null;
+ BytesRef containsRef = contains != null ? new BytesRef(contains) : null;
+ final TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, field, multiToken, prefixBytesRef, 128);
SchemaField sf = searcher.getSchema().getFieldOrNull(groupField);
@@ -519,6 +525,10 @@ public class SimpleFacets {
List scopedEntries
= result.getFacetEntries(offset, limit < 0 ? Integer.MAX_VALUE : limit);
for (TermGroupFacetCollector.FacetEntry facetEntry : scopedEntries) {
+ //:TODO:can we do contains earlier than this to make it more efficient?
+ if (containsRef != null && !StringHelper.contains(facetEntry.getValue(), containsRef, ignoreCase)) {
+ continue;
+ }
facetFieldType.indexedToReadable(facetEntry.getValue(), charsRef);
facetCounts.add(charsRef.toString(), facetEntry.getCount());
}
@@ -683,7 +693,7 @@ public class SimpleFacets {
* @see FacetParams#FACET_ZEROS
* @see FacetParams#FACET_MISSING
*/
- public NamedList getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing, String sort, String prefix)
+ public NamedList getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String contains, boolean ignoreCase)
throws IOException {
/* :TODO: potential optimization...
@@ -715,10 +725,16 @@ public class SimpleFacets {
int off=offset;
int lim=limit>=0 ? limit : Integer.MAX_VALUE;
- BytesRef startTermBytes = null;
+ BytesRef prefixTermBytes = null;
if (prefix != null) {
String indexedPrefix = ft.toInternal(prefix);
- startTermBytes = new BytesRef(indexedPrefix);
+ prefixTermBytes = new BytesRef(indexedPrefix);
+ }
+
+ BytesRef containsTermBytes = null;
+ if (contains != null) {
+ String indexedContains = ft.toInternal(contains);
+ containsTermBytes = new BytesRef(indexedContains);
}
Fields fields = r.fields();
@@ -732,8 +748,8 @@ public class SimpleFacets {
// TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for
// facet.offset when sorting by index order.
- if (startTermBytes != null) {
- if (termsEnum.seekCeil(startTermBytes) == TermsEnum.SeekStatus.END) {
+ if (prefixTermBytes != null) {
+ if (termsEnum.seekCeil(prefixTermBytes) == TermsEnum.SeekStatus.END) {
termsEnum = null;
} else {
term = termsEnum.term();
@@ -750,77 +766,77 @@ public class SimpleFacets {
if (docs.size() >= mincount) {
while (term != null) {
- if (startTermBytes != null && !StringHelper.startsWith(term, startTermBytes))
+ if (prefixTermBytes != null && !StringHelper.startsWith(term, prefixTermBytes))
break;
- int df = termsEnum.docFreq();
+ if (containsTermBytes == null || StringHelper.contains(term, containsTermBytes, ignoreCase)) {
+ int df = termsEnum.docFreq();
- // If we are sorting, we can use df>min (rather than >=) since we
- // are going in index order. For certain term distributions this can
- // make a large difference (for example, many terms with df=1).
- if (df>0 && df>min) {
- int c;
+ // If we are sorting, we can use df>min (rather than >=) since we
+ // are going in index order. For certain term distributions this can
+ // make a large difference (for example, many terms with df=1).
+ if (df > 0 && df > min) {
+ int c;
- if (df >= minDfFilterCache) {
- // use the filter cache
+ if (df >= minDfFilterCache) {
+ // use the filter cache
- if (deState==null) {
- deState = new SolrIndexSearcher.DocsEnumState();
- deState.fieldName = field;
- deState.liveDocs = r.getLiveDocs();
- deState.termsEnum = termsEnum;
- deState.postingsEnum = postingsEnum;
- }
+ if (deState == null) {
+ deState = new SolrIndexSearcher.DocsEnumState();
+ deState.fieldName = field;
+ deState.liveDocs = r.getLiveDocs();
+ deState.termsEnum = termsEnum;
+ deState.postingsEnum = postingsEnum;
+ }
- c = searcher.numDocs(docs, deState);
+ c = searcher.numDocs(docs, deState);
- postingsEnum = deState.postingsEnum;
- } else {
- // iterate over TermDocs to calculate the intersection
+ postingsEnum = deState.postingsEnum;
+ } else {
+ // iterate over TermDocs to calculate the intersection
- // TODO: specialize when base docset is a bitset or hash set (skipDocs)? or does it matter for this?
- // TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl)
- // TODO: would passing deleted docs lead to better efficiency over checking the fastForRandomSet?
- postingsEnum = termsEnum.postings(null, postingsEnum, PostingsEnum.NONE);
- c=0;
+ // TODO: specialize when base docset is a bitset or hash set (skipDocs)? or does it matter for this?
+ // TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl)
+ // TODO: would passing deleted docs lead to better efficiency over checking the fastForRandomSet?
+ postingsEnum = termsEnum.postings(null, postingsEnum, PostingsEnum.NONE);
+ c = 0;
- if (postingsEnum instanceof MultiPostingsEnum) {
- MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs();
- int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs();
- for (int subindex = 0; subindex min) {
+ BytesRef termCopy = BytesRef.deepCopyOf(term);
+ queue.add(new CountPair<>(termCopy, c));
+ if (queue.size() >= maxsize) min = queue.last().val;
+ }
+ } else {
+ if (c >= mincount && --off < 0) {
+ if (--lim < 0) break;
+ ft.indexedToReadable(term, charsRef);
+ res.add(charsRef.toString(), c);
}
- }
-
-
- }
-
- if (sortByCount) {
- if (c>min) {
- BytesRef termCopy = BytesRef.deepCopyOf(term);
- queue.add(new CountPair<>(termCopy, c));
- if (queue.size()>=maxsize) min=queue.last().val;
- }
- } else {
- if (c >= mincount && --off<0) {
- if (--lim<0) break;
- ft.indexedToReadable(term, charsRef);
- res.add(charsRef.toString(), c);
}
}
}
-
term = termsEnum.next();
}
}
diff --git a/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java b/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java
index dc775370ad5..5b60a26eb7c 100644
--- a/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java
+++ b/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java
@@ -17,12 +17,6 @@
package org.apache.solr.request;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.List;
-import java.util.Map;
-
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.ModifiableSolrParams;
@@ -32,6 +26,12 @@ import org.junit.BeforeClass;
import org.junit.Test;
import org.noggit.ObjectBuilder;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+
public class SimpleFacetsTest extends SolrTestCaseJ4 {
@@ -85,6 +85,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
indexFacetSingleValued();
indexFacetPrefixMultiValued();
indexFacetPrefixSingleValued();
+ indexFacetContains();
indexSimpleGroupedFacetCounts();
Collections.shuffle(pendingDocs, random());
@@ -1866,54 +1867,74 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
static void indexFacetPrefixMultiValued() {
- indexFacetPrefix("50","t_s");
+ indexFacetPrefix("50","t_s","","ignore_s");
}
@Test
public void testFacetPrefixMultiValued() {
- doFacetPrefix("t_s", null, "facet.method","enum");
- doFacetPrefix("t_s", null, "facet.method", "enum", "facet.enum.cache.minDf", "3");
- doFacetPrefix("t_s", null, "facet.method", "enum", "facet.enum.cache.minDf", "100");
- doFacetPrefix("t_s", null, "facet.method", "fc");
+ doFacetPrefix("t_s", null, "", "facet.method","enum");
+ doFacetPrefix("t_s", null, "", "facet.method", "enum", "facet.enum.cache.minDf", "3");
+ doFacetPrefix("t_s", null, "", "facet.method", "enum", "facet.enum.cache.minDf", "100");
+ doFacetPrefix("t_s", null, "", "facet.method", "fc");
}
static void indexFacetPrefixSingleValued() {
- indexFacetPrefix("60","tt_s1");
+ indexFacetPrefix("60","tt_s1","","ignore_s");
}
@Test
public void testFacetPrefixSingleValued() {
- doFacetPrefix("tt_s1", null);
+ doFacetPrefix("tt_s1", null, "");
}
+
@Test
public void testFacetPrefixSingleValuedFcs() {
- doFacetPrefix("tt_s1", null, "facet.method","fcs");
- doFacetPrefix("tt_s1", "{!threads=0}", "facet.method","fcs"); // direct execution
- doFacetPrefix("tt_s1", "{!threads=-1}", "facet.method","fcs"); // default / unlimited threads
- doFacetPrefix("tt_s1", "{!threads=2}", "facet.method","fcs"); // specific number of threads
+ doFacetPrefix("tt_s1", null, "", "facet.method","fcs");
+ doFacetPrefix("tt_s1", "{!threads=0}", "", "facet.method","fcs"); // direct execution
+ doFacetPrefix("tt_s1", "{!threads=-1}", "", "facet.method","fcs"); // default / unlimited threads
+ doFacetPrefix("tt_s1", "{!threads=2}", "", "facet.method","fcs"); // specific number of threads
}
+ static void indexFacetContains() {
+ indexFacetPrefix("70","contains_s1","","contains_group_s1");
+ indexFacetPrefix("80","contains_s1","Astra","contains_group_s1");
+ }
+
+ @Test
+ public void testFacetContains() {
+ doFacetContains("contains_s1", "contains_group_s1", "Astra", "BAst", "Ast", "facet.method", "enum");
+ doFacetContains("contains_s1", "contains_group_s1", "Astra", "BAst", "Ast", "facet.method", "fcs");
+ doFacetContains("contains_s1", "contains_group_s1", "Astra", "BAst", "Ast", "facet.method", "fc");
+ doFacetContains("contains_s1", "contains_group_s1", "Astra", "bAst", "ast", "facet.method", "enum", "facet.contains.ignoreCase", "true");
+ doFacetContains("contains_s1", "contains_group_s1", "Astra", "baSt", "ast", "facet.method", "fcs", "facet.contains.ignoreCase", "true");
+ doFacetContains("contains_s1", "contains_group_s1", "Astra", "basT", "ast", "facet.method", "fc", "facet.contains.ignoreCase", "true");
+ doFacetPrefix("contains_s1", null, "Astra", "facet.method", "enum", "facet.contains", "Ast");
+ doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fcs", "facet.contains", "Ast");
+ doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fc", "facet.contains", "Ast");
+ doFacetPrefix("contains_s1", null, "Astra", "facet.method", "enum", "facet.contains", "aSt", "facet.contains.ignoreCase", "true");
+ doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fcs", "facet.contains", "asT", "facet.contains.ignoreCase", "true");
+ doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fc", "facet.contains", "aST", "facet.contains.ignoreCase", "true");
+ }
- static void indexFacetPrefix(String idPrefix, String f) {
- add_doc("id", idPrefix+"1", f, "AAA");
- add_doc("id", idPrefix+"2", f, "B");
- add_doc("id", idPrefix+"3", f, "BB");
- add_doc("id", idPrefix+"4", f, "BB");
- add_doc("id", idPrefix+"5", f, "BBB");
- add_doc("id", idPrefix+"6", f, "BBB");
- add_doc("id", idPrefix+"7", f, "BBB");
- add_doc("id", idPrefix+"8", f, "CC");
- add_doc("id", idPrefix+"9", f, "CC");
- add_doc("id", idPrefix+"10", f, "CCC");
- add_doc("id", idPrefix+"11", f, "CCC");
- add_doc("id", idPrefix+"12", f, "CCC");
+ static void indexFacetPrefix(String idPrefix, String f, String termSuffix, String g) {
+ add_doc("id", idPrefix+"1", f, "AAA"+termSuffix, g, "A");
+ add_doc("id", idPrefix+"2", f, "B"+termSuffix, g, "A");
+ add_doc("id", idPrefix+"3", f, "BB"+termSuffix, g, "B");
+ add_doc("id", idPrefix+"4", f, "BB"+termSuffix, g, "B");
+ add_doc("id", idPrefix+"5", f, "BBB"+termSuffix, g, "B");
+ add_doc("id", idPrefix+"6", f, "BBB"+termSuffix, g, "B");
+ add_doc("id", idPrefix+"7", f, "BBB"+termSuffix, g, "C");
+ add_doc("id", idPrefix+"8", f, "CC"+termSuffix, g, "C");
+ add_doc("id", idPrefix+"9", f, "CC"+termSuffix, g, "C");
+ add_doc("id", idPrefix+"10", f, "CCC"+termSuffix, g, "C");
+ add_doc("id", idPrefix+"11", f, "CCC"+termSuffix, g, "D");
+ add_doc("id", idPrefix+"12", f, "CCC"+termSuffix, g, "E");
assertU(commit());
}
- public void doFacetPrefix(String f, String local, String... params) {
+ public void doFacetPrefix(String f, String local, String termSuffix, String... params) {
String indent="on";
String pre = "//lst[@name='"+f+"']";
- String notc = "id:[* TO *] -"+f+":C";
String lf = local==null ? f : local+f;
@@ -1929,9 +1950,9 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,"facet.prefix","B"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=3]"
- ,pre+"/int[1][@name='BBB'][.='3']"
- ,pre+"/int[2][@name='BB'][.='2']"
- ,pre+"/int[3][@name='B'][.='1']"
+ ,pre+"/int[1][@name='BBB"+termSuffix+"'][.='3']"
+ ,pre+"/int[2][@name='BB"+termSuffix+"'][.='2']"
+ ,pre+"/int[3][@name='B"+termSuffix+"'][.='1']"
);
assertQ("test facet.prefix middle, exact match first term, unsorted",
@@ -1946,30 +1967,11 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,"facet.prefix","B"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=3]"
- ,pre+"/int[1][@name='B'][.='1']"
- ,pre+"/int[2][@name='BB'][.='2']"
- ,pre+"/int[3][@name='BBB'][.='3']"
+ ,pre+"/int[1][@name='B"+termSuffix+"'][.='1']"
+ ,pre+"/int[2][@name='BB"+termSuffix+"'][.='2']"
+ ,pre+"/int[3][@name='BBB"+termSuffix+"'][.='3']"
);
-
- assertQ("test facet.prefix middle, exact match first term, unsorted",
- req(params, "q", "id:[* TO *]"
- ,"indent",indent
- ,"facet","true"
- ,"facet.field", lf
- ,"facet.mincount","0"
- ,"facet.offset","0"
- ,"facet.limit","100"
- ,"facet.sort","index"
- ,"facet.prefix","B"
- )
- ,"*[count(//lst[@name='facet_fields']/lst/int)=3]"
- ,pre+"/int[1][@name='B'][.='1']"
- ,pre+"/int[2][@name='BB'][.='2']"
- ,pre+"/int[3][@name='BBB'][.='3']"
- );
-
-
assertQ("test facet.prefix middle, paging",
req(params, "q", "id:[* TO *]"
,"indent",indent
@@ -1982,8 +1984,8 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,"facet.prefix","B"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=2]"
- ,pre+"/int[1][@name='BB'][.='2']"
- ,pre+"/int[2][@name='B'][.='1']"
+ ,pre+"/int[1][@name='BB"+termSuffix+"'][.='2']"
+ ,pre+"/int[2][@name='B"+termSuffix+"'][.='1']"
);
assertQ("test facet.prefix middle, paging",
@@ -1998,7 +2000,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,"facet.prefix","B"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
- ,pre+"/int[1][@name='BB'][.='2']"
+ ,pre+"/int[1][@name='BB"+termSuffix+"'][.='2']"
);
assertQ("test facet.prefix middle, paging",
@@ -2013,7 +2015,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,"facet.prefix","B"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
- ,pre+"/int[1][@name='BB'][.='2']"
+ ,pre+"/int[1][@name='BB"+termSuffix+"'][.='2']"
);
assertQ("test facet.prefix end, not exact match",
@@ -2028,8 +2030,8 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,"facet.prefix","C"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=2]"
- ,pre+"/int[1][@name='CCC'][.='3']"
- ,pre+"/int[2][@name='CC'][.='2']"
+ ,pre+"/int[1][@name='CCC"+termSuffix+"'][.='3']"
+ ,pre+"/int[2][@name='CC"+termSuffix+"'][.='2']"
);
assertQ("test facet.prefix end, exact match",
@@ -2044,8 +2046,8 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,"facet.prefix","CC"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=2]"
- ,pre+"/int[1][@name='CCC'][.='3']"
- ,pre+"/int[2][@name='CC'][.='2']"
+ ,pre+"/int[1][@name='CCC"+termSuffix+"'][.='3']"
+ ,pre+"/int[2][@name='CC"+termSuffix+"'][.='2']"
);
assertQ("test facet.prefix past end",
@@ -2088,7 +2090,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,"facet.prefix","AAA"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
- ,pre+"/int[1][@name='AAA'][.='1']"
+ ,pre+"/int[1][@name='AAA"+termSuffix+"'][.='1']"
);
assertQ("test facet.prefix at Start, not exact match",
req(params, "q", "id:[* TO *]"
@@ -2102,7 +2104,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,"facet.prefix","AA"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
- ,pre+"/int[1][@name='AAA'][.='1']"
+ ,pre+"/int[1][@name='AAA"+termSuffix+"'][.='1']"
);
assertQ("test facet.prefix at Start, not exact match",
req(params, "q", "id:[* TO *]"
@@ -2116,7 +2118,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
,"facet.prefix","AA"
)
,"*[count(//lst[@name='facet_fields']/lst/int)=1]"
- ,pre+"/int[1][@name='AAA'][.='1']"
+ ,pre+"/int[1][@name='AAA"+termSuffix+"'][.='1']"
);
assertQ("test facet.prefix before start",
req(params, "q", "id:[* TO *]"
@@ -2162,9 +2164,54 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
);
}
+ public void doFacetContains(String f, String g, String termSuffix, String contains, String groupContains, String... params) {
+ String indent="on";
+ String pre = "//lst[@name='"+f+"']";
+
+ assertQ("test facet.contains",
+ req(params, "q", "id:[* TO *]"
+ ,"indent",indent
+ ,"facet","true"
+ ,"facet.field", f
+ ,"facet.mincount","0"
+ ,"facet.offset","0"
+ ,"facet.limit","100"
+ ,"facet.sort","count"
+ ,"facet.contains",contains
+ )
+ ,"*[count(//lst[@name='facet_fields']/lst/int)=3]"
+ ,pre+"/int[1][@name='BBB"+termSuffix+"'][.='3']"
+ ,pre+"/int[2][@name='BB"+termSuffix+"'][.='2']"
+ ,pre+"/int[3][@name='B"+termSuffix+"'][.='1']"
+ );
+
+ assertQ("test facet.contains for grouped facets",
+ req(params, "q", "id:[* TO *]"
+ ,"indent",indent
+ ,"facet","true"
+ ,"facet.field", f
+ ,"facet.mincount","0"
+ ,"facet.offset","0"
+ ,"facet.limit","100"
+ ,"facet.sort","count"
+ ,"facet.contains",groupContains
+ ,"group","true"
+ ,"group.field",g
+ ,"group.facet","true"
+ )
+ ,"*[count(//lst[@name='facet_fields']/lst/int)=6]"
+ ,pre+"/int[1][@name='CCC"+termSuffix+"'][.='3']"
+ ,pre+"/int[2][@name='BBB"+termSuffix+"'][.='2']"
+ ,pre+"/int[3][@name='AAA"+termSuffix+"'][.='1']"
+ ,pre+"/int[4][@name='B"+termSuffix+"'][.='1']"
+ ,pre+"/int[5][@name='BB"+termSuffix+"'][.='1']"
+ ,pre+"/int[6][@name='CC"+termSuffix+"'][.='1']"
+ );
+ }
+
/**
- * kind of an absurd tests because if there is an inifnite loop, it
- * would ver finish -- but at least it ensures that if one of
+ * kind of an absurd test because if there is an infinite loop, it
+ * would never finish -- but at least it ensures that if one of
* these requests return, they return an error
*/
public void testRangeFacetInfiniteLoopDetection() {
diff --git a/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java b/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java
index 2dae65f6c69..3955bf61614 100644
--- a/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java
+++ b/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java
@@ -149,6 +149,16 @@ public interface FacetParams {
*/
public static final String FACET_PREFIX = FACET + ".prefix";
+ /**
+ * Only return constraints of a facet field containing the given string.
+ */
+ public static final String FACET_CONTAINS = FACET + ".contains";
+
+ /**
+ * If using facet contains, ignore case when comparing values.
+ */
+ public static final String FACET_CONTAINS_IGNORE_CASE = FACET_CONTAINS + ".ignoreCase";
+
/**
* When faceting by enumerating the terms in a field,
* only use the filterCache for terms with a df >= to this parameter.
@@ -165,7 +175,7 @@ public interface FacetParams {
*/
public static final String FACET_DATE_START = FACET_DATE + ".start";
/**
- * Date string indicating the endinging point for a date facet range.
+ * Date string indicating the ending point for a date facet range.
* Can be overriden on a per field basis.
*/
public static final String FACET_DATE_END = FACET_DATE + ".end";