diff --git a/lucene/core/src/java/org/apache/lucene/util/StringHelper.java b/lucene/core/src/java/org/apache/lucene/util/StringHelper.java index e3138a33fe1..eabd8d75671 100644 --- a/lucene/core/src/java/org/apache/lucene/util/StringHelper.java +++ b/lucene/core/src/java/org/apache/lucene/util/StringHelper.java @@ -22,6 +22,7 @@ import java.math.BigInteger; import java.nio.file.Files; import java.nio.file.Paths; import java.util.Arrays; +import java.util.Locale; import java.util.Properties; /** @@ -33,11 +34,11 @@ public abstract class StringHelper { /** * Compares two {@link BytesRef}, element by element, and returns the - * number of elements common to both arrays. + * number of elements common to both arrays (from the start of each). * * @param left The first {@link BytesRef} to compare * @param right The second {@link BytesRef} to compare - * @return The number of common elements. + * @return The number of common elements (from the start of each). */ public static int bytesDifference(BytesRef left, BytesRef right) { int len = left.length < right.length ? left.length : right.length; @@ -134,6 +135,34 @@ public abstract class StringHelper { return sliceEquals(ref, suffix, ref.length - suffix.length); } + /** + * Returns true iff the ref contains the given slice. Otherwise + * false. + * + * @param ref + * the {@link BytesRef} to test + * @param slice + * the slice to look for + * @param ignoreCase + * whether the comparison should be case-insensitive + * @return Returns true iff the ref contains the given slice. + * Otherwise false. + */ + public static boolean contains(BytesRef ref, BytesRef slice, boolean ignoreCase) { + if (ignoreCase) { + String s1 = ref.utf8ToString(); + String s2 = slice.utf8ToString(); + return s1.toLowerCase(Locale.ENGLISH).contains(s2.toLowerCase(Locale.ENGLISH)); + } else { + for (int pos = 0; pos <= ref.length - slice.length; ++pos) { + if (sliceEquals(ref, slice, pos)) { + return true; + } + } + } + return false; + } + private static boolean sliceEquals(BytesRef sliceToTest, BytesRef other, int pos) { if (pos < 0 || sliceToTest.length - pos < other.length) { return false; diff --git a/lucene/core/src/test/org/apache/lucene/util/TestStringHelper.java b/lucene/core/src/test/org/apache/lucene/util/TestStringHelper.java index 4256a7d588f..033c3250d58 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestStringHelper.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestStringHelper.java @@ -18,6 +18,73 @@ package org.apache.lucene.util; */ public class TestStringHelper extends LuceneTestCase { + + public void testBytesDifference() { + BytesRef left = new BytesRef("foobar"); + BytesRef right = new BytesRef("foozo"); + assertEquals(3, StringHelper.bytesDifference(left, right)); + } + + public void testEquals() { + assertTrue(StringHelper.equals("foo", "foo")); + assertFalse(StringHelper.equals("foo", null)); + assertFalse(StringHelper.equals(null, "foo")); + assertTrue(StringHelper.equals(null, null)); + } + + public void testStartsWith() { + BytesRef ref = new BytesRef("foobar"); + BytesRef slice = new BytesRef("foo"); + assertTrue(StringHelper.startsWith(ref, slice)); + } + + public void testEndsWith() { + BytesRef ref = new BytesRef("foobar"); + BytesRef slice = new BytesRef("bar"); + assertTrue(StringHelper.endsWith(ref, slice)); + } + + public void testStartsWithWhole() { + BytesRef ref = new BytesRef("foobar"); + BytesRef slice = new BytesRef("foobar"); + assertTrue(StringHelper.startsWith(ref, slice)); + } + + public void testEndsWithWhole() { + BytesRef ref = new BytesRef("foobar"); + BytesRef slice = new BytesRef("foobar"); + assertTrue(StringHelper.endsWith(ref, slice)); + } + + public void testContainsAtStart() { + BytesRef ref = new BytesRef("foobar"); + BytesRef slice = new BytesRef("foo"); + assertTrue(StringHelper.contains(ref, slice, false)); + } + + public void testContains() { + BytesRef ref = new BytesRef("foobar"); + BytesRef slice = new BytesRef("ooba"); + assertTrue(StringHelper.contains(ref, slice, false)); + } + + public void testContainsAtEnd() { + BytesRef ref = new BytesRef("foobar"); + BytesRef slice = new BytesRef("bar"); + assertTrue(StringHelper.contains(ref, slice, false)); + } + + public void testContainsWhole() { + BytesRef ref = new BytesRef("foobar"); + BytesRef slice = new BytesRef("foobar"); + assertTrue(StringHelper.contains(ref, slice, false)); + } + + public void testContainsIgnoreCase() { + BytesRef ref = new BytesRef("FooBar"); + BytesRef slice = new BytesRef("bar"); + assertTrue(StringHelper.contains(ref, slice, true)); + } public void testMurmurHash3() throws Exception { // Hashes computed using murmur3_32 from https://code.google.com/p/pyfasthash diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index 7cda0871e83..068331a8719 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -211,6 +211,9 @@ New Features * SOLR-7307: EmbeddedSolrServer can now be started up by passing a path to a solr home directory, or a NodeConfig object (Alan Woodward, Mike Drob) +* SOLR-1387: Add facet.contains and facet.contains.ignoreCase options (Tom Winch + via Alan Woodward) + Bug Fixes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java index 9985671cd4b..cfd3e05636e 100644 --- a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java +++ b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java @@ -34,6 +34,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.LongValues; +import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.UnicodeUtil; import org.apache.solr.common.params.FacetParams; import org.apache.solr.common.util.NamedList; @@ -58,7 +59,7 @@ import org.apache.solr.util.LongPriorityQueue; public class DocValuesFacets { private DocValuesFacets() {} - public static NamedList getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix) throws IOException { + public static NamedList getCounts(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String contains, boolean ignoreCase) throws IOException { SchemaField schemaField = searcher.getSchema().getField(fieldName); FieldType ft = schemaField.getType(); NamedList res = new NamedList<>(); @@ -97,6 +98,8 @@ public class DocValuesFacets { prefixRef = new BytesRefBuilder(); prefixRef.copyChars(prefix); } + + final BytesRef containsBR = contains != null ? new BytesRef(contains) : null; int startTermIndex, endTermIndex; if (prefix!=null) { @@ -170,6 +173,12 @@ public class DocValuesFacets { int min=mincount-1; // the smallest value in the top 'N' values for (int i=(startTermIndex==-1)?1:0; imin) { // NOTE: we use c>min rather than c>=min as an optimization because we are going in // index order, so we already know that the keys are ordered. This can be very @@ -203,18 +212,28 @@ public class DocValuesFacets { } else { // add results in index order int i=(startTermIndex==-1)?1:0; - if (mincount<=0) { - // if mincount<=0, then we won't discard any terms and we know exactly - // where to start. + if (mincount<=0 && containsBR == null) { + // if mincount<=0 and we're not examining the values for contains, then + // we won't discard any terms and we know exactly where to start. i+=off; off=0; } for (; i=0) continue; + if (c=0) continue; if (--lim<0) break; - final BytesRef term = si.lookupOrd(startTermIndex+i); + if (term == null) { + term = si.lookupOrd(startTermIndex+i); + } ft.indexedToReadable(term, charsRef); res.add(charsRef.toString(), c); } diff --git a/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java b/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java index beccaf857cd..6292d285ba0 100644 --- a/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java +++ b/solr/core/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java @@ -32,6 +32,7 @@ import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRefBuilder; import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.UnicodeUtil; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.FacetParams; @@ -54,12 +55,14 @@ class PerSegmentSingleValuedFaceting { boolean missing; String sort; String prefix; + BytesRef containsBR; + boolean ignoreCase; Filter baseSet; int nThreads; - public PerSegmentSingleValuedFaceting(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix) { + public PerSegmentSingleValuedFaceting(SolrIndexSearcher searcher, DocSet docs, String fieldName, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String contains, boolean ignoreCase) { this.searcher = searcher; this.docs = docs; this.fieldName = fieldName; @@ -69,6 +72,8 @@ class PerSegmentSingleValuedFaceting { this.missing = missing; this.sort = sort; this.prefix = prefix; + this.containsBR = contains != null ? new BytesRef(contains) : null; + this.ignoreCase = ignoreCase; } public void setNumThreads(int threads) { @@ -173,16 +178,23 @@ class PerSegmentSingleValuedFaceting { while (queue.size() > 0) { SegFacet seg = queue.top(); - + + // if facet.contains specified, only actually collect the count if substring contained + boolean collect = containsBR == null || StringHelper.contains(seg.tempBR, containsBR, ignoreCase); + // we will normally end up advancing the term enum for this segment // while still using "val", so we need to make a copy since the BytesRef // may be shared across calls. - val.copyBytes(seg.tempBR); - + if (collect) { + val.copyBytes(seg.tempBR); + } + int count = 0; do { - count += seg.counts[seg.pos - seg.startTermIndex]; + if (collect) { + count += seg.counts[seg.pos - seg.startTermIndex]; + } // TODO: OPTIMIZATION... // if mincount>0 then seg.pos++ can skip ahead to the next non-zero entry. @@ -190,14 +202,16 @@ class PerSegmentSingleValuedFaceting { if (seg.pos >= seg.endTermIndex) { queue.pop(); seg = queue.top(); - } else { + } else { seg.tempBR = seg.tenum.next(); seg = queue.updateTop(); } } while (seg != null && val.get().compareTo(seg.tempBR) == 0); - boolean stop = collector.collect(val.get(), count); - if (stop) break; + if (collect) { + boolean stop = collector.collect(val.get(), count); + if (stop) break; + } } NamedList res = collector.getFacetCounts(); @@ -253,31 +267,28 @@ class PerSegmentSingleValuedFaceting { startTermIndex=-1; endTermIndex=si.getValueCount(); } - final int nTerms=endTermIndex-startTermIndex; - if (nTerms>0) { - // count collection array only needs to be as big as the number of terms we are - // going to collect counts for. - final int[] counts = this.counts = new int[nTerms]; - DocIdSet idSet = baseSet.getDocIdSet(context, null); // this set only includes live docs - DocIdSetIterator iter = idSet.iterator(); + if (nTerms == 0) return; + // count collection array only needs to be as big as the number of terms we are + // going to collect counts for. + final int[] counts = this.counts = new int[nTerms]; + DocIdSet idSet = baseSet.getDocIdSet(context, null); // this set only includes live docs + DocIdSetIterator iter = idSet.iterator(); - //// + if (prefix==null) { + // specialized version when collecting counts for all terms int doc; - - if (prefix==null) { - // specialized version when collecting counts for all terms - while ((doc = iter.nextDoc()) < DocIdSetIterator.NO_MORE_DOCS) { - counts[1+si.getOrd(doc)]++; - } - } else { - // version that adjusts term numbers because we aren't collecting the full range - while ((doc = iter.nextDoc()) < DocIdSetIterator.NO_MORE_DOCS) { - int term = si.getOrd(doc); - int arrIdx = term-startTermIndex; - if (arrIdx>=0 && arrIdx=0 && arrIdx0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX); - String prefix = params.getFieldParam(field,FacetParams.FACET_PREFIX); - + String prefix = params.getFieldParam(field, FacetParams.FACET_PREFIX); + String contains = params.getFieldParam(field, FacetParams.FACET_CONTAINS); + boolean ignoreCase = params.getFieldBool(field, FacetParams.FACET_CONTAINS_IGNORE_CASE, false); NamedList counts; SchemaField sf = searcher.getSchema().getField(field); @@ -435,13 +435,13 @@ public class SimpleFacets { } if (params.getFieldBool(field, GroupParams.GROUP_FACET, false)) { - counts = getGroupedCounts(searcher, base, field, multiToken, offset,limit, mincount, missing, sort, prefix); + counts = getGroupedCounts(searcher, base, field, multiToken, offset,limit, mincount, missing, sort, prefix, contains, ignoreCase); } else { assert method != null; switch (method) { case ENUM: assert TrieField.getMainValuePrefix(ft) == null; - counts = getFacetTermEnumCounts(searcher, base, field, offset, limit, mincount,missing,sort,prefix); + counts = getFacetTermEnumCounts(searcher, base, field, offset, limit, mincount, missing, sort, prefix, contains, ignoreCase); break; case FCS: assert !multiToken; @@ -450,16 +450,19 @@ public class SimpleFacets { if (prefix != null && !prefix.isEmpty()) { throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_PREFIX + " is not supported on numeric types"); } + if (contains != null && !contains.isEmpty()) { + throw new SolrException(ErrorCode.BAD_REQUEST, FacetParams.FACET_CONTAINS + " is not supported on numeric types"); + } counts = NumericFacets.getCounts(searcher, base, field, offset, limit, mincount, missing, sort); } else { - PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, base, field, offset,limit, mincount, missing, sort, prefix); + PerSegmentSingleValuedFaceting ps = new PerSegmentSingleValuedFaceting(searcher, base, field, offset,limit, mincount, missing, sort, prefix, contains, ignoreCase); Executor executor = threads == 0 ? directExecutor : facetExecutor; ps.setNumThreads(threads); counts = ps.getFacetCounts(executor); } break; case FC: - counts = DocValuesFacets.getCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix); + counts = DocValuesFacets.getCounts(searcher, base, field, offset,limit, mincount, missing, sort, prefix, contains, ignoreCase); break; default: throw new AssertionError(); @@ -478,7 +481,9 @@ public class SimpleFacets { int mincount, boolean missing, String sort, - String prefix) throws IOException { + String prefix, + String contains, + boolean ignoreCase) throws IOException { GroupingSpecification groupingSpecification = rb.getGroupingSpec(); final String groupField = groupingSpecification != null ? groupingSpecification.getFields()[0] : null; if (groupField == null) { @@ -488,8 +493,9 @@ public class SimpleFacets { ); } - BytesRef prefixBR = prefix != null ? new BytesRef(prefix) : null; - final TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, field, multiToken, prefixBR, 128); + BytesRef prefixBytesRef = prefix != null ? new BytesRef(prefix) : null; + BytesRef containsRef = contains != null ? new BytesRef(contains) : null; + final TermGroupFacetCollector collector = TermGroupFacetCollector.createTermGroupFacetCollector(groupField, field, multiToken, prefixBytesRef, 128); SchemaField sf = searcher.getSchema().getFieldOrNull(groupField); @@ -519,6 +525,10 @@ public class SimpleFacets { List scopedEntries = result.getFacetEntries(offset, limit < 0 ? Integer.MAX_VALUE : limit); for (TermGroupFacetCollector.FacetEntry facetEntry : scopedEntries) { + //:TODO:can we do contains earlier than this to make it more efficient? + if (containsRef != null && !StringHelper.contains(facetEntry.getValue(), containsRef, ignoreCase)) { + continue; + } facetFieldType.indexedToReadable(facetEntry.getValue(), charsRef); facetCounts.add(charsRef.toString(), facetEntry.getCount()); } @@ -683,7 +693,7 @@ public class SimpleFacets { * @see FacetParams#FACET_ZEROS * @see FacetParams#FACET_MISSING */ - public NamedList getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing, String sort, String prefix) + public NamedList getFacetTermEnumCounts(SolrIndexSearcher searcher, DocSet docs, String field, int offset, int limit, int mincount, boolean missing, String sort, String prefix, String contains, boolean ignoreCase) throws IOException { /* :TODO: potential optimization... @@ -715,10 +725,16 @@ public class SimpleFacets { int off=offset; int lim=limit>=0 ? limit : Integer.MAX_VALUE; - BytesRef startTermBytes = null; + BytesRef prefixTermBytes = null; if (prefix != null) { String indexedPrefix = ft.toInternal(prefix); - startTermBytes = new BytesRef(indexedPrefix); + prefixTermBytes = new BytesRef(indexedPrefix); + } + + BytesRef containsTermBytes = null; + if (contains != null) { + String indexedContains = ft.toInternal(contains); + containsTermBytes = new BytesRef(indexedContains); } Fields fields = r.fields(); @@ -732,8 +748,8 @@ public class SimpleFacets { // TODO: OPT: if seek(ord) is supported for this termsEnum, then we could use it for // facet.offset when sorting by index order. - if (startTermBytes != null) { - if (termsEnum.seekCeil(startTermBytes) == TermsEnum.SeekStatus.END) { + if (prefixTermBytes != null) { + if (termsEnum.seekCeil(prefixTermBytes) == TermsEnum.SeekStatus.END) { termsEnum = null; } else { term = termsEnum.term(); @@ -750,77 +766,77 @@ public class SimpleFacets { if (docs.size() >= mincount) { while (term != null) { - if (startTermBytes != null && !StringHelper.startsWith(term, startTermBytes)) + if (prefixTermBytes != null && !StringHelper.startsWith(term, prefixTermBytes)) break; - int df = termsEnum.docFreq(); + if (containsTermBytes == null || StringHelper.contains(term, containsTermBytes, ignoreCase)) { + int df = termsEnum.docFreq(); - // If we are sorting, we can use df>min (rather than >=) since we - // are going in index order. For certain term distributions this can - // make a large difference (for example, many terms with df=1). - if (df>0 && df>min) { - int c; + // If we are sorting, we can use df>min (rather than >=) since we + // are going in index order. For certain term distributions this can + // make a large difference (for example, many terms with df=1). + if (df > 0 && df > min) { + int c; - if (df >= minDfFilterCache) { - // use the filter cache + if (df >= minDfFilterCache) { + // use the filter cache - if (deState==null) { - deState = new SolrIndexSearcher.DocsEnumState(); - deState.fieldName = field; - deState.liveDocs = r.getLiveDocs(); - deState.termsEnum = termsEnum; - deState.postingsEnum = postingsEnum; - } + if (deState == null) { + deState = new SolrIndexSearcher.DocsEnumState(); + deState.fieldName = field; + deState.liveDocs = r.getLiveDocs(); + deState.termsEnum = termsEnum; + deState.postingsEnum = postingsEnum; + } - c = searcher.numDocs(docs, deState); + c = searcher.numDocs(docs, deState); - postingsEnum = deState.postingsEnum; - } else { - // iterate over TermDocs to calculate the intersection + postingsEnum = deState.postingsEnum; + } else { + // iterate over TermDocs to calculate the intersection - // TODO: specialize when base docset is a bitset or hash set (skipDocs)? or does it matter for this? - // TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl) - // TODO: would passing deleted docs lead to better efficiency over checking the fastForRandomSet? - postingsEnum = termsEnum.postings(null, postingsEnum, PostingsEnum.NONE); - c=0; + // TODO: specialize when base docset is a bitset or hash set (skipDocs)? or does it matter for this? + // TODO: do this per-segment for better efficiency (MultiDocsEnum just uses base class impl) + // TODO: would passing deleted docs lead to better efficiency over checking the fastForRandomSet? + postingsEnum = termsEnum.postings(null, postingsEnum, PostingsEnum.NONE); + c = 0; - if (postingsEnum instanceof MultiPostingsEnum) { - MultiPostingsEnum.EnumWithSlice[] subs = ((MultiPostingsEnum) postingsEnum).getSubs(); - int numSubs = ((MultiPostingsEnum) postingsEnum).getNumSubs(); - for (int subindex = 0; subindex min) { + BytesRef termCopy = BytesRef.deepCopyOf(term); + queue.add(new CountPair<>(termCopy, c)); + if (queue.size() >= maxsize) min = queue.last().val; + } + } else { + if (c >= mincount && --off < 0) { + if (--lim < 0) break; + ft.indexedToReadable(term, charsRef); + res.add(charsRef.toString(), c); } - } - - - } - - if (sortByCount) { - if (c>min) { - BytesRef termCopy = BytesRef.deepCopyOf(term); - queue.add(new CountPair<>(termCopy, c)); - if (queue.size()>=maxsize) min=queue.last().val; - } - } else { - if (c >= mincount && --off<0) { - if (--lim<0) break; - ft.indexedToReadable(term, charsRef); - res.add(charsRef.toString(), c); } } } - term = termsEnum.next(); } } diff --git a/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java b/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java index dc775370ad5..5b60a26eb7c 100644 --- a/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java +++ b/solr/core/src/test/org/apache/solr/request/SimpleFacetsTest.java @@ -17,12 +17,6 @@ package org.apache.solr.request; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.List; -import java.util.Map; - import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.ModifiableSolrParams; @@ -32,6 +26,12 @@ import org.junit.BeforeClass; import org.junit.Test; import org.noggit.ObjectBuilder; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; + public class SimpleFacetsTest extends SolrTestCaseJ4 { @@ -85,6 +85,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { indexFacetSingleValued(); indexFacetPrefixMultiValued(); indexFacetPrefixSingleValued(); + indexFacetContains(); indexSimpleGroupedFacetCounts(); Collections.shuffle(pendingDocs, random()); @@ -1866,54 +1867,74 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { static void indexFacetPrefixMultiValued() { - indexFacetPrefix("50","t_s"); + indexFacetPrefix("50","t_s","","ignore_s"); } @Test public void testFacetPrefixMultiValued() { - doFacetPrefix("t_s", null, "facet.method","enum"); - doFacetPrefix("t_s", null, "facet.method", "enum", "facet.enum.cache.minDf", "3"); - doFacetPrefix("t_s", null, "facet.method", "enum", "facet.enum.cache.minDf", "100"); - doFacetPrefix("t_s", null, "facet.method", "fc"); + doFacetPrefix("t_s", null, "", "facet.method","enum"); + doFacetPrefix("t_s", null, "", "facet.method", "enum", "facet.enum.cache.minDf", "3"); + doFacetPrefix("t_s", null, "", "facet.method", "enum", "facet.enum.cache.minDf", "100"); + doFacetPrefix("t_s", null, "", "facet.method", "fc"); } static void indexFacetPrefixSingleValued() { - indexFacetPrefix("60","tt_s1"); + indexFacetPrefix("60","tt_s1","","ignore_s"); } @Test public void testFacetPrefixSingleValued() { - doFacetPrefix("tt_s1", null); + doFacetPrefix("tt_s1", null, ""); } + @Test public void testFacetPrefixSingleValuedFcs() { - doFacetPrefix("tt_s1", null, "facet.method","fcs"); - doFacetPrefix("tt_s1", "{!threads=0}", "facet.method","fcs"); // direct execution - doFacetPrefix("tt_s1", "{!threads=-1}", "facet.method","fcs"); // default / unlimited threads - doFacetPrefix("tt_s1", "{!threads=2}", "facet.method","fcs"); // specific number of threads + doFacetPrefix("tt_s1", null, "", "facet.method","fcs"); + doFacetPrefix("tt_s1", "{!threads=0}", "", "facet.method","fcs"); // direct execution + doFacetPrefix("tt_s1", "{!threads=-1}", "", "facet.method","fcs"); // default / unlimited threads + doFacetPrefix("tt_s1", "{!threads=2}", "", "facet.method","fcs"); // specific number of threads } + static void indexFacetContains() { + indexFacetPrefix("70","contains_s1","","contains_group_s1"); + indexFacetPrefix("80","contains_s1","Astra","contains_group_s1"); + } + + @Test + public void testFacetContains() { + doFacetContains("contains_s1", "contains_group_s1", "Astra", "BAst", "Ast", "facet.method", "enum"); + doFacetContains("contains_s1", "contains_group_s1", "Astra", "BAst", "Ast", "facet.method", "fcs"); + doFacetContains("contains_s1", "contains_group_s1", "Astra", "BAst", "Ast", "facet.method", "fc"); + doFacetContains("contains_s1", "contains_group_s1", "Astra", "bAst", "ast", "facet.method", "enum", "facet.contains.ignoreCase", "true"); + doFacetContains("contains_s1", "contains_group_s1", "Astra", "baSt", "ast", "facet.method", "fcs", "facet.contains.ignoreCase", "true"); + doFacetContains("contains_s1", "contains_group_s1", "Astra", "basT", "ast", "facet.method", "fc", "facet.contains.ignoreCase", "true"); + doFacetPrefix("contains_s1", null, "Astra", "facet.method", "enum", "facet.contains", "Ast"); + doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fcs", "facet.contains", "Ast"); + doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fc", "facet.contains", "Ast"); + doFacetPrefix("contains_s1", null, "Astra", "facet.method", "enum", "facet.contains", "aSt", "facet.contains.ignoreCase", "true"); + doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fcs", "facet.contains", "asT", "facet.contains.ignoreCase", "true"); + doFacetPrefix("contains_s1", null, "Astra", "facet.method", "fc", "facet.contains", "aST", "facet.contains.ignoreCase", "true"); + } - static void indexFacetPrefix(String idPrefix, String f) { - add_doc("id", idPrefix+"1", f, "AAA"); - add_doc("id", idPrefix+"2", f, "B"); - add_doc("id", idPrefix+"3", f, "BB"); - add_doc("id", idPrefix+"4", f, "BB"); - add_doc("id", idPrefix+"5", f, "BBB"); - add_doc("id", idPrefix+"6", f, "BBB"); - add_doc("id", idPrefix+"7", f, "BBB"); - add_doc("id", idPrefix+"8", f, "CC"); - add_doc("id", idPrefix+"9", f, "CC"); - add_doc("id", idPrefix+"10", f, "CCC"); - add_doc("id", idPrefix+"11", f, "CCC"); - add_doc("id", idPrefix+"12", f, "CCC"); + static void indexFacetPrefix(String idPrefix, String f, String termSuffix, String g) { + add_doc("id", idPrefix+"1", f, "AAA"+termSuffix, g, "A"); + add_doc("id", idPrefix+"2", f, "B"+termSuffix, g, "A"); + add_doc("id", idPrefix+"3", f, "BB"+termSuffix, g, "B"); + add_doc("id", idPrefix+"4", f, "BB"+termSuffix, g, "B"); + add_doc("id", idPrefix+"5", f, "BBB"+termSuffix, g, "B"); + add_doc("id", idPrefix+"6", f, "BBB"+termSuffix, g, "B"); + add_doc("id", idPrefix+"7", f, "BBB"+termSuffix, g, "C"); + add_doc("id", idPrefix+"8", f, "CC"+termSuffix, g, "C"); + add_doc("id", idPrefix+"9", f, "CC"+termSuffix, g, "C"); + add_doc("id", idPrefix+"10", f, "CCC"+termSuffix, g, "C"); + add_doc("id", idPrefix+"11", f, "CCC"+termSuffix, g, "D"); + add_doc("id", idPrefix+"12", f, "CCC"+termSuffix, g, "E"); assertU(commit()); } - public void doFacetPrefix(String f, String local, String... params) { + public void doFacetPrefix(String f, String local, String termSuffix, String... params) { String indent="on"; String pre = "//lst[@name='"+f+"']"; - String notc = "id:[* TO *] -"+f+":C"; String lf = local==null ? f : local+f; @@ -1929,9 +1950,9 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,"facet.prefix","B" ) ,"*[count(//lst[@name='facet_fields']/lst/int)=3]" - ,pre+"/int[1][@name='BBB'][.='3']" - ,pre+"/int[2][@name='BB'][.='2']" - ,pre+"/int[3][@name='B'][.='1']" + ,pre+"/int[1][@name='BBB"+termSuffix+"'][.='3']" + ,pre+"/int[2][@name='BB"+termSuffix+"'][.='2']" + ,pre+"/int[3][@name='B"+termSuffix+"'][.='1']" ); assertQ("test facet.prefix middle, exact match first term, unsorted", @@ -1946,30 +1967,11 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,"facet.prefix","B" ) ,"*[count(//lst[@name='facet_fields']/lst/int)=3]" - ,pre+"/int[1][@name='B'][.='1']" - ,pre+"/int[2][@name='BB'][.='2']" - ,pre+"/int[3][@name='BBB'][.='3']" + ,pre+"/int[1][@name='B"+termSuffix+"'][.='1']" + ,pre+"/int[2][@name='BB"+termSuffix+"'][.='2']" + ,pre+"/int[3][@name='BBB"+termSuffix+"'][.='3']" ); - - assertQ("test facet.prefix middle, exact match first term, unsorted", - req(params, "q", "id:[* TO *]" - ,"indent",indent - ,"facet","true" - ,"facet.field", lf - ,"facet.mincount","0" - ,"facet.offset","0" - ,"facet.limit","100" - ,"facet.sort","index" - ,"facet.prefix","B" - ) - ,"*[count(//lst[@name='facet_fields']/lst/int)=3]" - ,pre+"/int[1][@name='B'][.='1']" - ,pre+"/int[2][@name='BB'][.='2']" - ,pre+"/int[3][@name='BBB'][.='3']" - ); - - assertQ("test facet.prefix middle, paging", req(params, "q", "id:[* TO *]" ,"indent",indent @@ -1982,8 +1984,8 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,"facet.prefix","B" ) ,"*[count(//lst[@name='facet_fields']/lst/int)=2]" - ,pre+"/int[1][@name='BB'][.='2']" - ,pre+"/int[2][@name='B'][.='1']" + ,pre+"/int[1][@name='BB"+termSuffix+"'][.='2']" + ,pre+"/int[2][@name='B"+termSuffix+"'][.='1']" ); assertQ("test facet.prefix middle, paging", @@ -1998,7 +2000,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,"facet.prefix","B" ) ,"*[count(//lst[@name='facet_fields']/lst/int)=1]" - ,pre+"/int[1][@name='BB'][.='2']" + ,pre+"/int[1][@name='BB"+termSuffix+"'][.='2']" ); assertQ("test facet.prefix middle, paging", @@ -2013,7 +2015,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,"facet.prefix","B" ) ,"*[count(//lst[@name='facet_fields']/lst/int)=1]" - ,pre+"/int[1][@name='BB'][.='2']" + ,pre+"/int[1][@name='BB"+termSuffix+"'][.='2']" ); assertQ("test facet.prefix end, not exact match", @@ -2028,8 +2030,8 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,"facet.prefix","C" ) ,"*[count(//lst[@name='facet_fields']/lst/int)=2]" - ,pre+"/int[1][@name='CCC'][.='3']" - ,pre+"/int[2][@name='CC'][.='2']" + ,pre+"/int[1][@name='CCC"+termSuffix+"'][.='3']" + ,pre+"/int[2][@name='CC"+termSuffix+"'][.='2']" ); assertQ("test facet.prefix end, exact match", @@ -2044,8 +2046,8 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,"facet.prefix","CC" ) ,"*[count(//lst[@name='facet_fields']/lst/int)=2]" - ,pre+"/int[1][@name='CCC'][.='3']" - ,pre+"/int[2][@name='CC'][.='2']" + ,pre+"/int[1][@name='CCC"+termSuffix+"'][.='3']" + ,pre+"/int[2][@name='CC"+termSuffix+"'][.='2']" ); assertQ("test facet.prefix past end", @@ -2088,7 +2090,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,"facet.prefix","AAA" ) ,"*[count(//lst[@name='facet_fields']/lst/int)=1]" - ,pre+"/int[1][@name='AAA'][.='1']" + ,pre+"/int[1][@name='AAA"+termSuffix+"'][.='1']" ); assertQ("test facet.prefix at Start, not exact match", req(params, "q", "id:[* TO *]" @@ -2102,7 +2104,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,"facet.prefix","AA" ) ,"*[count(//lst[@name='facet_fields']/lst/int)=1]" - ,pre+"/int[1][@name='AAA'][.='1']" + ,pre+"/int[1][@name='AAA"+termSuffix+"'][.='1']" ); assertQ("test facet.prefix at Start, not exact match", req(params, "q", "id:[* TO *]" @@ -2116,7 +2118,7 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ,"facet.prefix","AA" ) ,"*[count(//lst[@name='facet_fields']/lst/int)=1]" - ,pre+"/int[1][@name='AAA'][.='1']" + ,pre+"/int[1][@name='AAA"+termSuffix+"'][.='1']" ); assertQ("test facet.prefix before start", req(params, "q", "id:[* TO *]" @@ -2162,9 +2164,54 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 { ); } + public void doFacetContains(String f, String g, String termSuffix, String contains, String groupContains, String... params) { + String indent="on"; + String pre = "//lst[@name='"+f+"']"; + + assertQ("test facet.contains", + req(params, "q", "id:[* TO *]" + ,"indent",indent + ,"facet","true" + ,"facet.field", f + ,"facet.mincount","0" + ,"facet.offset","0" + ,"facet.limit","100" + ,"facet.sort","count" + ,"facet.contains",contains + ) + ,"*[count(//lst[@name='facet_fields']/lst/int)=3]" + ,pre+"/int[1][@name='BBB"+termSuffix+"'][.='3']" + ,pre+"/int[2][@name='BB"+termSuffix+"'][.='2']" + ,pre+"/int[3][@name='B"+termSuffix+"'][.='1']" + ); + + assertQ("test facet.contains for grouped facets", + req(params, "q", "id:[* TO *]" + ,"indent",indent + ,"facet","true" + ,"facet.field", f + ,"facet.mincount","0" + ,"facet.offset","0" + ,"facet.limit","100" + ,"facet.sort","count" + ,"facet.contains",groupContains + ,"group","true" + ,"group.field",g + ,"group.facet","true" + ) + ,"*[count(//lst[@name='facet_fields']/lst/int)=6]" + ,pre+"/int[1][@name='CCC"+termSuffix+"'][.='3']" + ,pre+"/int[2][@name='BBB"+termSuffix+"'][.='2']" + ,pre+"/int[3][@name='AAA"+termSuffix+"'][.='1']" + ,pre+"/int[4][@name='B"+termSuffix+"'][.='1']" + ,pre+"/int[5][@name='BB"+termSuffix+"'][.='1']" + ,pre+"/int[6][@name='CC"+termSuffix+"'][.='1']" + ); + } + /** - * kind of an absurd tests because if there is an inifnite loop, it - * would ver finish -- but at least it ensures that if one of + * kind of an absurd test because if there is an infinite loop, it + * would never finish -- but at least it ensures that if one of * these requests return, they return an error */ public void testRangeFacetInfiniteLoopDetection() { diff --git a/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java b/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java index 2dae65f6c69..3955bf61614 100644 --- a/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java +++ b/solr/solrj/src/java/org/apache/solr/common/params/FacetParams.java @@ -149,6 +149,16 @@ public interface FacetParams { */ public static final String FACET_PREFIX = FACET + ".prefix"; + /** + * Only return constraints of a facet field containing the given string. + */ + public static final String FACET_CONTAINS = FACET + ".contains"; + + /** + * If using facet contains, ignore case when comparing values. + */ + public static final String FACET_CONTAINS_IGNORE_CASE = FACET_CONTAINS + ".ignoreCase"; + /** * When faceting by enumerating the terms in a field, * only use the filterCache for terms with a df >= to this parameter. @@ -165,7 +175,7 @@ public interface FacetParams { */ public static final String FACET_DATE_START = FACET_DATE + ".start"; /** - * Date string indicating the endinging point for a date facet range. + * Date string indicating the ending point for a date facet range. * Can be overriden on a per field basis. */ public static final String FACET_DATE_END = FACET_DATE + ".end";