SOLR-10132: A new optional facet.matches parameter to return facet buckets only for terms that match a regular expression. (Gus Heck, Christine Poerschke)

This commit is contained in:
Christine Poerschke 2017-10-24 21:54:01 +01:00
parent b7332f65b7
commit b8bcaf9246
7 changed files with 154 additions and 14 deletions

View File

@ -57,6 +57,9 @@ New Features
* SOLR-11518: Implement Suggestions for freedisk violations (noble)
* SOLR-10132: A new optional facet.matches parameter to return facet buckets only
for terms that match a regular expression. (Gus Heck, Christine Poerschke)
Bug Fixes
----------------------

View File

@ -0,0 +1,46 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import java.util.function.Predicate;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.util.BytesRef;
/**
* An implementation of {@link Predicate} which returns true if the BytesRef matches the supplied regular expression.
*/
public class RegexBytesRefFilter implements Predicate<BytesRef> {
final private Pattern compiled;
public RegexBytesRefFilter(String regex) {
this.compiled = Pattern.compile(regex);
}
protected boolean includeString(String term) {
Matcher m = compiled.matcher(term);
return m.matches();
}
@Override
public boolean test(BytesRef term) {
return includeString(term.utf8ToString());
}
}

View File

@ -348,6 +348,16 @@ public class SimpleFacets {
ENUM, FC, FCS, UIF;
}
/**
* Create a new bytes ref filter for excluding facet terms.
*
* This method by default uses the {@link FacetParams#FACET_EXCLUDETERMS} parameter
* but custom SimpleFacets classes could use a different implementation.
*
* @param field the field to check for facet term filters
* @param params the request parameter object
* @return A predicate for filtering terms or null if no filters are applicable.
*/
protected Predicate<BytesRef> newExcludeBytesRefFilter(String field, SolrParams params) {
final String exclude = params.getFieldParam(field, FacetParams.FACET_EXCLUDETERMS);
if (exclude == null) {
@ -364,30 +374,37 @@ public class SimpleFacets {
};
}
/**
* Create a new bytes ref filter for filtering facet terms. If more than one filter is
* applicable the applicable filters will be returned as an {@link Predicate#and(Predicate)}
* of all such filters.
*
* @param field the field to check for facet term filters
* @param params the request parameter object
* @return A predicate for filtering terms or null if no filters are applicable.
*/
protected Predicate<BytesRef> newBytesRefFilter(String field, SolrParams params) {
final String contains = params.getFieldParam(field, FacetParams.FACET_CONTAINS);
final Predicate<BytesRef> containsFilter;
Predicate<BytesRef> finalFilter = null;
if (contains != null) {
final boolean containsIgnoreCase = params.getFieldBool(field, FacetParams.FACET_CONTAINS_IGNORE_CASE, false);
containsFilter = new SubstringBytesRefFilter(contains, containsIgnoreCase);
} else {
containsFilter = null;
finalFilter = new SubstringBytesRefFilter(contains, containsIgnoreCase);
}
final String regex = params.getFieldParam(field, FacetParams.FACET_MATCHES);
if (regex != null) {
final RegexBytesRefFilter regexBytesRefFilter = new RegexBytesRefFilter(regex);
finalFilter = (finalFilter == null) ? regexBytesRefFilter : finalFilter.and(regexBytesRefFilter);
}
final Predicate<BytesRef> excludeFilter = newExcludeBytesRefFilter(field, params);
if (containsFilter == null && excludeFilter == null) {
return null;
if (excludeFilter != null) {
finalFilter = (finalFilter == null) ? excludeFilter : finalFilter.and(excludeFilter);
}
if (containsFilter != null && excludeFilter == null) {
return containsFilter;
} else if (containsFilter == null && excludeFilter != null) {
return excludeFilter;
}
return containsFilter.and(excludeFilter);
return finalFilter;
}
/**
@ -493,6 +510,7 @@ public class SimpleFacets {
}
if (termFilter != null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "BytesRef term filters ("
+ FacetParams.FACET_MATCHES + ", "
+ FacetParams.FACET_CONTAINS + ", "
+ FacetParams.FACET_EXCLUDETERMS + ") are not supported on numeric types");
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
public class RegexBytesRefFilterTest extends LuceneTestCase {
@Test
public void testSubstringBytesRefFilter() {
final RegexBytesRefFilter filter = new RegexBytesRefFilter("^f[o]{2}.*");
assertTrue(filter.test(new BytesRef("foobar")));
assertFalse(filter.test(new BytesRef("qux")));
assertFalse(filter.test(new BytesRef("quxfoo")));
}
}

View File

@ -567,6 +567,36 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
}
@Test
public void testFacetMatches() {
final String[][] uifSwitch = new String[][] {
new String[]{"f.trait_s.facet.method", "uif"},
new String[]{"facet.method", "uif"}
};
final String[] none = new String[]{};
for (String[] aSwitch : uifSwitch) {
for(String[] methodParam : new String[][]{ none, aSwitch}) {
assertQ("check facet.match filters facets returned",
req(methodParam
, "q", "id:[42 TO 47]"
, "facet", "true"
, "facet.field", "trait_s"
, "facet.matches", ".*o.*"
)
, "*[count(//doc)=6]"
, "//lst[@name='facet_counts']/lst[@name='facet_queries']"
, "//lst[@name='facet_counts']/lst[@name='facet_fields']"
, "//lst[@name='facet_fields']/lst[@name='trait_s']"
, "*[count(//lst[@name='trait_s']/int)=2]"
, "//lst[@name='trait_s']/int[@name='Tool'][.='2']"
, "//lst[@name='trait_s']/int[@name='Obnoxious'][.='2']"
);
}
}
}
@Test
public void testSimpleFacetCounts() {

View File

@ -65,6 +65,10 @@ The `facet.contains` parameter limits the terms on which to facet to those conta
If `facet.contains` is used, the `facet.contains.ignoreCase` parameter causes case to be ignored when matching the given substring against candidate facet terms.
`facet.matches`::
If you want to only return facet buckets for the terms that match a regular expression.
`facet.sort`::
This parameter determines the ordering of the facet field constraints.
+

View File

@ -175,6 +175,11 @@ public interface FacetParams {
*/
public static final String FACET_CONTAINS = FACET + ".contains";
/**
* Only return constraints of a facet field containing the given string.
*/
public static final String FACET_MATCHES = FACET + ".matches";
/**
* If using facet contains, ignore case when comparing values.
*/