mirror of https://github.com/apache/lucene.git
SOLR-10132: A new optional facet.matches parameter to return facet buckets only for terms that match a regular expression. (Gus Heck, Christine Poerschke)
This commit is contained in:
parent
b7332f65b7
commit
b8bcaf9246
|
@ -57,6 +57,9 @@ New Features
|
|||
|
||||
* SOLR-11518: Implement Suggestions for freedisk violations (noble)
|
||||
|
||||
* SOLR-10132: A new optional facet.matches parameter to return facet buckets only
|
||||
for terms that match a regular expression. (Gus Heck, Christine Poerschke)
|
||||
|
||||
Bug Fixes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -0,0 +1,46 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.request;
|
||||
|
||||
import java.util.function.Predicate;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* An implementation of {@link Predicate} which returns true if the BytesRef matches the supplied regular expression.
|
||||
*/
|
||||
public class RegexBytesRefFilter implements Predicate<BytesRef> {
|
||||
|
||||
final private Pattern compiled;
|
||||
|
||||
public RegexBytesRefFilter(String regex) {
|
||||
this.compiled = Pattern.compile(regex);
|
||||
}
|
||||
|
||||
protected boolean includeString(String term) {
|
||||
Matcher m = compiled.matcher(term);
|
||||
return m.matches();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean test(BytesRef term) {
|
||||
return includeString(term.utf8ToString());
|
||||
}
|
||||
|
||||
}
|
|
@ -348,6 +348,16 @@ public class SimpleFacets {
|
|||
ENUM, FC, FCS, UIF;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new bytes ref filter for excluding facet terms.
|
||||
*
|
||||
* This method by default uses the {@link FacetParams#FACET_EXCLUDETERMS} parameter
|
||||
* but custom SimpleFacets classes could use a different implementation.
|
||||
*
|
||||
* @param field the field to check for facet term filters
|
||||
* @param params the request parameter object
|
||||
* @return A predicate for filtering terms or null if no filters are applicable.
|
||||
*/
|
||||
protected Predicate<BytesRef> newExcludeBytesRefFilter(String field, SolrParams params) {
|
||||
final String exclude = params.getFieldParam(field, FacetParams.FACET_EXCLUDETERMS);
|
||||
if (exclude == null) {
|
||||
|
@ -364,30 +374,37 @@ public class SimpleFacets {
|
|||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a new bytes ref filter for filtering facet terms. If more than one filter is
|
||||
* applicable the applicable filters will be returned as an {@link Predicate#and(Predicate)}
|
||||
* of all such filters.
|
||||
*
|
||||
* @param field the field to check for facet term filters
|
||||
* @param params the request parameter object
|
||||
* @return A predicate for filtering terms or null if no filters are applicable.
|
||||
*/
|
||||
protected Predicate<BytesRef> newBytesRefFilter(String field, SolrParams params) {
|
||||
final String contains = params.getFieldParam(field, FacetParams.FACET_CONTAINS);
|
||||
|
||||
final Predicate<BytesRef> containsFilter;
|
||||
Predicate<BytesRef> finalFilter = null;
|
||||
|
||||
if (contains != null) {
|
||||
final boolean containsIgnoreCase = params.getFieldBool(field, FacetParams.FACET_CONTAINS_IGNORE_CASE, false);
|
||||
containsFilter = new SubstringBytesRefFilter(contains, containsIgnoreCase);
|
||||
} else {
|
||||
containsFilter = null;
|
||||
finalFilter = new SubstringBytesRefFilter(contains, containsIgnoreCase);
|
||||
}
|
||||
|
||||
final String regex = params.getFieldParam(field, FacetParams.FACET_MATCHES);
|
||||
if (regex != null) {
|
||||
final RegexBytesRefFilter regexBytesRefFilter = new RegexBytesRefFilter(regex);
|
||||
finalFilter = (finalFilter == null) ? regexBytesRefFilter : finalFilter.and(regexBytesRefFilter);
|
||||
}
|
||||
|
||||
final Predicate<BytesRef> excludeFilter = newExcludeBytesRefFilter(field, params);
|
||||
|
||||
if (containsFilter == null && excludeFilter == null) {
|
||||
return null;
|
||||
if (excludeFilter != null) {
|
||||
finalFilter = (finalFilter == null) ? excludeFilter : finalFilter.and(excludeFilter);
|
||||
}
|
||||
|
||||
if (containsFilter != null && excludeFilter == null) {
|
||||
return containsFilter;
|
||||
} else if (containsFilter == null && excludeFilter != null) {
|
||||
return excludeFilter;
|
||||
}
|
||||
|
||||
return containsFilter.and(excludeFilter);
|
||||
return finalFilter;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -493,6 +510,7 @@ public class SimpleFacets {
|
|||
}
|
||||
if (termFilter != null) {
|
||||
throw new SolrException(ErrorCode.BAD_REQUEST, "BytesRef term filters ("
|
||||
+ FacetParams.FACET_MATCHES + ", "
|
||||
+ FacetParams.FACET_CONTAINS + ", "
|
||||
+ FacetParams.FACET_EXCLUDETERMS + ") are not supported on numeric types");
|
||||
}
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.solr.request;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
public class RegexBytesRefFilterTest extends LuceneTestCase {
|
||||
|
||||
@Test
|
||||
public void testSubstringBytesRefFilter() {
|
||||
final RegexBytesRefFilter filter = new RegexBytesRefFilter("^f[o]{2}.*");
|
||||
|
||||
assertTrue(filter.test(new BytesRef("foobar")));
|
||||
assertFalse(filter.test(new BytesRef("qux")));
|
||||
assertFalse(filter.test(new BytesRef("quxfoo")));
|
||||
}
|
||||
|
||||
}
|
|
@ -567,6 +567,36 @@ public class SimpleFacetsTest extends SolrTestCaseJ4 {
|
|||
}
|
||||
|
||||
|
||||
@Test
|
||||
public void testFacetMatches() {
|
||||
final String[][] uifSwitch = new String[][] {
|
||||
new String[]{"f.trait_s.facet.method", "uif"},
|
||||
new String[]{"facet.method", "uif"}
|
||||
};
|
||||
final String[] none = new String[]{};
|
||||
for (String[] aSwitch : uifSwitch) {
|
||||
for(String[] methodParam : new String[][]{ none, aSwitch}) {
|
||||
assertQ("check facet.match filters facets returned",
|
||||
req(methodParam
|
||||
, "q", "id:[42 TO 47]"
|
||||
, "facet", "true"
|
||||
, "facet.field", "trait_s"
|
||||
, "facet.matches", ".*o.*"
|
||||
)
|
||||
, "*[count(//doc)=6]"
|
||||
|
||||
, "//lst[@name='facet_counts']/lst[@name='facet_queries']"
|
||||
|
||||
, "//lst[@name='facet_counts']/lst[@name='facet_fields']"
|
||||
, "//lst[@name='facet_fields']/lst[@name='trait_s']"
|
||||
, "*[count(//lst[@name='trait_s']/int)=2]"
|
||||
, "//lst[@name='trait_s']/int[@name='Tool'][.='2']"
|
||||
, "//lst[@name='trait_s']/int[@name='Obnoxious'][.='2']"
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSimpleFacetCounts() {
|
||||
|
||||
|
|
|
@ -65,6 +65,10 @@ The `facet.contains` parameter limits the terms on which to facet to those conta
|
|||
|
||||
If `facet.contains` is used, the `facet.contains.ignoreCase` parameter causes case to be ignored when matching the given substring against candidate facet terms.
|
||||
|
||||
`facet.matches`::
|
||||
|
||||
If you want to only return facet buckets for the terms that match a regular expression.
|
||||
|
||||
`facet.sort`::
|
||||
This parameter determines the ordering of the facet field constraints.
|
||||
+
|
||||
|
|
|
@ -175,6 +175,11 @@ public interface FacetParams {
|
|||
*/
|
||||
public static final String FACET_CONTAINS = FACET + ".contains";
|
||||
|
||||
/**
|
||||
* Only return constraints of a facet field containing the given string.
|
||||
*/
|
||||
public static final String FACET_MATCHES = FACET + ".matches";
|
||||
|
||||
/**
|
||||
* If using facet contains, ignore case when comparing values.
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue