mirror of https://github.com/apache/lucene.git
SOLR-1625 Add regexp support for TermsComponent
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@889537 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c8ad3e84b3
commit
93e3d755d4
|
@ -53,8 +53,7 @@ New Features
|
||||||
* SOLR-1571: Added unicode collation support though Lucene's CollationKeyFilter
|
* SOLR-1571: Added unicode collation support though Lucene's CollationKeyFilter
|
||||||
(Robert Muir via shalin)
|
(Robert Muir via shalin)
|
||||||
|
|
||||||
* SOLR-785: Distributed Search support for SpellCheckComponent
|
* SOLR-1625: Add regexp support for TermsComponent (Uri Boness via noble)
|
||||||
(Matthew Woytowitz, shalin)
|
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
|
@ -142,8 +141,6 @@ Other Changes
|
||||||
* SOLR-1608: Extract base class from TestDistributedSearch to make
|
* SOLR-1608: Extract base class from TestDistributedSearch to make
|
||||||
it easy to write test cases for other distributed components. (shalin)
|
it easy to write test cases for other distributed components. (shalin)
|
||||||
|
|
||||||
* Upgraded to Lucene 2.9-dev r888785 (shalin)
|
|
||||||
|
|
||||||
Build
|
Build
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
|
|
||||||
package org.apache.solr.common.params;
|
package org.apache.solr.common.params;
|
||||||
|
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
@ -68,7 +69,32 @@ public interface TermsParams {
|
||||||
|
|
||||||
public static final String TERMS_PREFIX_STR = TERMS_PREFIX + "prefix";
|
public static final String TERMS_PREFIX_STR = TERMS_PREFIX + "prefix";
|
||||||
|
|
||||||
/**
|
public static final String TERMS_REGEXP_STR = TERMS_PREFIX + "regex";
|
||||||
|
|
||||||
|
public static final String TERMS_REGEXP_FLAG = TERMS_REGEXP_STR + ".flag";
|
||||||
|
|
||||||
|
public static enum TermsRegexpFlag {
|
||||||
|
UNIX_LINES(Pattern.UNIX_LINES),
|
||||||
|
CASE_INSENSITIVE(Pattern.CASE_INSENSITIVE),
|
||||||
|
COMMENTS(Pattern.COMMENTS),
|
||||||
|
MULTILINE(Pattern.MULTILINE),
|
||||||
|
LITERAL(Pattern.LITERAL),
|
||||||
|
DOTALL(Pattern.DOTALL),
|
||||||
|
UNICODE_CASE(Pattern.UNICODE_CASE),
|
||||||
|
CANON_EQ(Pattern.CANON_EQ);
|
||||||
|
|
||||||
|
int value;
|
||||||
|
|
||||||
|
TermsRegexpFlag(int value) {
|
||||||
|
this.value = value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getValue() {
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
* Optional. The minimum value of docFreq to be returned. 1 by default
|
* Optional. The minimum value of docFreq to be returned. 1 by default
|
||||||
*/
|
*/
|
||||||
public static final String TERMS_MINCOUNT = TERMS_PREFIX + "mincount";
|
public static final String TERMS_MINCOUNT = TERMS_PREFIX + "mincount";
|
||||||
|
|
|
@ -20,7 +20,6 @@ import org.apache.lucene.index.Term;
|
||||||
import org.apache.lucene.index.TermEnum;
|
import org.apache.lucene.index.TermEnum;
|
||||||
import org.apache.lucene.util.StringHelper;
|
import org.apache.lucene.util.StringHelper;
|
||||||
import org.apache.solr.common.SolrException;
|
import org.apache.solr.common.SolrException;
|
||||||
import org.apache.solr.common.params.CommonParams;
|
|
||||||
import org.apache.solr.common.params.SolrParams;
|
import org.apache.solr.common.params.SolrParams;
|
||||||
import org.apache.solr.common.params.TermsParams;
|
import org.apache.solr.common.params.TermsParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
|
@ -30,7 +29,7 @@ import org.apache.solr.request.SimpleFacets.CountPair;
|
||||||
import org.apache.solr.util.BoundedTreeSet;
|
import org.apache.solr.util.BoundedTreeSet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return TermEnum information, useful for things like auto suggest.
|
* Return TermEnum information, useful for things like auto suggest.
|
||||||
|
@ -65,6 +64,9 @@ public class TermsComponent extends SearchComponent {
|
||||||
freqmax = Integer.MAX_VALUE;
|
freqmax = Integer.MAX_VALUE;
|
||||||
}
|
}
|
||||||
String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
|
String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
|
||||||
|
String regexp = params.get(TermsParams.TERMS_REGEXP_STR);
|
||||||
|
Pattern pattern = regexp != null ? Pattern.compile(regexp, resolveRegexpFlags(params)) : null;
|
||||||
|
|
||||||
boolean raw = params.getBool(TermsParams.TERMS_RAW, false);
|
boolean raw = params.getBool(TermsParams.TERMS_RAW, false);
|
||||||
for (int j = 0; j < fields.length; j++) {
|
for (int j = 0; j < fields.length; j++) {
|
||||||
String field = StringHelper.intern(fields[j]);
|
String field = StringHelper.intern(fields[j]);
|
||||||
|
@ -105,6 +107,11 @@ public class TermsComponent extends SearchComponent {
|
||||||
// stop if the prefix doesn't match
|
// stop if the prefix doesn't match
|
||||||
if (prefix != null && !indexedText.startsWith(prefix)) break;
|
if (prefix != null && !indexedText.startsWith(prefix)) break;
|
||||||
|
|
||||||
|
if (pattern != null && !pattern.matcher(indexedText).matches()) {
|
||||||
|
termEnum.next();
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
if (upperTerm != null) {
|
if (upperTerm != null) {
|
||||||
int upperCmp = theTerm.compareTo(upperTerm);
|
int upperCmp = theTerm.compareTo(upperTerm);
|
||||||
// if we are past the upper term, or equal to it (when don't include upper) then stop.
|
// if we are past the upper term, or equal to it (when don't include upper) then stop.
|
||||||
|
@ -146,6 +153,22 @@ public class TermsComponent extends SearchComponent {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int resolveRegexpFlags(SolrParams params) {
|
||||||
|
String[] flagParams = params.getParams(TermsParams.TERMS_REGEXP_FLAG);
|
||||||
|
if (flagParams == null) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
int flags = 0;
|
||||||
|
for (String flagParam : flagParams) {
|
||||||
|
try {
|
||||||
|
flags |= TermsParams.TermsRegexpFlag.valueOf(flagParam.toUpperCase()).getValue();
|
||||||
|
} catch (IllegalArgumentException iae) {
|
||||||
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Unknown terms regex flag '" + flagParam + "'");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return flags;
|
||||||
|
}
|
||||||
|
|
||||||
public void prepare(ResponseBuilder rb) throws IOException {
|
public void prepare(ResponseBuilder rb) throws IOException {
|
||||||
//nothing to do
|
//nothing to do
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.solr.request.SolrRequestHandler;
|
||||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||||
import org.apache.solr.request.SolrQueryResponse;
|
import org.apache.solr.request.SolrQueryResponse;
|
||||||
|
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
|
@ -210,6 +211,73 @@ public class TermsComponentTest extends AbstractSolrTestCase {
|
||||||
assertTrue("value is null and it shouldn't be", value != null);
|
assertTrue("value is null and it shouldn't be", value != null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testRegexp() throws Exception {
|
||||||
|
SolrCore core = h.getCore();
|
||||||
|
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
|
||||||
|
assertTrue("tc is null and it shouldn't be", tc != null);
|
||||||
|
|
||||||
|
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||||
|
params.add(TermsParams.TERMS, "true");
|
||||||
|
params.add(TermsParams.TERMS_FIELD, "standardfilt");
|
||||||
|
params.add(TermsParams.TERMS_LOWER, "bb");
|
||||||
|
params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
|
||||||
|
params.add(TermsParams.TERMS_REGEXP_STR, "b.*");
|
||||||
|
params.add(TermsParams.TERMS_UPPER, "bbbb");
|
||||||
|
params.add(TermsParams.TERMS_UPPER_INCLUSIVE, "true");
|
||||||
|
params.add(TermsParams.TERMS_LIMIT, String.valueOf(50));
|
||||||
|
SolrRequestHandler handler;
|
||||||
|
SolrQueryResponse rsp;
|
||||||
|
NamedList values;
|
||||||
|
NamedList terms;
|
||||||
|
handler = core.getRequestHandler("/terms");
|
||||||
|
assertTrue("handler is null and it shouldn't be", handler != null);
|
||||||
|
rsp = new SolrQueryResponse();
|
||||||
|
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||||
|
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||||
|
values = rsp.getValues();
|
||||||
|
terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
|
||||||
|
assertEquals("terms Size: " + terms.size() + " is not: 1", 1, terms.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRegexpFlagParsing() {
|
||||||
|
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||||
|
params.add(TermsParams.TERMS_REGEXP_FLAG, "case_insensitive", "literal", "comments", "multiline", "unix_lines",
|
||||||
|
"unicode_case", "dotall", "canon_eq");
|
||||||
|
int flags = new TermsComponent().resolveRegexpFlags(params);
|
||||||
|
int expected = Pattern.CASE_INSENSITIVE | Pattern.LITERAL | Pattern.COMMENTS | Pattern.MULTILINE | Pattern.UNIX_LINES
|
||||||
|
| Pattern.UNICODE_CASE | Pattern.DOTALL | Pattern.CANON_EQ;
|
||||||
|
assertEquals(expected, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testRegexpWithFlags() throws Exception {
|
||||||
|
SolrCore core = h.getCore();
|
||||||
|
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
|
||||||
|
assertTrue("tc is null and it shouldn't be", tc != null);
|
||||||
|
|
||||||
|
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||||
|
params.add(TermsParams.TERMS, "true");
|
||||||
|
params.add(TermsParams.TERMS_FIELD, "standardfilt");
|
||||||
|
params.add(TermsParams.TERMS_LOWER, "bb");
|
||||||
|
params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
|
||||||
|
params.add(TermsParams.TERMS_REGEXP_STR, "B.*");
|
||||||
|
params.add(TermsParams.TERMS_REGEXP_FLAG, "case_insensitive");
|
||||||
|
params.add(TermsParams.TERMS_UPPER, "bbbb");
|
||||||
|
params.add(TermsParams.TERMS_UPPER_INCLUSIVE, "true");
|
||||||
|
params.add(TermsParams.TERMS_LIMIT, String.valueOf(50));
|
||||||
|
SolrRequestHandler handler;
|
||||||
|
SolrQueryResponse rsp;
|
||||||
|
NamedList values;
|
||||||
|
NamedList terms;
|
||||||
|
handler = core.getRequestHandler("/terms");
|
||||||
|
assertTrue("handler is null and it shouldn't be", handler != null);
|
||||||
|
rsp = new SolrQueryResponse();
|
||||||
|
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||||
|
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||||
|
values = rsp.getValues();
|
||||||
|
terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
|
||||||
|
assertEquals("terms Size: " + terms.size() + " is not: 1", 1, terms.size());
|
||||||
|
}
|
||||||
|
|
||||||
public void testSortCount() throws Exception {
|
public void testSortCount() throws Exception {
|
||||||
SolrCore core = h.getCore();
|
SolrCore core = h.getCore();
|
||||||
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
|
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
|
||||||
|
|
Loading…
Reference in New Issue