SOLR-877: TermsComponent internal/external fixes plus rows to limit change

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@773218 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2009-05-09 13:01:29 +00:00
parent ab458b7900
commit 63c55c742e
4 changed files with 67 additions and 29 deletions

View File

@ -637,12 +637,16 @@
-->
<!-- A component to return terms and document frequency of those terms.
This component does not yet support distributed search. -->
<searchComponent name="termsComponent" class="org.apache.solr.handler.component.TermsComponent"/>
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>
<requestHandler name="/autoSuggest" class="org.apache.solr.handler.component.SearchHandler">
<requestHandler name="/terms" class="org.apache.solr.handler.component.SearchHandler">
<lst name="defaults">
<bool name="terms">true</bool>
</lst>
<arr name="components">
<str>termsComp</str>
<str>termsComponent</str>
</arr>
</requestHandler>

View File

@ -64,7 +64,7 @@ public interface TermsParams {
/**
* Optional. The number of results to return. If not specified, looks for {@link org.apache.solr.common.params.CommonParams#ROWS}. If that's not specified, uses 10.
*/
public static final String TERMS_ROWS = TERMS_PREFIX + "rows";
public static final String TERMS_LIMIT = TERMS_PREFIX + "limit";
public static final String TERMS_PREFIX_STR = TERMS_PREFIX + "prefix";
@ -76,5 +76,11 @@ public interface TermsParams {
* Optional. The maximum value of docFreq to be returned. -1 by default means no boundary
*/
public static final String TERMS_MAXCOUNT = TERMS_PREFIX + "maxcount";
/**
* Optional. If true, return the raw characters of the indexed term, regardless of if it is readable.
* For instance, the index form of numeric numbers is not human readable. The default is false.
*/
public static final String TERMS_RAW = TERMS_PREFIX + "raw";
}

View File

@ -23,6 +23,8 @@ import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.TermsParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.StrField;
import java.io.IOException;
@ -40,23 +42,30 @@ public class TermsComponent extends SearchComponent {
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (params.getBool(TermsParams.TERMS, false)) {
String lower = params.get(TermsParams.TERMS_LOWER, "");
String lowerStr = params.get(TermsParams.TERMS_LOWER, null);
String[] fields = params.getParams(TermsParams.TERMS_FIELD);
if (fields != null && fields.length > 0) {
NamedList terms = new NamedList();
rb.rsp.add("terms", terms);
int rows = params.getInt(TermsParams.TERMS_ROWS, params.getInt(CommonParams.ROWS, 10));
int rows = params.getInt(TermsParams.TERMS_LIMIT, 10);
if (rows < 0) {
rows = Integer.MAX_VALUE;
}
String upper = params.get(TermsParams.TERMS_UPPER);
String upperStr = params.get(TermsParams.TERMS_UPPER);
boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin
int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax
String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
boolean raw = params.getBool(TermsParams.TERMS_RAW, false);
for (int j = 0; j < fields.length; j++) {
String field = fields[j];
FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
if (ft==null) ft = new StrField();
String lower = lowerStr==null ? "" : ft.toInternal(lowerStr);
String upper = upperStr==null ? null : ft.toInternal(upperStr);
Term lowerTerm = new Term(field, lower);
Term upperTerm = upper != null ? new Term(field, upper) : null;
TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm);//this will be positioned ready to go
@ -72,16 +81,17 @@ public class TermsComponent extends SearchComponent {
if (hasMore == true) {
do {
Term theTerm = termEnum.term();
String theText = theTerm.text();
String indexedText = theTerm.text();
String readableText = ft.indexedToReadable(indexedText);
int upperCmp = upperTerm != null ? theTerm.compareTo(upperTerm) : -1;
if (theTerm != null && theTerm.field().equals(field)
&& ((upperIncl == true && upperCmp <= 0) ||
(upperIncl == false && upperCmp < 0))
&& (prefix == null || theText.startsWith(prefix))
&& (prefix == null || readableText.startsWith(prefix))
) {
int docFreq = termEnum.docFreq();
if (docFreq >= freqmin && (freqmax == UNLIMITED_MAX_COUNT || (docFreq <= freqmax))) {
fieldTerms.add(theText, docFreq);
fieldTerms.add(readableText, docFreq);
i++;
}
} else {//we're done
@ -103,15 +113,15 @@ public class TermsComponent extends SearchComponent {
}
public String getVersion() {
return "$Revision:$";
return "$Revision$";
}
public String getSourceId() {
return "$Id:$";
return "$Id$";
}
public String getSource() {
return "$URL:$";
return "$URL$";
}
public String getDescription() {

View File

@ -43,9 +43,9 @@ public class TermsComponentTest extends AbstractSolrTestCase {
public void setUp() throws Exception {
super.setUp();
assertU(adoc("id", "0", "lowerfilt", "a", "standardfilt", "a"));
assertU(adoc("id", "1", "lowerfilt", "a", "standardfilt", "aa"));
assertU(adoc("id", "2", "lowerfilt", "aa", "standardfilt", "aaa"));
assertU(adoc("id", "0", "lowerfilt", "a", "standardfilt", "a", "foo_i","1"));
assertU(adoc("id", "1", "lowerfilt", "a", "standardfilt", "aa", "foo_i","1"));
assertU(adoc("id", "2", "lowerfilt", "aa", "standardfilt", "aaa", "foo_i","2"));
assertU(adoc("id", "3", "lowerfilt", "aaa", "standardfilt", "abbb"));
assertU(adoc("id", "4", "lowerfilt", "ab", "standardfilt", "b"));
assertU(adoc("id", "5", "lowerfilt", "abb", "standardfilt", "bb"));
@ -75,7 +75,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
//no lower bound
params.add(TermsParams.TERMS_UPPER, "b");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
params.add(TermsParams.TERMS_LIMIT, String.valueOf(50));
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
@ -106,7 +106,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
params.add(TermsParams.TERMS, "true");
//no lower bound
params.add(TermsParams.TERMS_LOWER, "d");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
params.add(TermsParams.TERMS_LIMIT, String.valueOf(50));
SolrRequestHandler handler;
SolrQueryResponse rsp;
@ -130,7 +130,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
params.add(TermsParams.TERMS_FIELD, "lowerfilt", "standardfilt");
//no lower bound
params.add(TermsParams.TERMS_UPPER, "b");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
params.add(TermsParams.TERMS_LIMIT, String.valueOf(50));
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
@ -158,7 +158,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, "lowerfilt", "standardfilt");
//no lower bound, upper bound or rows
params.add(TermsParams.TERMS_ROWS, String.valueOf(-1));
params.add(TermsParams.TERMS_LIMIT, String.valueOf(-1));
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
@ -186,7 +186,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
params.add(TermsParams.TERMS_PREFIX_STR, "aa");
params.add(TermsParams.TERMS_UPPER, "b");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
params.add(TermsParams.TERMS_LIMIT, String.valueOf(50));
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
@ -213,7 +213,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
//no upper bound, lower bound doesn't exist
params.add(TermsParams.TERMS_LOWER, "d");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
params.add(TermsParams.TERMS_LIMIT, String.valueOf(50));
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
@ -239,7 +239,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
params.add(TermsParams.TERMS_LOWER, "a");
params.add(TermsParams.TERMS_UPPER, "b");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
params.add(TermsParams.TERMS_LIMIT, String.valueOf(50));
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
@ -268,7 +268,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
params.add(TermsParams.TERMS_FIELD, "standardfilt");
params.add(TermsParams.TERMS_LOWER, "cc");
params.add(TermsParams.TERMS_UPPER, "d");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
params.add(TermsParams.TERMS_LIMIT, String.valueOf(50));
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
@ -288,7 +288,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
params.add(TermsParams.TERMS_LOWER, "a");
params.add(TermsParams.TERMS_UPPER, "b");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
params.add(TermsParams.TERMS_LIMIT, String.valueOf(50));
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
@ -330,7 +330,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
params.add(TermsParams.TERMS_LOWER, "a");
params.add(TermsParams.TERMS_UPPER, "b");
params.add(TermsParams.TERMS_ROWS, String.valueOf(2));
params.add(TermsParams.TERMS_LIMIT, String.valueOf(2));
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
@ -343,6 +343,24 @@ public class TermsComponentTest extends AbstractSolrTestCase {
assertTrue("abc is not null", terms.get("abc") == null);
assertTrue("b is null and it shouldn't be", terms.get("b") == null);
assertTrue("baa is not null", terms.get("baa") == null);
params = new ModifiableSolrParams();
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, "foo_i");
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) ((NamedList) values.get("terms")).get("foo_i");
assertEquals(2,terms.get("1"));
params.add("terms.raw","true");
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) ((NamedList) values.get("terms")).get("foo_i");
assertTrue(terms.get("1") == null);
}
@ -358,7 +376,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
params.add(TermsParams.TERMS_LIMIT, String.valueOf(50));
// Tests TERMS_LOWER = "a" with freqmin = 2, freqmax = -1, terms.size() = 1
params.add(TermsParams.TERMS_LOWER, "a");
params.add(TermsParams.TERMS_MINCOUNT,String.valueOf(2));
@ -373,7 +391,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
params = new ModifiableSolrParams();
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, "standardfilt");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
params.add(TermsParams.TERMS_LIMIT, String.valueOf(50));
// Tests TERMS_LOWER = "a" with freqmin = 2, freqmax = -1, terms.size() = 1
params.add(TermsParams.TERMS_LOWER, "d");
params.add(TermsParams.TERMS_MINCOUNT,String.valueOf(2));