SOLR-1156: Sort TermsComponent results by frequency

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@807289 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2009-08-24 15:56:32 +00:00
parent e7f856a4d8
commit 5d1bb05f32
4 changed files with 107 additions and 4 deletions

View File

@ -280,6 +280,8 @@ New Features
high precision date subtraction, add sub() for subtracting other arguments. high precision date subtraction, add sub() for subtracting other arguments.
(yonik) (yonik)
73. SOLR-1156: Sort TermsComponent results by frequency (Matt Weber via yonik)
Optimizations Optimizations
---------------------- ----------------------
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the 1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the

View File

@ -82,5 +82,13 @@ public interface TermsParams {
* For instance, the index form of numeric numbers is not human readable. The default is false. * For instance, the index form of numeric numbers is not human readable. The default is false.
*/ */
public static final String TERMS_RAW = TERMS_PREFIX + "raw"; public static final String TERMS_RAW = TERMS_PREFIX + "raw";
/**
* Optional. If sorting by frequency is enabled. Defaults to sorting by count.
*/
public static final String TERMS_SORT = TERMS_PREFIX + "sort";
public static final String TERMS_SORT_COUNT = "count";
public static final String TERMS_SORT_INDEX = "index";
} }

View File

@ -26,6 +26,8 @@ import org.apache.solr.common.params.TermsParams;
import org.apache.solr.common.util.NamedList; import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.StrField; import org.apache.solr.schema.StrField;
import org.apache.solr.request.SimpleFacets.CountPair;
import org.apache.solr.util.BoundedTreeSet;
import java.io.IOException; import java.io.IOException;
@ -55,6 +57,8 @@ public class TermsComponent extends SearchComponent {
String upperStr = params.get(TermsParams.TERMS_UPPER); String upperStr = params.get(TermsParams.TERMS_UPPER);
boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false); boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true); boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(
params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin
int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax
if (freqmax<0) { if (freqmax<0) {
@ -77,6 +81,7 @@ public class TermsComponent extends SearchComponent {
TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm); //this will be positioned ready to go TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm); //this will be positioned ready to go
int i = 0; int i = 0;
BoundedTreeSet<CountPair<String, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<String, Integer>>(limit) : null);
NamedList fieldTerms = new NamedList(); NamedList fieldTerms = new NamedList();
terms.add(field, fieldTerms); terms.add(field, fieldTerms);
Term lowerTestTerm = termEnum.term(); Term lowerTestTerm = termEnum.term();
@ -87,7 +92,7 @@ public class TermsComponent extends SearchComponent {
termEnum.next(); termEnum.next();
} }
while (i<limit) { while (i<limit || sort) {
Term theTerm = termEnum.term(); Term theTerm = termEnum.term();
@ -111,14 +116,29 @@ public class TermsComponent extends SearchComponent {
if (docFreq >= freqmin && docFreq <= freqmax) { if (docFreq >= freqmin && docFreq <= freqmax) {
// add the term to the list // add the term to the list
String label = raw ? indexedText : ft.indexedToReadable(indexedText); String label = raw ? indexedText : ft.indexedToReadable(indexedText);
if (sort) {
queue.add(new CountPair<String, Integer>(label, docFreq));
} else {
fieldTerms.add(label, docFreq); fieldTerms.add(label, docFreq);
i++; i++;
} }
}
termEnum.next(); termEnum.next();
} }
termEnum.close(); termEnum.close();
if (sort) {
for (CountPair<String, Integer> item : queue) {
if (i < limit) {
fieldTerms.add(item.key, item.val);
i++;
} else {
break;
}
}
}
} }
} else { } else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified"); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified");

View File

@ -62,6 +62,13 @@ public class TermsComponentTest extends AbstractSolrTestCase {
assertU(adoc("id", "15", "standardfilt", "d")); assertU(adoc("id", "15", "standardfilt", "d"));
assertU(adoc("id", "16", "standardfilt", "d")); assertU(adoc("id", "16", "standardfilt", "d"));
assertU(adoc("id", "17", "standardfilt", "snake"));
assertU(adoc("id", "18", "standardfilt", "spider"));
assertU(adoc("id", "19", "standardfilt", "shark"));
assertU(adoc("id", "20", "standardfilt", "snake"));
assertU(adoc("id", "21", "standardfilt", "snake"));
assertU(adoc("id", "22", "standardfilt", "shark"));
assertU("commit", commit()); assertU("commit", commit());
} }
@ -203,6 +210,72 @@ public class TermsComponentTest extends AbstractSolrTestCase {
assertTrue("value is null and it shouldn't be", value != null); assertTrue("value is null and it shouldn't be", value != null);
} }
public void testSortCount() throws Exception {
SolrCore core = h.getCore();
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
assertTrue("tc is null and it shouldn't be", tc != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, "standardfilt");
params.add(TermsParams.TERMS_LOWER, "s");
params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
params.add(TermsParams.TERMS_PREFIX_STR, "s");
params.add(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT);
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
NamedList terms;
handler = core.getRequestHandler("/terms");
assertTrue("handler is null and it shouldn't be", handler != null);
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
assertTrue("Item 0 name is not 'snake'", terms.getName(0).equals("snake"));
assertTrue("Item 0 frequency is not '3'", (Integer) terms.getVal(0) == 3);
assertTrue("Item 1 name is not 'shark'", terms.getName(1).equals("shark"));
assertTrue("Item 1 frequency is not '2'", (Integer) terms.getVal(1) == 2);
assertTrue("Item 2 name is not 'spider'", terms.getName(2).equals("spider"));
assertTrue("Item 2 frequency is not '1'", (Integer) terms.getVal(2) == 1);
}
public void testSortIndex() throws Exception {
SolrCore core = h.getCore();
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
assertTrue("tc is null and it shouldn't be", tc != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, "standardfilt");
params.add(TermsParams.TERMS_LOWER, "s");
params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
params.add(TermsParams.TERMS_PREFIX_STR, "s");
params.add(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_INDEX);
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
NamedList terms;
handler = core.getRequestHandler("/terms");
assertTrue("handler is null and it shouldn't be", handler != null);
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
assertTrue("Item 0 name is not 'shark' it is " + terms.getName(0), terms.getName(0).equals("shark"));
assertTrue("Item 0 frequency is not '2'", (Integer) terms.getVal(0) == 2);
assertTrue("Item 1 name is not 'snake', it is " + terms.getName(1), terms.getName(1).equals("snake"));
assertTrue("Item 1 frequency is not '3'", (Integer) terms.getVal(1) == 3);
assertTrue("Item 2 name is not 'spider', it is " + terms.getName(2), terms.getName(2).equals("spider"));
assertTrue("Item 2 frequency is not '1'", (Integer) terms.getVal(2) == 1);
}
public void testPastUpper() throws Exception { public void testPastUpper() throws Exception {
SolrCore core = h.getCore(); SolrCore core = h.getCore();
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp"); TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
@ -412,7 +485,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues(); values = rsp.getValues();
terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt"); terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
assertTrue("terms Size: " + terms.size() + " is not: " + 1, terms.size() == 1); assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
Integer d = (Integer) terms.get("d"); Integer d = (Integer) terms.get("d");
assertTrue(d + " does not equal: " + 3, d == 3); assertTrue(d + " does not equal: " + 3, d == 3);