mirror of https://github.com/apache/lucene.git
SOLR-1156: Sort TermsComponent results by frequency
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@807289 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e7f856a4d8
commit
5d1bb05f32
|
@ -280,6 +280,8 @@ New Features
|
|||
high precision date subtraction, add sub() for subtracting other arguments.
|
||||
(yonik)
|
||||
|
||||
73. SOLR-1156: Sort TermsComponent results by frequency (Matt Weber via yonik)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the
|
||||
|
|
|
@ -82,5 +82,13 @@ public interface TermsParams {
|
|||
* For instance, the index form of numeric numbers is not human readable. The default is false.
|
||||
*/
|
||||
public static final String TERMS_RAW = TERMS_PREFIX + "raw";
|
||||
|
||||
/**
|
||||
* Optional. If sorting by frequency is enabled. Defaults to sorting by count.
|
||||
*/
|
||||
public static final String TERMS_SORT = TERMS_PREFIX + "sort";
|
||||
|
||||
public static final String TERMS_SORT_COUNT = "count";
|
||||
public static final String TERMS_SORT_INDEX = "index";
|
||||
}
|
||||
|
||||
|
|
|
@ -26,6 +26,8 @@ import org.apache.solr.common.params.TermsParams;
|
|||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.StrField;
|
||||
import org.apache.solr.request.SimpleFacets.CountPair;
|
||||
import org.apache.solr.util.BoundedTreeSet;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
@ -55,6 +57,8 @@ public class TermsComponent extends SearchComponent {
|
|||
String upperStr = params.get(TermsParams.TERMS_UPPER);
|
||||
boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
|
||||
boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
|
||||
boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(
|
||||
params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
|
||||
int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin
|
||||
int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax
|
||||
if (freqmax<0) {
|
||||
|
@ -77,6 +81,7 @@ public class TermsComponent extends SearchComponent {
|
|||
|
||||
TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm); //this will be positioned ready to go
|
||||
int i = 0;
|
||||
BoundedTreeSet<CountPair<String, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<String, Integer>>(limit) : null);
|
||||
NamedList fieldTerms = new NamedList();
|
||||
terms.add(field, fieldTerms);
|
||||
Term lowerTestTerm = termEnum.term();
|
||||
|
@ -87,7 +92,7 @@ public class TermsComponent extends SearchComponent {
|
|||
termEnum.next();
|
||||
}
|
||||
|
||||
while (i<limit) {
|
||||
while (i<limit || sort) {
|
||||
|
||||
Term theTerm = termEnum.term();
|
||||
|
||||
|
@ -111,14 +116,29 @@ public class TermsComponent extends SearchComponent {
|
|||
if (docFreq >= freqmin && docFreq <= freqmax) {
|
||||
// add the term to the list
|
||||
String label = raw ? indexedText : ft.indexedToReadable(indexedText);
|
||||
fieldTerms.add(label, docFreq);
|
||||
i++;
|
||||
if (sort) {
|
||||
queue.add(new CountPair<String, Integer>(label, docFreq));
|
||||
} else {
|
||||
fieldTerms.add(label, docFreq);
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
termEnum.next();
|
||||
}
|
||||
|
||||
termEnum.close();
|
||||
|
||||
if (sort) {
|
||||
for (CountPair<String, Integer> item : queue) {
|
||||
if (i < limit) {
|
||||
fieldTerms.add(item.key, item.val);
|
||||
i++;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified");
|
||||
|
|
|
@ -62,6 +62,13 @@ public class TermsComponentTest extends AbstractSolrTestCase {
|
|||
assertU(adoc("id", "15", "standardfilt", "d"));
|
||||
assertU(adoc("id", "16", "standardfilt", "d"));
|
||||
|
||||
assertU(adoc("id", "17", "standardfilt", "snake"));
|
||||
assertU(adoc("id", "18", "standardfilt", "spider"));
|
||||
assertU(adoc("id", "19", "standardfilt", "shark"));
|
||||
assertU(adoc("id", "20", "standardfilt", "snake"));
|
||||
assertU(adoc("id", "21", "standardfilt", "snake"));
|
||||
assertU(adoc("id", "22", "standardfilt", "shark"));
|
||||
|
||||
assertU("commit", commit());
|
||||
}
|
||||
|
||||
|
@ -203,6 +210,72 @@ public class TermsComponentTest extends AbstractSolrTestCase {
|
|||
assertTrue("value is null and it shouldn't be", value != null);
|
||||
}
|
||||
|
||||
public void testSortCount() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
|
||||
assertTrue("tc is null and it shouldn't be", tc != null);
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(TermsParams.TERMS, "true");
|
||||
params.add(TermsParams.TERMS_FIELD, "standardfilt");
|
||||
params.add(TermsParams.TERMS_LOWER, "s");
|
||||
params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
|
||||
params.add(TermsParams.TERMS_PREFIX_STR, "s");
|
||||
params.add(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT);
|
||||
|
||||
SolrRequestHandler handler;
|
||||
SolrQueryResponse rsp;
|
||||
NamedList values;
|
||||
NamedList terms;
|
||||
handler = core.getRequestHandler("/terms");
|
||||
assertTrue("handler is null and it shouldn't be", handler != null);
|
||||
rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
values = rsp.getValues();
|
||||
terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
|
||||
assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
|
||||
assertTrue("Item 0 name is not 'snake'", terms.getName(0).equals("snake"));
|
||||
assertTrue("Item 0 frequency is not '3'", (Integer) terms.getVal(0) == 3);
|
||||
assertTrue("Item 1 name is not 'shark'", terms.getName(1).equals("shark"));
|
||||
assertTrue("Item 1 frequency is not '2'", (Integer) terms.getVal(1) == 2);
|
||||
assertTrue("Item 2 name is not 'spider'", terms.getName(2).equals("spider"));
|
||||
assertTrue("Item 2 frequency is not '1'", (Integer) terms.getVal(2) == 1);
|
||||
}
|
||||
|
||||
public void testSortIndex() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
|
||||
assertTrue("tc is null and it shouldn't be", tc != null);
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(TermsParams.TERMS, "true");
|
||||
params.add(TermsParams.TERMS_FIELD, "standardfilt");
|
||||
params.add(TermsParams.TERMS_LOWER, "s");
|
||||
params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
|
||||
params.add(TermsParams.TERMS_PREFIX_STR, "s");
|
||||
params.add(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_INDEX);
|
||||
|
||||
SolrRequestHandler handler;
|
||||
SolrQueryResponse rsp;
|
||||
NamedList values;
|
||||
NamedList terms;
|
||||
handler = core.getRequestHandler("/terms");
|
||||
assertTrue("handler is null and it shouldn't be", handler != null);
|
||||
rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
values = rsp.getValues();
|
||||
terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
|
||||
assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
|
||||
assertTrue("Item 0 name is not 'shark' it is " + terms.getName(0), terms.getName(0).equals("shark"));
|
||||
assertTrue("Item 0 frequency is not '2'", (Integer) terms.getVal(0) == 2);
|
||||
assertTrue("Item 1 name is not 'snake', it is " + terms.getName(1), terms.getName(1).equals("snake"));
|
||||
assertTrue("Item 1 frequency is not '3'", (Integer) terms.getVal(1) == 3);
|
||||
assertTrue("Item 2 name is not 'spider', it is " + terms.getName(2), terms.getName(2).equals("spider"));
|
||||
assertTrue("Item 2 frequency is not '1'", (Integer) terms.getVal(2) == 1);
|
||||
}
|
||||
|
||||
public void testPastUpper() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
|
||||
|
@ -412,7 +485,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
|
|||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
values = rsp.getValues();
|
||||
terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
|
||||
assertTrue("terms Size: " + terms.size() + " is not: " + 1, terms.size() == 1);
|
||||
assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
|
||||
Integer d = (Integer) terms.get("d");
|
||||
assertTrue(d + " does not equal: " + 3, d == 3);
|
||||
|
||||
|
|
Loading…
Reference in New Issue