mirror of https://github.com/apache/lucene.git
SOLR-1156: Sort TermsComponent results by frequency
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@807289 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e7f856a4d8
commit
5d1bb05f32
|
@ -280,6 +280,8 @@ New Features
|
||||||
high precision date subtraction, add sub() for subtracting other arguments.
|
high precision date subtraction, add sub() for subtracting other arguments.
|
||||||
(yonik)
|
(yonik)
|
||||||
|
|
||||||
|
73. SOLR-1156: Sort TermsComponent results by frequency (Matt Weber via yonik)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
----------------------
|
----------------------
|
||||||
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the
|
1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the
|
||||||
|
|
|
@ -82,5 +82,13 @@ public interface TermsParams {
|
||||||
* For instance, the index form of numeric numbers is not human readable. The default is false.
|
* For instance, the index form of numeric numbers is not human readable. The default is false.
|
||||||
*/
|
*/
|
||||||
public static final String TERMS_RAW = TERMS_PREFIX + "raw";
|
public static final String TERMS_RAW = TERMS_PREFIX + "raw";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Optional. If sorting by frequency is enabled. Defaults to sorting by count.
|
||||||
|
*/
|
||||||
|
public static final String TERMS_SORT = TERMS_PREFIX + "sort";
|
||||||
|
|
||||||
|
public static final String TERMS_SORT_COUNT = "count";
|
||||||
|
public static final String TERMS_SORT_INDEX = "index";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -26,6 +26,8 @@ import org.apache.solr.common.params.TermsParams;
|
||||||
import org.apache.solr.common.util.NamedList;
|
import org.apache.solr.common.util.NamedList;
|
||||||
import org.apache.solr.schema.FieldType;
|
import org.apache.solr.schema.FieldType;
|
||||||
import org.apache.solr.schema.StrField;
|
import org.apache.solr.schema.StrField;
|
||||||
|
import org.apache.solr.request.SimpleFacets.CountPair;
|
||||||
|
import org.apache.solr.util.BoundedTreeSet;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
@ -55,6 +57,8 @@ public class TermsComponent extends SearchComponent {
|
||||||
String upperStr = params.get(TermsParams.TERMS_UPPER);
|
String upperStr = params.get(TermsParams.TERMS_UPPER);
|
||||||
boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
|
boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
|
||||||
boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
|
boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
|
||||||
|
boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(
|
||||||
|
params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
|
||||||
int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin
|
int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin
|
||||||
int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax
|
int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax
|
||||||
if (freqmax<0) {
|
if (freqmax<0) {
|
||||||
|
@ -77,6 +81,7 @@ public class TermsComponent extends SearchComponent {
|
||||||
|
|
||||||
TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm); //this will be positioned ready to go
|
TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm); //this will be positioned ready to go
|
||||||
int i = 0;
|
int i = 0;
|
||||||
|
BoundedTreeSet<CountPair<String, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<String, Integer>>(limit) : null);
|
||||||
NamedList fieldTerms = new NamedList();
|
NamedList fieldTerms = new NamedList();
|
||||||
terms.add(field, fieldTerms);
|
terms.add(field, fieldTerms);
|
||||||
Term lowerTestTerm = termEnum.term();
|
Term lowerTestTerm = termEnum.term();
|
||||||
|
@ -87,7 +92,7 @@ public class TermsComponent extends SearchComponent {
|
||||||
termEnum.next();
|
termEnum.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
while (i<limit) {
|
while (i<limit || sort) {
|
||||||
|
|
||||||
Term theTerm = termEnum.term();
|
Term theTerm = termEnum.term();
|
||||||
|
|
||||||
|
@ -111,14 +116,29 @@ public class TermsComponent extends SearchComponent {
|
||||||
if (docFreq >= freqmin && docFreq <= freqmax) {
|
if (docFreq >= freqmin && docFreq <= freqmax) {
|
||||||
// add the term to the list
|
// add the term to the list
|
||||||
String label = raw ? indexedText : ft.indexedToReadable(indexedText);
|
String label = raw ? indexedText : ft.indexedToReadable(indexedText);
|
||||||
|
if (sort) {
|
||||||
|
queue.add(new CountPair<String, Integer>(label, docFreq));
|
||||||
|
} else {
|
||||||
fieldTerms.add(label, docFreq);
|
fieldTerms.add(label, docFreq);
|
||||||
i++;
|
i++;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
termEnum.next();
|
termEnum.next();
|
||||||
}
|
}
|
||||||
|
|
||||||
termEnum.close();
|
termEnum.close();
|
||||||
|
|
||||||
|
if (sort) {
|
||||||
|
for (CountPair<String, Integer> item : queue) {
|
||||||
|
if (i < limit) {
|
||||||
|
fieldTerms.add(item.key, item.val);
|
||||||
|
i++;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified");
|
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified");
|
||||||
|
|
|
@ -62,6 +62,13 @@ public class TermsComponentTest extends AbstractSolrTestCase {
|
||||||
assertU(adoc("id", "15", "standardfilt", "d"));
|
assertU(adoc("id", "15", "standardfilt", "d"));
|
||||||
assertU(adoc("id", "16", "standardfilt", "d"));
|
assertU(adoc("id", "16", "standardfilt", "d"));
|
||||||
|
|
||||||
|
assertU(adoc("id", "17", "standardfilt", "snake"));
|
||||||
|
assertU(adoc("id", "18", "standardfilt", "spider"));
|
||||||
|
assertU(adoc("id", "19", "standardfilt", "shark"));
|
||||||
|
assertU(adoc("id", "20", "standardfilt", "snake"));
|
||||||
|
assertU(adoc("id", "21", "standardfilt", "snake"));
|
||||||
|
assertU(adoc("id", "22", "standardfilt", "shark"));
|
||||||
|
|
||||||
assertU("commit", commit());
|
assertU("commit", commit());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -203,6 +210,72 @@ public class TermsComponentTest extends AbstractSolrTestCase {
|
||||||
assertTrue("value is null and it shouldn't be", value != null);
|
assertTrue("value is null and it shouldn't be", value != null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testSortCount() throws Exception {
|
||||||
|
SolrCore core = h.getCore();
|
||||||
|
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
|
||||||
|
assertTrue("tc is null and it shouldn't be", tc != null);
|
||||||
|
|
||||||
|
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||||
|
params.add(TermsParams.TERMS, "true");
|
||||||
|
params.add(TermsParams.TERMS_FIELD, "standardfilt");
|
||||||
|
params.add(TermsParams.TERMS_LOWER, "s");
|
||||||
|
params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
|
||||||
|
params.add(TermsParams.TERMS_PREFIX_STR, "s");
|
||||||
|
params.add(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT);
|
||||||
|
|
||||||
|
SolrRequestHandler handler;
|
||||||
|
SolrQueryResponse rsp;
|
||||||
|
NamedList values;
|
||||||
|
NamedList terms;
|
||||||
|
handler = core.getRequestHandler("/terms");
|
||||||
|
assertTrue("handler is null and it shouldn't be", handler != null);
|
||||||
|
rsp = new SolrQueryResponse();
|
||||||
|
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||||
|
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||||
|
values = rsp.getValues();
|
||||||
|
terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
|
||||||
|
assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
|
||||||
|
assertTrue("Item 0 name is not 'snake'", terms.getName(0).equals("snake"));
|
||||||
|
assertTrue("Item 0 frequency is not '3'", (Integer) terms.getVal(0) == 3);
|
||||||
|
assertTrue("Item 1 name is not 'shark'", terms.getName(1).equals("shark"));
|
||||||
|
assertTrue("Item 1 frequency is not '2'", (Integer) terms.getVal(1) == 2);
|
||||||
|
assertTrue("Item 2 name is not 'spider'", terms.getName(2).equals("spider"));
|
||||||
|
assertTrue("Item 2 frequency is not '1'", (Integer) terms.getVal(2) == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testSortIndex() throws Exception {
|
||||||
|
SolrCore core = h.getCore();
|
||||||
|
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
|
||||||
|
assertTrue("tc is null and it shouldn't be", tc != null);
|
||||||
|
|
||||||
|
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||||
|
params.add(TermsParams.TERMS, "true");
|
||||||
|
params.add(TermsParams.TERMS_FIELD, "standardfilt");
|
||||||
|
params.add(TermsParams.TERMS_LOWER, "s");
|
||||||
|
params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
|
||||||
|
params.add(TermsParams.TERMS_PREFIX_STR, "s");
|
||||||
|
params.add(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_INDEX);
|
||||||
|
|
||||||
|
SolrRequestHandler handler;
|
||||||
|
SolrQueryResponse rsp;
|
||||||
|
NamedList values;
|
||||||
|
NamedList terms;
|
||||||
|
handler = core.getRequestHandler("/terms");
|
||||||
|
assertTrue("handler is null and it shouldn't be", handler != null);
|
||||||
|
rsp = new SolrQueryResponse();
|
||||||
|
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||||
|
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||||
|
values = rsp.getValues();
|
||||||
|
terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
|
||||||
|
assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
|
||||||
|
assertTrue("Item 0 name is not 'shark' it is " + terms.getName(0), terms.getName(0).equals("shark"));
|
||||||
|
assertTrue("Item 0 frequency is not '2'", (Integer) terms.getVal(0) == 2);
|
||||||
|
assertTrue("Item 1 name is not 'snake', it is " + terms.getName(1), terms.getName(1).equals("snake"));
|
||||||
|
assertTrue("Item 1 frequency is not '3'", (Integer) terms.getVal(1) == 3);
|
||||||
|
assertTrue("Item 2 name is not 'spider', it is " + terms.getName(2), terms.getName(2).equals("spider"));
|
||||||
|
assertTrue("Item 2 frequency is not '1'", (Integer) terms.getVal(2) == 1);
|
||||||
|
}
|
||||||
|
|
||||||
public void testPastUpper() throws Exception {
|
public void testPastUpper() throws Exception {
|
||||||
SolrCore core = h.getCore();
|
SolrCore core = h.getCore();
|
||||||
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
|
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
|
||||||
|
@ -412,7 +485,7 @@ public class TermsComponentTest extends AbstractSolrTestCase {
|
||||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||||
values = rsp.getValues();
|
values = rsp.getValues();
|
||||||
terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
|
terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt");
|
||||||
assertTrue("terms Size: " + terms.size() + " is not: " + 1, terms.size() == 1);
|
assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3);
|
||||||
Integer d = (Integer) terms.get("d");
|
Integer d = (Integer) terms.get("d");
|
||||||
assertTrue(d + " does not equal: " + 3, d == 3);
|
assertTrue(d + " does not equal: " + 3, d == 3);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue