From 5d1bb05f321681ccae23bbd2542c6ba7eeb45bb7 Mon Sep 17 00:00:00 2001 From: Yonik Seeley Date: Mon, 24 Aug 2009 15:56:32 +0000 Subject: [PATCH] SOLR-1156: Sort TermsComponent results by frequency git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@807289 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 2 + .../solr/common/params/TermsParams.java | 8 ++ .../handler/component/TermsComponent.java | 26 ++++++- .../handler/component/TermsComponentTest.java | 75 ++++++++++++++++++- 4 files changed, 107 insertions(+), 4 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index df97af440fb..3c138abb1f4 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -280,6 +280,8 @@ New Features high precision date subtraction, add sub() for subtracting other arguments. (yonik) +73. SOLR-1156: Sort TermsComponent results by frequency (Matt Weber via yonik) + Optimizations ---------------------- 1. SOLR-374: Use IndexReader.reopen to save resources by re-using parts of the diff --git a/src/common/org/apache/solr/common/params/TermsParams.java b/src/common/org/apache/solr/common/params/TermsParams.java index dd869e57705..d08dd183a28 100644 --- a/src/common/org/apache/solr/common/params/TermsParams.java +++ b/src/common/org/apache/solr/common/params/TermsParams.java @@ -82,5 +82,13 @@ public interface TermsParams { * For instance, the index form of numeric numbers is not human readable. The default is false. */ public static final String TERMS_RAW = TERMS_PREFIX + "raw"; + + /** + * Optional. If sorting by frequency is enabled. Defaults to sorting by count. + */ + public static final String TERMS_SORT = TERMS_PREFIX + "sort"; + + public static final String TERMS_SORT_COUNT = "count"; + public static final String TERMS_SORT_INDEX = "index"; } diff --git a/src/java/org/apache/solr/handler/component/TermsComponent.java b/src/java/org/apache/solr/handler/component/TermsComponent.java index 3260b7307e7..93c82cf3c49 100644 --- a/src/java/org/apache/solr/handler/component/TermsComponent.java +++ b/src/java/org/apache/solr/handler/component/TermsComponent.java @@ -26,6 +26,8 @@ import org.apache.solr.common.params.TermsParams; import org.apache.solr.common.util.NamedList; import org.apache.solr.schema.FieldType; import org.apache.solr.schema.StrField; +import org.apache.solr.request.SimpleFacets.CountPair; +import org.apache.solr.util.BoundedTreeSet; import java.io.IOException; @@ -55,6 +57,8 @@ public class TermsComponent extends SearchComponent { String upperStr = params.get(TermsParams.TERMS_UPPER); boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false); boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true); + boolean sort = !TermsParams.TERMS_SORT_INDEX.equals( + params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT)); int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax if (freqmax<0) { @@ -77,6 +81,7 @@ public class TermsComponent extends SearchComponent { TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm); //this will be positioned ready to go int i = 0; + BoundedTreeSet> queue = (sort ? new BoundedTreeSet>(limit) : null); NamedList fieldTerms = new NamedList(); terms.add(field, fieldTerms); Term lowerTestTerm = termEnum.term(); @@ -87,7 +92,7 @@ public class TermsComponent extends SearchComponent { termEnum.next(); } - while (i= freqmin && docFreq <= freqmax) { // add the term to the list String label = raw ? indexedText : ft.indexedToReadable(indexedText); - fieldTerms.add(label, docFreq); - i++; + if (sort) { + queue.add(new CountPair(label, docFreq)); + } else { + fieldTerms.add(label, docFreq); + i++; + } } termEnum.next(); } termEnum.close(); + + if (sort) { + for (CountPair item : queue) { + if (i < limit) { + fieldTerms.add(item.key, item.val); + i++; + } else { + break; + } + } + } } } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified"); diff --git a/src/test/org/apache/solr/handler/component/TermsComponentTest.java b/src/test/org/apache/solr/handler/component/TermsComponentTest.java index a87086701eb..00475f752f7 100644 --- a/src/test/org/apache/solr/handler/component/TermsComponentTest.java +++ b/src/test/org/apache/solr/handler/component/TermsComponentTest.java @@ -62,6 +62,13 @@ public class TermsComponentTest extends AbstractSolrTestCase { assertU(adoc("id", "15", "standardfilt", "d")); assertU(adoc("id", "16", "standardfilt", "d")); + assertU(adoc("id", "17", "standardfilt", "snake")); + assertU(adoc("id", "18", "standardfilt", "spider")); + assertU(adoc("id", "19", "standardfilt", "shark")); + assertU(adoc("id", "20", "standardfilt", "snake")); + assertU(adoc("id", "21", "standardfilt", "snake")); + assertU(adoc("id", "22", "standardfilt", "shark")); + assertU("commit", commit()); } @@ -203,6 +210,72 @@ public class TermsComponentTest extends AbstractSolrTestCase { assertTrue("value is null and it shouldn't be", value != null); } + public void testSortCount() throws Exception { + SolrCore core = h.getCore(); + TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp"); + assertTrue("tc is null and it shouldn't be", tc != null); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add(TermsParams.TERMS, "true"); + params.add(TermsParams.TERMS_FIELD, "standardfilt"); + params.add(TermsParams.TERMS_LOWER, "s"); + params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false"); + params.add(TermsParams.TERMS_PREFIX_STR, "s"); + params.add(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT); + + SolrRequestHandler handler; + SolrQueryResponse rsp; + NamedList values; + NamedList terms; + handler = core.getRequestHandler("/terms"); + assertTrue("handler is null and it shouldn't be", handler != null); + rsp = new SolrQueryResponse(); + rsp.add("responseHeader", new SimpleOrderedMap()); + handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); + values = rsp.getValues(); + terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt"); + assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3); + assertTrue("Item 0 name is not 'snake'", terms.getName(0).equals("snake")); + assertTrue("Item 0 frequency is not '3'", (Integer) terms.getVal(0) == 3); + assertTrue("Item 1 name is not 'shark'", terms.getName(1).equals("shark")); + assertTrue("Item 1 frequency is not '2'", (Integer) terms.getVal(1) == 2); + assertTrue("Item 2 name is not 'spider'", terms.getName(2).equals("spider")); + assertTrue("Item 2 frequency is not '1'", (Integer) terms.getVal(2) == 1); + } + + public void testSortIndex() throws Exception { + SolrCore core = h.getCore(); + TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp"); + assertTrue("tc is null and it shouldn't be", tc != null); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add(TermsParams.TERMS, "true"); + params.add(TermsParams.TERMS_FIELD, "standardfilt"); + params.add(TermsParams.TERMS_LOWER, "s"); + params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false"); + params.add(TermsParams.TERMS_PREFIX_STR, "s"); + params.add(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_INDEX); + + SolrRequestHandler handler; + SolrQueryResponse rsp; + NamedList values; + NamedList terms; + handler = core.getRequestHandler("/terms"); + assertTrue("handler is null and it shouldn't be", handler != null); + rsp = new SolrQueryResponse(); + rsp.add("responseHeader", new SimpleOrderedMap()); + handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); + values = rsp.getValues(); + terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt"); + assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3); + assertTrue("Item 0 name is not 'shark' it is " + terms.getName(0), terms.getName(0).equals("shark")); + assertTrue("Item 0 frequency is not '2'", (Integer) terms.getVal(0) == 2); + assertTrue("Item 1 name is not 'snake', it is " + terms.getName(1), terms.getName(1).equals("snake")); + assertTrue("Item 1 frequency is not '3'", (Integer) terms.getVal(1) == 3); + assertTrue("Item 2 name is not 'spider', it is " + terms.getName(2), terms.getName(2).equals("spider")); + assertTrue("Item 2 frequency is not '1'", (Integer) terms.getVal(2) == 1); + } + public void testPastUpper() throws Exception { SolrCore core = h.getCore(); TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp"); @@ -412,7 +485,7 @@ public class TermsComponentTest extends AbstractSolrTestCase { handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); values = rsp.getValues(); terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt"); - assertTrue("terms Size: " + terms.size() + " is not: " + 1, terms.size() == 1); + assertTrue("terms Size: " + terms.size() + " is not: " + 3, terms.size() == 3); Integer d = (Integer) terms.get("d"); assertTrue(d + " does not equal: " + 3, d == 3);