From 527bce2658a75ccf65964cc266ad46085387fdc2 Mon Sep 17 00:00:00 2001 From: Grant Ingersoll Date: Sat, 6 Dec 2008 14:09:42 +0000 Subject: [PATCH] SOLR-877: added mincount and maxcount options git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@723985 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + .../solr/common/params/TermsParams.java | 10 ++++ .../handler/component/TermsComponent.java | 18 +++--- .../handler/component/TermsComponentTest.java | 55 ++++++++++++++++++- 4 files changed, 74 insertions(+), 10 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 5f91103a38c..8d93e4c1ac7 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -93,6 +93,7 @@ New Features 18. SOLR-877: Added TermsComponent for accessing Lucene's TermEnum capabilities. Useful for auto suggest and possibly distributed search. Not distributed search compliant. (gsingers) + - Added mincount and maxcount options (Khee Chin via gsingers) 19. SOLR-538: Add maxChars attribute for copyField function so that the length limit for destination can be specified. diff --git a/src/java/org/apache/solr/common/params/TermsParams.java b/src/java/org/apache/solr/common/params/TermsParams.java index b2ee500935a..1ade243f874 100644 --- a/src/java/org/apache/solr/common/params/TermsParams.java +++ b/src/java/org/apache/solr/common/params/TermsParams.java @@ -67,4 +67,14 @@ public interface TermsParams { public static final String TERMS_ROWS = TERMS_PREFIX + "rows"; public static final String TERMS_PREFIX_STR = TERMS_PREFIX + "prefix"; + + /** + * Optional. The minimum value of docFreq to be returned. 1 by default + */ + public static final String TERMS_MINCOUNT = TERMS_PREFIX + "mincount"; + /** + * Optional. The maximum value of docFreq to be returned. -1 by default means no boundary + */ + public static final String TERMS_MAXCOUNT = TERMS_PREFIX + "maxcount"; } + diff --git a/src/java/org/apache/solr/handler/component/TermsComponent.java b/src/java/org/apache/solr/handler/component/TermsComponent.java index be89eabeb63..e441c4c9ea1 100644 --- a/src/java/org/apache/solr/handler/component/TermsComponent.java +++ b/src/java/org/apache/solr/handler/component/TermsComponent.java @@ -34,6 +34,7 @@ import java.io.IOException; * See Lucene's TermEnum class */ public class TermsComponent extends SearchComponent { + public static final int UNLIMITED_MAX_COUNT = -1; public void process(ResponseBuilder rb) throws IOException { @@ -45,13 +46,14 @@ public class TermsComponent extends SearchComponent { NamedList terms = new NamedList(); rb.rsp.add("terms", terms); int rows = params.getInt(TermsParams.TERMS_ROWS, params.getInt(CommonParams.ROWS, 10)); - if (rows < 0){ + if (rows < 0) { rows = Integer.MAX_VALUE; } - String upper = params.get(TermsParams.TERMS_UPPER); boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false); boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true); + int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin + int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax String prefix = params.get(TermsParams.TERMS_PREFIX_STR); for (int j = 0; j < fields.length; j++) { String field = fields[j]; @@ -77,17 +79,19 @@ public class TermsComponent extends SearchComponent { (upperIncl == false && upperCmp < 0)) && (prefix == null || theText.startsWith(prefix)) ) { - fieldTerms.add(theText, termEnum.docFreq()); + int docFreq = termEnum.docFreq(); + if (docFreq >= freqmin && (freqmax == UNLIMITED_MAX_COUNT || (docFreq <= freqmax))) { + fieldTerms.add(theText, docFreq); + i++; + } } else {//we're done break; } - i++; } while (i < rows && termEnum.next()); } termEnum.close(); } - } else { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified"); } @@ -99,7 +103,7 @@ public class TermsComponent extends SearchComponent { } public String getVersion() { - return "$Revision$"; + return "$Revision:$"; } public String getSourceId() { @@ -107,7 +111,7 @@ public class TermsComponent extends SearchComponent { } public String getSource() { - return "$Revision:$"; + return "$URL:$"; } public String getDescription() { diff --git a/src/test/org/apache/solr/handler/component/TermsComponentTest.java b/src/test/org/apache/solr/handler/component/TermsComponentTest.java index 413174690cd..bc5d1be8d2d 100644 --- a/src/test/org/apache/solr/handler/component/TermsComponentTest.java +++ b/src/test/org/apache/solr/handler/component/TermsComponentTest.java @@ -16,7 +16,6 @@ package org.apache.solr.handler.component; * limitations under the License. */ -import org.mortbay.log.Log; import org.apache.solr.util.AbstractSolrTestCase; import org.apache.solr.core.SolrCore; import org.apache.solr.common.params.ModifiableSolrParams; @@ -27,8 +26,6 @@ import org.apache.solr.request.SolrRequestHandler; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryResponse; -import java.util.Iterator; - /** * @@ -57,6 +54,14 @@ public class TermsComponentTest extends AbstractSolrTestCase { assertU(adoc("id", "8", "lowerfilt", "baa", "standardfilt", "cccc")); assertU(adoc("id", "9", "lowerfilt", "bbb", "standardfilt", "ccccc")); + assertU(adoc("id", "10", "standardfilt", "ddddd")); + assertU(adoc("id", "11", "standardfilt", "ddddd")); + assertU(adoc("id", "12", "standardfilt", "ddddd")); + assertU(adoc("id", "13", "standardfilt", "ddddd")); + assertU(adoc("id", "14", "standardfilt", "d")); + assertU(adoc("id", "15", "standardfilt", "d")); + assertU(adoc("id", "16", "standardfilt", "d")); + assertU("commit", commit()); } @@ -339,4 +344,48 @@ public class TermsComponentTest extends AbstractSolrTestCase { assertTrue("b is null and it shouldn't be", terms.get("b") == null); assertTrue("baa is not null", terms.get("baa") == null); } + + + public void testMinMaxFreq() throws Exception { + SolrCore core = h.getCore(); + TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp"); + assertTrue("tc is null and it shouldn't be", tc != null); + SolrRequestHandler handler; + SolrQueryResponse rsp; + NamedList values; + NamedList terms; + handler = core.getRequestHandler("/terms"); + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add(TermsParams.TERMS, "true"); + params.add(TermsParams.TERMS_FIELD, "lowerfilt"); + params.add(TermsParams.TERMS_ROWS, String.valueOf(50)); + // Tests TERMS_LOWER = "a" with freqmin = 2, freqmax = -1, terms.size() = 1 + params.add(TermsParams.TERMS_LOWER, "a"); + params.add(TermsParams.TERMS_MINCOUNT,String.valueOf(2)); + params.add(TermsParams.TERMS_MAXCOUNT,String.valueOf(TermsComponent.UNLIMITED_MAX_COUNT)); + rsp = new SolrQueryResponse(); + rsp.add("responseHeader", new SimpleOrderedMap()); + handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); + values = rsp.getValues(); + terms = (NamedList) ((NamedList) values.get("terms")).get("lowerfilt"); + assertTrue("terms Size: " + terms.size() + " is not: " + 1, terms.size() == 1); + + params = new ModifiableSolrParams(); + params.add(TermsParams.TERMS, "true"); + params.add(TermsParams.TERMS_FIELD, "standardfilt"); + params.add(TermsParams.TERMS_ROWS, String.valueOf(50)); + // Tests TERMS_LOWER = "a" with freqmin = 2, freqmax = -1, terms.size() = 1 + params.add(TermsParams.TERMS_LOWER, "d"); + params.add(TermsParams.TERMS_MINCOUNT,String.valueOf(2)); + params.add(TermsParams.TERMS_MAXCOUNT,String.valueOf(3)); + rsp = new SolrQueryResponse(); + rsp.add("responseHeader", new SimpleOrderedMap()); + handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); + values = rsp.getValues(); + terms = (NamedList) ((NamedList) values.get("terms")).get("standardfilt"); + assertTrue("terms Size: " + terms.size() + " is not: " + 1, terms.size() == 1); + Integer d = (Integer) terms.get("d"); + assertTrue(d + " does not equal: " + 3, d == 3); + + } }