SOLR-877: Add access to TermEnum capabilities via TermsComponent

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@721491 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2008-11-28 13:30:02 +00:00
parent 5e6e0e3258
commit b55c2b6592
6 changed files with 386 additions and 1 deletions

View File

@ -91,6 +91,8 @@ New Features
17. SOLR-829: Allow slaves to request compressed files from master during replication 17. SOLR-829: Allow slaves to request compressed files from master during replication
(Simon Collins, Noble Paul, Akshay Ukey via shalin) (Simon Collins, Noble Paul, Akshay Ukey via shalin)
18. SOLR-877: Added TermsComponent for accessing Lucene's TermEnum capabilities.
Useful for auto suggest and possibly distributed search. Not distributed search compliant. (gsingers)
Optimizations Optimizations
---------------------- ----------------------

View File

@ -610,6 +610,15 @@
</requestHandler> </requestHandler>
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>
<requestHandler name="/autoSuggest" class="org.apache.solr.handler.component.SearchHandler">
<arr name="components">
<str>termsComp</str>
</arr>
</requestHandler>
<!-- a search component that enables you to configure the top results for <!-- a search component that enables you to configure the top results for
a given query regardless of the normal lucene scoring.--> a given query regardless of the normal lucene scoring.-->
<searchComponent name="elevator" class="solr.QueryElevationComponent" > <searchComponent name="elevator" class="solr.QueryElevationComponent" >

View File

@ -0,0 +1,62 @@
package org.apache.solr.common.params;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*
*
**/
public class TermsParams {
public static final String TERMS = "terms";
public static final String TERMS_PREFIX = TERMS + ".";
/**
* Required. Specify the field to look up terms in.
*/
public static final String TERMS_FIELD = TERMS_PREFIX + "fl";
/**
* Optional. The lower bound term to start at. The TermEnum will start at the next term after this term in the dictionary.
*
* If not specified, the empty string is used
*/
public static final String TERMS_LOWER = TERMS_PREFIX + "lower";
/**
* Optional. The term to stop at.
*
* @see #TERMS_UPPER_INCLUSIVE
*/
public static final String TERMS_UPPER = TERMS_PREFIX + "upper";
/**
* Optional. If true, include the upper bound term in the results. False by default.
*/
public static final String TERMS_UPPER_INCLUSIVE = TERMS_PREFIX + "upr.incl";
/**
* Optional. If true, include the lower bound term in the results, otherwise skip to the next one. True by default.
*/
public static final String TERMS_LOWER_INCLUSIVE = TERMS_PREFIX + "lwr.incl";
/**
* Optional. The number of results to return. If not specified, looks for {@link org.apache.solr.common.params.CommonParams#ROWS}. If that's not specified, uses 10.
*/
public static final String TERMS_ROWS = TERMS_PREFIX + "rows";
}

View File

@ -0,0 +1,101 @@
package org.apache.solr.handler.component;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.TermsParams;
import org.apache.solr.common.util.NamedList;
import java.io.IOException;
/**
* Return TermEnum information, useful for things like auto suggest.
*
* @see org.apache.solr.common.params.TermsParams
* See Lucene's TermEnum class
*/
public class TermsComponent extends SearchComponent {
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (params.getBool(TermsParams.TERMS, false)) {
String lower = params.get(TermsParams.TERMS_LOWER, "");
String field = params.get(TermsParams.TERMS_FIELD);
if (field != null) {
Term lowerTerm = new Term(field, lower);
TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm);//this will be positioned ready to go
int rows = params.getInt(TermsParams.TERMS_ROWS, params.getInt(CommonParams.ROWS, 10));
int i = 0;
NamedList terms = new NamedList();
rb.rsp.add("terms", terms);
String upper = params.get(TermsParams.TERMS_UPPER);
Term upperTerm = upper != null ? new Term(field, upper) : null;
boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
boolean hasMore = true;
if (lowerIncl == false) {
hasMore = termEnum.next();
}
if (hasMore == true) {
do {
Term theTerm = termEnum.term();
String theText = theTerm.text();
int upperCmp = upperTerm != null ? theTerm.compareTo(upperTerm) : -1;
if (theTerm != null && theTerm.field().equals(field)
&& ((upperIncl == true && upperCmp <= 0) ||
(upperIncl == false && upperCmp < 0))) {
terms.add(theText, String.valueOf(termEnum.docFreq()));
} else {//we're done
break;
}
i++;
}
while (i < rows && termEnum.next());
}
termEnum.close();
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified");
}
}
}
public void prepare(ResponseBuilder rb) throws IOException {
//nothing to do
}
public String getVersion() {
return "$Revision$";
}
public String getSourceId() {
return "$Id:$";
}
public String getSource() {
return "$Revision:$";
}
public String getDescription() {
return "A Component for working with Term Enumerators";
}
}

View File

@ -0,0 +1,203 @@
package org.apache.solr.handler.component;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.mortbay.log.Log;
import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.solr.core.SolrCore;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.TermsParams;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import java.util.Iterator;
/**
*
*
**/
public class TermsComponentTest extends AbstractSolrTestCase {
public String getSchemaFile() {
return "schema.xml";
}
public String getSolrConfigFile() {
return "solrconfig.xml";
}
public void setUp() throws Exception {
super.setUp();
assertU(adoc("id", "0", "lowerfilt", "a"));
assertU(adoc("id", "1", "lowerfilt", "a"));
assertU(adoc("id", "2", "lowerfilt", "aa"));
assertU(adoc("id", "3", "lowerfilt", "aaa"));
assertU(adoc("id", "4", "lowerfilt", "ab"));
assertU(adoc("id", "5", "lowerfilt", "abb"));
assertU(adoc("id", "6", "lowerfilt", "abc"));
assertU(adoc("id", "7", "lowerfilt", "b"));
assertU(adoc("id", "8", "lowerfilt", "baa"));
assertU(adoc("id", "9", "lowerfilt", "bbb"));
assertU("commit", commit());
}
public void testEmptyLower() throws Exception {
SolrCore core = h.getCore();
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
assertTrue("tc is null and it shouldn't be", tc != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
//no lower bound
params.add(TermsParams.TERMS_UPPER, "b");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
NamedList terms;
handler = core.getRequestHandler("/terms");
assertTrue("handler is null and it shouldn't be", handler != null);
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) values.get("terms");
assertTrue("terms Size: " + terms.size() + " is not: " + 6, terms.size() == 6);
assertTrue("a is null and it shouldn't be", terms.get("a") != null);
assertTrue("aa is null and it shouldn't be", terms.get("aa") != null);
assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null);
assertTrue("ab is null and it shouldn't be", terms.get("ab") != null);
assertTrue("abb is null and it shouldn't be", terms.get("abb") != null);
assertTrue("abc is null and it shouldn't be", terms.get("abc") != null);
}
public void testPastUpper() throws Exception {
SolrCore core = h.getCore();
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
assertTrue("tc is null and it shouldn't be", tc != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
//no lower bound
params.add(TermsParams.TERMS_LOWER, "d");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
NamedList terms;
handler = core.getRequestHandler("/terms");
assertTrue("handler is null and it shouldn't be", handler != null);
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) values.get("terms");
assertTrue("terms Size: " + terms.size() + " is not: " + 0, terms.size() == 0);
}
public void test() throws Exception {
SolrCore core = h.getCore();
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
assertTrue("tc is null and it shouldn't be", tc != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
params.add(TermsParams.TERMS_LOWER, "a");
params.add(TermsParams.TERMS_UPPER, "b");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
NamedList terms;
handler = core.getRequestHandler("/terms");
assertTrue("handler is null and it shouldn't be", handler != null);
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) values.get("terms");
assertTrue("terms Size: " + terms.size() + " is not: " + 6, terms.size() == 6);
assertTrue("aa is null and it shouldn't be", terms.get("aa") != null);
assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null);
assertTrue("ab is null and it shouldn't be", terms.get("ab") != null);
assertTrue("abb is null and it shouldn't be", terms.get("abb") != null);
assertTrue("abc is null and it shouldn't be", terms.get("abc") != null);
assertTrue("a is null", terms.get("a") != null);
assertTrue("b is not null and it should be", terms.get("b") == null);
params.add(TermsParams.TERMS_UPPER_INCLUSIVE, "true");
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) values.get("terms");
assertTrue("terms Size: " + terms.size() + " is not: " + 7, terms.size() == 7);
assertTrue("aa is null and it shouldn't be", terms.get("aa") != null);
assertTrue("ab is null and it shouldn't be", terms.get("ab") != null);
assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null);
assertTrue("abb is null and it shouldn't be", terms.get("abb") != null);
assertTrue("abc is null and it shouldn't be", terms.get("abc") != null);
assertTrue("b is null and it shouldn't be", terms.get("b") != null);
assertTrue("a is null", terms.get("a") != null);
assertTrue("baa is not null", terms.get("baa") == null);
params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) values.get("terms");
assertTrue("terms Size: " + terms.size() + " is not: " + 6, terms.size() == 6);
assertTrue("aa is null and it shouldn't be", terms.get("aa") != null);
assertTrue("ab is null and it shouldn't be", terms.get("ab") != null);
assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null);
assertTrue("abb is null and it shouldn't be", terms.get("abb") != null);
assertTrue("abc is null and it shouldn't be", terms.get("abc") != null);
assertTrue("b is null and it shouldn't be", terms.get("b") != null);
assertTrue("a is not null", terms.get("a") == null);
assertTrue("baa is not null", terms.get("baa") == null);
params = new ModifiableSolrParams();
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
params.add(TermsParams.TERMS_LOWER, "a");
params.add(TermsParams.TERMS_UPPER, "b");
params.add(TermsParams.TERMS_ROWS, String.valueOf(2));
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) values.get("terms");
assertTrue("terms Size: " + terms.size() + " is not: " + 2, terms.size() == 2);
assertTrue("aa is null and it shouldn't be", terms.get("a") != null);
assertTrue("aaa is null and it shouldn't be", terms.get("aa") != null);
assertTrue("abb is not null", terms.get("abb") == null);
assertTrue("abc is not null", terms.get("abc") == null);
assertTrue("b is null and it shouldn't be", terms.get("b") == null);
assertTrue("baa is not null", terms.get("baa") == null);
}
}

View File

@ -349,6 +349,14 @@
<str name="spellcheckIndexDir">spellchecker3</str> <str name="spellcheckIndexDir">spellchecker3</str>
</lst> </lst>
</searchComponent> </searchComponent>
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>
<requestHandler name="/terms" class="org.apache.solr.handler.component.SearchHandler">
<arr name="components">
<str>termsComp</str>
</arr>
</requestHandler>
<!-- <!--
The SpellingQueryConverter to convert raw (CommonParams.Q) queries into tokens. Uses a simple regular expression The SpellingQueryConverter to convert raw (CommonParams.Q) queries into tokens. Uses a simple regular expression
to strip off field markup, boosts, ranges, etc. but it is not guaranteed to match an exact parse from the query parser. to strip off field markup, boosts, ranges, etc. but it is not guaranteed to match an exact parse from the query parser.