SOLR-877: Add access to TermEnum capabilities via TermsComponent

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@721491 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2008-11-28 13:30:02 +00:00
parent 5e6e0e3258
commit b55c2b6592
6 changed files with 386 additions and 1 deletions

View File

@ -90,7 +90,9 @@ New Features
17. SOLR-829: Allow slaves to request compressed files from master during replication
(Simon Collins, Noble Paul, Akshay Ukey via shalin)
18. SOLR-877: Added TermsComponent for accessing Lucene's TermEnum capabilities.
Useful for auto suggest and possibly distributed search. Not distributed search compliant. (gsingers)
Optimizations
----------------------

View File

@ -610,6 +610,15 @@
</requestHandler>
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>
<requestHandler name="/autoSuggest" class="org.apache.solr.handler.component.SearchHandler">
<arr name="components">
<str>termsComp</str>
</arr>
</requestHandler>
<!-- a search component that enables you to configure the top results for
a given query regardless of the normal lucene scoring.-->
<searchComponent name="elevator" class="solr.QueryElevationComponent" >

View File

@ -0,0 +1,62 @@
package org.apache.solr.common.params;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
*
*
**/
public class TermsParams {
public static final String TERMS = "terms";
public static final String TERMS_PREFIX = TERMS + ".";
/**
* Required. Specify the field to look up terms in.
*/
public static final String TERMS_FIELD = TERMS_PREFIX + "fl";
/**
* Optional. The lower bound term to start at. The TermEnum will start at the next term after this term in the dictionary.
*
* If not specified, the empty string is used
*/
public static final String TERMS_LOWER = TERMS_PREFIX + "lower";
/**
* Optional. The term to stop at.
*
* @see #TERMS_UPPER_INCLUSIVE
*/
public static final String TERMS_UPPER = TERMS_PREFIX + "upper";
/**
* Optional. If true, include the upper bound term in the results. False by default.
*/
public static final String TERMS_UPPER_INCLUSIVE = TERMS_PREFIX + "upr.incl";
/**
* Optional. If true, include the lower bound term in the results, otherwise skip to the next one. True by default.
*/
public static final String TERMS_LOWER_INCLUSIVE = TERMS_PREFIX + "lwr.incl";
/**
* Optional. The number of results to return. If not specified, looks for {@link org.apache.solr.common.params.CommonParams#ROWS}. If that's not specified, uses 10.
*/
public static final String TERMS_ROWS = TERMS_PREFIX + "rows";
}

View File

@ -0,0 +1,101 @@
package org.apache.solr.handler.component;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.TermsParams;
import org.apache.solr.common.util.NamedList;
import java.io.IOException;
/**
* Return TermEnum information, useful for things like auto suggest.
*
* @see org.apache.solr.common.params.TermsParams
* See Lucene's TermEnum class
*/
public class TermsComponent extends SearchComponent {
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (params.getBool(TermsParams.TERMS, false)) {
String lower = params.get(TermsParams.TERMS_LOWER, "");
String field = params.get(TermsParams.TERMS_FIELD);
if (field != null) {
Term lowerTerm = new Term(field, lower);
TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm);//this will be positioned ready to go
int rows = params.getInt(TermsParams.TERMS_ROWS, params.getInt(CommonParams.ROWS, 10));
int i = 0;
NamedList terms = new NamedList();
rb.rsp.add("terms", terms);
String upper = params.get(TermsParams.TERMS_UPPER);
Term upperTerm = upper != null ? new Term(field, upper) : null;
boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
boolean hasMore = true;
if (lowerIncl == false) {
hasMore = termEnum.next();
}
if (hasMore == true) {
do {
Term theTerm = termEnum.term();
String theText = theTerm.text();
int upperCmp = upperTerm != null ? theTerm.compareTo(upperTerm) : -1;
if (theTerm != null && theTerm.field().equals(field)
&& ((upperIncl == true && upperCmp <= 0) ||
(upperIncl == false && upperCmp < 0))) {
terms.add(theText, String.valueOf(termEnum.docFreq()));
} else {//we're done
break;
}
i++;
}
while (i < rows && termEnum.next());
}
termEnum.close();
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified");
}
}
}
public void prepare(ResponseBuilder rb) throws IOException {
//nothing to do
}
public String getVersion() {
return "$Revision$";
}
public String getSourceId() {
return "$Id:$";
}
public String getSource() {
return "$Revision:$";
}
public String getDescription() {
return "A Component for working with Term Enumerators";
}
}

View File

@ -0,0 +1,203 @@
package org.apache.solr.handler.component;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.mortbay.log.Log;
import org.apache.solr.util.AbstractSolrTestCase;
import org.apache.solr.core.SolrCore;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.TermsParams;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import java.util.Iterator;
/**
*
*
**/
public class TermsComponentTest extends AbstractSolrTestCase {
public String getSchemaFile() {
return "schema.xml";
}
public String getSolrConfigFile() {
return "solrconfig.xml";
}
public void setUp() throws Exception {
super.setUp();
assertU(adoc("id", "0", "lowerfilt", "a"));
assertU(adoc("id", "1", "lowerfilt", "a"));
assertU(adoc("id", "2", "lowerfilt", "aa"));
assertU(adoc("id", "3", "lowerfilt", "aaa"));
assertU(adoc("id", "4", "lowerfilt", "ab"));
assertU(adoc("id", "5", "lowerfilt", "abb"));
assertU(adoc("id", "6", "lowerfilt", "abc"));
assertU(adoc("id", "7", "lowerfilt", "b"));
assertU(adoc("id", "8", "lowerfilt", "baa"));
assertU(adoc("id", "9", "lowerfilt", "bbb"));
assertU("commit", commit());
}
public void testEmptyLower() throws Exception {
SolrCore core = h.getCore();
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
assertTrue("tc is null and it shouldn't be", tc != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
//no lower bound
params.add(TermsParams.TERMS_UPPER, "b");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
NamedList terms;
handler = core.getRequestHandler("/terms");
assertTrue("handler is null and it shouldn't be", handler != null);
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) values.get("terms");
assertTrue("terms Size: " + terms.size() + " is not: " + 6, terms.size() == 6);
assertTrue("a is null and it shouldn't be", terms.get("a") != null);
assertTrue("aa is null and it shouldn't be", terms.get("aa") != null);
assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null);
assertTrue("ab is null and it shouldn't be", terms.get("ab") != null);
assertTrue("abb is null and it shouldn't be", terms.get("abb") != null);
assertTrue("abc is null and it shouldn't be", terms.get("abc") != null);
}
public void testPastUpper() throws Exception {
SolrCore core = h.getCore();
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
assertTrue("tc is null and it shouldn't be", tc != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
//no lower bound
params.add(TermsParams.TERMS_LOWER, "d");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
NamedList terms;
handler = core.getRequestHandler("/terms");
assertTrue("handler is null and it shouldn't be", handler != null);
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) values.get("terms");
assertTrue("terms Size: " + terms.size() + " is not: " + 0, terms.size() == 0);
}
public void test() throws Exception {
SolrCore core = h.getCore();
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
assertTrue("tc is null and it shouldn't be", tc != null);
ModifiableSolrParams params = new ModifiableSolrParams();
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
params.add(TermsParams.TERMS_LOWER, "a");
params.add(TermsParams.TERMS_UPPER, "b");
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
SolrRequestHandler handler;
SolrQueryResponse rsp;
NamedList values;
NamedList terms;
handler = core.getRequestHandler("/terms");
assertTrue("handler is null and it shouldn't be", handler != null);
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) values.get("terms");
assertTrue("terms Size: " + terms.size() + " is not: " + 6, terms.size() == 6);
assertTrue("aa is null and it shouldn't be", terms.get("aa") != null);
assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null);
assertTrue("ab is null and it shouldn't be", terms.get("ab") != null);
assertTrue("abb is null and it shouldn't be", terms.get("abb") != null);
assertTrue("abc is null and it shouldn't be", terms.get("abc") != null);
assertTrue("a is null", terms.get("a") != null);
assertTrue("b is not null and it should be", terms.get("b") == null);
params.add(TermsParams.TERMS_UPPER_INCLUSIVE, "true");
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) values.get("terms");
assertTrue("terms Size: " + terms.size() + " is not: " + 7, terms.size() == 7);
assertTrue("aa is null and it shouldn't be", terms.get("aa") != null);
assertTrue("ab is null and it shouldn't be", terms.get("ab") != null);
assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null);
assertTrue("abb is null and it shouldn't be", terms.get("abb") != null);
assertTrue("abc is null and it shouldn't be", terms.get("abc") != null);
assertTrue("b is null and it shouldn't be", terms.get("b") != null);
assertTrue("a is null", terms.get("a") != null);
assertTrue("baa is not null", terms.get("baa") == null);
params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) values.get("terms");
assertTrue("terms Size: " + terms.size() + " is not: " + 6, terms.size() == 6);
assertTrue("aa is null and it shouldn't be", terms.get("aa") != null);
assertTrue("ab is null and it shouldn't be", terms.get("ab") != null);
assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null);
assertTrue("abb is null and it shouldn't be", terms.get("abb") != null);
assertTrue("abc is null and it shouldn't be", terms.get("abc") != null);
assertTrue("b is null and it shouldn't be", terms.get("b") != null);
assertTrue("a is not null", terms.get("a") == null);
assertTrue("baa is not null", terms.get("baa") == null);
params = new ModifiableSolrParams();
params.add(TermsParams.TERMS, "true");
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
params.add(TermsParams.TERMS_LOWER, "a");
params.add(TermsParams.TERMS_UPPER, "b");
params.add(TermsParams.TERMS_ROWS, String.valueOf(2));
rsp = new SolrQueryResponse();
rsp.add("responseHeader", new SimpleOrderedMap());
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
values = rsp.getValues();
terms = (NamedList) values.get("terms");
assertTrue("terms Size: " + terms.size() + " is not: " + 2, terms.size() == 2);
assertTrue("aa is null and it shouldn't be", terms.get("a") != null);
assertTrue("aaa is null and it shouldn't be", terms.get("aa") != null);
assertTrue("abb is not null", terms.get("abb") == null);
assertTrue("abc is not null", terms.get("abc") == null);
assertTrue("b is null and it shouldn't be", terms.get("b") == null);
assertTrue("baa is not null", terms.get("baa") == null);
}
}

View File

@ -349,6 +349,14 @@
<str name="spellcheckIndexDir">spellchecker3</str>
</lst>
</searchComponent>
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>
<requestHandler name="/terms" class="org.apache.solr.handler.component.SearchHandler">
<arr name="components">
<str>termsComp</str>
</arr>
</requestHandler>
<!--
The SpellingQueryConverter to convert raw (CommonParams.Q) queries into tokens. Uses a simple regular expression
to strip off field markup, boosts, ranges, etc. but it is not guaranteed to match an exact parse from the query parser.