mirror of https://github.com/apache/lucene.git
SOLR-877: Add access to TermEnum capabilities via TermsComponent
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@721491 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5e6e0e3258
commit
b55c2b6592
|
@ -91,6 +91,8 @@ New Features
|
|||
17. SOLR-829: Allow slaves to request compressed files from master during replication
|
||||
(Simon Collins, Noble Paul, Akshay Ukey via shalin)
|
||||
|
||||
18. SOLR-877: Added TermsComponent for accessing Lucene's TermEnum capabilities.
|
||||
Useful for auto suggest and possibly distributed search. Not distributed search compliant. (gsingers)
|
||||
|
||||
Optimizations
|
||||
----------------------
|
||||
|
|
|
@ -610,6 +610,15 @@
|
|||
</requestHandler>
|
||||
|
||||
|
||||
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>
|
||||
|
||||
<requestHandler name="/autoSuggest" class="org.apache.solr.handler.component.SearchHandler">
|
||||
<arr name="components">
|
||||
<str>termsComp</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
|
||||
|
||||
<!-- a search component that enables you to configure the top results for
|
||||
a given query regardless of the normal lucene scoring.-->
|
||||
<searchComponent name="elevator" class="solr.QueryElevationComponent" >
|
||||
|
|
|
@ -0,0 +1,62 @@
|
|||
package org.apache.solr.common.params;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
public class TermsParams {
|
||||
|
||||
public static final String TERMS = "terms";
|
||||
|
||||
public static final String TERMS_PREFIX = TERMS + ".";
|
||||
|
||||
/**
|
||||
* Required. Specify the field to look up terms in.
|
||||
*/
|
||||
public static final String TERMS_FIELD = TERMS_PREFIX + "fl";
|
||||
|
||||
/**
|
||||
* Optional. The lower bound term to start at. The TermEnum will start at the next term after this term in the dictionary.
|
||||
*
|
||||
* If not specified, the empty string is used
|
||||
*/
|
||||
public static final String TERMS_LOWER = TERMS_PREFIX + "lower";
|
||||
|
||||
/**
|
||||
* Optional. The term to stop at.
|
||||
*
|
||||
* @see #TERMS_UPPER_INCLUSIVE
|
||||
*/
|
||||
public static final String TERMS_UPPER = TERMS_PREFIX + "upper";
|
||||
/**
|
||||
* Optional. If true, include the upper bound term in the results. False by default.
|
||||
*/
|
||||
public static final String TERMS_UPPER_INCLUSIVE = TERMS_PREFIX + "upr.incl";
|
||||
|
||||
/**
|
||||
* Optional. If true, include the lower bound term in the results, otherwise skip to the next one. True by default.
|
||||
*/
|
||||
public static final String TERMS_LOWER_INCLUSIVE = TERMS_PREFIX + "lwr.incl";
|
||||
|
||||
/**
|
||||
* Optional. The number of results to return. If not specified, looks for {@link org.apache.solr.common.params.CommonParams#ROWS}. If that's not specified, uses 10.
|
||||
*/
|
||||
public static final String TERMS_ROWS = TERMS_PREFIX + "rows";
|
||||
}
|
|
@ -0,0 +1,101 @@
|
|||
package org.apache.solr.handler.component;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermEnum;
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.params.TermsParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Return TermEnum information, useful for things like auto suggest.
|
||||
*
|
||||
* @see org.apache.solr.common.params.TermsParams
|
||||
* See Lucene's TermEnum class
|
||||
*/
|
||||
public class TermsComponent extends SearchComponent {
|
||||
|
||||
|
||||
public void process(ResponseBuilder rb) throws IOException {
|
||||
SolrParams params = rb.req.getParams();
|
||||
if (params.getBool(TermsParams.TERMS, false)) {
|
||||
String lower = params.get(TermsParams.TERMS_LOWER, "");
|
||||
String field = params.get(TermsParams.TERMS_FIELD);
|
||||
if (field != null) {
|
||||
Term lowerTerm = new Term(field, lower);
|
||||
TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm);//this will be positioned ready to go
|
||||
int rows = params.getInt(TermsParams.TERMS_ROWS, params.getInt(CommonParams.ROWS, 10));
|
||||
int i = 0;
|
||||
NamedList terms = new NamedList();
|
||||
rb.rsp.add("terms", terms);
|
||||
String upper = params.get(TermsParams.TERMS_UPPER);
|
||||
Term upperTerm = upper != null ? new Term(field, upper) : null;
|
||||
boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
|
||||
boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
|
||||
boolean hasMore = true;
|
||||
if (lowerIncl == false) {
|
||||
hasMore = termEnum.next();
|
||||
}
|
||||
if (hasMore == true) {
|
||||
do {
|
||||
Term theTerm = termEnum.term();
|
||||
String theText = theTerm.text();
|
||||
int upperCmp = upperTerm != null ? theTerm.compareTo(upperTerm) : -1;
|
||||
if (theTerm != null && theTerm.field().equals(field)
|
||||
&& ((upperIncl == true && upperCmp <= 0) ||
|
||||
(upperIncl == false && upperCmp < 0))) {
|
||||
terms.add(theText, String.valueOf(termEnum.docFreq()));
|
||||
} else {//we're done
|
||||
break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
while (i < rows && termEnum.next());
|
||||
}
|
||||
termEnum.close();
|
||||
} else {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public void prepare(ResponseBuilder rb) throws IOException {
|
||||
//nothing to do
|
||||
}
|
||||
|
||||
public String getVersion() {
|
||||
return "$Revision$";
|
||||
}
|
||||
|
||||
public String getSourceId() {
|
||||
return "$Id:$";
|
||||
}
|
||||
|
||||
public String getSource() {
|
||||
return "$Revision:$";
|
||||
}
|
||||
|
||||
public String getDescription() {
|
||||
return "A Component for working with Term Enumerators";
|
||||
}
|
||||
}
|
|
@ -0,0 +1,203 @@
|
|||
package org.apache.solr.handler.component;
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.mortbay.log.Log;
|
||||
import org.apache.solr.util.AbstractSolrTestCase;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.TermsParams;
|
||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.request.SolrRequestHandler;
|
||||
import org.apache.solr.request.LocalSolrQueryRequest;
|
||||
import org.apache.solr.request.SolrQueryResponse;
|
||||
|
||||
import java.util.Iterator;
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*
|
||||
**/
|
||||
public class TermsComponentTest extends AbstractSolrTestCase {
|
||||
public String getSchemaFile() {
|
||||
return "schema.xml";
|
||||
}
|
||||
|
||||
public String getSolrConfigFile() {
|
||||
return "solrconfig.xml";
|
||||
}
|
||||
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
||||
assertU(adoc("id", "0", "lowerfilt", "a"));
|
||||
assertU(adoc("id", "1", "lowerfilt", "a"));
|
||||
assertU(adoc("id", "2", "lowerfilt", "aa"));
|
||||
assertU(adoc("id", "3", "lowerfilt", "aaa"));
|
||||
assertU(adoc("id", "4", "lowerfilt", "ab"));
|
||||
assertU(adoc("id", "5", "lowerfilt", "abb"));
|
||||
assertU(adoc("id", "6", "lowerfilt", "abc"));
|
||||
assertU(adoc("id", "7", "lowerfilt", "b"));
|
||||
assertU(adoc("id", "8", "lowerfilt", "baa"));
|
||||
assertU(adoc("id", "9", "lowerfilt", "bbb"));
|
||||
|
||||
assertU("commit", commit());
|
||||
}
|
||||
|
||||
public void testEmptyLower() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
|
||||
assertTrue("tc is null and it shouldn't be", tc != null);
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(TermsParams.TERMS, "true");
|
||||
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
|
||||
//no lower bound
|
||||
params.add(TermsParams.TERMS_UPPER, "b");
|
||||
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
|
||||
SolrRequestHandler handler;
|
||||
SolrQueryResponse rsp;
|
||||
NamedList values;
|
||||
NamedList terms;
|
||||
handler = core.getRequestHandler("/terms");
|
||||
assertTrue("handler is null and it shouldn't be", handler != null);
|
||||
rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
values = rsp.getValues();
|
||||
terms = (NamedList) values.get("terms");
|
||||
assertTrue("terms Size: " + terms.size() + " is not: " + 6, terms.size() == 6);
|
||||
assertTrue("a is null and it shouldn't be", terms.get("a") != null);
|
||||
assertTrue("aa is null and it shouldn't be", terms.get("aa") != null);
|
||||
assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null);
|
||||
assertTrue("ab is null and it shouldn't be", terms.get("ab") != null);
|
||||
assertTrue("abb is null and it shouldn't be", terms.get("abb") != null);
|
||||
assertTrue("abc is null and it shouldn't be", terms.get("abc") != null);
|
||||
}
|
||||
|
||||
public void testPastUpper() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
|
||||
assertTrue("tc is null and it shouldn't be", tc != null);
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(TermsParams.TERMS, "true");
|
||||
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
|
||||
//no lower bound
|
||||
params.add(TermsParams.TERMS_LOWER, "d");
|
||||
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
|
||||
SolrRequestHandler handler;
|
||||
SolrQueryResponse rsp;
|
||||
NamedList values;
|
||||
NamedList terms;
|
||||
handler = core.getRequestHandler("/terms");
|
||||
assertTrue("handler is null and it shouldn't be", handler != null);
|
||||
rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
values = rsp.getValues();
|
||||
terms = (NamedList) values.get("terms");
|
||||
assertTrue("terms Size: " + terms.size() + " is not: " + 0, terms.size() == 0);
|
||||
}
|
||||
|
||||
public void test() throws Exception {
|
||||
SolrCore core = h.getCore();
|
||||
TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp");
|
||||
assertTrue("tc is null and it shouldn't be", tc != null);
|
||||
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(TermsParams.TERMS, "true");
|
||||
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
|
||||
params.add(TermsParams.TERMS_LOWER, "a");
|
||||
params.add(TermsParams.TERMS_UPPER, "b");
|
||||
params.add(TermsParams.TERMS_ROWS, String.valueOf(50));
|
||||
SolrRequestHandler handler;
|
||||
SolrQueryResponse rsp;
|
||||
NamedList values;
|
||||
NamedList terms;
|
||||
handler = core.getRequestHandler("/terms");
|
||||
assertTrue("handler is null and it shouldn't be", handler != null);
|
||||
rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
values = rsp.getValues();
|
||||
terms = (NamedList) values.get("terms");
|
||||
assertTrue("terms Size: " + terms.size() + " is not: " + 6, terms.size() == 6);
|
||||
assertTrue("aa is null and it shouldn't be", terms.get("aa") != null);
|
||||
assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null);
|
||||
assertTrue("ab is null and it shouldn't be", terms.get("ab") != null);
|
||||
assertTrue("abb is null and it shouldn't be", terms.get("abb") != null);
|
||||
assertTrue("abc is null and it shouldn't be", terms.get("abc") != null);
|
||||
assertTrue("a is null", terms.get("a") != null);
|
||||
assertTrue("b is not null and it should be", terms.get("b") == null);
|
||||
|
||||
params.add(TermsParams.TERMS_UPPER_INCLUSIVE, "true");
|
||||
rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
values = rsp.getValues();
|
||||
terms = (NamedList) values.get("terms");
|
||||
assertTrue("terms Size: " + terms.size() + " is not: " + 7, terms.size() == 7);
|
||||
assertTrue("aa is null and it shouldn't be", terms.get("aa") != null);
|
||||
assertTrue("ab is null and it shouldn't be", terms.get("ab") != null);
|
||||
assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null);
|
||||
assertTrue("abb is null and it shouldn't be", terms.get("abb") != null);
|
||||
assertTrue("abc is null and it shouldn't be", terms.get("abc") != null);
|
||||
assertTrue("b is null and it shouldn't be", terms.get("b") != null);
|
||||
assertTrue("a is null", terms.get("a") != null);
|
||||
assertTrue("baa is not null", terms.get("baa") == null);
|
||||
|
||||
params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false");
|
||||
rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
values = rsp.getValues();
|
||||
terms = (NamedList) values.get("terms");
|
||||
assertTrue("terms Size: " + terms.size() + " is not: " + 6, terms.size() == 6);
|
||||
assertTrue("aa is null and it shouldn't be", terms.get("aa") != null);
|
||||
assertTrue("ab is null and it shouldn't be", terms.get("ab") != null);
|
||||
assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null);
|
||||
assertTrue("abb is null and it shouldn't be", terms.get("abb") != null);
|
||||
assertTrue("abc is null and it shouldn't be", terms.get("abc") != null);
|
||||
assertTrue("b is null and it shouldn't be", terms.get("b") != null);
|
||||
assertTrue("a is not null", terms.get("a") == null);
|
||||
assertTrue("baa is not null", terms.get("baa") == null);
|
||||
|
||||
|
||||
|
||||
params = new ModifiableSolrParams();
|
||||
params.add(TermsParams.TERMS, "true");
|
||||
params.add(TermsParams.TERMS_FIELD, "lowerfilt");
|
||||
params.add(TermsParams.TERMS_LOWER, "a");
|
||||
params.add(TermsParams.TERMS_UPPER, "b");
|
||||
params.add(TermsParams.TERMS_ROWS, String.valueOf(2));
|
||||
rsp = new SolrQueryResponse();
|
||||
rsp.add("responseHeader", new SimpleOrderedMap());
|
||||
handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp);
|
||||
values = rsp.getValues();
|
||||
terms = (NamedList) values.get("terms");
|
||||
assertTrue("terms Size: " + terms.size() + " is not: " + 2, terms.size() == 2);
|
||||
assertTrue("aa is null and it shouldn't be", terms.get("a") != null);
|
||||
assertTrue("aaa is null and it shouldn't be", terms.get("aa") != null);
|
||||
assertTrue("abb is not null", terms.get("abb") == null);
|
||||
assertTrue("abc is not null", terms.get("abc") == null);
|
||||
assertTrue("b is null and it shouldn't be", terms.get("b") == null);
|
||||
assertTrue("baa is not null", terms.get("baa") == null);
|
||||
}
|
||||
}
|
|
@ -349,6 +349,14 @@
|
|||
<str name="spellcheckIndexDir">spellchecker3</str>
|
||||
</lst>
|
||||
</searchComponent>
|
||||
|
||||
<searchComponent name="termsComp" class="org.apache.solr.handler.component.TermsComponent"/>
|
||||
|
||||
<requestHandler name="/terms" class="org.apache.solr.handler.component.SearchHandler">
|
||||
<arr name="components">
|
||||
<str>termsComp</str>
|
||||
</arr>
|
||||
</requestHandler>
|
||||
<!--
|
||||
The SpellingQueryConverter to convert raw (CommonParams.Q) queries into tokens. Uses a simple regular expression
|
||||
to strip off field markup, boosts, ranges, etc. but it is not guaranteed to match an exact parse from the query parser.
|
||||
|
|
Loading…
Reference in New Issue