diff --git a/CHANGES.txt b/CHANGES.txt index 5790052369a..f0386e21b8d 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -90,7 +90,9 @@ New Features 17. SOLR-829: Allow slaves to request compressed files from master during replication (Simon Collins, Noble Paul, Akshay Ukey via shalin) - + +18. SOLR-877: Added TermsComponent for accessing Lucene's TermEnum capabilities. + Useful for auto suggest and possibly distributed search. Not distributed search compliant. (gsingers) Optimizations ---------------------- diff --git a/example/solr/conf/solrconfig.xml b/example/solr/conf/solrconfig.xml index ceb484ec720..2eebc87d802 100755 --- a/example/solr/conf/solrconfig.xml +++ b/example/solr/conf/solrconfig.xml @@ -610,6 +610,15 @@ + + + + + termsComp + + + + diff --git a/src/java/org/apache/solr/common/params/TermsParams.java b/src/java/org/apache/solr/common/params/TermsParams.java new file mode 100644 index 00000000000..bdb1017a39d --- /dev/null +++ b/src/java/org/apache/solr/common/params/TermsParams.java @@ -0,0 +1,62 @@ +package org.apache.solr.common.params; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +/** + * + * + **/ +public class TermsParams { + + public static final String TERMS = "terms"; + + public static final String TERMS_PREFIX = TERMS + "."; + + /** + * Required. Specify the field to look up terms in. + */ + public static final String TERMS_FIELD = TERMS_PREFIX + "fl"; + + /** + * Optional. The lower bound term to start at. The TermEnum will start at the next term after this term in the dictionary. + * + * If not specified, the empty string is used + */ + public static final String TERMS_LOWER = TERMS_PREFIX + "lower"; + + /** + * Optional. The term to stop at. + * + * @see #TERMS_UPPER_INCLUSIVE + */ + public static final String TERMS_UPPER = TERMS_PREFIX + "upper"; + /** + * Optional. If true, include the upper bound term in the results. False by default. + */ + public static final String TERMS_UPPER_INCLUSIVE = TERMS_PREFIX + "upr.incl"; + + /** + * Optional. If true, include the lower bound term in the results, otherwise skip to the next one. True by default. + */ + public static final String TERMS_LOWER_INCLUSIVE = TERMS_PREFIX + "lwr.incl"; + + /** + * Optional. The number of results to return. If not specified, looks for {@link org.apache.solr.common.params.CommonParams#ROWS}. If that's not specified, uses 10. + */ + public static final String TERMS_ROWS = TERMS_PREFIX + "rows"; +} diff --git a/src/java/org/apache/solr/handler/component/TermsComponent.java b/src/java/org/apache/solr/handler/component/TermsComponent.java new file mode 100644 index 00000000000..49b0cf57833 --- /dev/null +++ b/src/java/org/apache/solr/handler/component/TermsComponent.java @@ -0,0 +1,101 @@ +package org.apache.solr.handler.component; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; +import org.apache.solr.common.SolrException; +import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.params.SolrParams; +import org.apache.solr.common.params.TermsParams; +import org.apache.solr.common.util.NamedList; + +import java.io.IOException; + + +/** + * Return TermEnum information, useful for things like auto suggest. + * + * @see org.apache.solr.common.params.TermsParams + * See Lucene's TermEnum class + */ +public class TermsComponent extends SearchComponent { + + + public void process(ResponseBuilder rb) throws IOException { + SolrParams params = rb.req.getParams(); + if (params.getBool(TermsParams.TERMS, false)) { + String lower = params.get(TermsParams.TERMS_LOWER, ""); + String field = params.get(TermsParams.TERMS_FIELD); + if (field != null) { + Term lowerTerm = new Term(field, lower); + TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm);//this will be positioned ready to go + int rows = params.getInt(TermsParams.TERMS_ROWS, params.getInt(CommonParams.ROWS, 10)); + int i = 0; + NamedList terms = new NamedList(); + rb.rsp.add("terms", terms); + String upper = params.get(TermsParams.TERMS_UPPER); + Term upperTerm = upper != null ? new Term(field, upper) : null; + boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false); + boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true); + boolean hasMore = true; + if (lowerIncl == false) { + hasMore = termEnum.next(); + } + if (hasMore == true) { + do { + Term theTerm = termEnum.term(); + String theText = theTerm.text(); + int upperCmp = upperTerm != null ? theTerm.compareTo(upperTerm) : -1; + if (theTerm != null && theTerm.field().equals(field) + && ((upperIncl == true && upperCmp <= 0) || + (upperIncl == false && upperCmp < 0))) { + terms.add(theText, String.valueOf(termEnum.docFreq())); + } else {//we're done + break; + } + i++; + } + while (i < rows && termEnum.next()); + } + termEnum.close(); + } else { + throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified"); + } + } + } + + public void prepare(ResponseBuilder rb) throws IOException { + //nothing to do + } + + public String getVersion() { + return "$Revision$"; + } + + public String getSourceId() { + return "$Id:$"; + } + + public String getSource() { + return "$Revision:$"; + } + + public String getDescription() { + return "A Component for working with Term Enumerators"; + } +} diff --git a/src/test/org/apache/solr/handler/component/TermsComponentTest.java b/src/test/org/apache/solr/handler/component/TermsComponentTest.java new file mode 100644 index 00000000000..cae9797c13e --- /dev/null +++ b/src/test/org/apache/solr/handler/component/TermsComponentTest.java @@ -0,0 +1,203 @@ +package org.apache.solr.handler.component; +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.mortbay.log.Log; +import org.apache.solr.util.AbstractSolrTestCase; +import org.apache.solr.core.SolrCore; +import org.apache.solr.common.params.ModifiableSolrParams; +import org.apache.solr.common.params.TermsParams; +import org.apache.solr.common.util.SimpleOrderedMap; +import org.apache.solr.common.util.NamedList; +import org.apache.solr.request.SolrRequestHandler; +import org.apache.solr.request.LocalSolrQueryRequest; +import org.apache.solr.request.SolrQueryResponse; + +import java.util.Iterator; + + +/** + * + * + **/ +public class TermsComponentTest extends AbstractSolrTestCase { + public String getSchemaFile() { + return "schema.xml"; + } + + public String getSolrConfigFile() { + return "solrconfig.xml"; + } + + public void setUp() throws Exception { + super.setUp(); + + assertU(adoc("id", "0", "lowerfilt", "a")); + assertU(adoc("id", "1", "lowerfilt", "a")); + assertU(adoc("id", "2", "lowerfilt", "aa")); + assertU(adoc("id", "3", "lowerfilt", "aaa")); + assertU(adoc("id", "4", "lowerfilt", "ab")); + assertU(adoc("id", "5", "lowerfilt", "abb")); + assertU(adoc("id", "6", "lowerfilt", "abc")); + assertU(adoc("id", "7", "lowerfilt", "b")); + assertU(adoc("id", "8", "lowerfilt", "baa")); + assertU(adoc("id", "9", "lowerfilt", "bbb")); + + assertU("commit", commit()); + } + + public void testEmptyLower() throws Exception { + SolrCore core = h.getCore(); + TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp"); + assertTrue("tc is null and it shouldn't be", tc != null); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add(TermsParams.TERMS, "true"); + params.add(TermsParams.TERMS_FIELD, "lowerfilt"); + //no lower bound + params.add(TermsParams.TERMS_UPPER, "b"); + params.add(TermsParams.TERMS_ROWS, String.valueOf(50)); + SolrRequestHandler handler; + SolrQueryResponse rsp; + NamedList values; + NamedList terms; + handler = core.getRequestHandler("/terms"); + assertTrue("handler is null and it shouldn't be", handler != null); + rsp = new SolrQueryResponse(); + rsp.add("responseHeader", new SimpleOrderedMap()); + handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); + values = rsp.getValues(); + terms = (NamedList) values.get("terms"); + assertTrue("terms Size: " + terms.size() + " is not: " + 6, terms.size() == 6); + assertTrue("a is null and it shouldn't be", terms.get("a") != null); + assertTrue("aa is null and it shouldn't be", terms.get("aa") != null); + assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null); + assertTrue("ab is null and it shouldn't be", terms.get("ab") != null); + assertTrue("abb is null and it shouldn't be", terms.get("abb") != null); + assertTrue("abc is null and it shouldn't be", terms.get("abc") != null); + } + + public void testPastUpper() throws Exception { + SolrCore core = h.getCore(); + TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp"); + assertTrue("tc is null and it shouldn't be", tc != null); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add(TermsParams.TERMS, "true"); + params.add(TermsParams.TERMS_FIELD, "lowerfilt"); + //no lower bound + params.add(TermsParams.TERMS_LOWER, "d"); + params.add(TermsParams.TERMS_ROWS, String.valueOf(50)); + SolrRequestHandler handler; + SolrQueryResponse rsp; + NamedList values; + NamedList terms; + handler = core.getRequestHandler("/terms"); + assertTrue("handler is null and it shouldn't be", handler != null); + rsp = new SolrQueryResponse(); + rsp.add("responseHeader", new SimpleOrderedMap()); + handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); + values = rsp.getValues(); + terms = (NamedList) values.get("terms"); + assertTrue("terms Size: " + terms.size() + " is not: " + 0, terms.size() == 0); + } + + public void test() throws Exception { + SolrCore core = h.getCore(); + TermsComponent tc = (TermsComponent) core.getSearchComponent("termsComp"); + assertTrue("tc is null and it shouldn't be", tc != null); + + ModifiableSolrParams params = new ModifiableSolrParams(); + params.add(TermsParams.TERMS, "true"); + params.add(TermsParams.TERMS_FIELD, "lowerfilt"); + params.add(TermsParams.TERMS_LOWER, "a"); + params.add(TermsParams.TERMS_UPPER, "b"); + params.add(TermsParams.TERMS_ROWS, String.valueOf(50)); + SolrRequestHandler handler; + SolrQueryResponse rsp; + NamedList values; + NamedList terms; + handler = core.getRequestHandler("/terms"); + assertTrue("handler is null and it shouldn't be", handler != null); + rsp = new SolrQueryResponse(); + rsp.add("responseHeader", new SimpleOrderedMap()); + handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); + values = rsp.getValues(); + terms = (NamedList) values.get("terms"); + assertTrue("terms Size: " + terms.size() + " is not: " + 6, terms.size() == 6); + assertTrue("aa is null and it shouldn't be", terms.get("aa") != null); + assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null); + assertTrue("ab is null and it shouldn't be", terms.get("ab") != null); + assertTrue("abb is null and it shouldn't be", terms.get("abb") != null); + assertTrue("abc is null and it shouldn't be", terms.get("abc") != null); + assertTrue("a is null", terms.get("a") != null); + assertTrue("b is not null and it should be", terms.get("b") == null); + + params.add(TermsParams.TERMS_UPPER_INCLUSIVE, "true"); + rsp = new SolrQueryResponse(); + rsp.add("responseHeader", new SimpleOrderedMap()); + handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); + values = rsp.getValues(); + terms = (NamedList) values.get("terms"); + assertTrue("terms Size: " + terms.size() + " is not: " + 7, terms.size() == 7); + assertTrue("aa is null and it shouldn't be", terms.get("aa") != null); + assertTrue("ab is null and it shouldn't be", terms.get("ab") != null); + assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null); + assertTrue("abb is null and it shouldn't be", terms.get("abb") != null); + assertTrue("abc is null and it shouldn't be", terms.get("abc") != null); + assertTrue("b is null and it shouldn't be", terms.get("b") != null); + assertTrue("a is null", terms.get("a") != null); + assertTrue("baa is not null", terms.get("baa") == null); + + params.add(TermsParams.TERMS_LOWER_INCLUSIVE, "false"); + rsp = new SolrQueryResponse(); + rsp.add("responseHeader", new SimpleOrderedMap()); + handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); + values = rsp.getValues(); + terms = (NamedList) values.get("terms"); + assertTrue("terms Size: " + terms.size() + " is not: " + 6, terms.size() == 6); + assertTrue("aa is null and it shouldn't be", terms.get("aa") != null); + assertTrue("ab is null and it shouldn't be", terms.get("ab") != null); + assertTrue("aaa is null and it shouldn't be", terms.get("aaa") != null); + assertTrue("abb is null and it shouldn't be", terms.get("abb") != null); + assertTrue("abc is null and it shouldn't be", terms.get("abc") != null); + assertTrue("b is null and it shouldn't be", terms.get("b") != null); + assertTrue("a is not null", terms.get("a") == null); + assertTrue("baa is not null", terms.get("baa") == null); + + + + params = new ModifiableSolrParams(); + params.add(TermsParams.TERMS, "true"); + params.add(TermsParams.TERMS_FIELD, "lowerfilt"); + params.add(TermsParams.TERMS_LOWER, "a"); + params.add(TermsParams.TERMS_UPPER, "b"); + params.add(TermsParams.TERMS_ROWS, String.valueOf(2)); + rsp = new SolrQueryResponse(); + rsp.add("responseHeader", new SimpleOrderedMap()); + handler.handleRequest(new LocalSolrQueryRequest(core, params), rsp); + values = rsp.getValues(); + terms = (NamedList) values.get("terms"); + assertTrue("terms Size: " + terms.size() + " is not: " + 2, terms.size() == 2); + assertTrue("aa is null and it shouldn't be", terms.get("a") != null); + assertTrue("aaa is null and it shouldn't be", terms.get("aa") != null); + assertTrue("abb is not null", terms.get("abb") == null); + assertTrue("abc is not null", terms.get("abc") == null); + assertTrue("b is null and it shouldn't be", terms.get("b") == null); + assertTrue("baa is not null", terms.get("baa") == null); + } +} diff --git a/src/test/test-files/solr/conf/solrconfig.xml b/src/test/test-files/solr/conf/solrconfig.xml index 6322d1f032a..ccd587796a0 100644 --- a/src/test/test-files/solr/conf/solrconfig.xml +++ b/src/test/test-files/solr/conf/solrconfig.xml @@ -349,6 +349,14 @@ spellchecker3 + + + + + + termsComp + +