mirror of https://github.com/apache/lucene.git
SOLR-9243:Add terms.list parameter to the TermsComponent to fetch the docFreq for a list of terms
This commit is contained in:
parent
3ca4fea578
commit
551bdc6f53
|
@ -28,6 +28,7 @@ import org.apache.solr.common.util.StrUtils;
|
|||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.StrField;
|
||||
import org.apache.solr.request.SimpleFacets.CountPair;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
import org.apache.solr.util.BoundedTreeSet;
|
||||
import org.apache.solr.client.solrj.response.TermsResponse;
|
||||
|
||||
|
@ -92,6 +93,12 @@ public class TermsComponent extends SearchComponent {
|
|||
|
||||
if (fields == null || fields.length==0) return;
|
||||
|
||||
String termList = params.get(TermsParams.TERMS_LIST);
|
||||
if(termList != null) {
|
||||
fetchTerms(rb.req.getSearcher(), fields, termList, termsResult);
|
||||
return;
|
||||
}
|
||||
|
||||
int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
|
||||
if (limit < 0) {
|
||||
limit = Integer.MAX_VALUE;
|
||||
|
@ -377,8 +384,12 @@ public class TermsComponent extends SearchComponent {
|
|||
NamedList<Object> response = new SimpleOrderedMap<>();
|
||||
|
||||
// determine if we are going index or count sort
|
||||
boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(params.get(
|
||||
TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
|
||||
boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(params.get(TermsParams.TERMS_SORT,
|
||||
TermsParams.TERMS_SORT_COUNT));
|
||||
if(params.get(TermsParams.TERMS_LIST) != null) {
|
||||
//Always use lexical sort when TERM_LIST is provided
|
||||
sort = false;
|
||||
}
|
||||
|
||||
// init minimum frequency
|
||||
long freqmin = 1;
|
||||
|
@ -466,6 +477,76 @@ public class TermsComponent extends SearchComponent {
|
|||
}
|
||||
}
|
||||
|
||||
private void fetchTerms(SolrIndexSearcher indexSearcher,
|
||||
String[] fields,
|
||||
String termList,
|
||||
NamedList result) throws IOException {
|
||||
|
||||
NamedList termsMap = new SimpleOrderedMap();
|
||||
List<LeafReaderContext> leaves = indexSearcher.getTopReaderContext().leaves();
|
||||
String field = fields[0];
|
||||
FieldType fieldType = indexSearcher.getSchema().getField(field).getType();
|
||||
String[] splitTerms = termList.split(",");
|
||||
|
||||
for(int i=0; i<splitTerms.length; i++) {
|
||||
splitTerms[i] = splitTerms[i].trim();
|
||||
}
|
||||
|
||||
Term[] terms = new Term[splitTerms.length];
|
||||
TermContext[] termContexts = new TermContext[terms.length];
|
||||
for(int i=0; i<splitTerms.length; i++) {
|
||||
terms[i] = new Term(field, fieldType.readableToIndexed(splitTerms[i]));
|
||||
}
|
||||
|
||||
Arrays.sort(terms);
|
||||
|
||||
collectTermContext(indexSearcher.getTopReaderContext().reader(), leaves, termContexts, terms);
|
||||
|
||||
for(int i=0; i<terms.length; i++) {
|
||||
if(termContexts[i] != null) {
|
||||
String outTerm = fieldType.indexedToReadable(terms[i].bytes().utf8ToString());
|
||||
int docFreq = termContexts[i].docFreq();
|
||||
termsMap.add(outTerm, docFreq);
|
||||
}
|
||||
}
|
||||
|
||||
result.add(field, termsMap);
|
||||
}
|
||||
|
||||
private void collectTermContext(IndexReader reader,
|
||||
List<LeafReaderContext> leaves, TermContext[] contextArray,
|
||||
Term[] queryTerms) throws IOException {
|
||||
TermsEnum termsEnum = null;
|
||||
for (LeafReaderContext context : leaves) {
|
||||
final Fields fields = context.reader().fields();
|
||||
for (int i = 0; i < queryTerms.length; i++) {
|
||||
Term term = queryTerms[i];
|
||||
TermContext termContext = contextArray[i];
|
||||
final Terms terms = fields.terms(term.field());
|
||||
if (terms == null) {
|
||||
// field does not exist
|
||||
continue;
|
||||
}
|
||||
termsEnum = terms.iterator();
|
||||
assert termsEnum != null;
|
||||
|
||||
if (termsEnum == TermsEnum.EMPTY) continue;
|
||||
if (termsEnum.seekExact(term.bytes())) {
|
||||
if (termContext == null) {
|
||||
contextArray[i] = new TermContext(reader.getContext(),
|
||||
termsEnum.termState(), context.ord, termsEnum.docFreq(),
|
||||
termsEnum.totalTermFreq());
|
||||
} else {
|
||||
termContext.register(termsEnum.termState(), context.ord,
|
||||
termsEnum.docFreq(), termsEnum.totalTermFreq());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getDescription() {
|
||||
return "A Component for working with Term Enumerators";
|
||||
|
|
|
@ -30,10 +30,10 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
|
|||
@Test
|
||||
public void test() throws Exception {
|
||||
del("*:*");
|
||||
index(id, 18, "b_t", "snake spider shark snail slug seal");
|
||||
index(id, 19, "b_t", "snake spider shark snail slug");
|
||||
index(id, 20, "b_t", "snake spider shark snail");
|
||||
index(id, 21, "b_t", "snake spider shark");
|
||||
index(id, 18, "b_t", "snake spider shark snail slug seal", "foo_i", "1");
|
||||
index(id, 19, "b_t", "snake spider shark snail slug", "foo_i", "2");
|
||||
index(id, 20, "b_t", "snake spider shark snail", "foo_i", "3");
|
||||
index(id, 21, "b_t", "snake spider shark", "foo_i", "2");
|
||||
index(id, 22, "b_t", "snake spider");
|
||||
index(id, 23, "b_t", "snake");
|
||||
index(id, 24, "b_t", "ant zebra");
|
||||
|
@ -49,5 +49,8 @@ public class DistributedTermsComponentTest extends BaseDistributedSearchTestCase
|
|||
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.sort", "index");
|
||||
query("qt", "/terms", "shards.qt", "/terms", "terms.limit", 5, "terms", "true", "terms.fl", "b_t", "terms.prefix", "s", "terms.lower", "s", "terms.upper", "sn", "terms.sort", "index");
|
||||
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.sort", "index");
|
||||
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "b_t", "terms.list", "snake, zebra, ant, bad");
|
||||
query("qt", "/terms", "shards.qt", "/terms", "terms", "true", "terms.fl", "foo_i", "terms.list", "2, 3, 1");
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,9 +32,9 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
|
|||
@BeforeClass
|
||||
public static void beforeTest() throws Exception {
|
||||
System.setProperty("enable.update.log", "false"); // schema12 doesn't support _version_
|
||||
initCore("solrconfig.xml","schema12.xml");
|
||||
initCore("solrconfig.xml", "schema12.xml");
|
||||
|
||||
assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "a", "standardfilt", "a", "foo_i","1")));
|
||||
assertNull(h.validateUpdate(adoc("id", "0", "lowerfilt", "a", "standardfilt", "a", "foo_i", "1")));
|
||||
assertNull(h.validateUpdate(adoc("id", "1", "lowerfilt", "a", "standardfilt", "aa", "foo_i","1")));
|
||||
assertNull(h.validateUpdate(adoc("id", "2", "lowerfilt", "aa", "standardfilt", "aaa", "foo_i","2")));
|
||||
assertNull(h.validateUpdate(adoc("id", "3", "lowerfilt", "aaa", "standardfilt", "abbb")));
|
||||
|
@ -45,7 +45,10 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
|
|||
assertNull(h.validateUpdate(adoc("id", "8", "lowerfilt", "baa", "standardfilt", "cccc")));
|
||||
assertNull(h.validateUpdate(adoc("id", "9", "lowerfilt", "bbb", "standardfilt", "ccccc")));
|
||||
|
||||
|
||||
assertNull(h.validateUpdate(adoc("id", "10", "standardfilt", "ddddd")));
|
||||
|
||||
assertNull(h.validateUpdate(commit()));
|
||||
assertNull(h.validateUpdate(adoc("id", "11", "standardfilt", "ddddd")));
|
||||
assertNull(h.validateUpdate(adoc("id", "12", "standardfilt", "ddddd")));
|
||||
assertNull(h.validateUpdate(adoc("id", "13", "standardfilt", "ddddd")));
|
||||
|
@ -53,6 +56,8 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
|
|||
assertNull(h.validateUpdate(adoc("id", "15", "standardfilt", "d")));
|
||||
assertNull(h.validateUpdate(adoc("id", "16", "standardfilt", "d")));
|
||||
|
||||
assertNull(h.validateUpdate(commit()));
|
||||
|
||||
assertNull(h.validateUpdate(adoc("id", "17", "standardfilt", "snake")));
|
||||
assertNull(h.validateUpdate(adoc("id", "18", "standardfilt", "spider")));
|
||||
assertNull(h.validateUpdate(adoc("id", "19", "standardfilt", "shark")));
|
||||
|
@ -137,13 +142,13 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
|
|||
@Test
|
||||
public void testRegexpWithFlags() throws Exception {
|
||||
// TODO: there are no uppercase or mixed-case terms in the index!
|
||||
assertQ(req("indent","true", "qt","/terms", "terms","true",
|
||||
"terms.fl","standardfilt",
|
||||
"terms.lower","a", "terms.lower.incl","false",
|
||||
"terms.upper","c", "terms.upper.incl","true",
|
||||
"terms.regex","B.*",
|
||||
"terms.regex.flag","case_insensitive")
|
||||
,"count(//lst[@name='standardfilt']/*)=3"
|
||||
assertQ(req("indent", "true", "qt", "/terms", "terms", "true",
|
||||
"terms.fl", "standardfilt",
|
||||
"terms.lower", "a", "terms.lower.incl", "false",
|
||||
"terms.upper", "c", "terms.upper.incl", "true",
|
||||
"terms.regex", "B.*",
|
||||
"terms.regex.flag", "case_insensitive")
|
||||
, "count(//lst[@name='standardfilt']/*)=3"
|
||||
);
|
||||
}
|
||||
|
||||
|
@ -162,6 +167,29 @@ public class TermsComponentTest extends SolrTestCaseJ4 {
|
|||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTermsList() throws Exception {
|
||||
//Terms list always returns in index order
|
||||
assertQ(req("indent","true", "qt","/terms", "terms","true",
|
||||
"terms.fl","standardfilt",
|
||||
"terms.list","spider, snake, shark, ddddd, bad")
|
||||
,"count(//lst[@name='standardfilt']/*)=4"
|
||||
,"//lst[@name='standardfilt']/int[1][@name='ddddd'][.='4']"
|
||||
,"//lst[@name='standardfilt']/int[2][@name='shark'][.='2']"
|
||||
,"//lst[@name='standardfilt']/int[3][@name='snake'][.='3']"
|
||||
,"//lst[@name='standardfilt']/int[4][@name='spider'][.='1']"
|
||||
);
|
||||
|
||||
//Test with numeric terms
|
||||
assertQ(req("indent","true", "qt","/terms", "terms","true",
|
||||
"terms.fl","foo_i",
|
||||
"terms.list","2, 1")
|
||||
,"count(//lst[@name='foo_i']/*)=2"
|
||||
,"//lst[@name='foo_i']/int[1][@name='1'][.='2']"
|
||||
,"//lst[@name='foo_i']/int[2][@name='2'][.='1']"
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSortIndex() throws Exception {
|
||||
assertQ(req("indent","true", "qt","/terms", "terms","true",
|
||||
|
|
|
@ -38,6 +38,12 @@ public interface TermsParams {
|
|||
*/
|
||||
public static final String TERMS_FIELD = TERMS_PREFIX + "fl";
|
||||
|
||||
/**
|
||||
* Optional. The list of terms to be retrieved.
|
||||
*
|
||||
*/
|
||||
public static final String TERMS_LIST = TERMS_PREFIX + "list";
|
||||
|
||||
/**
|
||||
* Optional. The lower bound term to start at. The TermEnum will start at the next term after this term in the dictionary.
|
||||
*
|
||||
|
|
Loading…
Reference in New Issue