SOLR-7350: fix facet module docvalues support

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1671472 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2015-04-06 03:16:29 +00:00
parent aa82d48423
commit 56b7843b56
5 changed files with 280 additions and 33 deletions

View File

@ -23,24 +23,29 @@ import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiPostingsEnum;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.request.SimpleFacets;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
@ -115,6 +120,10 @@ public class FacetField extends FacetRequest {
return new FacetFieldProcessorStream(fcontext, this, sf);
}
if (!multiToken || sf.hasDocValues()) {
return new FacetFieldProcessorDV(fcontext, this, sf);
}
if (multiToken) {
return new FacetFieldProcessorUIF(fcontext, this, sf);
} else {
@ -810,4 +819,146 @@ class FacetFieldProcessorStream extends FacetFieldProcessor implements Closeable
}
}
class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
static boolean unwrap_singleValued_multiDv = true; // only set to false for test coverage
boolean multiValuedField;
SortedSetDocValues si; // only used for term lookups (for both single and multi-valued)
MultiDocValues.OrdinalMap ordinalMap = null; // maps per-segment ords to global ords
public FacetFieldProcessorDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
super(fcontext, freq, sf);
multiValuedField = sf.multiValued() || sf.getType().multiValuedFieldCache();
}
protected BytesRef lookupOrd(int ord) throws IOException {
return si.lookupOrd(ord);
}
protected void findStartAndEndOrds() throws IOException {
if (multiValuedField) {
si = FieldUtil.getSortedSetDocValues(fcontext.qcontext, sf, null);
if (si instanceof MultiDocValues.MultiSortedSetDocValues) {
ordinalMap = ((MultiDocValues.MultiSortedSetDocValues)si).mapping;
}
} else {
SortedDocValues single = FieldUtil.getSortedDocValues(fcontext.qcontext, sf, null);
si = DocValues.singleton(single); // multi-valued view
if (single instanceof MultiDocValues.MultiSortedDocValues) {
ordinalMap = ((MultiDocValues.MultiSortedDocValues)single).mapping;
}
}
if (si.getValueCount() >= Integer.MAX_VALUE) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Field has too many unique values. field=" + sf + " nterms= " + si.getValueCount());
}
if (prefixRef != null) {
startTermIndex = (int)si.lookupTerm(prefixRef.get());
if (startTermIndex < 0) startTermIndex = -startTermIndex - 1;
prefixRef.append(UnicodeUtil.BIG_TERM);
endTermIndex = (int)si.lookupTerm(prefixRef.get());
assert endTermIndex < 0;
endTermIndex = -endTermIndex - 1;
} else {
startTermIndex = 0;
endTermIndex = (int)si.getValueCount();
}
// optimize collecting the "missing" bucket when startTermindex is 0 (since the "missing" ord is -1)
startTermIndex = startTermIndex==0 && freq.missing ? -1 : startTermIndex;
nTerms = endTermIndex - startTermIndex;
}
@Override
protected void collectDocs() throws IOException {
if (nTerms <= 0 || fcontext.base.size() < effectiveMincount) { // TODO: what about allBuckets? missing bucket?
return;
}
final List<LeafReaderContext> leaves = fcontext.searcher.getIndexReader().leaves();
Filter filter = fcontext.base.getTopFilter();
for (int subIdx = 0; subIdx < leaves.size(); subIdx++) {
LeafReaderContext subCtx = leaves.get(subIdx);
setNextReader(subCtx);
DocIdSet dis = filter.getDocIdSet(subCtx, null); // solr docsets already exclude any deleted docs
DocIdSetIterator disi = dis.iterator();
SortedDocValues singleDv = null;
SortedSetDocValues multiDv = null;
if (multiValuedField) {
// TODO: get sub from multi?
multiDv = subCtx.reader().getSortedSetDocValues(sf.getName());
if (multiDv == null) {
multiDv = DocValues.emptySortedSet();
}
// some codecs may optimize SortedSet storage for single-valued fields
// this will be null if this is not a wrapped single valued docvalues.
if (unwrap_singleValued_multiDv) {
singleDv = DocValues.unwrapSingleton(multiDv);
}
} else {
singleDv = subCtx.reader().getSortedDocValues(sf.getName());
if (singleDv == null) {
singleDv = DocValues.emptySorted();
}
}
LongValues toGlobal = ordinalMap == null ? null : ordinalMap.getGlobalOrds(subIdx);
if (singleDv != null) {
collectDocs(singleDv, disi, toGlobal);
} else {
collectDocs(multiDv, disi, toGlobal);
}
}
}
protected void collectDocs(SortedDocValues singleDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
int segOrd = singleDv.getOrd(doc);
collect(doc, segOrd, toGlobal);
}
}
protected void collectDocs(SortedSetDocValues multiDv, DocIdSetIterator disi, LongValues toGlobal) throws IOException {
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
multiDv.setDocument(doc);
int segOrd = (int)multiDv.nextOrd();
collect(doc, segOrd, toGlobal); // collect anything the first time (even -1 for missing)
if (segOrd < 0) continue;
for(;;) {
segOrd = (int)multiDv.nextOrd();
if (segOrd < 0) break;
collect(doc, segOrd, toGlobal);
}
}
}
private void collect(int doc, int segOrd, LongValues toGlobal) throws IOException {
int ord = (toGlobal != null && segOrd >= 0) ? (int)toGlobal.get(segOrd) : segOrd;
int arrIdx = ord - startTermIndex;
if (arrIdx >= 0 && arrIdx < nTerms) {
countAcc.incrementCount(arrIdx, 1);
collect(doc, arrIdx); // per-seg collectors
if (allBucketsSlot >= 0 && ord >= 0) {
countAcc.incrementCount(allBucketsSlot, 1);
collect(doc, allBucketsSlot); // per-seg collectors
}
}
}
}

View File

@ -19,7 +19,10 @@ package org.apache.solr.search.facet;
import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.QParser;
@ -44,23 +47,12 @@ public class FieldUtil {
// if (!field.hasDocValues() && (field.getType() instanceof StrField || field.getType() instanceof TextField)) {
// }
return si == null ? EMPTY_SortedDocValues : si;
return si == null ? DocValues.emptySorted() : si;
}
private static SortedDocValues EMPTY_SortedDocValues = new SortedDocValues() {
@Override
public int getOrd(int docID) {
return -1;
}
public static SortedSetDocValues getSortedSetDocValues(QueryContext context, SchemaField field, QParser qparser) throws IOException {
SortedSetDocValues si = context.searcher().getLeafReader().getSortedSetDocValues(field.getName());
return si == null ? DocValues.emptySortedSet() : si;
}
@Override
public BytesRef lookupOrd(int ord) {
return null;
}
@Override
public int getValueCount() {
return 0;
}
};
}

View File

@ -18,13 +18,15 @@ package org.apache.solr.search.facet;
*/
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.BitSetIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.LongValues;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;
@ -427,24 +429,48 @@ abstract class UniqueSlotAcc extends SlotAcc {
class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
SortedDocValues si;
final SortedDocValues topLevel;
final SortedDocValues[] subDvs;
final MultiDocValues.OrdinalMap ordMap;
LongValues toGlobal;
SortedDocValues subDv;
public UniqueSinglevaluedSlotAcc(FacetContext fcontext, String field, int numSlots) throws IOException {
super(fcontext, field, numSlots);
SolrIndexSearcher searcher = fcontext.qcontext.searcher();
si = FieldUtil.getSortedDocValues(fcontext.qcontext, searcher.getSchema().getField(field), null);
nTerms = si.getValueCount();
topLevel = FieldUtil.getSortedDocValues(fcontext.qcontext, searcher.getSchema().getField(field), null);
nTerms = topLevel.getValueCount();
if (topLevel instanceof MultiDocValues.MultiSortedDocValues) {
ordMap = ((MultiDocValues.MultiSortedDocValues)topLevel).mapping;
subDvs = ((MultiDocValues.MultiSortedDocValues)topLevel).values;
} else {
ordMap = null;
subDvs = null;
}
}
@Override
protected BytesRef lookupOrd(int ord) {
return si.lookupOrd(ord);
return topLevel.lookupOrd(ord);
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
if (subDvs != null) {
subDv = subDvs[readerContext.ord];
toGlobal = ordMap.getGlobalOrds(readerContext.ord);
} else {
assert readerContext.ord==0 || topLevel.getValueCount() == 0;
subDv = topLevel;
}
}
@Override
public void collect(int doc, int slotNum) {
int ord = si.getOrd(doc + currentDocBase);
if (ord < 0) return; // -1 means missing
int segOrd = subDv.getOrd(doc);
if (segOrd < 0) return; // -1 means missing
int ord = toGlobal==null ? segOrd : (int)toGlobal.get(segOrd);
FixedBitSet bits = arr[slotNum];
if (bits == null) {
@ -456,6 +482,66 @@ class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
}
class UniqueMultiDvSlotAcc extends UniqueSlotAcc {
final SortedSetDocValues topLevel;
final SortedSetDocValues[] subDvs;
final MultiDocValues.OrdinalMap ordMap;
LongValues toGlobal;
SortedSetDocValues subDv;
public UniqueMultiDvSlotAcc(FacetContext fcontext, String field, int numSlots) throws IOException {
super(fcontext, field, numSlots);
SolrIndexSearcher searcher = fcontext.qcontext.searcher();
topLevel = FieldUtil.getSortedSetDocValues(fcontext.qcontext, searcher.getSchema().getField(field), null);
nTerms = (int) topLevel.getValueCount();
if (topLevel instanceof MultiDocValues.MultiSortedSetDocValues) {
ordMap = ((MultiDocValues.MultiSortedSetDocValues) topLevel).mapping;
subDvs = ((MultiDocValues.MultiSortedSetDocValues) topLevel).values;
} else {
ordMap = null;
subDvs = null;
}
}
@Override
protected BytesRef lookupOrd(int ord) {
return topLevel.lookupOrd(ord);
}
@Override
public void setNextReader(LeafReaderContext readerContext) throws IOException {
super.setNextReader(readerContext);
if (subDvs != null) {
subDv = subDvs[readerContext.ord];
toGlobal = ordMap.getGlobalOrds(readerContext.ord);
} else {
assert readerContext.ord==0 || topLevel.getValueCount() == 0;
subDv = topLevel;
}
}
@Override
public void collect(int doc, int slotNum) {
subDv.setDocument(doc);
int segOrd = (int) subDv.nextOrd();
if (segOrd < 0) return;
FixedBitSet bits = arr[slotNum];
if (bits == null) {
bits = new FixedBitSet(nTerms);
arr[slotNum] = bits;
}
do {
int ord = toGlobal == null ? segOrd : (int) toGlobal.get(segOrd);
bits.set(ord);
segOrd = (int) subDv.nextOrd();
} while (segOrd >= 0);
}
}
class UniqueMultivaluedSlotAcc extends UniqueSlotAcc implements UnInvertedField.Callback {
private UnInvertedField uif;
private UnInvertedField.DocToTerm docToTerm;

View File

@ -23,6 +23,7 @@ import java.util.List;
import java.util.Set;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField;
public class UniqueAgg extends StrAggValueSource {
public static String UNIQUE = "unique";
@ -36,10 +37,16 @@ public class UniqueAgg extends StrAggValueSource {
@Override
public SlotAcc createSlotAcc(FacetContext fcontext, int numDocs, int numSlots) throws IOException {
if (fcontext.qcontext.searcher().getSchema().getField(getArg()).multiValued())
return new UniqueMultivaluedSlotAcc(fcontext, getArg(), numSlots);
else
SchemaField sf = fcontext.qcontext.searcher().getSchema().getField(getArg());
if (sf.multiValued() || sf.getType().multiValuedFieldCache()) {
if (sf.hasDocValues()) {
return new UniqueMultiDvSlotAcc(fcontext, getArg(), numSlots);
} else {
return new UniqueMultivaluedSlotAcc(fcontext, getArg(), numSlots);
}
} else {
return new UniqueSinglevaluedSlotAcc(fcontext, getArg(), numSlots);
}
}
@Override

View File

@ -262,6 +262,17 @@ public class TestJsonFacets extends SolrTestCaseHS {
// multi-valued strings
doStatsTemplated(client, params(p, "facet","true", "rows","0", "noexist","noexist_ss", "cat_s","cat_ss", "where_s","where_ss", "num_d","num_d", "num_i","num_i", "super_s","super_ss", "val_b","val_b", "sparse_s","sparse_ss", "multi_ss","multi_ss") );
// single valued docvalues for strings, and single valued numeric doc values for numeric fields
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sd", "cat_s","cat_sd", "where_s","where_sd", "num_d","num_dd", "num_i","num_id", "super_s","super_sd", "val_b","val_b", "sparse_s","sparse_sd" ,"multi_ss","multi_sds") );
// multi-valued docvalues
FacetFieldProcessorDV.unwrap_singleValued_multiDv = false; // better multi-valued coverage
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "super_s","super_sds", "val_b","val_b", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") );
// multi-valued docvalues
FacetFieldProcessorDV.unwrap_singleValued_multiDv = true;
doStatsTemplated(client, params(p, "rows","0", "noexist","noexist_sds", "cat_s","cat_sds", "where_s","where_sds", "num_d","num_d", "num_i","num_i", "super_s","super_sds", "val_b","val_b", "sparse_s","sparse_sds" ,"multi_ss","multi_sds") );
}
public static void doStatsTemplated(Client client, ModifiableSolrParams p) throws Exception {
@ -279,7 +290,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
client.deleteByQuery("*:*", null);
client.add(sdoc("id", "1", cat_s, "A", where_s, "NY", num_d, "4", num_i, "2", super_s, "zodiac", val_b, "true", sparse_s, "one"), null);
client.add(sdoc("id", "2", cat_s, "B", where_s, "NJ", num_d, "-9", num_i, "-5", super_s,"superman", val_b, "false" , multi_ss,"a", "multi_ss","b" ), null);
client.add(sdoc("id", "2", cat_s, "B", where_s, "NJ", num_d, "-9", num_i, "-5", super_s,"superman", val_b, "false" , multi_ss,"a", multi_ss,"b" ), null);
client.add(sdoc("id", "3"), null);
client.commit();
client.add(sdoc("id", "4", cat_s, "A", where_s, "NJ", num_d, "2", num_i, "3", super_s,"spiderman" , multi_ss, "b"), null);
@ -493,7 +504,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
// test missing with stats
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{f1:{terms:{field:${sparse_s}, missing:true, facet:{x:'sum(num_d)'} }}}"
, "json.facet", "{f1:{terms:{field:${sparse_s}, missing:true, facet:{x:'sum(${num_d})'} }}}"
)
, "facets=={ 'count':6, " +
"'f1':{ 'buckets':[{val:one, count:1, x:4.0}, {val:two, count:1, x:11.0}], missing:{count:4, x:-12.0} } } "
@ -501,7 +512,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
// test that the missing bucket is not affected by any prefix
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{f1:{terms:{field:${sparse_s}, missing:true, prefix:on, facet:{x:'sum(num_d)'} }}}"
, "json.facet", "{f1:{terms:{field:${sparse_s}, missing:true, prefix:on, facet:{x:'sum(${num_d})'} }}}"
)
, "facets=={ 'count':6, " +
"'f1':{ 'buckets':[{val:one, count:1, x:4.0}], missing:{count:4, x:-12.0} } } "
@ -509,7 +520,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
// test missing with prefix that doesn't exist
client.testJQ(params(p, "q", "*:*"
, "json.facet", "{f1:{terms:{field:${sparse_s}, missing:true, prefix:ppp, facet:{x:'sum(num_d)'} }}}"
, "json.facet", "{f1:{terms:{field:${sparse_s}, missing:true, prefix:ppp, facet:{x:'sum(${num_d})'} }}}"
)
, "facets=={ 'count':6, " +
"'f1':{ 'buckets':[], missing:{count:4, x:-12.0} } } "
@ -665,7 +676,7 @@ public class TestJsonFacets extends SolrTestCaseHS {
client.testJQ(params(p, "q", "*:*"
// , "json.facet", "{f1:{terms:{field:'${cat_s}', sort:'n1 desc', facet:{n1:'sum(${num_d})'} }}" +
// " , f2:{terms:{field:'${cat_s}', sort:'n1 asc', facet:{n1:'sum(${num_d})'} }} }"
, "facet","true", "facet.version", "2", "facet.field","{!key=f1}${cat_s}", "f.f1.facet.sort","n1 desc", "facet.stat","n1:sum(num_d)"
, "facet","true", "facet.version", "2", "facet.field","{!key=f1}${cat_s}", "f.f1.facet.sort","n1 desc", "facet.stat","n1:sum(${num_d})"
, "facet.field","{!key=f2}${cat_s}", "f.f1.facet.sort","n1 asc"
)
, "facets=={ 'count':6, " +