mirror of https://github.com/apache/lucene.git
SOLR-7446: simplify missing bucket handling
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1692304 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d01866b8fc
commit
fbd467ad76
|
@ -53,7 +53,6 @@ import org.apache.solr.common.util.SimpleOrderedMap;
|
||||||
import org.apache.solr.schema.FieldType;
|
import org.apache.solr.schema.FieldType;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.schema.TrieField;
|
import org.apache.solr.schema.TrieField;
|
||||||
import org.apache.solr.search.DocIterator;
|
|
||||||
import org.apache.solr.search.DocSet;
|
import org.apache.solr.search.DocSet;
|
||||||
import org.apache.solr.search.HashDocSet;
|
import org.apache.solr.search.HashDocSet;
|
||||||
import org.apache.solr.search.SolrIndexSearcher;
|
import org.apache.solr.search.SolrIndexSearcher;
|
||||||
|
@ -174,7 +173,6 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
||||||
SlotAcc[] otherAccs; // Accumulators that do not need to be calculated across all buckets.
|
SlotAcc[] otherAccs; // Accumulators that do not need to be calculated across all buckets.
|
||||||
|
|
||||||
SpecialSlotAcc allBucketsAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
|
SpecialSlotAcc allBucketsAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
|
||||||
SpecialSlotAcc missingAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
|
|
||||||
|
|
||||||
|
|
||||||
FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) {
|
FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) {
|
||||||
|
@ -502,7 +500,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
|
||||||
int maxSlots;
|
int maxSlots;
|
||||||
|
|
||||||
int allBucketsSlot = -1; // slot for the primary Accs (countAcc, collectAcc)
|
int allBucketsSlot = -1; // slot for the primary Accs (countAcc, collectAcc)
|
||||||
int missingSlot = -1;
|
|
||||||
|
|
||||||
public FacetFieldProcessorFCBase(FacetContext fcontext, FacetField freq, SchemaField sf) {
|
public FacetFieldProcessorFCBase(FacetContext fcontext, FacetField freq, SchemaField sf) {
|
||||||
super(fcontext, freq, sf);
|
super(fcontext, freq, sf);
|
||||||
|
@ -538,9 +535,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
|
||||||
if (freq.allBuckets) {
|
if (freq.allBuckets) {
|
||||||
allBucketsSlot = maxSlots++;
|
allBucketsSlot = maxSlots++;
|
||||||
}
|
}
|
||||||
if (freq.missing) {
|
|
||||||
missingSlot = maxSlots++;
|
|
||||||
}
|
|
||||||
|
|
||||||
createCollectAcc(nDocs, maxSlots);
|
createCollectAcc(nDocs, maxSlots);
|
||||||
|
|
||||||
|
@ -548,11 +542,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
|
||||||
allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
|
allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (freq.missing) {
|
|
||||||
// TODO: optimize case when missingSlot can be contiguous with other slots
|
|
||||||
missingAcc = new SpecialSlotAcc(fcontext, collectAcc, missingSlot, otherAccs, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
collectDocs();
|
collectDocs();
|
||||||
|
|
||||||
return findTopSlots();
|
return findTopSlots();
|
||||||
|
@ -587,7 +576,7 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
|
||||||
};
|
};
|
||||||
|
|
||||||
Slot bottom = null;
|
Slot bottom = null;
|
||||||
for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
|
for (int i = 0; i < nTerms; i++) {
|
||||||
// screen out buckets not matching mincount immediately (i.e. don't even increment numBuckets)
|
// screen out buckets not matching mincount immediately (i.e. don't even increment numBuckets)
|
||||||
if (effectiveMincount > 0 && countAcc.getCount(i) < effectiveMincount) {
|
if (effectiveMincount > 0 && countAcc.getCount(i) < effectiveMincount) {
|
||||||
continue;
|
continue;
|
||||||
|
@ -672,29 +661,8 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
|
||||||
|
|
||||||
if (freq.missing) {
|
if (freq.missing) {
|
||||||
SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
|
SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
|
||||||
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field));
|
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
|
||||||
res.add("missing", missingBucket);
|
res.add("missing", missingBucket);
|
||||||
|
|
||||||
/*** TODO - OPTIMIZE
|
|
||||||
DocSet missingDocSet = null;
|
|
||||||
if (startTermIndex == -1) {
|
|
||||||
fillBucket(missingBucket, countAcc.getCount(0), null);
|
|
||||||
} else {
|
|
||||||
missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
|
|
||||||
// an extra slot was added to the end for this missing bucket
|
|
||||||
countAcc.incrementCount(nTerms, missingDocSet.size());
|
|
||||||
collect(missingDocSet, nTerms);
|
|
||||||
addStats(missingBucket, nTerms);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (freq.getSubFacets().size() > 0) {
|
|
||||||
// TODO: we can do better than this!
|
|
||||||
if (missingDocSet == null) {
|
|
||||||
missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
|
|
||||||
}
|
|
||||||
processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet);
|
|
||||||
}
|
|
||||||
***/
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return res;
|
return res;
|
||||||
|
@ -751,9 +719,6 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
|
||||||
endTermIndex = (int)si.getValueCount();
|
endTermIndex = (int)si.getValueCount();
|
||||||
}
|
}
|
||||||
|
|
||||||
// optimize collecting the "missing" bucket when startTermindex is 0 (since the "missing" ord is -1)
|
|
||||||
startTermIndex = startTermIndex==0 && freq.missing ? -1 : startTermIndex;
|
|
||||||
|
|
||||||
nTerms = endTermIndex - startTermIndex;
|
nTerms = endTermIndex - startTermIndex;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -809,6 +774,7 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
|
||||||
int doc;
|
int doc;
|
||||||
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
int segOrd = singleDv.getOrd(doc);
|
int segOrd = singleDv.getOrd(doc);
|
||||||
|
if (segOrd < 0) continue;
|
||||||
collect(doc, segOrd, toGlobal);
|
collect(doc, segOrd, toGlobal);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -817,11 +783,8 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
|
||||||
int doc;
|
int doc;
|
||||||
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
multiDv.setDocument(doc);
|
multiDv.setDocument(doc);
|
||||||
int segOrd = (int)multiDv.nextOrd();
|
|
||||||
collect(doc, segOrd, toGlobal); // collect anything the first time (even -1 for missing)
|
|
||||||
if (segOrd < 0) continue;
|
|
||||||
for(;;) {
|
for(;;) {
|
||||||
segOrd = (int)multiDv.nextOrd();
|
int segOrd = (int)multiDv.nextOrd();
|
||||||
if (segOrd < 0) break;
|
if (segOrd < 0) break;
|
||||||
collect(doc, segOrd, toGlobal);
|
collect(doc, segOrd, toGlobal);
|
||||||
}
|
}
|
||||||
|
@ -837,8 +800,7 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
|
||||||
if (collectAcc != null) {
|
if (collectAcc != null) {
|
||||||
collectAcc.collect(doc, arrIdx);
|
collectAcc.collect(doc, arrIdx);
|
||||||
}
|
}
|
||||||
// since this can be called for missing, we need to ensure it's currently not.
|
if (allBucketsAcc != null) {
|
||||||
if (allBucketsAcc != null && ord >= 0) {
|
|
||||||
allBucketsAcc.collect(doc, arrIdx);
|
allBucketsAcc.collect(doc, arrIdx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.util.PriorityQueue;
|
||||||
import org.apache.solr.common.util.SimpleOrderedMap;
|
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||||
import org.apache.solr.schema.SchemaField;
|
import org.apache.solr.schema.SchemaField;
|
||||||
import org.apache.solr.search.DocIterator;
|
import org.apache.solr.search.DocIterator;
|
||||||
|
import org.apache.solr.search.DocSetCollector;
|
||||||
|
|
||||||
class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
||||||
static int MAXIMUM_STARTING_TABLE_SIZE=1024; // must be a power of two, non-final to support setting by tests
|
static int MAXIMUM_STARTING_TABLE_SIZE=1024; // must be a power of two, non-final to support setting by tests
|
||||||
|
@ -137,10 +138,8 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
||||||
super(fcontext, freq, sf);
|
super(fcontext, freq, sf);
|
||||||
}
|
}
|
||||||
|
|
||||||
int missingSlot = -1;
|
|
||||||
int allBucketsSlot = -1;
|
int allBucketsSlot = -1;
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void process() throws IOException {
|
public void process() throws IOException {
|
||||||
super.process();
|
super.process();
|
||||||
|
@ -148,18 +147,14 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void doRehash(LongCounts table) {
|
private void doRehash(LongCounts table) {
|
||||||
if (collectAcc == null && missingAcc == null && allBucketsAcc == null) return;
|
if (collectAcc == null && allBucketsAcc == null) return;
|
||||||
|
|
||||||
// Our "count" acc is backed by the hash table and will already be rehashed
|
// Our "count" acc is backed by the hash table and will already be rehashed
|
||||||
// otherAccs don't need to be rehashed
|
// otherAccs don't need to be rehashed
|
||||||
|
|
||||||
int newTableSize = table.numSlots();
|
int newTableSize = table.numSlots();
|
||||||
int numSlots = newTableSize;
|
int numSlots = newTableSize;
|
||||||
final int oldMissingSlot = missingSlot;
|
|
||||||
final int oldAllBucketsSlot = allBucketsSlot;
|
final int oldAllBucketsSlot = allBucketsSlot;
|
||||||
if (oldMissingSlot >= 0) {
|
|
||||||
missingSlot = numSlots++;
|
|
||||||
}
|
|
||||||
if (oldAllBucketsSlot >= 0) {
|
if (oldAllBucketsSlot >= 0) {
|
||||||
allBucketsSlot = numSlots++;
|
allBucketsSlot = numSlots++;
|
||||||
}
|
}
|
||||||
|
@ -178,9 +173,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
||||||
if (oldSlot < mapping.length) {
|
if (oldSlot < mapping.length) {
|
||||||
return mapping[oldSlot];
|
return mapping[oldSlot];
|
||||||
}
|
}
|
||||||
if (oldSlot == oldMissingSlot) {
|
|
||||||
return missingSlot;
|
|
||||||
}
|
|
||||||
if (oldSlot == oldAllBucketsSlot) {
|
if (oldSlot == oldAllBucketsSlot) {
|
||||||
return allBucketsSlot;
|
return allBucketsSlot;
|
||||||
}
|
}
|
||||||
|
@ -192,9 +184,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
||||||
if (collectAcc != null) {
|
if (collectAcc != null) {
|
||||||
collectAcc.resize(resizer);
|
collectAcc.resize(resizer);
|
||||||
}
|
}
|
||||||
if (missingAcc != null) {
|
|
||||||
missingAcc.resize(resizer);
|
|
||||||
}
|
|
||||||
if (allBucketsAcc != null) {
|
if (allBucketsAcc != null) {
|
||||||
allBucketsAcc.resize(resizer);
|
allBucketsAcc.resize(resizer);
|
||||||
}
|
}
|
||||||
|
@ -225,9 +214,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
||||||
|
|
||||||
int numMissing = 0;
|
int numMissing = 0;
|
||||||
|
|
||||||
if (freq.missing) {
|
|
||||||
missingSlot = numSlots++;
|
|
||||||
}
|
|
||||||
if (freq.allBuckets) {
|
if (freq.allBuckets) {
|
||||||
allBucketsSlot = numSlots++;
|
allBucketsSlot = numSlots++;
|
||||||
}
|
}
|
||||||
|
@ -302,11 +289,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
||||||
allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
|
allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (freq.missing) {
|
|
||||||
// TODO: optimize case when missingSlot can be contiguous with other slots
|
|
||||||
missingAcc = new SpecialSlotAcc(fcontext, collectAcc, missingSlot, otherAccs, 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
NumericDocValues values = null;
|
NumericDocValues values = null;
|
||||||
Bits docsWithField = null;
|
Bits docsWithField = null;
|
||||||
|
|
||||||
|
@ -335,11 +317,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
||||||
|
|
||||||
int segDoc = doc - segBase;
|
int segDoc = doc - segBase;
|
||||||
long val = values.get(segDoc);
|
long val = values.get(segDoc);
|
||||||
if (val == 0 && !docsWithField.get(segDoc)) {
|
if (val != 0 && docsWithField.get(segDoc)) {
|
||||||
if (missingAcc != null) {
|
|
||||||
missingAcc.collect(segDoc, -1);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
int slot = table.add(val); // this can trigger a rehash rehash
|
int slot = table.add(val); // this can trigger a rehash rehash
|
||||||
|
|
||||||
// countAcc.incrementCount(slot, 1);
|
// countAcc.incrementCount(slot, 1);
|
||||||
|
@ -428,7 +406,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
||||||
// TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway.
|
// TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway.
|
||||||
|
|
||||||
SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
|
SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
|
||||||
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field));
|
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
|
||||||
res.add("missing", missingBucket);
|
res.add("missing", missingBucket);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -283,15 +283,16 @@ public class FacetProcessor<FacetRequestT extends FacetRequest> {
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
public void fillBucket(SimpleOrderedMap<Object> bucket, Query q) throws IOException {
|
public void fillBucket(SimpleOrderedMap<Object> bucket, Query q, DocSet result) throws IOException {
|
||||||
boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;
|
boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;
|
||||||
|
|
||||||
// TODO: always collect counts or not???
|
// TODO: always collect counts or not???
|
||||||
|
|
||||||
DocSet result = null;
|
|
||||||
int count;
|
int count;
|
||||||
|
|
||||||
if (needDocSet) {
|
if (result != null) {
|
||||||
|
count = result.size();
|
||||||
|
} else if (needDocSet) {
|
||||||
if (q == null) {
|
if (q == null) {
|
||||||
result = fcontext.base;
|
result = fcontext.base;
|
||||||
// result.incref(); // OFF-HEAP
|
// result.incref(); // OFF-HEAP
|
||||||
|
|
|
@ -54,7 +54,7 @@ class FacetQueryProcessor extends FacetProcessor<FacetQuery> {
|
||||||
public void process() throws IOException {
|
public void process() throws IOException {
|
||||||
super.process();
|
super.process();
|
||||||
response = new SimpleOrderedMap<>();
|
response = new SimpleOrderedMap<>();
|
||||||
fillBucket(response, freq.q);
|
fillBucket(response, freq.q, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -336,7 +336,7 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
|
||||||
}
|
}
|
||||||
|
|
||||||
Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
|
Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
|
||||||
fillBucket(bucket, rangeQ);
|
fillBucket(bucket, rangeQ, null);
|
||||||
|
|
||||||
return bucket;
|
return bucket;
|
||||||
}
|
}
|
||||||
|
|
|
@ -394,7 +394,7 @@ public class UnInvertedField extends DocTermOrds {
|
||||||
|
|
||||||
|
|
||||||
public void collectDocs(FacetFieldProcessorUIF processor) throws IOException {
|
public void collectDocs(FacetFieldProcessorUIF processor) throws IOException {
|
||||||
if (processor.collectAcc==null && processor.missingAcc == null && processor.allBucketsAcc == null && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField) {
|
if (processor.collectAcc==null && processor.allBucketsAcc == null && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField) {
|
||||||
getCounts(processor, processor.countAcc);
|
getCounts(processor, processor.countAcc);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue