mirror of https://github.com/apache/lucene.git
SOLR-7446: simplify missing bucket handling
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1692304 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d01866b8fc
commit
fbd467ad76
|
@ -53,7 +53,6 @@ import org.apache.solr.common.util.SimpleOrderedMap;
|
|||
import org.apache.solr.schema.FieldType;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.schema.TrieField;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocSet;
|
||||
import org.apache.solr.search.HashDocSet;
|
||||
import org.apache.solr.search.SolrIndexSearcher;
|
||||
|
@ -174,7 +173,6 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
|
|||
SlotAcc[] otherAccs; // Accumulators that do not need to be calculated across all buckets.
|
||||
|
||||
SpecialSlotAcc allBucketsAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
|
||||
SpecialSlotAcc missingAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
|
||||
|
||||
|
||||
FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) {
|
||||
|
@ -502,7 +500,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
|
|||
int maxSlots;
|
||||
|
||||
int allBucketsSlot = -1; // slot for the primary Accs (countAcc, collectAcc)
|
||||
int missingSlot = -1;
|
||||
|
||||
public FacetFieldProcessorFCBase(FacetContext fcontext, FacetField freq, SchemaField sf) {
|
||||
super(fcontext, freq, sf);
|
||||
|
@ -538,9 +535,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
|
|||
if (freq.allBuckets) {
|
||||
allBucketsSlot = maxSlots++;
|
||||
}
|
||||
if (freq.missing) {
|
||||
missingSlot = maxSlots++;
|
||||
}
|
||||
|
||||
createCollectAcc(nDocs, maxSlots);
|
||||
|
||||
|
@ -548,11 +542,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
|
|||
allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
|
||||
}
|
||||
|
||||
if (freq.missing) {
|
||||
// TODO: optimize case when missingSlot can be contiguous with other slots
|
||||
missingAcc = new SpecialSlotAcc(fcontext, collectAcc, missingSlot, otherAccs, 1);
|
||||
}
|
||||
|
||||
collectDocs();
|
||||
|
||||
return findTopSlots();
|
||||
|
@ -587,7 +576,7 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
|
|||
};
|
||||
|
||||
Slot bottom = null;
|
||||
for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
|
||||
for (int i = 0; i < nTerms; i++) {
|
||||
// screen out buckets not matching mincount immediately (i.e. don't even increment numBuckets)
|
||||
if (effectiveMincount > 0 && countAcc.getCount(i) < effectiveMincount) {
|
||||
continue;
|
||||
|
@ -672,29 +661,8 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
|
|||
|
||||
if (freq.missing) {
|
||||
SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
|
||||
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field));
|
||||
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
|
||||
res.add("missing", missingBucket);
|
||||
|
||||
/*** TODO - OPTIMIZE
|
||||
DocSet missingDocSet = null;
|
||||
if (startTermIndex == -1) {
|
||||
fillBucket(missingBucket, countAcc.getCount(0), null);
|
||||
} else {
|
||||
missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
|
||||
// an extra slot was added to the end for this missing bucket
|
||||
countAcc.incrementCount(nTerms, missingDocSet.size());
|
||||
collect(missingDocSet, nTerms);
|
||||
addStats(missingBucket, nTerms);
|
||||
}
|
||||
|
||||
if (freq.getSubFacets().size() > 0) {
|
||||
// TODO: we can do better than this!
|
||||
if (missingDocSet == null) {
|
||||
missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
|
||||
}
|
||||
processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet);
|
||||
}
|
||||
***/
|
||||
}
|
||||
|
||||
return res;
|
||||
|
@ -751,9 +719,6 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
|
|||
endTermIndex = (int)si.getValueCount();
|
||||
}
|
||||
|
||||
// optimize collecting the "missing" bucket when startTermindex is 0 (since the "missing" ord is -1)
|
||||
startTermIndex = startTermIndex==0 && freq.missing ? -1 : startTermIndex;
|
||||
|
||||
nTerms = endTermIndex - startTermIndex;
|
||||
}
|
||||
|
||||
|
@ -809,6 +774,7 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
|
|||
int doc;
|
||||
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
int segOrd = singleDv.getOrd(doc);
|
||||
if (segOrd < 0) continue;
|
||||
collect(doc, segOrd, toGlobal);
|
||||
}
|
||||
}
|
||||
|
@ -817,11 +783,8 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
|
|||
int doc;
|
||||
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
multiDv.setDocument(doc);
|
||||
int segOrd = (int)multiDv.nextOrd();
|
||||
collect(doc, segOrd, toGlobal); // collect anything the first time (even -1 for missing)
|
||||
if (segOrd < 0) continue;
|
||||
for(;;) {
|
||||
segOrd = (int)multiDv.nextOrd();
|
||||
int segOrd = (int)multiDv.nextOrd();
|
||||
if (segOrd < 0) break;
|
||||
collect(doc, segOrd, toGlobal);
|
||||
}
|
||||
|
@ -837,8 +800,7 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
|
|||
if (collectAcc != null) {
|
||||
collectAcc.collect(doc, arrIdx);
|
||||
}
|
||||
// since this can be called for missing, we need to ensure it's currently not.
|
||||
if (allBucketsAcc != null && ord >= 0) {
|
||||
if (allBucketsAcc != null) {
|
||||
allBucketsAcc.collect(doc, arrIdx);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -32,6 +32,7 @@ import org.apache.lucene.util.PriorityQueue;
|
|||
import org.apache.solr.common.util.SimpleOrderedMap;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
import org.apache.solr.search.DocIterator;
|
||||
import org.apache.solr.search.DocSetCollector;
|
||||
|
||||
class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
||||
static int MAXIMUM_STARTING_TABLE_SIZE=1024; // must be a power of two, non-final to support setting by tests
|
||||
|
@ -137,10 +138,8 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
|||
super(fcontext, freq, sf);
|
||||
}
|
||||
|
||||
int missingSlot = -1;
|
||||
int allBucketsSlot = -1;
|
||||
|
||||
|
||||
@Override
|
||||
public void process() throws IOException {
|
||||
super.process();
|
||||
|
@ -148,18 +147,14 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
|||
}
|
||||
|
||||
private void doRehash(LongCounts table) {
|
||||
if (collectAcc == null && missingAcc == null && allBucketsAcc == null) return;
|
||||
if (collectAcc == null && allBucketsAcc == null) return;
|
||||
|
||||
// Our "count" acc is backed by the hash table and will already be rehashed
|
||||
// otherAccs don't need to be rehashed
|
||||
|
||||
int newTableSize = table.numSlots();
|
||||
int numSlots = newTableSize;
|
||||
final int oldMissingSlot = missingSlot;
|
||||
final int oldAllBucketsSlot = allBucketsSlot;
|
||||
if (oldMissingSlot >= 0) {
|
||||
missingSlot = numSlots++;
|
||||
}
|
||||
if (oldAllBucketsSlot >= 0) {
|
||||
allBucketsSlot = numSlots++;
|
||||
}
|
||||
|
@ -178,9 +173,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
|||
if (oldSlot < mapping.length) {
|
||||
return mapping[oldSlot];
|
||||
}
|
||||
if (oldSlot == oldMissingSlot) {
|
||||
return missingSlot;
|
||||
}
|
||||
if (oldSlot == oldAllBucketsSlot) {
|
||||
return allBucketsSlot;
|
||||
}
|
||||
|
@ -192,9 +184,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
|||
if (collectAcc != null) {
|
||||
collectAcc.resize(resizer);
|
||||
}
|
||||
if (missingAcc != null) {
|
||||
missingAcc.resize(resizer);
|
||||
}
|
||||
if (allBucketsAcc != null) {
|
||||
allBucketsAcc.resize(resizer);
|
||||
}
|
||||
|
@ -225,9 +214,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
|||
|
||||
int numMissing = 0;
|
||||
|
||||
if (freq.missing) {
|
||||
missingSlot = numSlots++;
|
||||
}
|
||||
|
||||
if (freq.allBuckets) {
|
||||
allBucketsSlot = numSlots++;
|
||||
}
|
||||
|
@ -302,11 +289,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
|||
allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
|
||||
}
|
||||
|
||||
if (freq.missing) {
|
||||
// TODO: optimize case when missingSlot can be contiguous with other slots
|
||||
missingAcc = new SpecialSlotAcc(fcontext, collectAcc, missingSlot, otherAccs, 1);
|
||||
}
|
||||
|
||||
NumericDocValues values = null;
|
||||
Bits docsWithField = null;
|
||||
|
||||
|
@ -335,11 +317,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
|||
|
||||
int segDoc = doc - segBase;
|
||||
long val = values.get(segDoc);
|
||||
if (val == 0 && !docsWithField.get(segDoc)) {
|
||||
if (missingAcc != null) {
|
||||
missingAcc.collect(segDoc, -1);
|
||||
}
|
||||
} else {
|
||||
if (val != 0 && docsWithField.get(segDoc)) {
|
||||
int slot = table.add(val); // this can trigger a rehash rehash
|
||||
|
||||
// countAcc.incrementCount(slot, 1);
|
||||
|
@ -428,7 +406,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
|
|||
// TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway.
|
||||
|
||||
SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
|
||||
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field));
|
||||
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
|
||||
res.add("missing", missingBucket);
|
||||
}
|
||||
|
||||
|
|
|
@ -283,15 +283,16 @@ public class FacetProcessor<FacetRequestT extends FacetRequest> {
|
|||
}
|
||||
|
||||
|
||||
public void fillBucket(SimpleOrderedMap<Object> bucket, Query q) throws IOException {
|
||||
public void fillBucket(SimpleOrderedMap<Object> bucket, Query q, DocSet result) throws IOException {
|
||||
boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;
|
||||
|
||||
// TODO: always collect counts or not???
|
||||
|
||||
DocSet result = null;
|
||||
int count;
|
||||
|
||||
if (needDocSet) {
|
||||
if (result != null) {
|
||||
count = result.size();
|
||||
} else if (needDocSet) {
|
||||
if (q == null) {
|
||||
result = fcontext.base;
|
||||
// result.incref(); // OFF-HEAP
|
||||
|
|
|
@ -54,7 +54,7 @@ class FacetQueryProcessor extends FacetProcessor<FacetQuery> {
|
|||
public void process() throws IOException {
|
||||
super.process();
|
||||
response = new SimpleOrderedMap<>();
|
||||
fillBucket(response, freq.q);
|
||||
fillBucket(response, freq.q, null);
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -336,7 +336,7 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
|
|||
}
|
||||
|
||||
Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
|
||||
fillBucket(bucket, rangeQ);
|
||||
fillBucket(bucket, rangeQ, null);
|
||||
|
||||
return bucket;
|
||||
}
|
||||
|
|
|
@ -394,7 +394,7 @@ public class UnInvertedField extends DocTermOrds {
|
|||
|
||||
|
||||
public void collectDocs(FacetFieldProcessorUIF processor) throws IOException {
|
||||
if (processor.collectAcc==null && processor.missingAcc == null && processor.allBucketsAcc == null && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField) {
|
||||
if (processor.collectAcc==null && processor.allBucketsAcc == null && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField) {
|
||||
getCounts(processor, processor.countAcc);
|
||||
return;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue