SOLR-7446: simplify missing bucket handling

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1692304 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2015-07-22 17:54:36 +00:00
parent d01866b8fc
commit fbd467ad76
6 changed files with 17 additions and 76 deletions

View File

@ -53,7 +53,6 @@ import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TrieField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.search.HashDocSet;
import org.apache.solr.search.SolrIndexSearcher;
@ -174,7 +173,6 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
SlotAcc[] otherAccs; // Accumulators that do not need to be calculated across all buckets.
SpecialSlotAcc allBucketsAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
SpecialSlotAcc missingAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) {
@ -502,7 +500,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
int maxSlots;
int allBucketsSlot = -1; // slot for the primary Accs (countAcc, collectAcc)
int missingSlot = -1;
public FacetFieldProcessorFCBase(FacetContext fcontext, FacetField freq, SchemaField sf) {
super(fcontext, freq, sf);
@ -538,9 +535,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
if (freq.allBuckets) {
allBucketsSlot = maxSlots++;
}
if (freq.missing) {
missingSlot = maxSlots++;
}
createCollectAcc(nDocs, maxSlots);
@ -548,11 +542,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
}
if (freq.missing) {
// TODO: optimize case when missingSlot can be contiguous with other slots
missingAcc = new SpecialSlotAcc(fcontext, collectAcc, missingSlot, otherAccs, 1);
}
collectDocs();
return findTopSlots();
@ -587,7 +576,7 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
};
Slot bottom = null;
for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) {
for (int i = 0; i < nTerms; i++) {
// screen out buckets not matching mincount immediately (i.e. don't even increment numBuckets)
if (effectiveMincount > 0 && countAcc.getCount(i) < effectiveMincount) {
continue;
@ -672,29 +661,8 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
if (freq.missing) {
SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field));
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
res.add("missing", missingBucket);
/*** TODO - OPTIMIZE
DocSet missingDocSet = null;
if (startTermIndex == -1) {
fillBucket(missingBucket, countAcc.getCount(0), null);
} else {
missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
// an extra slot was added to the end for this missing bucket
countAcc.incrementCount(nTerms, missingDocSet.size());
collect(missingDocSet, nTerms);
addStats(missingBucket, nTerms);
}
if (freq.getSubFacets().size() > 0) {
// TODO: we can do better than this!
if (missingDocSet == null) {
missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
}
processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet);
}
***/
}
return res;
@ -751,9 +719,6 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
endTermIndex = (int)si.getValueCount();
}
// optimize collecting the "missing" bucket when startTermindex is 0 (since the "missing" ord is -1)
startTermIndex = startTermIndex==0 && freq.missing ? -1 : startTermIndex;
nTerms = endTermIndex - startTermIndex;
}
@ -809,6 +774,7 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
int segOrd = singleDv.getOrd(doc);
if (segOrd < 0) continue;
collect(doc, segOrd, toGlobal);
}
}
@ -817,11 +783,8 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
multiDv.setDocument(doc);
int segOrd = (int)multiDv.nextOrd();
collect(doc, segOrd, toGlobal); // collect anything the first time (even -1 for missing)
if (segOrd < 0) continue;
for(;;) {
segOrd = (int)multiDv.nextOrd();
int segOrd = (int)multiDv.nextOrd();
if (segOrd < 0) break;
collect(doc, segOrd, toGlobal);
}
@ -837,8 +800,7 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
if (collectAcc != null) {
collectAcc.collect(doc, arrIdx);
}
// since this can be called for missing, we need to ensure it's currently not.
if (allBucketsAcc != null && ord >= 0) {
if (allBucketsAcc != null) {
allBucketsAcc.collect(doc, arrIdx);
}
}

View File

@ -32,6 +32,7 @@ import org.apache.lucene.util.PriorityQueue;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSetCollector;
class FacetFieldProcessorNumeric extends FacetFieldProcessor {
static int MAXIMUM_STARTING_TABLE_SIZE=1024; // must be a power of two, non-final to support setting by tests
@ -137,10 +138,8 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
super(fcontext, freq, sf);
}
int missingSlot = -1;
int allBucketsSlot = -1;
@Override
public void process() throws IOException {
super.process();
@ -148,18 +147,14 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
}
private void doRehash(LongCounts table) {
if (collectAcc == null && missingAcc == null && allBucketsAcc == null) return;
if (collectAcc == null && allBucketsAcc == null) return;
// Our "count" acc is backed by the hash table and will already be rehashed
// otherAccs don't need to be rehashed
int newTableSize = table.numSlots();
int numSlots = newTableSize;
final int oldMissingSlot = missingSlot;
final int oldAllBucketsSlot = allBucketsSlot;
if (oldMissingSlot >= 0) {
missingSlot = numSlots++;
}
if (oldAllBucketsSlot >= 0) {
allBucketsSlot = numSlots++;
}
@ -178,9 +173,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
if (oldSlot < mapping.length) {
return mapping[oldSlot];
}
if (oldSlot == oldMissingSlot) {
return missingSlot;
}
if (oldSlot == oldAllBucketsSlot) {
return allBucketsSlot;
}
@ -192,9 +184,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
if (collectAcc != null) {
collectAcc.resize(resizer);
}
if (missingAcc != null) {
missingAcc.resize(resizer);
}
if (allBucketsAcc != null) {
allBucketsAcc.resize(resizer);
}
@ -225,9 +214,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
int numMissing = 0;
if (freq.missing) {
missingSlot = numSlots++;
}
if (freq.allBuckets) {
allBucketsSlot = numSlots++;
}
@ -302,11 +289,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
}
if (freq.missing) {
// TODO: optimize case when missingSlot can be contiguous with other slots
missingAcc = new SpecialSlotAcc(fcontext, collectAcc, missingSlot, otherAccs, 1);
}
NumericDocValues values = null;
Bits docsWithField = null;
@ -335,11 +317,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
int segDoc = doc - segBase;
long val = values.get(segDoc);
if (val == 0 && !docsWithField.get(segDoc)) {
if (missingAcc != null) {
missingAcc.collect(segDoc, -1);
}
} else {
if (val != 0 && docsWithField.get(segDoc)) {
int slot = table.add(val); // this can trigger a rehash rehash
// countAcc.incrementCount(slot, 1);
@ -428,7 +406,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
// TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway.
SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field));
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
res.add("missing", missingBucket);
}

View File

@ -283,15 +283,16 @@ public class FacetProcessor<FacetRequestT extends FacetRequest> {
}
public void fillBucket(SimpleOrderedMap<Object> bucket, Query q) throws IOException {
public void fillBucket(SimpleOrderedMap<Object> bucket, Query q, DocSet result) throws IOException {
boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;
// TODO: always collect counts or not???
DocSet result = null;
int count;
if (needDocSet) {
if (result != null) {
count = result.size();
} else if (needDocSet) {
if (q == null) {
result = fcontext.base;
// result.incref(); // OFF-HEAP

View File

@ -54,7 +54,7 @@ class FacetQueryProcessor extends FacetProcessor<FacetQuery> {
public void process() throws IOException {
super.process();
response = new SimpleOrderedMap<>();
fillBucket(response, freq.q);
fillBucket(response, freq.q, null);
}

View File

@ -336,7 +336,7 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
}
Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
fillBucket(bucket, rangeQ);
fillBucket(bucket, rangeQ, null);
return bucket;
}

View File

@ -394,7 +394,7 @@ public class UnInvertedField extends DocTermOrds {
public void collectDocs(FacetFieldProcessorUIF processor) throws IOException {
if (processor.collectAcc==null && processor.missingAcc == null && processor.allBucketsAcc == null && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField) {
if (processor.collectAcc==null && processor.allBucketsAcc == null && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField) {
getCounts(processor, processor.countAcc);
return;
}