SOLR-7446: simplify missing bucket handling

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1692304 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2015-07-22 17:54:36 +00:00
parent d01866b8fc
commit fbd467ad76
6 changed files with 17 additions and 76 deletions

View File

@ -53,7 +53,6 @@ import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.FieldType; import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.TrieField; import org.apache.solr.schema.TrieField;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet; import org.apache.solr.search.DocSet;
import org.apache.solr.search.HashDocSet; import org.apache.solr.search.HashDocSet;
import org.apache.solr.search.SolrIndexSearcher; import org.apache.solr.search.SolrIndexSearcher;
@ -174,7 +173,6 @@ abstract class FacetFieldProcessor extends FacetProcessor<FacetField> {
SlotAcc[] otherAccs; // Accumulators that do not need to be calculated across all buckets. SlotAcc[] otherAccs; // Accumulators that do not need to be calculated across all buckets.
SpecialSlotAcc allBucketsAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist. SpecialSlotAcc allBucketsAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
SpecialSlotAcc missingAcc; // this can internally refer to otherAccs and/or collectAcc. setNextReader should be called on otherAccs directly if they exist.
FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) { FacetFieldProcessor(FacetContext fcontext, FacetField freq, SchemaField sf) {
@ -502,7 +500,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
int maxSlots; int maxSlots;
int allBucketsSlot = -1; // slot for the primary Accs (countAcc, collectAcc) int allBucketsSlot = -1; // slot for the primary Accs (countAcc, collectAcc)
int missingSlot = -1;
public FacetFieldProcessorFCBase(FacetContext fcontext, FacetField freq, SchemaField sf) { public FacetFieldProcessorFCBase(FacetContext fcontext, FacetField freq, SchemaField sf) {
super(fcontext, freq, sf); super(fcontext, freq, sf);
@ -538,9 +535,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
if (freq.allBuckets) { if (freq.allBuckets) {
allBucketsSlot = maxSlots++; allBucketsSlot = maxSlots++;
} }
if (freq.missing) {
missingSlot = maxSlots++;
}
createCollectAcc(nDocs, maxSlots); createCollectAcc(nDocs, maxSlots);
@ -548,11 +542,6 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0); allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
} }
if (freq.missing) {
// TODO: optimize case when missingSlot can be contiguous with other slots
missingAcc = new SpecialSlotAcc(fcontext, collectAcc, missingSlot, otherAccs, 1);
}
collectDocs(); collectDocs();
return findTopSlots(); return findTopSlots();
@ -587,7 +576,7 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
}; };
Slot bottom = null; Slot bottom = null;
for (int i = (startTermIndex == -1) ? 1 : 0; i < nTerms; i++) { for (int i = 0; i < nTerms; i++) {
// screen out buckets not matching mincount immediately (i.e. don't even increment numBuckets) // screen out buckets not matching mincount immediately (i.e. don't even increment numBuckets)
if (effectiveMincount > 0 && countAcc.getCount(i) < effectiveMincount) { if (effectiveMincount > 0 && countAcc.getCount(i) < effectiveMincount) {
continue; continue;
@ -672,29 +661,8 @@ abstract class FacetFieldProcessorFCBase extends FacetFieldProcessor {
if (freq.missing) { if (freq.missing) {
SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>(); SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field)); fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
res.add("missing", missingBucket); res.add("missing", missingBucket);
/*** TODO - OPTIMIZE
DocSet missingDocSet = null;
if (startTermIndex == -1) {
fillBucket(missingBucket, countAcc.getCount(0), null);
} else {
missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
// an extra slot was added to the end for this missing bucket
countAcc.incrementCount(nTerms, missingDocSet.size());
collect(missingDocSet, nTerms);
addStats(missingBucket, nTerms);
}
if (freq.getSubFacets().size() > 0) {
// TODO: we can do better than this!
if (missingDocSet == null) {
missingDocSet = getFieldMissing(fcontext.searcher, fcontext.base, freq.field);
}
processSubs(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), missingDocSet);
}
***/
} }
return res; return res;
@ -751,9 +719,6 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
endTermIndex = (int)si.getValueCount(); endTermIndex = (int)si.getValueCount();
} }
// optimize collecting the "missing" bucket when startTermindex is 0 (since the "missing" ord is -1)
startTermIndex = startTermIndex==0 && freq.missing ? -1 : startTermIndex;
nTerms = endTermIndex - startTermIndex; nTerms = endTermIndex - startTermIndex;
} }
@ -809,6 +774,7 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
int doc; int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
int segOrd = singleDv.getOrd(doc); int segOrd = singleDv.getOrd(doc);
if (segOrd < 0) continue;
collect(doc, segOrd, toGlobal); collect(doc, segOrd, toGlobal);
} }
} }
@ -817,11 +783,8 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
int doc; int doc;
while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { while ((doc = disi.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
multiDv.setDocument(doc); multiDv.setDocument(doc);
int segOrd = (int)multiDv.nextOrd();
collect(doc, segOrd, toGlobal); // collect anything the first time (even -1 for missing)
if (segOrd < 0) continue;
for(;;) { for(;;) {
segOrd = (int)multiDv.nextOrd(); int segOrd = (int)multiDv.nextOrd();
if (segOrd < 0) break; if (segOrd < 0) break;
collect(doc, segOrd, toGlobal); collect(doc, segOrd, toGlobal);
} }
@ -837,8 +800,7 @@ class FacetFieldProcessorDV extends FacetFieldProcessorFCBase {
if (collectAcc != null) { if (collectAcc != null) {
collectAcc.collect(doc, arrIdx); collectAcc.collect(doc, arrIdx);
} }
// since this can be called for missing, we need to ensure it's currently not. if (allBucketsAcc != null) {
if (allBucketsAcc != null && ord >= 0) {
allBucketsAcc.collect(doc, arrIdx); allBucketsAcc.collect(doc, arrIdx);
} }
} }

View File

@ -32,6 +32,7 @@ import org.apache.lucene.util.PriorityQueue;
import org.apache.solr.common.util.SimpleOrderedMap; import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.schema.SchemaField; import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.DocIterator; import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSetCollector;
class FacetFieldProcessorNumeric extends FacetFieldProcessor { class FacetFieldProcessorNumeric extends FacetFieldProcessor {
static int MAXIMUM_STARTING_TABLE_SIZE=1024; // must be a power of two, non-final to support setting by tests static int MAXIMUM_STARTING_TABLE_SIZE=1024; // must be a power of two, non-final to support setting by tests
@ -137,10 +138,8 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
super(fcontext, freq, sf); super(fcontext, freq, sf);
} }
int missingSlot = -1;
int allBucketsSlot = -1; int allBucketsSlot = -1;
@Override @Override
public void process() throws IOException { public void process() throws IOException {
super.process(); super.process();
@ -148,18 +147,14 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
} }
private void doRehash(LongCounts table) { private void doRehash(LongCounts table) {
if (collectAcc == null && missingAcc == null && allBucketsAcc == null) return; if (collectAcc == null && allBucketsAcc == null) return;
// Our "count" acc is backed by the hash table and will already be rehashed // Our "count" acc is backed by the hash table and will already be rehashed
// otherAccs don't need to be rehashed // otherAccs don't need to be rehashed
int newTableSize = table.numSlots(); int newTableSize = table.numSlots();
int numSlots = newTableSize; int numSlots = newTableSize;
final int oldMissingSlot = missingSlot;
final int oldAllBucketsSlot = allBucketsSlot; final int oldAllBucketsSlot = allBucketsSlot;
if (oldMissingSlot >= 0) {
missingSlot = numSlots++;
}
if (oldAllBucketsSlot >= 0) { if (oldAllBucketsSlot >= 0) {
allBucketsSlot = numSlots++; allBucketsSlot = numSlots++;
} }
@ -178,9 +173,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
if (oldSlot < mapping.length) { if (oldSlot < mapping.length) {
return mapping[oldSlot]; return mapping[oldSlot];
} }
if (oldSlot == oldMissingSlot) {
return missingSlot;
}
if (oldSlot == oldAllBucketsSlot) { if (oldSlot == oldAllBucketsSlot) {
return allBucketsSlot; return allBucketsSlot;
} }
@ -192,9 +184,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
if (collectAcc != null) { if (collectAcc != null) {
collectAcc.resize(resizer); collectAcc.resize(resizer);
} }
if (missingAcc != null) {
missingAcc.resize(resizer);
}
if (allBucketsAcc != null) { if (allBucketsAcc != null) {
allBucketsAcc.resize(resizer); allBucketsAcc.resize(resizer);
} }
@ -225,9 +214,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
int numMissing = 0; int numMissing = 0;
if (freq.missing) {
missingSlot = numSlots++;
}
if (freq.allBuckets) { if (freq.allBuckets) {
allBucketsSlot = numSlots++; allBucketsSlot = numSlots++;
} }
@ -302,11 +289,6 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0); allBucketsAcc = new SpecialSlotAcc(fcontext, collectAcc, allBucketsSlot, otherAccs, 0);
} }
if (freq.missing) {
// TODO: optimize case when missingSlot can be contiguous with other slots
missingAcc = new SpecialSlotAcc(fcontext, collectAcc, missingSlot, otherAccs, 1);
}
NumericDocValues values = null; NumericDocValues values = null;
Bits docsWithField = null; Bits docsWithField = null;
@ -335,11 +317,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
int segDoc = doc - segBase; int segDoc = doc - segBase;
long val = values.get(segDoc); long val = values.get(segDoc);
if (val == 0 && !docsWithField.get(segDoc)) { if (val != 0 && docsWithField.get(segDoc)) {
if (missingAcc != null) {
missingAcc.collect(segDoc, -1);
}
} else {
int slot = table.add(val); // this can trigger a rehash rehash int slot = table.add(val); // this can trigger a rehash rehash
// countAcc.incrementCount(slot, 1); // countAcc.incrementCount(slot, 1);
@ -428,7 +406,7 @@ class FacetFieldProcessorNumeric extends FacetFieldProcessor {
// TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway. // TODO: it would be more efficient to buid up a missing DocSet if we need it here anyway.
SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>(); SimpleOrderedMap<Object> missingBucket = new SimpleOrderedMap<>();
fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field)); fillBucket(missingBucket, getFieldMissingQuery(fcontext.searcher, freq.field), null);
res.add("missing", missingBucket); res.add("missing", missingBucket);
} }

View File

@ -283,15 +283,16 @@ public class FacetProcessor<FacetRequestT extends FacetRequest> {
} }
public void fillBucket(SimpleOrderedMap<Object> bucket, Query q) throws IOException { public void fillBucket(SimpleOrderedMap<Object> bucket, Query q, DocSet result) throws IOException {
boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0; boolean needDocSet = freq.getFacetStats().size() > 0 || freq.getSubFacets().size() > 0;
// TODO: always collect counts or not??? // TODO: always collect counts or not???
DocSet result = null;
int count; int count;
if (needDocSet) { if (result != null) {
count = result.size();
} else if (needDocSet) {
if (q == null) { if (q == null) {
result = fcontext.base; result = fcontext.base;
// result.incref(); // OFF-HEAP // result.incref(); // OFF-HEAP

View File

@ -54,7 +54,7 @@ class FacetQueryProcessor extends FacetProcessor<FacetQuery> {
public void process() throws IOException { public void process() throws IOException {
super.process(); super.process();
response = new SimpleOrderedMap<>(); response = new SimpleOrderedMap<>();
fillBucket(response, freq.q); fillBucket(response, freq.q, null);
} }

View File

@ -336,7 +336,7 @@ class FacetRangeProcessor extends FacetProcessor<FacetRange> {
} }
Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper); Query rangeQ = sf.getType().getRangeQuery(null, sf, range.low == null ? null : calc.formatValue(range.low), range.high==null ? null : calc.formatValue(range.high), range.includeLower, range.includeUpper);
fillBucket(bucket, rangeQ); fillBucket(bucket, rangeQ, null);
return bucket; return bucket;
} }

View File

@ -394,7 +394,7 @@ public class UnInvertedField extends DocTermOrds {
public void collectDocs(FacetFieldProcessorUIF processor) throws IOException { public void collectDocs(FacetFieldProcessorUIF processor) throws IOException {
if (processor.collectAcc==null && processor.missingAcc == null && processor.allBucketsAcc == null && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField) { if (processor.collectAcc==null && processor.allBucketsAcc == null && processor.startTermIndex == 0 && processor.endTermIndex >= numTermsInField) {
getCounts(processor, processor.countAcc); getCounts(processor, processor.countAcc);
return; return;
} }