LUCENE-4980: fix several issues when mixing range and non-range facets with DrillSideways

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1480025 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-05-07 19:09:38 +00:00
parent d9712d9aa9
commit 27ef1eb203
10 changed files with 305 additions and 32 deletions

View File

@ -112,6 +112,10 @@ Bug Fixes
* LUCENE-949: AnalyzingQueryParser can't work with leading wildcards.
(Tim Allison, Robert Muir, Steve Rowe)
* LUCENE-4980: Fix issues preventing mixing of RangeFacetRequest and
non-RangeFacetRequest when using DrillSideways. (Mike McCandless,
Shai Erera)
Optimizations

View File

@ -48,7 +48,7 @@ public class MultiAssociationsFacetsAggregator implements FacetsAggregator {
* Creates a new {@link MultiAssociationsFacetsAggregator} over the given
* aggregators. The mapping is used by
* {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)} to
* rollup the values of the speicfic category by the corresponding
* rollup the values of the specific category by the corresponding
* {@link FacetsAggregator}. However, since each {@link FacetsAggregator}
* handles the associations of a specific type, which could cover multiple
* categories, the aggregation is done on the unique set of aggregators, which

View File

@ -119,7 +119,7 @@ public class RangeAccumulator extends FacetsAccumulator {
}
@Override
protected boolean requiresDocScores() {
public boolean requiresDocScores() {
return false;
}
}

View File

@ -0,0 +1,117 @@
package org.apache.lucene.facet.range;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultsHandler;
import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.FacetsAggregator;
import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.IndexReader;
/** Takes multiple facet requests and if necessary splits
* them between the normal {@link FacetsAccumulator} and a
* {@link RangeAccumulator} */
public class RangeFacetsAccumulatorWrapper extends FacetsAccumulator {
// TODO: somehow handle SortedSetDVAccumulator as
// well... but it's tricky because SSDV just uses an
// "ordinary" flat CountFacetRequest so we can't switch
// based on that.
private final FacetsAccumulator accumulator;
private final RangeAccumulator rangeAccumulator;
public static FacetsAccumulator create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) {
return create(fsp, indexReader, taxoReader, new FacetArrays(taxoReader.getSize()));
}
public static FacetsAccumulator create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader, FacetArrays arrays) {
List<FacetRequest> rangeRequests = new ArrayList<FacetRequest>();
List<FacetRequest> nonRangeRequests = new ArrayList<FacetRequest>();
for(FacetRequest fr : fsp.facetRequests) {
if (fr instanceof RangeFacetRequest) {
rangeRequests.add(fr);
} else {
nonRangeRequests.add(fr);
}
}
if (rangeRequests.isEmpty()) {
return new FacetsAccumulator(fsp, indexReader, taxoReader, arrays);
} else if (nonRangeRequests.isEmpty()) {
return new RangeAccumulator(fsp, indexReader);
} else {
FacetsAccumulator accumulator = new FacetsAccumulator(new FacetSearchParams(fsp.indexingParams, nonRangeRequests), indexReader, taxoReader, arrays);
RangeAccumulator rangeAccumulator = new RangeAccumulator(new FacetSearchParams(fsp.indexingParams, rangeRequests), indexReader);
return new RangeFacetsAccumulatorWrapper(accumulator, rangeAccumulator, fsp);
}
}
private RangeFacetsAccumulatorWrapper(FacetsAccumulator accumulator, RangeAccumulator rangeAccumulator, FacetSearchParams fsp) {
super(fsp, accumulator.indexReader, accumulator.taxonomyReader);
this.accumulator = accumulator;
this.rangeAccumulator = rangeAccumulator;
}
@Override
public FacetsAggregator getAggregator() {
throw new UnsupportedOperationException();
}
@Override
protected FacetResultsHandler createFacetResultsHandler(FacetRequest fr) {
throw new UnsupportedOperationException();
}
@Override
protected Set<CategoryListParams> getCategoryLists() {
throw new UnsupportedOperationException();
}
@Override
public boolean requiresDocScores() {
return accumulator.requiresDocScores();
}
public List<FacetResult> accumulate(List<MatchingDocs> matchingDocs) throws IOException {
List<FacetResult> results = accumulator.accumulate(matchingDocs);
List<FacetResult> rangeResults = rangeAccumulator.accumulate(matchingDocs);
int aUpto = 0;
int raUpto = 0;
List<FacetResult> merged = new ArrayList<FacetResult>();
for(FacetRequest fr : searchParams.facetRequests) {
if (fr instanceof RangeFacetRequest) {
merged.add(rangeResults.get(raUpto++));
} else {
merged.add(results.get(aUpto++));
}
}
return merged;
}
}

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.lucene.facet.params.CategoryListParams;
import org.apache.lucene.facet.params.FacetIndexingParams;
@ -90,7 +89,7 @@ public final class DrillDownQuery extends Query {
}
/** Used by DrillSideways */
DrillDownQuery(FacetIndexingParams fip, Query baseQuery, List<Query> clauses) {
DrillDownQuery(FacetIndexingParams fip, Query baseQuery, List<Query> clauses, Map<String,Integer> drillDownDims) {
this.fip = fip;
this.query = new BooleanQuery(true);
if (baseQuery != null) {
@ -98,21 +97,8 @@ public final class DrillDownQuery extends Query {
}
for(Query clause : clauses) {
query.add(clause, Occur.MUST);
drillDownDims.put(getDim(clause), drillDownDims.size());
}
}
String getDim(Query clause) {
assert clause instanceof ConstantScoreQuery;
clause = ((ConstantScoreQuery) clause).getQuery();
assert clause instanceof TermQuery || clause instanceof BooleanQuery;
String term;
if (clause instanceof TermQuery) {
term = ((TermQuery) clause).getTerm().text();
} else {
term = ((TermQuery) ((BooleanQuery) clause).getClauses()[0].getQuery()).getTerm().text();
}
return term.split(Pattern.quote(Character.toString(fip.getFacetDelimChar())), 2)[0];
this.drillDownDims.putAll(drillDownDims);
}
/**

View File

@ -20,6 +20,7 @@ package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
@ -94,6 +95,11 @@ public class DrillSideways {
BooleanClause[] clauses = in.getBooleanQuery().getClauses();
Map<String,Integer> drillDownDims = in.getDims();
String[] dimsByIndex = new String[drillDownDims.size()];
for(Map.Entry<String,Integer> ent : drillDownDims.entrySet()) {
dimsByIndex[ent.getValue()] = ent.getKey();
}
int startClause;
if (clauses.length == drillDownDims.size()) {
startClause = 0;
@ -107,13 +113,15 @@ public class DrillSideways {
// baseQuery:
List<Query> nonFacetClauses = new ArrayList<Query>();
List<Query> facetClauses = new ArrayList<Query>();
Map<String,Integer> dimToIndex = new LinkedHashMap<String,Integer>();
for(int i=startClause;i<clauses.length;i++) {
Query q = clauses[i].getQuery();
String dim = in.getDim(q);
String dim = dimsByIndex[i-startClause];
if (!facetDims.contains(dim)) {
nonFacetClauses.add(q);
} else {
facetClauses.add(q);
dimToIndex.put(dim, dimToIndex.size());
}
}
@ -127,7 +135,7 @@ public class DrillSideways {
newBaseQuery.add(q, BooleanClause.Occur.MUST);
}
return new DrillDownQuery(fsp.indexingParams, newBaseQuery, facetClauses);
return new DrillDownQuery(fsp.indexingParams, newBaseQuery, facetClauses, dimToIndex);
} else {
// No change:
return in;
@ -157,6 +165,20 @@ public class DrillSideways {
return new DrillSidewaysResult(c.getFacetResults(), null);
}
List<FacetRequest> ddRequests = new ArrayList<FacetRequest>();
for(FacetRequest fr : fsp.facetRequests) {
assert fr.categoryPath.length > 0;
if (!drillDownDims.containsKey(fr.categoryPath.components[0])) {
ddRequests.add(fr);
}
}
FacetSearchParams fsp2;
if (!ddRequests.isEmpty()) {
fsp2 = new FacetSearchParams(fsp.indexingParams, ddRequests);
} else {
fsp2 = null;
}
BooleanQuery ddq = query.getBooleanQuery();
BooleanClause[] clauses = ddq.getClauses();
@ -173,7 +195,7 @@ public class DrillSideways {
startClause = 1;
}
FacetsCollector drillDownCollector = FacetsCollector.create(getDrillDownAccumulator(fsp));
FacetsCollector drillDownCollector = fsp2 == null ? null : FacetsCollector.create(getDrillDownAccumulator(fsp2));
FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[drillDownDims.size()];
@ -225,6 +247,8 @@ public class DrillSideways {
break;
}
}
} else {
useCollectorMethod = true;
}
}
}
@ -246,6 +270,7 @@ public class DrillSideways {
List<FacetResult> mergedResults = new ArrayList<FacetResult>();
int[] requestUpto = new int[drillDownDims.size()];
int ddUpto = 0;
for(int i=0;i<fsp.facetRequests.size();i++) {
FacetRequest fr = fsp.facetRequests.get(i);
assert fr.categoryPath.length > 0;
@ -260,7 +285,7 @@ public class DrillSideways {
//System.out.println("get DD results");
}
//System.out.println("add dd results " + i);
mergedResults.add(drillDownResults.get(i));
mergedResults.add(drillDownResults.get(ddUpto++));
} else {
// Drill sideways dim:
int dim = dimIndex.intValue();
@ -359,7 +384,7 @@ public class DrillSideways {
subQuery.setMinimumNumberShouldMatch(minShouldMatch);
//System.out.println("EXE " + topQuery);
// System.out.println("EXE " + topQuery);
// Collects against the passed-in
// drillDown/SidewaysCollectors as a side effect:

View File

@ -82,7 +82,9 @@ class DrillSidewaysCollector extends Collector {
// drillDown collector:
//System.out.println(" hit " + drillDownCollector);
hitCollector.collect(doc);
drillDownCollector.collect(doc);
if (drillDownCollector != null) {
drillDownCollector.collect(doc);
}
// Also collect across all drill-sideways counts so
// we "merge in" drill-down counts for this
@ -98,21 +100,28 @@ class DrillSidewaysCollector extends Collector {
}
} else {
boolean found = false;
for(int i=0;i<subScorers.length;i++) {
if (subScorers[i] == null) {
// This segment did not have any docs with this
// drill-down field & value:
continue;
drillSidewaysCollectors[i].collect(doc);
assert allMatchesFrom(i+1, doc);
found = true;
break;
}
int subDoc = subScorers[i].docID();
//System.out.println(" sub: " + subDoc);
//System.out.println(" i=" + i + " sub: " + subDoc);
if (subDoc != doc) {
//System.out.println(" +ds[" + i + "]");
assert subDoc > doc: "subDoc=" + subDoc + " doc=" + doc;
drillSidewaysCollectors[i].collect(doc);
assert allMatchesFrom(i+1, doc);
found = true;
break;
}
}
assert found;
}
}
@ -134,8 +143,11 @@ class DrillSidewaysCollector extends Collector {
@Override
public void setNextReader(AtomicReaderContext leaf) throws IOException {
//System.out.println("DS.setNextReader reader=" + leaf.reader());
hitCollector.setNextReader(leaf);
drillDownCollector.setNextReader(leaf);
if (drillDownCollector != null) {
drillDownCollector.setNextReader(leaf);
}
for(Collector dsc : drillSidewaysCollectors) {
dsc.setNextReader(leaf);
}
@ -166,7 +178,9 @@ class DrillSidewaysCollector extends Collector {
Arrays.fill(subScorers, null);
findScorers(scorer);
hitCollector.setScorer(scorer);
drillDownCollector.setScorer(scorer);
if (drillDownCollector != null) {
drillDownCollector.setScorer(scorer);
}
for(Collector dsc : drillSidewaysCollectors) {
dsc.setScorer(scorer);
}

View File

@ -63,8 +63,10 @@ class DrillSidewaysScorer extends Scorer {
//}
//System.out.println("score r=" + context.reader());
collector.setScorer(this);
drillDownCollector.setScorer(this);
drillDownCollector.setNextReader(context);
if (drillDownCollector != null) {
drillDownCollector.setScorer(this);
drillDownCollector.setNextReader(context);
}
for(DocsEnumsAndFreq dim : dims) {
dim.sidewaysCollector.setScorer(this);
dim.sidewaysCollector.setNextReader(context);
@ -393,7 +395,9 @@ class DrillSidewaysScorer extends Scorer {
//}
collector.collect(collectDocID);
drillDownCollector.collect(collectDocID);
if (drillDownCollector != null) {
drillDownCollector.collect(collectDocID);
}
// TODO: we could "fix" faceting of the sideways counts
// to do this "union" (of the drill down hits) in the

View File

@ -196,7 +196,7 @@ public class FacetsAccumulator {
return res;
}
protected boolean requiresDocScores() {
public boolean requiresDocScores() {
return getAggregator().requiresDocScores();
}
}

View File

@ -17,7 +17,10 @@ package org.apache.lucene.facet.range;
* limitations under the License.
*/
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
@ -29,18 +32,30 @@ import org.apache.lucene.document.LongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.FacetTestUtils;
import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.search.CountFacetRequest;
import org.apache.lucene.facet.search.DrillDownQuery;
import org.apache.lucene.facet.search.DrillSideways.DrillSidewaysResult;
import org.apache.lucene.facet.search.DrillSideways;
import org.apache.lucene.facet.search.FacetRequest;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetResultNode;
import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.FacetsCollector;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util._TestUtil;
public class TestRangeAccumulator extends FacetTestCase {
@ -81,6 +96,114 @@ public class TestRangeAccumulator extends FacetTestCase {
d.close();
}
/** Tests single request that mixes Range and non-Range
* faceting, with DrillSideways. */
public void testMixedRangeAndNonRange() throws Exception {
Directory d = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), d);
Directory td = newDirectory();
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode.CREATE);
FacetFields ff = new FacetFields(tw);
for(long l=0;l<100;l++) {
Document doc = new Document();
// For computing range facet counts:
doc.add(new NumericDocValuesField("field", l));
// For drill down by numeric range:
doc.add(new LongField("field", l, Field.Store.NO));
CategoryPath cp;
if ((l&3) == 0) {
cp = new CategoryPath("dim", "a");
} else {
cp = new CategoryPath("dim", "b");
}
ff.addFields(doc, Collections.singletonList(cp));
w.addDocument(doc);
}
IndexReader r = w.getReader();
w.close();
final TaxonomyReader tr = new DirectoryTaxonomyReader(tw);
tw.close();
IndexSearcher s = newSearcher(r);
final FacetSearchParams fsp = new FacetSearchParams(
new CountFacetRequest(new CategoryPath("dim"), 2),
new RangeFacetRequest<LongRange>("field",
new LongRange("less than 10", 0L, true, 10L, false),
new LongRange("less than or equal to 10", 0L, true, 10L, true),
new LongRange("over 90", 90L, false, 100L, false),
new LongRange("90 or above", 90L, true, 100L, false),
new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, false)));
final Set<String> dimSeen = new HashSet<String>();
DrillSideways ds = new DrillSideways(s, tr) {
@Override
protected FacetsAccumulator getDrillDownAccumulator(FacetSearchParams fsp) {
checkSeen(fsp);
return RangeFacetsAccumulatorWrapper.create(fsp, searcher.getIndexReader(), tr);
}
@Override
protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams fsp) {
checkSeen(fsp);
return RangeFacetsAccumulatorWrapper.create(fsp, searcher.getIndexReader(), tr);
}
private void checkSeen(FacetSearchParams fsp) {
// Each dim should should up only once, across
// both drillDown and drillSideways requests:
for(FacetRequest fr : fsp.facetRequests) {
String dim = fr.categoryPath.components[0];
assertFalse("dim " + dim + " already seen", dimSeen.contains(dim));
dimSeen.add(dim);
}
}
@Override
protected boolean scoreSubDocsAtOnce() {
return random().nextBoolean();
}
};
// First search, no drill downs:
DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT, new MatchAllDocsQuery());
DrillSidewaysResult dsr = ds.search(null, ddq, 10, fsp);
assertEquals(100, dsr.hits.totalHits);
assertEquals(2, dsr.facetResults.size());
assertEquals("dim (0)\n b (75)\n a (25)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(0)));
assertEquals("field (0)\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(1)));
// Second search, drill down on dim=b:
ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT, new MatchAllDocsQuery());
ddq.add(new CategoryPath("dim", "b"));
dimSeen.clear();
dsr = ds.search(null, ddq, 10, fsp);
assertEquals(75, dsr.hits.totalHits);
assertEquals(2, dsr.facetResults.size());
assertEquals("dim (0)\n b (75)\n a (25)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(0)));
assertEquals("field (0)\n less than 10 (7)\n less than or equal to 10 (8)\n over 90 (7)\n 90 or above (8)\n over 1000 (0)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(1)));
// Third search, drill down on "less than or equal to 10":
ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT, new MatchAllDocsQuery());
ddq.add("field", NumericRangeQuery.newLongRange("field", 0L, 10L, true, true));
dimSeen.clear();
dsr = ds.search(null, ddq, 10, fsp);
assertEquals(11, dsr.hits.totalHits);
assertEquals(2, dsr.facetResults.size());
assertEquals("dim (0)\n b (8)\n a (3)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(0)));
assertEquals("field (0)\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(1)));
IOUtils.close(tr, td, r, d);
}
public void testBasicDouble() throws Exception {
Directory d = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), d);