From bb1164b2fc524138700573e793f0da849b4028dd Mon Sep 17 00:00:00 2001 From: Shai Erera Date: Mon, 29 Jul 2013 13:07:30 +0000 Subject: [PATCH] LUCENE-4985: Make it easier to mix different kinds of FacetRequests git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1508043 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 7 + .../demo/facet/AssociationsFacetsExample.java | 30 +-- .../lucene/demo/facet/RangeFacetsExample.java | 13 +- .../facet/SimpleSortedSetFacetsExample.java | 4 +- ...a => SumFloatAssociationFacetRequest.java} | 11 +- ...ava => SumIntAssociationFacetRequest.java} | 11 +- .../facet/params/FacetSearchParams.java | 10 +- .../lucene/facet/range/RangeAccumulator.java | 31 +-- .../lucene/facet/range/RangeFacetRequest.java | 7 + .../range/RangeFacetsAccumulatorWrapper.java | 117 --------- .../apache/lucene/facet/sampling/Sampler.java | 7 + .../facet/search/CountFacetRequest.java | 7 + .../search/CountingFacetsAggregator.java | 13 + .../lucene/facet/search/DrillSideways.java | 34 ++- .../lucene/facet/search/FacetRequest.java | 11 + .../lucene/facet/search/FacetResult.java | 6 + .../facet/search/FacetsAccumulator.java | 245 ++++++++---------- .../lucene/facet/search/FacetsCollector.java | 2 +- .../search/FastCountingFacetsAggregator.java | 18 -- .../facet/search/MultiFacetsAccumulator.java | 69 +++++ .../MultiFacetsAggregator.java} | 21 +- .../search/PerCategoryListAggregator.java | 3 + .../search/StandardFacetsAccumulator.java | 18 +- .../facet/search/SumScoreFacetRequest.java | 6 + .../search/TaxonomyFacetsAccumulator.java | 225 ++++++++++++++++ .../SortedSetDocValuesAccumulator.java | 245 +++++++++--------- .../AssociationsFacetRequestTest.java | 39 +-- .../facet/range/TestRangeAccumulator.java | 206 +++++++++++---- .../search/CountingFacetsAggregatorTest.java | 4 +- .../lucene/facet/search/FacetResultTest.java | 2 +- .../facet/search/TestDrillSideways.java | 18 +- .../facet/search/TestFacetsCollector.java | 42 +-- .../TestSortedSetDocValuesFacets.java | 4 +- 33 files changed, 879 insertions(+), 607 deletions(-) rename lucene/facet/src/java/org/apache/lucene/facet/associations/{AssociationFloatSumFacetRequest.java => SumFloatAssociationFacetRequest.java} (79%) rename lucene/facet/src/java/org/apache/lucene/facet/associations/{AssociationIntSumFacetRequest.java => SumIntAssociationFacetRequest.java} (79%) delete mode 100644 lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetsAccumulatorWrapper.java create mode 100644 lucene/facet/src/java/org/apache/lucene/facet/search/MultiFacetsAccumulator.java rename lucene/facet/src/java/org/apache/lucene/facet/{associations/MultiAssociationsFacetsAggregator.java => search/MultiFacetsAggregator.java} (80%) create mode 100644 lucene/facet/src/java/org/apache/lucene/facet/search/TaxonomyFacetsAccumulator.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 7c52f2d7c34..6166848b597 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -70,6 +70,13 @@ New features * LUCENE-5091: SpanNotQuery can now be configured with pre and post slop to act as a hypothetical SpanNotNearQuery. (Tim Allison via David Smiley) +* LUCENE-4985: FacetsAccumulator.create() is now able to create a + MultiFacetsAccumulator over a mixed set of facet requests. MultiFacetsAccumulator + allows wrapping multiple FacetsAccumulators, allowing to easily mix + existing and custom ones. TaxonomyFacetsAccumulator supports any + FacetRequest which implements createFacetsAggregator and was indexed + using the taxonomy index. (Shai Erera) + Bug Fixes * LUCENE-5116: IndexWriter.addIndexes(IndexReader...) should drop empty (or all diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java index 78e7adcfe07..fdfd5fd4d8b 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/AssociationsFacetsExample.java @@ -1,27 +1,20 @@ package org.apache.lucene.demo.facet; import java.io.IOException; -import java.util.HashMap; import java.util.List; -import java.util.Map; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.facet.associations.AssociationFloatSumFacetRequest; -import org.apache.lucene.facet.associations.AssociationIntSumFacetRequest; import org.apache.lucene.facet.associations.AssociationsFacetFields; import org.apache.lucene.facet.associations.CategoryAssociation; import org.apache.lucene.facet.associations.CategoryAssociationsContainer; import org.apache.lucene.facet.associations.CategoryFloatAssociation; import org.apache.lucene.facet.associations.CategoryIntAssociation; -import org.apache.lucene.facet.associations.MultiAssociationsFacetsAggregator; -import org.apache.lucene.facet.associations.SumFloatAssociationFacetsAggregator; -import org.apache.lucene.facet.associations.SumIntAssociationFacetsAggregator; +import org.apache.lucene.facet.associations.SumFloatAssociationFacetRequest; +import org.apache.lucene.facet.associations.SumIntAssociationFacetRequest; import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.FacetResult; -import org.apache.lucene.facet.search.FacetsAccumulator; -import org.apache.lucene.facet.search.FacetsAggregator; import org.apache.lucene.facet.search.FacetsCollector; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; @@ -135,22 +128,9 @@ public class AssociationsFacetsExample { CategoryPath tags = new CategoryPath("tags"); CategoryPath genre = new CategoryPath("genre"); - FacetSearchParams fsp = new FacetSearchParams( - new AssociationIntSumFacetRequest(tags, 10), - new AssociationFloatSumFacetRequest(genre, 10)); - - // every category has a different type of association, so use chain their - // respective aggregators. - final Map aggregators = new HashMap(); - aggregators.put(tags, new SumIntAssociationFacetsAggregator()); - aggregators.put(genre, new SumFloatAssociationFacetsAggregator()); - FacetsAccumulator fa = new FacetsAccumulator(fsp, indexReader, taxoReader) { - @Override - public FacetsAggregator getAggregator() { - return new MultiAssociationsFacetsAggregator(aggregators); - } - }; - FacetsCollector fc = FacetsCollector.create(fa); + FacetSearchParams fsp = new FacetSearchParams(new SumIntAssociationFacetRequest(tags, 10), + new SumFloatAssociationFacetRequest(genre, 10)); + FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java index 183828fe873..021429f95cd 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/RangeFacetsExample.java @@ -27,7 +27,6 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.LongField; import org.apache.lucene.document.NumericDocValuesField; import org.apache.lucene.facet.params.FacetIndexingParams; -import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.range.LongRange; import org.apache.lucene.facet.range.RangeAccumulator; import org.apache.lucene.facet.range.RangeFacetRequest; @@ -80,13 +79,12 @@ public class RangeFacetsExample implements Closeable { /** User runs a query and counts facets. */ public List search() throws IOException { - FacetSearchParams fsp = new FacetSearchParams( - new RangeFacetRequest("timestamp", - new LongRange("Past hour", nowSec-3600, true, nowSec, true), - new LongRange("Past six hours", nowSec-6*3600, true, nowSec, true), - new LongRange("Past day", nowSec-24*3600, true, nowSec, true))); + RangeFacetRequest rangeFacetRequest = new RangeFacetRequest("timestamp", + new LongRange("Past hour", nowSec-3600, true, nowSec, true), + new LongRange("Past six hours", nowSec-6*3600, true, nowSec, true), + new LongRange("Past day", nowSec-24*3600, true, nowSec, true)); // Aggregatses the facet counts - FacetsCollector fc = FacetsCollector.create(new RangeAccumulator(fsp, searcher.getIndexReader())); + FacetsCollector fc = FacetsCollector.create(new RangeAccumulator(rangeFacetRequest)); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally @@ -112,6 +110,7 @@ public class RangeFacetsExample implements Closeable { return searcher.search(q, 10); } + @Override public void close() throws IOException { searcher.getIndexReader().close(); indexDir.close(); diff --git a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java index 351dc6caef1..3cd7c1542ce 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/facet/SimpleSortedSetFacetsExample.java @@ -91,7 +91,7 @@ public class SimpleSortedSetFacetsExample { new CountFacetRequest(new CategoryPath("Author"), 10)); // Aggregatses the facet counts - FacetsCollector fc = FacetsCollector.create(new SortedSetDocValuesAccumulator(fsp, state)); + FacetsCollector fc = FacetsCollector.create(new SortedSetDocValuesAccumulator(state, fsp)); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally @@ -117,7 +117,7 @@ public class SimpleSortedSetFacetsExample { FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("Author"), 10)); DrillDownQuery q = new DrillDownQuery(fsp.indexingParams, new MatchAllDocsQuery()); q.add(new CategoryPath("Publish Year/2010", '/')); - FacetsCollector fc = FacetsCollector.create(new SortedSetDocValuesAccumulator(fsp, state)); + FacetsCollector fc = FacetsCollector.create(new SortedSetDocValuesAccumulator(state, fsp)); searcher.search(q, fc); // Retrieve results diff --git a/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationFloatSumFacetRequest.java b/lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetRequest.java similarity index 79% rename from lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationFloatSumFacetRequest.java rename to lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetRequest.java index 8255b0d0753..b400146303f 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationFloatSumFacetRequest.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/associations/SumFloatAssociationFacetRequest.java @@ -1,7 +1,9 @@ package org.apache.lucene.facet.associations; +import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.FacetRequest; +import org.apache.lucene.facet.search.FacetsAggregator; import org.apache.lucene.facet.taxonomy.CategoryPath; /* @@ -27,16 +29,21 @@ import org.apache.lucene.facet.taxonomy.CategoryPath; * * @lucene.experimental */ -public class AssociationFloatSumFacetRequest extends FacetRequest { +public class SumFloatAssociationFacetRequest extends FacetRequest { /** * Create a float association facet request for a given node in the * taxonomy. */ - public AssociationFloatSumFacetRequest(CategoryPath path, int num) { + public SumFloatAssociationFacetRequest(CategoryPath path, int num) { super(path, num); } + @Override + public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) { + return new SumFloatAssociationFacetsAggregator(); + } + @Override public double getValueOf(FacetArrays arrays, int ordinal) { return arrays.getFloatArray()[ordinal]; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationIntSumFacetRequest.java b/lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetRequest.java similarity index 79% rename from lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationIntSumFacetRequest.java rename to lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetRequest.java index 2c94c3c77af..0ae698318e5 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/associations/AssociationIntSumFacetRequest.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/associations/SumIntAssociationFacetRequest.java @@ -1,7 +1,9 @@ package org.apache.lucene.facet.associations; +import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.FacetRequest; +import org.apache.lucene.facet.search.FacetsAggregator; import org.apache.lucene.facet.taxonomy.CategoryPath; /* @@ -27,16 +29,21 @@ import org.apache.lucene.facet.taxonomy.CategoryPath; * * @lucene.experimental */ -public class AssociationIntSumFacetRequest extends FacetRequest { +public class SumIntAssociationFacetRequest extends FacetRequest { /** * Create an integer association facet request for a given node in the * taxonomy. */ - public AssociationIntSumFacetRequest(CategoryPath path, int num) { + public SumIntAssociationFacetRequest(CategoryPath path, int num) { super(path, num); } + @Override + public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) { + return new SumIntAssociationFacetsAggregator(); + } + @Override public FacetArraysSource getFacetArraysSource() { return FacetArraysSource.INT; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/params/FacetSearchParams.java b/lucene/facet/src/java/org/apache/lucene/facet/params/FacetSearchParams.java index f0a9805b014..c4bfc224da5 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/params/FacetSearchParams.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/params/FacetSearchParams.java @@ -23,13 +23,9 @@ import org.apache.lucene.facet.search.FacetRequest; */ /** - * Defines parameters that are needed for faceted search. The list of - * {@link FacetRequest facet requests} denotes the facets for which aggregated - * should be done. - *

- * One can pass {@link FacetIndexingParams} in order to tell the search code how - * to read the facets information. Note that you must use the same - * {@link FacetIndexingParams} that were used for indexing. + * Defines parameters that are needed for faceted search: the list of facet + * {@link FacetRequest facet requests} which should be aggregated as well as the + * {@link FacetIndexingParams indexing params} that were used to index them. * * @lucene.experimental */ diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java b/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java index fffcb77d8e9..0a07e92d1a7 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/range/RangeAccumulator.java @@ -19,6 +19,7 @@ package org.apache.lucene.facet.range; import java.io.IOException; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.apache.lucene.facet.params.FacetSearchParams; @@ -26,10 +27,8 @@ import org.apache.lucene.facet.search.FacetRequest; import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetResultNode; import org.apache.lucene.facet.search.FacetsAccumulator; -import org.apache.lucene.facet.search.FacetsAggregator; import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.NumericDocValues; /** Uses a {@link NumericDocValues} and accumulates @@ -51,30 +50,26 @@ public class RangeAccumulator extends FacetsAccumulator { final List requests = new ArrayList(); - public RangeAccumulator(FacetSearchParams fsp, IndexReader reader) { - super(fsp, reader, null, null); - - for(FacetRequest fr : fsp.facetRequests) { - + public RangeAccumulator(FacetRequest... facetRequests) { + this(Arrays.asList(facetRequests)); + } + + public RangeAccumulator(List facetRequests) { + super(new FacetSearchParams(facetRequests)); + for (FacetRequest fr : facetRequests) { if (!(fr instanceof RangeFacetRequest)) { - throw new IllegalArgumentException("only RangeFacetRequest is supported; got " + fsp.facetRequests.get(0).getClass()); + throw new IllegalArgumentException("this accumulator only supports RangeFacetRequest; got " + fr); } if (fr.categoryPath.length != 1) { throw new IllegalArgumentException("only flat (dimension only) CategoryPath is allowed"); } - + RangeFacetRequest rfr = (RangeFacetRequest) fr; - - requests.add(new RangeSet(rfr.ranges, rfr.categoryPath.components[0])); + requests.add(new RangeSet(rfr.ranges, fr.categoryPath.components[0])); } } - @Override - public FacetsAggregator getAggregator() { - throw new UnsupportedOperationException(); - } - @Override public List accumulate(List matchingDocs) throws IOException { @@ -82,7 +77,7 @@ public class RangeAccumulator extends FacetsAccumulator { // faster to do MachingDocs on the inside) ... see // patches on LUCENE-4965): List results = new ArrayList(); - for(int i=0;i extends FacetRequest { throw new UnsupportedOperationException(); } + @Override + public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) { + return null; + } + @Override public double getValueOf(FacetArrays arrays, int ordinal) { throw new UnsupportedOperationException(); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetsAccumulatorWrapper.java b/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetsAccumulatorWrapper.java deleted file mode 100644 index ef108f3edea..00000000000 --- a/lucene/facet/src/java/org/apache/lucene/facet/range/RangeFacetsAccumulatorWrapper.java +++ /dev/null @@ -1,117 +0,0 @@ -package org.apache.lucene.facet.range; - -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; - -import org.apache.lucene.facet.params.CategoryListParams; -import org.apache.lucene.facet.params.FacetSearchParams; -import org.apache.lucene.facet.search.FacetArrays; -import org.apache.lucene.facet.search.FacetRequest; -import org.apache.lucene.facet.search.FacetResult; -import org.apache.lucene.facet.search.FacetResultsHandler; -import org.apache.lucene.facet.search.FacetsAccumulator; -import org.apache.lucene.facet.search.FacetsAggregator; -import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.index.IndexReader; - -/** Takes multiple facet requests and if necessary splits - * them between the normal {@link FacetsAccumulator} and a - * {@link RangeAccumulator} */ -public class RangeFacetsAccumulatorWrapper extends FacetsAccumulator { - // TODO: somehow handle SortedSetDVAccumulator as - // well... but it's tricky because SSDV just uses an - // "ordinary" flat CountFacetRequest so we can't switch - // based on that. - private final FacetsAccumulator accumulator; - private final RangeAccumulator rangeAccumulator; - - public static FacetsAccumulator create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) { - return create(fsp, indexReader, taxoReader, new FacetArrays(taxoReader.getSize())); - } - - public static FacetsAccumulator create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader, FacetArrays arrays) { - List rangeRequests = new ArrayList(); - List nonRangeRequests = new ArrayList(); - for(FacetRequest fr : fsp.facetRequests) { - if (fr instanceof RangeFacetRequest) { - rangeRequests.add(fr); - } else { - nonRangeRequests.add(fr); - } - } - - if (rangeRequests.isEmpty()) { - return new FacetsAccumulator(fsp, indexReader, taxoReader, arrays); - } else if (nonRangeRequests.isEmpty()) { - return new RangeAccumulator(fsp, indexReader); - } else { - FacetsAccumulator accumulator = new FacetsAccumulator(new FacetSearchParams(fsp.indexingParams, nonRangeRequests), indexReader, taxoReader, arrays); - RangeAccumulator rangeAccumulator = new RangeAccumulator(new FacetSearchParams(fsp.indexingParams, rangeRequests), indexReader); - return new RangeFacetsAccumulatorWrapper(accumulator, rangeAccumulator, fsp); - } - } - - private RangeFacetsAccumulatorWrapper(FacetsAccumulator accumulator, RangeAccumulator rangeAccumulator, FacetSearchParams fsp) { - super(fsp, accumulator.indexReader, accumulator.taxonomyReader); - this.accumulator = accumulator; - this.rangeAccumulator = rangeAccumulator; - } - - @Override - public FacetsAggregator getAggregator() { - throw new UnsupportedOperationException(); - } - - @Override - protected FacetResultsHandler createFacetResultsHandler(FacetRequest fr) { - throw new UnsupportedOperationException(); - } - - @Override - protected Set getCategoryLists() { - throw new UnsupportedOperationException(); - } - - @Override - public boolean requiresDocScores() { - return accumulator.requiresDocScores(); - } - - public List accumulate(List matchingDocs) throws IOException { - List results = accumulator.accumulate(matchingDocs); - List rangeResults = rangeAccumulator.accumulate(matchingDocs); - - int aUpto = 0; - int raUpto = 0; - List merged = new ArrayList(); - for(FacetRequest fr : searchParams.facetRequests) { - if (fr instanceof RangeFacetRequest) { - merged.add(rangeResults.get(raUpto++)); - } else { - merged.add(results.get(aUpto++)); - } - } - - return merged; - } -} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java b/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java index ec39ef7a649..047fdce793e 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sampling/Sampler.java @@ -4,12 +4,14 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.Aggregator; import org.apache.lucene.facet.search.FacetArrays; import org.apache.lucene.facet.search.FacetRequest; import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetResultNode; +import org.apache.lucene.facet.search.FacetsAggregator; import org.apache.lucene.facet.search.ScoredDocIDs; import org.apache.lucene.facet.taxonomy.TaxonomyReader; @@ -214,6 +216,11 @@ public abstract class Sampler { setSortOrder(orig.getSortOrder()); } + @Override + public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) { + return orig.createFacetsAggregator(fip); + } + @Override public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) throws IOException { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/CountFacetRequest.java b/lucene/facet/src/java/org/apache/lucene/facet/search/CountFacetRequest.java index 9519230b6c9..40ad3a6304e 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/CountFacetRequest.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/CountFacetRequest.java @@ -1,6 +1,7 @@ package org.apache.lucene.facet.search; import org.apache.lucene.facet.complements.ComplementCountingAggregator; +import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; @@ -32,6 +33,7 @@ public class CountFacetRequest extends FacetRequest { super(path, num); } + // TODO nuke Aggregator and move this logic to StandardFacetsAccumulator -- it should only be used for counting @Override public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) { // we rely on that, if needed, result is cleared by arrays! @@ -42,6 +44,11 @@ public class CountFacetRequest extends FacetRequest { return new CountingAggregator(a); } + @Override + public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) { + return CountingFacetsAggregator.create(fip.getCategoryListParams(categoryPath)); + } + @Override public double getValueOf(FacetArrays arrays, int ordinal) { return arrays.getIntArray()[ordinal]; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java b/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java index 277f32d4a5e..26c3fe1c9b6 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsAggregator.java @@ -2,6 +2,7 @@ package org.apache.lucene.facet.search; import java.io.IOException; +import org.apache.lucene.facet.encoding.DGapVInt8IntDecoder; import org.apache.lucene.facet.params.CategoryListParams; import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; import org.apache.lucene.util.IntsRef; @@ -34,6 +35,18 @@ import org.apache.lucene.util.IntsRef; */ public class CountingFacetsAggregator extends IntRollupFacetsAggregator { + /** + * Returns a {@link FacetsAggregator} suitable for counting categories given + * the {@link CategoryListParams}. + */ + public static FacetsAggregator create(CategoryListParams clp) { + if (clp.createEncoder().createMatchingDecoder().getClass() == DGapVInt8IntDecoder.class) { + return new FastCountingFacetsAggregator(); + } else { + return new CountingFacetsAggregator(); + } + } + private final IntsRef ordinals = new IntsRef(32); @Override diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java b/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java index f3318edd784..b2654ce6eb7 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/DrillSideways.java @@ -25,7 +25,10 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.params.FacetSearchParams; +import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetFields; +import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; @@ -70,11 +73,26 @@ public class DrillSideways { protected final IndexSearcher searcher; protected final TaxonomyReader taxoReader; - - /** Create a new {@code DrillSideways} instance. */ + protected final SortedSetDocValuesReaderState state; + + /** + * Create a new {@code DrillSideways} instance, assuming the categories were + * indexed with {@link FacetFields}. + */ public DrillSideways(IndexSearcher searcher, TaxonomyReader taxoReader) { this.searcher = searcher; this.taxoReader = taxoReader; + this.state = null; + } + + /** + * Create a new {@code DrillSideways} instance, assuming the categories were + * indexed with {@link SortedSetDocValuesFacetFields}. + */ + public DrillSideways(IndexSearcher searcher, SortedSetDocValuesReaderState state) { + this.searcher = searcher; + this.taxoReader = null; + this.state = state; } /** Moves any drill-downs that don't have a corresponding @@ -440,13 +458,21 @@ public class DrillSideways { /** Override this to use a custom drill-down {@link * FacetsAccumulator}. */ protected FacetsAccumulator getDrillDownAccumulator(FacetSearchParams fsp) throws IOException { - return FacetsAccumulator.create(fsp, searcher.getIndexReader(), taxoReader); + if (taxoReader != null) { + return FacetsAccumulator.create(fsp, searcher.getIndexReader(), taxoReader, null); + } else { + return FacetsAccumulator.create(fsp, state, null); + } } /** Override this to use a custom drill-sideways {@link * FacetsAccumulator}. */ protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams fsp) throws IOException { - return FacetsAccumulator.create(fsp, searcher.getIndexReader(), taxoReader); + if (taxoReader != null) { + return FacetsAccumulator.create(fsp, searcher.getIndexReader(), taxoReader, null); + } else { + return FacetsAccumulator.create(fsp, state, null); + } } /** Override this and return true if your collector diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetRequest.java b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetRequest.java index b63e621896a..fd675281c33 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetRequest.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetRequest.java @@ -3,6 +3,8 @@ package org.apache.lucene.facet.search; import java.io.IOException; import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy; +import org.apache.lucene.facet.params.FacetIndexingParams; +import org.apache.lucene.facet.range.RangeFacetRequest; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; @@ -139,6 +141,15 @@ public abstract class FacetRequest { "you should override FacetsAccumulator to return the proper FacetsAggregator"); } + /** + * Returns the {@link FacetsAggregator} which can aggregate the categories of + * this facet request. The aggregator is expected to aggregate category values + * into {@link FacetArrays}. If the facet request does not support that, e.g. + * {@link RangeFacetRequest}, it can return {@code null}. Note though that + * such requests require a dedicated {@link FacetsAccumulator}. + */ + public abstract FacetsAggregator createFacetsAggregator(FacetIndexingParams fip); + @Override public boolean equals(Object o) { if (o instanceof FacetRequest) { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java index d21fbf63486..0c588eabc16 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java @@ -7,6 +7,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.util.CollectionUtil; @@ -152,6 +153,11 @@ public class FacetResult { } } FacetRequest dummy = new FacetRequest(min, frs.get(0).getFacetRequest().numResults) { + @Override + public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) { + throw new UnsupportedOperationException("not supported by this request"); + } + @Override public double getValueOf(FacetArrays arrays, int idx) { throw new UnsupportedOperationException("not supported by this request"); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java index ce0dd33bce1..1400a03f245 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsAccumulator.java @@ -2,20 +2,15 @@ package org.apache.lucene.facet.search; import java.io.IOException; import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; import java.util.List; -import java.util.Set; -import org.apache.lucene.facet.encoding.DGapVInt8IntDecoder; -import org.apache.lucene.facet.params.CategoryListParams; +import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; -import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy; -import org.apache.lucene.facet.search.FacetRequest.FacetArraysSource; -import org.apache.lucene.facet.search.FacetRequest.ResultMode; -import org.apache.lucene.facet.search.FacetRequest.SortOrder; +import org.apache.lucene.facet.range.RangeAccumulator; +import org.apache.lucene.facet.range.RangeFacetRequest; import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays; +import org.apache.lucene.facet.sortedset.SortedSetDocValuesAccumulator; +import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.IndexReader; @@ -37,48 +32,117 @@ import org.apache.lucene.index.IndexReader; */ /** - * Driver for Accumulating facets of faceted search requests over given - * documents. + * Accumulates the facets defined in the {@link FacetSearchParams}. * * @lucene.experimental */ -public class FacetsAccumulator { +public abstract class FacetsAccumulator { - public final TaxonomyReader taxonomyReader; - public final IndexReader indexReader; - public final FacetArrays facetArrays; - public FacetSearchParams searchParams; + // TODO this should be final, but currently SamplingAccumulator modifies the params. + // need to review the class and if it's resolved, make it final + public /*final*/ FacetSearchParams searchParams; - /** - * Initializes the accumulator with the given search params, index reader and - * taxonomy reader. This constructor creates the default {@link FacetArrays}, - * which do not support reuse. If you want to use {@link ReusingFacetArrays}, - * you should use the - * {@link #FacetsAccumulator(FacetSearchParams, IndexReader, TaxonomyReader, FacetArrays)} - * constructor. - */ - public FacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { - this(searchParams, indexReader, taxonomyReader, new FacetArrays(taxonomyReader.getSize())); + /** Constructor with the given search params. */ + protected FacetsAccumulator(FacetSearchParams fsp) { + this.searchParams = fsp; } /** - * Creates an appropriate {@link FacetsAccumulator}, - * returning {@link FacetsAccumulator} when all requests - * are {@link CountFacetRequest} and only one partition is - * in use, otherwise {@link StandardFacetsAccumulator}. + * Creates a {@link FacetsAccumulator} for the given facet requests. This + * method supports {@link RangeAccumulator} and + * {@link TaxonomyFacetsAccumulator} by dividing the facet requests into + * {@link RangeFacetRequest} and the rest. + *

+ * If both types of facet requests are used, it returns a + * {@link MultiFacetsAccumulator} and the facet results returned from + * {@link #accumulate(List)} may not be in the same order as the given facet + * requests. + * + * @param fsp + * the search params define the facet requests and the + * {@link FacetIndexingParams} + * @param indexReader + * the {@link IndexReader} used for search + * @param taxoReader + * the {@link TaxonomyReader} used for search + * @param arrays + * the {@link FacetArrays} which the accumulator should use to store + * the categories weights in. Can be {@code null}. */ - public static FacetsAccumulator create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) { + public static FacetsAccumulator create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader, + FacetArrays arrays) { if (fsp.indexingParams.getPartitionSize() != Integer.MAX_VALUE) { - return new StandardFacetsAccumulator(fsp, indexReader, taxoReader); + return new StandardFacetsAccumulator(fsp, indexReader, taxoReader, arrays); } + List rangeRequests = new ArrayList(); + List nonRangeRequests = new ArrayList(); for (FacetRequest fr : fsp.facetRequests) { - if (!(fr instanceof CountFacetRequest)) { - return new StandardFacetsAccumulator(fsp, indexReader, taxoReader); + if (fr instanceof RangeFacetRequest) { + rangeRequests.add(fr); + } else { + nonRangeRequests.add(fr); + } + } + + if (rangeRequests.isEmpty()) { + return new TaxonomyFacetsAccumulator(fsp, indexReader, taxoReader, arrays); + } else if (nonRangeRequests.isEmpty()) { + return new RangeAccumulator(rangeRequests); + } else { + FacetSearchParams searchParams = new FacetSearchParams(fsp.indexingParams, nonRangeRequests); + FacetsAccumulator accumulator = new TaxonomyFacetsAccumulator(searchParams, indexReader, taxoReader, arrays); + RangeAccumulator rangeAccumulator = new RangeAccumulator(rangeRequests); + return MultiFacetsAccumulator.wrap(accumulator, rangeAccumulator); + } + } + + /** + * Creates a {@link FacetsAccumulator} for the given facet requests. This + * method supports {@link RangeAccumulator} and + * {@link SortedSetDocValuesAccumulator} by dividing the facet requests into + * {@link RangeFacetRequest} and the rest. + *

+ * If both types of facet requests are used, it returns a + * {@link MultiFacetsAccumulator} and the facet results returned from + * {@link #accumulate(List)} may not be in the same order as the given facet + * requests. + * + * @param fsp + * the search params define the facet requests and the + * {@link FacetIndexingParams} + * @param state + * the {@link SortedSetDocValuesReaderState} needed for accumulating + * the categories + * @param arrays + * the {@link FacetArrays} which the accumulator should use to + * store the categories weights in. Can be {@code null}. + */ + public static FacetsAccumulator create(FacetSearchParams fsp, SortedSetDocValuesReaderState state, FacetArrays arrays) throws IOException { + if (fsp.indexingParams.getPartitionSize() != Integer.MAX_VALUE) { + throw new IllegalArgumentException("only default partition size is supported by this method: " + fsp.indexingParams.getPartitionSize()); + } + + List rangeRequests = new ArrayList(); + List nonRangeRequests = new ArrayList(); + for (FacetRequest fr : fsp.facetRequests) { + if (fr instanceof RangeFacetRequest) { + rangeRequests.add(fr); + } else { + nonRangeRequests.add(fr); } } - return new FacetsAccumulator(fsp, indexReader, taxoReader); + if (rangeRequests.isEmpty()) { + return new SortedSetDocValuesAccumulator(state, fsp, arrays); + } else if (nonRangeRequests.isEmpty()) { + return new RangeAccumulator(rangeRequests); + } else { + FacetSearchParams searchParams = new FacetSearchParams(fsp.indexingParams, nonRangeRequests); + FacetsAccumulator accumulator = new SortedSetDocValuesAccumulator(state, searchParams, arrays); + RangeAccumulator rangeAccumulator = new RangeAccumulator(rangeRequests); + return MultiFacetsAccumulator.wrap(accumulator, rangeAccumulator); + } } /** Returns an empty {@link FacetResult}. */ @@ -88,69 +152,6 @@ public class FacetsAccumulator { return new FacetResult(fr, root, 0); } - /** - * Initializes the accumulator with the given parameters as well as - * {@link FacetArrays}. Note that the accumulator doesn't call - * {@link FacetArrays#free()}. If you require that (only makes sense if you - * use {@link ReusingFacetArrays}, you should do it after you've finished with - * the accumulator. - */ - public FacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, TaxonomyReader taxonomyReader, - FacetArrays facetArrays) { - this.facetArrays = facetArrays; - this.indexReader = indexReader; - this.taxonomyReader = taxonomyReader; - this.searchParams = searchParams; - } - - /** - * Returns the {@link FacetsAggregator} to use for aggregating the categories - * found in the result documents. The default implementation returns - * {@link CountingFacetsAggregator}, or {@link FastCountingFacetsAggregator} - * if all categories can be decoded with {@link DGapVInt8IntDecoder}. - */ - public FacetsAggregator getAggregator() { - if (FastCountingFacetsAggregator.verifySearchParams(searchParams)) { - return new FastCountingFacetsAggregator(); - } else { - return new CountingFacetsAggregator(); - } - } - - /** - * Creates a {@link FacetResultsHandler} that matches the given - * {@link FacetRequest}. - */ - protected FacetResultsHandler createFacetResultsHandler(FacetRequest fr) { - if (fr.getDepth() == 1 && fr.getSortOrder() == SortOrder.DESCENDING) { - FacetArraysSource fas = fr.getFacetArraysSource(); - if (fas == FacetArraysSource.INT) { - return new IntFacetResultsHandler(taxonomyReader, fr, facetArrays); - } - - if (fas == FacetArraysSource.FLOAT) { - return new FloatFacetResultsHandler(taxonomyReader, fr, facetArrays); - } - } - - if (fr.getResultMode() == ResultMode.PER_NODE_IN_TREE) { - return new TopKInEachNodeHandler(taxonomyReader, fr, facetArrays); - } - return new TopKFacetResultsHandler(taxonomyReader, fr, facetArrays); - } - - protected Set getCategoryLists() { - if (searchParams.indexingParams.getAllCategoryListParams().size() == 1) { - return Collections.singleton(searchParams.indexingParams.getCategoryListParams(null)); - } - - HashSet clps = new HashSet(); - for (FacetRequest fr : searchParams.facetRequests) { - clps.add(searchParams.indexingParams.getCategoryListParams(fr.categoryPath)); - } - return clps; - } - /** * Used by {@link FacetsCollector} to build the list of {@link FacetResult * facet results} that match the {@link FacetRequest facet requests} that were @@ -159,44 +160,12 @@ public class FacetsAccumulator { * @param matchingDocs * the documents that matched the query, per-segment. */ - public List accumulate(List matchingDocs) throws IOException { - // aggregate facets per category list (usually onle one category list) - FacetsAggregator aggregator = getAggregator(); - for (CategoryListParams clp : getCategoryLists()) { - for (MatchingDocs md : matchingDocs) { - aggregator.aggregate(md, clp, facetArrays); - } - } - - ParallelTaxonomyArrays arrays = taxonomyReader.getParallelTaxonomyArrays(); - - // compute top-K - final int[] children = arrays.children(); - final int[] siblings = arrays.siblings(); - List res = new ArrayList(); - for (FacetRequest fr : searchParams.facetRequests) { - int rootOrd = taxonomyReader.getOrdinal(fr.categoryPath); - if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist - // Add empty FacetResult - res.add(emptyResult(rootOrd, fr)); - continue; - } - CategoryListParams clp = searchParams.indexingParams.getCategoryListParams(fr.categoryPath); - if (fr.categoryPath.length > 0) { // someone might ask to aggregate the ROOT category - OrdinalPolicy ordinalPolicy = clp.getOrdinalPolicy(fr.categoryPath.components[0]); - if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) { - // rollup values - aggregator.rollupValues(fr, rootOrd, children, siblings, facetArrays); - } - } - - FacetResultsHandler frh = createFacetResultsHandler(fr); - res.add(frh.compute()); - } - return res; - } + public abstract List accumulate(List matchingDocs) throws IOException; - public boolean requiresDocScores() { - return getAggregator().requiresDocScores(); - } + /** + * Used by {@link FacetsCollector} to determine if document scores need to be + * collected in addition to matching documents. + */ + public abstract boolean requiresDocScores(); + } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java index c7ba6116359..f27d3e2aaaf 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java @@ -167,7 +167,7 @@ public abstract class FacetsCollector extends Collector { * FacetsAccumulator} from {@link FacetsAccumulator#create}. */ public static FacetsCollector create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) { - return create(FacetsAccumulator.create(fsp, indexReader, taxoReader)); + return create(FacetsAccumulator.create(fsp, indexReader, taxoReader, null)); } /** diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java b/lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java index 252eb4ca565..fdb81a60cca 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java @@ -5,7 +5,6 @@ import java.io.IOException; import org.apache.lucene.facet.encoding.DGapVInt8IntDecoder; import org.apache.lucene.facet.encoding.DGapVInt8IntEncoder; import org.apache.lucene.facet.params.CategoryListParams; -import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.util.BytesRef; @@ -40,23 +39,6 @@ public final class FastCountingFacetsAggregator extends IntRollupFacetsAggregato private final BytesRef buf = new BytesRef(32); - /** - * Asserts that this {@link FacetsCollector} can handle the given - * {@link FacetSearchParams}. Returns {@code null} if true, otherwise an error - * message. - */ - final static boolean verifySearchParams(FacetSearchParams fsp) { - // verify that all category lists were encoded with DGapVInt - for (FacetRequest fr : fsp.facetRequests) { - CategoryListParams clp = fsp.indexingParams.getCategoryListParams(fr.categoryPath); - if (clp.createEncoder().createMatchingDecoder().getClass() != DGapVInt8IntDecoder.class) { - return false; - } - } - - return true; - } - @Override public final void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/MultiFacetsAccumulator.java b/lucene/facet/src/java/org/apache/lucene/facet/search/MultiFacetsAccumulator.java new file mode 100644 index 00000000000..e82f76bf828 --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/MultiFacetsAccumulator.java @@ -0,0 +1,69 @@ +package org.apache.lucene.facet.search; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +import org.apache.lucene.facet.params.FacetSearchParams; +import org.apache.lucene.facet.search.FacetResult; +import org.apache.lucene.facet.search.FacetsAccumulator; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; + +/** + * Wraps multiple {@link FacetsAccumulator} and returns a merged list of + * {@link FacetResult}, in the order the accumulators were given. + */ +public class MultiFacetsAccumulator extends FacetsAccumulator { + + private final FacetsAccumulator[] accumulators; + + /** Wraps the given {@link FacetsAccumulator accumulators}. */ + public static FacetsAccumulator wrap(FacetsAccumulator... accumulators) { + if (accumulators.length == 0) { + return accumulators[0]; + } else { + return new MultiFacetsAccumulator(accumulators); + } + } + + private MultiFacetsAccumulator(FacetsAccumulator... accumulators) { + super((FacetSearchParams) null); + this.accumulators = accumulators; + } + + @Override + public boolean requiresDocScores() { + for (FacetsAccumulator fa : accumulators) { + if (fa.requiresDocScores()) { + return true; + } + } + return false; + } + + @Override + public List accumulate(List matchingDocs) throws IOException { + List merged = new ArrayList(); + for (FacetsAccumulator fa : accumulators) { + merged.addAll(fa.accumulate(matchingDocs)); + } + return merged; + } +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java b/lucene/facet/src/java/org/apache/lucene/facet/search/MultiFacetsAggregator.java similarity index 80% rename from lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java rename to lucene/facet/src/java/org/apache/lucene/facet/search/MultiFacetsAggregator.java index 3004ad65e7a..cfcc881b1d2 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/associations/MultiAssociationsFacetsAggregator.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/MultiFacetsAggregator.java @@ -1,4 +1,4 @@ -package org.apache.lucene.facet.associations; +package org.apache.lucene.facet.search; import java.io.IOException; import java.util.ArrayList; @@ -39,22 +39,21 @@ import org.apache.lucene.facet.taxonomy.CategoryPath; * * @lucene.experimental */ -public class MultiAssociationsFacetsAggregator implements FacetsAggregator { +public class MultiFacetsAggregator implements FacetsAggregator { private final Map categoryAggregators; private final List aggregators; /** - * Creates a new {@link MultiAssociationsFacetsAggregator} over the given - * aggregators. The mapping is used by - * {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)} to - * rollup the values of the specific category by the corresponding - * {@link FacetsAggregator}. However, since each {@link FacetsAggregator} - * handles the associations of a specific type, which could cover multiple - * categories, the aggregation is done on the unique set of aggregators, which - * are identified by their class. + * Constructor. + *

+ * The mapping is used to rollup the values of the specific category by the + * corresponding {@link FacetsAggregator}. It is ok to pass differnet + * {@link FacetsAggregator} instances for each {@link CategoryPath} - the + * constructor ensures that each aggregator type (determined by its + * class) is invoked only once. */ - public MultiAssociationsFacetsAggregator(Map aggregators) { + public MultiFacetsAggregator(Map aggregators) { this.categoryAggregators = aggregators; // make sure that each FacetsAggregator class is invoked only once, or diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/PerCategoryListAggregator.java b/lucene/facet/src/java/org/apache/lucene/facet/search/PerCategoryListAggregator.java index 790dba62b50..fb2add4440f 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/PerCategoryListAggregator.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/PerCategoryListAggregator.java @@ -27,6 +27,9 @@ import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; /** * A {@link FacetsAggregator} which invokes the proper aggregator per * {@link CategoryListParams}. + * {@link #rollupValues(FacetRequest, int, int[], int[], FacetArrays)} is + * delegated to the proper aggregator which handles the + * {@link CategoryListParams} the given {@link FacetRequest} belongs to. */ public class PerCategoryListAggregator implements FacetsAggregator { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java b/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java index 97e57cb8b33..f1ecb539b29 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsAccumulator.java @@ -43,7 +43,7 @@ import org.apache.lucene.util.IntsRef; */ /** - * Standard implementation for {@link FacetsAccumulator}, utilizing partitions to save on memory. + * Standard implementation for {@link TaxonomyFacetsAccumulator}, utilizing partitions to save on memory. *

* Why partitions? Because if there are say 100M categories out of which * only top K are required, we must first compute value for all 100M categories @@ -64,7 +64,7 @@ import org.apache.lucene.util.IntsRef; * * @lucene.experimental */ -public class StandardFacetsAccumulator extends FacetsAccumulator { +public class StandardFacetsAccumulator extends TaxonomyFacetsAccumulator { private static final Logger logger = Logger.getLogger(StandardFacetsAccumulator.class.getName()); @@ -96,15 +96,18 @@ public class StandardFacetsAccumulator extends FacetsAccumulator { private double complementThreshold = DEFAULT_COMPLEMENT_THRESHOLD; - public StandardFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, + private static FacetArrays createFacetArrays(FacetSearchParams searchParams, TaxonomyReader taxoReader) { + return new FacetArrays(PartitionsUtils.partitionSize(searchParams.indexingParams, taxoReader)); + } + + public StandardFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { - this(searchParams, indexReader, taxonomyReader, new FacetArrays( - PartitionsUtils.partitionSize(searchParams.indexingParams, taxonomyReader))); + this(searchParams, indexReader, taxonomyReader, null); } public StandardFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, TaxonomyReader taxonomyReader, FacetArrays facetArrays) { - super(searchParams, indexReader, taxonomyReader, facetArrays); + super(searchParams, indexReader, taxonomyReader, facetArrays == null ? createFacetArrays(searchParams, taxonomyReader) : facetArrays); // can only be computed later when docids size is known isUsingComplements = false; @@ -126,8 +129,7 @@ public class StandardFacetsAccumulator extends FacetsAccumulator { if (isUsingComplements) { try { - totalFacetCounts = TotalFacetCountsCache.getSingleton().getTotalCounts(indexReader, taxonomyReader, - searchParams.indexingParams); + totalFacetCounts = TotalFacetCountsCache.getSingleton().getTotalCounts(indexReader, taxonomyReader, searchParams.indexingParams); if (totalFacetCounts != null) { docids = ScoredDocIdsUtils.getComplementSet(docids, indexReader); } else { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetRequest.java b/lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetRequest.java index 7a08a124069..9eba2ac6a79 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetRequest.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/SumScoreFacetRequest.java @@ -1,5 +1,6 @@ package org.apache.lucene.facet.search; +import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; @@ -33,6 +34,11 @@ public class SumScoreFacetRequest extends FacetRequest { super(path, num); } + @Override + public FacetsAggregator createFacetsAggregator(FacetIndexingParams fip) { + return new SumScoreFacetsAggregator(); + } + @Override public Aggregator createAggregator(boolean useComplements, FacetArrays arrays, TaxonomyReader taxonomy) { assert !useComplements : "complements are not supported by this FacetRequest"; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/TaxonomyFacetsAccumulator.java b/lucene/facet/src/java/org/apache/lucene/facet/search/TaxonomyFacetsAccumulator.java new file mode 100644 index 00000000000..2ee39ae7c73 --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/TaxonomyFacetsAccumulator.java @@ -0,0 +1,225 @@ +package org.apache.lucene.facet.search; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.lucene.facet.params.CategoryListParams; +import org.apache.lucene.facet.params.CategoryListParams.OrdinalPolicy; +import org.apache.lucene.facet.params.FacetSearchParams; +import org.apache.lucene.facet.search.FacetRequest.FacetArraysSource; +import org.apache.lucene.facet.search.FacetRequest.ResultMode; +import org.apache.lucene.facet.search.FacetRequest.SortOrder; +import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.index.IndexReader; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link FacetsAccumulator} suitable for accumulating categories that were + * indexed into a taxonomy index. + * + * @lucene.experimental + */ +public class TaxonomyFacetsAccumulator extends FacetsAccumulator { + + public final TaxonomyReader taxonomyReader; + public final IndexReader indexReader; + public final FacetArrays facetArrays; + + /** + * Initializes the accumulator with the given search params, index reader and + * taxonomy reader. This constructor creates the default {@link FacetArrays}, + * which do not support reuse. If you want to use {@link ReusingFacetArrays}, + * you should use the + * {@link #TaxonomyFacetsAccumulator(FacetSearchParams, IndexReader, TaxonomyReader)} + * constructor. + */ + public TaxonomyFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, + TaxonomyReader taxonomyReader) { + this(searchParams, indexReader, taxonomyReader, null); + } + + /** + * Initializes the accumulator with the given parameters as well as + * {@link FacetArrays}. Note that the accumulator doesn't call + * {@link FacetArrays#free()}. If you require that (only makes sense if you + * use {@link ReusingFacetArrays}, you should do it after you've finished with + * the accumulator. + */ + public TaxonomyFacetsAccumulator(FacetSearchParams searchParams, IndexReader indexReader, + TaxonomyReader taxonomyReader, FacetArrays facetArrays) { + super(searchParams); + this.facetArrays = facetArrays == null ? new FacetArrays(taxonomyReader.getSize()) : facetArrays; + this.indexReader = indexReader; + this.taxonomyReader = taxonomyReader; + } + + /** Group all requests that belong to the same {@link CategoryListParams}. */ + protected Map> groupRequests() { + if (searchParams.indexingParams.getAllCategoryListParams().size() == 1) { + return Collections.singletonMap(searchParams.indexingParams.getCategoryListParams(null), searchParams.facetRequests); + } + + HashMap> requestsPerCLP = new HashMap>(); + for (FacetRequest fr : searchParams.facetRequests) { + CategoryListParams clp = searchParams.indexingParams.getCategoryListParams(fr.categoryPath); + List requests = requestsPerCLP.get(clp); + if (requests == null) { + requests = new ArrayList(); + requestsPerCLP.put(clp, requests); + } + requests.add(fr); + } + return requestsPerCLP; + } + + /** + * Returns the {@link FacetsAggregator} to use for aggregating the categories + * found in the result documents. + */ + public FacetsAggregator getAggregator() { + Map> requestsPerCLP = groupRequests(); + + // optimize for all-CountFacetRequest and single category list (common case) + if (requestsPerCLP.size() == 1) { + boolean allCount = true; + for (FacetRequest fr : searchParams.facetRequests) { + if (!(fr instanceof CountFacetRequest)) { + allCount = false; + break; + } + } + if (allCount) { + return requestsPerCLP.values().iterator().next().get(0).createFacetsAggregator(searchParams.indexingParams); + } + } + + // If we're here it means the facet requests are spread across multiple + // category lists, or there are multiple types of facet requests, or both. + // Therefore create a per-CategoryList mapping of FacetsAggregators. + Map perCLPAggregator = new HashMap(); + for (Entry> e : requestsPerCLP.entrySet()) { + CategoryListParams clp = e.getKey(); + List requests = e.getValue(); + Map,FacetsAggregator> aggClasses = new HashMap,FacetsAggregator>(); + Map perCategoryAggregator = new HashMap(); + for (FacetRequest fr : requests) { + FacetsAggregator fa = fr.createFacetsAggregator(searchParams.indexingParams); + if (fa == null) { + throw new IllegalArgumentException("this accumulator only supports requests that create a FacetsAggregator: " + fr); + } + Class faClass = fa.getClass(); + if (!aggClasses.containsKey(faClass)) { + aggClasses.put(faClass, fa); + } else { + fa = aggClasses.get(faClass); + } + perCategoryAggregator.put(fr.categoryPath, fa); + } + + if (aggClasses.size() == 1) { // only one type of facet request + perCLPAggregator.put(clp, aggClasses.values().iterator().next()); + } else { + perCLPAggregator.put(clp, new MultiFacetsAggregator(perCategoryAggregator)); + } + } + + return new PerCategoryListAggregator(perCLPAggregator, searchParams.indexingParams); + } + + /** + * Creates a {@link FacetResultsHandler} that matches the given + * {@link FacetRequest}. + */ + protected FacetResultsHandler createFacetResultsHandler(FacetRequest fr) { + if (fr.getDepth() == 1 && fr.getSortOrder() == SortOrder.DESCENDING) { + FacetArraysSource fas = fr.getFacetArraysSource(); + if (fas == FacetArraysSource.INT) { + return new IntFacetResultsHandler(taxonomyReader, fr, facetArrays); + } + + if (fas == FacetArraysSource.FLOAT) { + return new FloatFacetResultsHandler(taxonomyReader, fr, facetArrays); + } + } + + if (fr.getResultMode() == ResultMode.PER_NODE_IN_TREE) { + return new TopKInEachNodeHandler(taxonomyReader, fr, facetArrays); + } + return new TopKFacetResultsHandler(taxonomyReader, fr, facetArrays); + } + + /** + * Used by {@link FacetsCollector} to build the list of {@link FacetResult + * facet results} that match the {@link FacetRequest facet requests} that were + * given in the constructor. + * + * @param matchingDocs + * the documents that matched the query, per-segment. + */ + @Override + public List accumulate(List matchingDocs) throws IOException { + // aggregate facets per category list (usually onle one category list) + FacetsAggregator aggregator = getAggregator(); + for (CategoryListParams clp : groupRequests().keySet()) { + for (MatchingDocs md : matchingDocs) { + aggregator.aggregate(md, clp, facetArrays); + } + } + + ParallelTaxonomyArrays arrays = taxonomyReader.getParallelTaxonomyArrays(); + + // compute top-K + final int[] children = arrays.children(); + final int[] siblings = arrays.siblings(); + List res = new ArrayList(); + for (FacetRequest fr : searchParams.facetRequests) { + int rootOrd = taxonomyReader.getOrdinal(fr.categoryPath); + if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist + // Add empty FacetResult + res.add(emptyResult(rootOrd, fr)); + continue; + } + CategoryListParams clp = searchParams.indexingParams.getCategoryListParams(fr.categoryPath); + if (fr.categoryPath.length > 0) { // someone might ask to aggregate the ROOT category + OrdinalPolicy ordinalPolicy = clp.getOrdinalPolicy(fr.categoryPath.components[0]); + if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) { + // rollup values + aggregator.rollupValues(fr, rootOrd, children, siblings, facetArrays); + } + } + + FacetResultsHandler frh = createFacetResultsHandler(fr); + res.add(frh.compute()); + } + return res; + } + + @Override + public boolean requiresDocScores() { + return getAggregator().requiresDocScores(); + } +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesAccumulator.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesAccumulator.java index 108be03f5ca..52bcec8e5ac 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesAccumulator.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesAccumulator.java @@ -32,19 +32,19 @@ import org.apache.lucene.facet.search.FacetRequest; import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetResultNode; import org.apache.lucene.facet.search.FacetsAccumulator; -import org.apache.lucene.facet.search.FacetsAggregator; import org.apache.lucene.facet.search.FacetsCollector.MatchingDocs; +import org.apache.lucene.facet.search.TaxonomyFacetsAccumulator; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; -/** A {@link FacetsAccumulator} that uses previously +/** A {@link TaxonomyFacetsAccumulator} that uses previously * indexed {@link SortedSetDocValuesFacetFields} to perform faceting, * without require a separate taxonomy index. Faceting is * a bit slower (~25%), and there is added cost on every @@ -57,25 +57,34 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator { final SortedSetDocValuesReaderState state; final SortedSetDocValues dv; final String field; - - public SortedSetDocValuesAccumulator(FacetSearchParams fsp, SortedSetDocValuesReaderState state) throws IOException { - super(fsp, null, null, new FacetArrays(state.getSize())); + final FacetArrays facetArrays; + + /** Constructor with the given facet search params. */ + public SortedSetDocValuesAccumulator(SortedSetDocValuesReaderState state, FacetSearchParams fsp) + throws IOException { + this(state, fsp, null); + } + + public SortedSetDocValuesAccumulator(SortedSetDocValuesReaderState state, FacetSearchParams fsp, FacetArrays arrays) + throws IOException { + super(fsp); this.state = state; this.field = state.getField(); + this.facetArrays = arrays == null ? new FacetArrays(state.getSize()) : arrays; dv = state.getDocValues(); // Check params: - for(FacetRequest request : fsp.facetRequests) { - if (!(request instanceof CountFacetRequest)) { - throw new IllegalArgumentException("this collector only supports CountFacetRequest; got " + request); + for (FacetRequest fr : fsp.facetRequests) { + if (!(fr instanceof CountFacetRequest)) { + throw new IllegalArgumentException("this accumulator only supports CountFacetRequest; got " + fr); } - if (request.categoryPath.length != 1) { - throw new IllegalArgumentException("this collector only supports depth 1 CategoryPath; got " + request.categoryPath); + if (fr.categoryPath.length != 1) { + throw new IllegalArgumentException("this accumulator only supports 1-level CategoryPath; got " + fr.categoryPath); } - if (request.getDepth() != 1) { - throw new IllegalArgumentException("this collector only supports depth=1; got " + request.getDepth()); + if (fr.getDepth() != 1) { + throw new IllegalArgumentException("this accumulator only supports depth=1; got " + fr.getDepth()); } - String dim = request.categoryPath.components[0]; + String dim = fr.categoryPath.components[0]; SortedSetDocValuesReaderState.OrdRange ordRange = state.getOrdRange(dim); if (ordRange == null) { @@ -84,105 +93,6 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator { } } - @Override - public FacetsAggregator getAggregator() { - - return new FacetsAggregator() { - - @Override - public void aggregate(MatchingDocs matchingDocs, CategoryListParams clp, FacetArrays facetArrays) throws IOException { - - AtomicReader reader = matchingDocs.context.reader(); - - // LUCENE-5090: make sure the provided reader context "matches" - // the top-level reader passed to the - // SortedSetDocValuesReaderState, else cryptic - // AIOOBE can happen: - if (ReaderUtil.getTopLevelContext(matchingDocs.context).reader() != state.origReader) { - throw new IllegalStateException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader"); - } - - SortedSetDocValues segValues = reader.getSortedSetDocValues(field); - if (segValues == null) { - return; - } - - final int[] counts = facetArrays.getIntArray(); - final int maxDoc = reader.maxDoc(); - assert maxDoc == matchingDocs.bits.length(); - - if (dv instanceof MultiSortedSetDocValues) { - MultiDocValues.OrdinalMap ordinalMap = ((MultiSortedSetDocValues) dv).mapping; - int segOrd = matchingDocs.context.ord; - - int numSegOrds = (int) segValues.getValueCount(); - - if (matchingDocs.totalHits < numSegOrds/10) { - // Remap every ord to global ord as we iterate: - int doc = 0; - while (doc < maxDoc && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) { - segValues.setDocument(doc); - int term = (int) segValues.nextOrd(); - while (term != SortedSetDocValues.NO_MORE_ORDS) { - counts[(int) ordinalMap.getGlobalOrd(segOrd, term)]++; - term = (int) segValues.nextOrd(); - } - ++doc; - } - } else { - - // First count in seg-ord space: - final int[] segCounts = new int[numSegOrds]; - int doc = 0; - while (doc < maxDoc && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) { - segValues.setDocument(doc); - int term = (int) segValues.nextOrd(); - while (term != SortedSetDocValues.NO_MORE_ORDS) { - segCounts[term]++; - term = (int) segValues.nextOrd(); - } - ++doc; - } - - // Then, migrate to global ords: - for(int ord=0;ord { public TopCountPQ(int topN) { @@ -201,14 +111,105 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator { } } + static class SortedSetAggregator { + + private final SortedSetDocValuesReaderState state; + private final String field; + private final SortedSetDocValues dv; + + public SortedSetAggregator(String field, SortedSetDocValuesReaderState state, SortedSetDocValues dv) { + this.field = field; + this.state = state; + this.dv = dv; + } + + public void aggregate(MatchingDocs matchingDocs, FacetArrays facetArrays) throws IOException { + + AtomicReader reader = matchingDocs.context.reader(); + + // LUCENE-5090: make sure the provided reader context "matches" + // the top-level reader passed to the + // SortedSetDocValuesReaderState, else cryptic + // AIOOBE can happen: + if (ReaderUtil.getTopLevelContext(matchingDocs.context).reader() != state.origReader) { + throw new IllegalStateException("the SortedSetDocValuesReaderState provided to this class does not match the reader being searched; you must create a new SortedSetDocValuesReaderState every time you open a new IndexReader"); + } + + SortedSetDocValues segValues = reader.getSortedSetDocValues(field); + if (segValues == null) { + return; + } + + final int[] counts = facetArrays.getIntArray(); + final int maxDoc = reader.maxDoc(); + assert maxDoc == matchingDocs.bits.length(); + + if (dv instanceof MultiSortedSetDocValues) { + MultiDocValues.OrdinalMap ordinalMap = ((MultiSortedSetDocValues) dv).mapping; + int segOrd = matchingDocs.context.ord; + + int numSegOrds = (int) segValues.getValueCount(); + + if (matchingDocs.totalHits < numSegOrds/10) { + // Remap every ord to global ord as we iterate: + int doc = 0; + while (doc < maxDoc && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) { + segValues.setDocument(doc); + int term = (int) segValues.nextOrd(); + while (term != SortedSetDocValues.NO_MORE_ORDS) { + counts[(int) ordinalMap.getGlobalOrd(segOrd, term)]++; + term = (int) segValues.nextOrd(); + } + ++doc; + } + } else { + + // First count in seg-ord space: + final int[] segCounts = new int[numSegOrds]; + int doc = 0; + while (doc < maxDoc && (doc = matchingDocs.bits.nextSetBit(doc)) != -1) { + segValues.setDocument(doc); + int term = (int) segValues.nextOrd(); + while (term != SortedSetDocValues.NO_MORE_ORDS) { + segCounts[term]++; + term = (int) segValues.nextOrd(); + } + ++doc; + } + + // Then, migrate to global ords: + for(int ord=0;ord accumulate(List matchingDocs) throws IOException { - FacetsAggregator aggregator = getAggregator(); - for (CategoryListParams clp : getCategoryLists()) { - for (MatchingDocs md : matchingDocs) { - aggregator.aggregate(md, clp, facetArrays); - } + SortedSetAggregator aggregator = new SortedSetAggregator(field, state, dv); + for (MatchingDocs md : matchingDocs) { + aggregator.aggregate(md, facetArrays); } // compute top-K @@ -218,7 +219,7 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator { BytesRef scratch = new BytesRef(); - for(FacetRequest request : searchParams.facetRequests) { + for (FacetRequest request : searchParams.facetRequests) { String dim = request.categoryPath.components[0]; SortedSetDocValuesReaderState.OrdRange ordRange = state.getOrdRange(dim); // checked in ctor: @@ -315,4 +316,10 @@ public class SortedSetDocValuesAccumulator extends FacetsAccumulator { return results; } + + @Override + public boolean requiresDocScores() { + return false; + } + } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/associations/AssociationsFacetRequestTest.java b/lucene/facet/src/test/org/apache/lucene/facet/associations/AssociationsFacetRequestTest.java index 6d8f608d76a..54babc6d4da 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/associations/AssociationsFacetRequestTest.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/associations/AssociationsFacetRequestTest.java @@ -1,8 +1,6 @@ package org.apache.lucene.facet.associations; -import java.util.HashMap; import java.util.List; -import java.util.Map; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; @@ -10,9 +8,9 @@ import org.apache.lucene.document.Document; import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.FacetResult; -import org.apache.lucene.facet.search.FacetsAccumulator; import org.apache.lucene.facet.search.FacetsAggregator; import org.apache.lucene.facet.search.FacetsCollector; +import org.apache.lucene.facet.search.TaxonomyFacetsAccumulator; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; @@ -103,12 +101,12 @@ public class AssociationsFacetRequestTest extends FacetTestCase { // facet requests for two facets FacetSearchParams fsp = new FacetSearchParams( - new AssociationIntSumFacetRequest(aint, 10), - new AssociationIntSumFacetRequest(bint, 10)); + new SumIntAssociationFacetRequest(aint, 10), + new SumIntAssociationFacetRequest(bint, 10)); Query q = new MatchAllDocsQuery(); - FacetsAccumulator fa = new FacetsAccumulator(fsp, reader, taxo) { + TaxonomyFacetsAccumulator fa = new TaxonomyFacetsAccumulator(fsp, reader, taxo) { @Override public FacetsAggregator getAggregator() { return new SumIntAssociationFacetsAggregator(); @@ -135,12 +133,12 @@ public class AssociationsFacetRequestTest extends FacetTestCase { // facet requests for two facets FacetSearchParams fsp = new FacetSearchParams( - new AssociationFloatSumFacetRequest(afloat, 10), - new AssociationFloatSumFacetRequest(bfloat, 10)); + new SumFloatAssociationFacetRequest(afloat, 10), + new SumFloatAssociationFacetRequest(bfloat, 10)); Query q = new MatchAllDocsQuery(); - FacetsAccumulator fa = new FacetsAccumulator(fsp, reader, taxo) { + TaxonomyFacetsAccumulator fa = new TaxonomyFacetsAccumulator(fsp, reader, taxo) { @Override public FacetsAggregator getAggregator() { return new SumFloatAssociationFacetsAggregator(); @@ -167,27 +165,14 @@ public class AssociationsFacetRequestTest extends FacetTestCase { // facet requests for two facets FacetSearchParams fsp = new FacetSearchParams( - new AssociationIntSumFacetRequest(aint, 10), - new AssociationIntSumFacetRequest(bint, 10), - new AssociationFloatSumFacetRequest(afloat, 10), - new AssociationFloatSumFacetRequest(bfloat, 10)); + new SumIntAssociationFacetRequest(aint, 10), + new SumIntAssociationFacetRequest(bint, 10), + new SumFloatAssociationFacetRequest(afloat, 10), + new SumFloatAssociationFacetRequest(bfloat, 10)); Query q = new MatchAllDocsQuery(); - final SumIntAssociationFacetsAggregator sumInt = new SumIntAssociationFacetsAggregator(); - final SumFloatAssociationFacetsAggregator sumFloat = new SumFloatAssociationFacetsAggregator(); - final Map aggregators = new HashMap(); - aggregators.put(aint, sumInt); - aggregators.put(bint, sumInt); - aggregators.put(afloat, sumFloat); - aggregators.put(bfloat, sumFloat); - FacetsAccumulator fa = new FacetsAccumulator(fsp, reader, taxo) { - @Override - public FacetsAggregator getAggregator() { - return new MultiAssociationsFacetsAggregator(aggregators); - } - }; - FacetsCollector fc = FacetsCollector.create(fa); + FacetsCollector fc = FacetsCollector.create(fsp, reader, taxo); IndexSearcher searcher = newSearcher(reader); searcher.search(q, fc); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeAccumulator.java b/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeAccumulator.java index 48738c2e546..92ecb17bcb4 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeAccumulator.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/range/TestRangeAccumulator.java @@ -17,6 +17,7 @@ package org.apache.lucene.facet.range; * limitations under the License. */ +import java.io.IOException; import java.util.Collections; import java.util.HashSet; import java.util.List; @@ -37,13 +38,15 @@ import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.CountFacetRequest; import org.apache.lucene.facet.search.DrillDownQuery; -import org.apache.lucene.facet.search.DrillSideways.DrillSidewaysResult; import org.apache.lucene.facet.search.DrillSideways; +import org.apache.lucene.facet.search.DrillSideways.DrillSidewaysResult; import org.apache.lucene.facet.search.FacetRequest; import org.apache.lucene.facet.search.FacetResult; import org.apache.lucene.facet.search.FacetResultNode; import org.apache.lucene.facet.search.FacetsAccumulator; import org.apache.lucene.facet.search.FacetsCollector; +import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetFields; +import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; @@ -74,15 +77,12 @@ public class TestRangeAccumulator extends FacetTestCase { IndexReader r = w.getReader(); w.close(); - FacetSearchParams fsp = new FacetSearchParams( - new RangeFacetRequest("field", - new LongRange("less than 10", 0L, true, 10L, false), - new LongRange("less than or equal to 10", 0L, true, 10L, true), - new LongRange("over 90", 90L, false, 100L, false), - new LongRange("90 or above", 90L, true, 100L, false), - new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, false))); - - RangeAccumulator a = new RangeAccumulator(fsp, r); + RangeAccumulator a = new RangeAccumulator(new RangeFacetRequest("field", + new LongRange("less than 10", 0L, true, 10L, false), + new LongRange("less than or equal to 10", 0L, true, 10L, true), + new LongRange("over 90", 90L, false, 100L, false), + new LongRange("90 or above", 90L, true, 100L, false), + new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, false))); FacetsCollector fc = FacetsCollector.create(a); @@ -97,15 +97,15 @@ public class TestRangeAccumulator extends FacetTestCase { } /** Tests single request that mixes Range and non-Range - * faceting, with DrillSideways. */ - public void testMixedRangeAndNonRange() throws Exception { + * faceting, with DrillSideways and taxonomy. */ + public void testMixedRangeAndNonRangeTaxonomy() throws Exception { Directory d = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d); Directory td = newDirectory(); DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(td, IndexWriterConfig.OpenMode.CREATE); FacetFields ff = new FacetFields(tw); - for(long l=0;l<100;l++) { + for (long l = 0; l < 100; l++) { Document doc = new Document(); // For computing range facet counts: doc.add(new NumericDocValuesField("field", l)); @@ -122,7 +122,7 @@ public class TestRangeAccumulator extends FacetTestCase { w.addDocument(doc); } - IndexReader r = w.getReader(); + final IndexReader r = w.getReader(); w.close(); final TaxonomyReader tr = new DirectoryTaxonomyReader(tw); @@ -130,32 +130,32 @@ public class TestRangeAccumulator extends FacetTestCase { IndexSearcher s = newSearcher(r); - final FacetSearchParams fsp = new FacetSearchParams( - new CountFacetRequest(new CategoryPath("dim"), 2), - new RangeFacetRequest("field", - new LongRange("less than 10", 0L, true, 10L, false), - new LongRange("less than or equal to 10", 0L, true, 10L, true), - new LongRange("over 90", 90L, false, 100L, false), - new LongRange("90 or above", 90L, true, 100L, false), - new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, false))); - + final CountFacetRequest countRequest = new CountFacetRequest(new CategoryPath("dim"), 2); + final RangeFacetRequest rangeRequest = new RangeFacetRequest("field", + new LongRange("less than 10", 0L, true, 10L, false), + new LongRange("less than or equal to 10", 0L, true, 10L, true), + new LongRange("over 90", 90L, false, 100L, false), + new LongRange("90 or above", 90L, true, 100L, false), + new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, false)); + FacetSearchParams fsp = new FacetSearchParams(countRequest, rangeRequest); + final Set dimSeen = new HashSet(); DrillSideways ds = new DrillSideways(s, tr) { @Override protected FacetsAccumulator getDrillDownAccumulator(FacetSearchParams fsp) { checkSeen(fsp); - return RangeFacetsAccumulatorWrapper.create(fsp, searcher.getIndexReader(), tr); + return FacetsAccumulator.create(fsp, r, tr, null); } @Override protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams fsp) { checkSeen(fsp); - return RangeFacetsAccumulatorWrapper.create(fsp, searcher.getIndexReader(), tr); + return FacetsAccumulator.create(fsp, r, tr, null); } private void checkSeen(FacetSearchParams fsp) { - // Each dim should should up only once, across + // Each dim should up only once, across // both drillDown and drillSideways requests: for(FacetRequest fr : fsp.facetRequests) { String dim = fr.categoryPath.components[0]; @@ -204,6 +204,111 @@ public class TestRangeAccumulator extends FacetTestCase { IOUtils.close(tr, td, r, d); } + /** Tests single request that mixes Range and non-Range + * faceting, with DrillSideways and SortedSet. */ + public void testMixedRangeAndNonRangeSortedSet() throws Exception { + assumeTrue("Test requires SortedSetDV support", defaultCodecSupportsSortedSet()); + Directory d = newDirectory(); + RandomIndexWriter w = new RandomIndexWriter(random(), d); + SortedSetDocValuesFacetFields ff = new SortedSetDocValuesFacetFields(); + + for (long l = 0; l < 100; l++) { + Document doc = new Document(); + // For computing range facet counts: + doc.add(new NumericDocValuesField("field", l)); + // For drill down by numeric range: + doc.add(new LongField("field", l, Field.Store.NO)); + + CategoryPath cp; + if ((l&3) == 0) { + cp = new CategoryPath("dim", "a"); + } else { + cp = new CategoryPath("dim", "b"); + } + ff.addFields(doc, Collections.singletonList(cp)); + w.addDocument(doc); + } + + final IndexReader r = w.getReader(); + w.close(); + + IndexSearcher s = newSearcher(r); + final SortedSetDocValuesReaderState state = new SortedSetDocValuesReaderState(s.getIndexReader()); + + final CountFacetRequest countRequest = new CountFacetRequest(new CategoryPath("dim"), 2); + final RangeFacetRequest rangeRequest = new RangeFacetRequest("field", + new LongRange("less than 10", 0L, true, 10L, false), + new LongRange("less than or equal to 10", 0L, true, 10L, true), + new LongRange("over 90", 90L, false, 100L, false), + new LongRange("90 or above", 90L, true, 100L, false), + new LongRange("over 1000", 1000L, false, Long.MAX_VALUE, false)); + FacetSearchParams fsp = new FacetSearchParams(countRequest, rangeRequest); + + final Set dimSeen = new HashSet(); + + DrillSideways ds = new DrillSideways(s, state) { + @Override + protected FacetsAccumulator getDrillDownAccumulator(FacetSearchParams fsp) throws IOException { + checkSeen(fsp); + return FacetsAccumulator.create(fsp, state, null); + } + + @Override + protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams fsp) throws IOException { + checkSeen(fsp); + return FacetsAccumulator.create(fsp, state, null); + } + + private void checkSeen(FacetSearchParams fsp) { + // Each dim should up only once, across + // both drillDown and drillSideways requests: + for(FacetRequest fr : fsp.facetRequests) { + String dim = fr.categoryPath.components[0]; + assertFalse("dim " + dim + " already seen", dimSeen.contains(dim)); + dimSeen.add(dim); + } + } + + @Override + protected boolean scoreSubDocsAtOnce() { + return random().nextBoolean(); + } + }; + + // First search, no drill downs: + DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT, new MatchAllDocsQuery()); + DrillSidewaysResult dsr = ds.search(null, ddq, 10, fsp); + + assertEquals(100, dsr.hits.totalHits); + assertEquals(2, dsr.facetResults.size()); + assertEquals("dim (0)\n b (75)\n a (25)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(0))); + assertEquals("field (0)\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(1))); + + // Second search, drill down on dim=b: + ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT, new MatchAllDocsQuery()); + ddq.add(new CategoryPath("dim", "b")); + dimSeen.clear(); + dsr = ds.search(null, ddq, 10, fsp); + + assertEquals(75, dsr.hits.totalHits); + assertEquals(2, dsr.facetResults.size()); + assertEquals("dim (0)\n b (75)\n a (25)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(0))); + assertEquals("field (0)\n less than 10 (7)\n less than or equal to 10 (8)\n over 90 (7)\n 90 or above (8)\n over 1000 (0)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(1))); + + // Third search, drill down on "less than or equal to 10": + ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT, new MatchAllDocsQuery()); + ddq.add("field", NumericRangeQuery.newLongRange("field", 0L, 10L, true, true)); + dimSeen.clear(); + dsr = ds.search(null, ddq, 10, fsp); + + assertEquals(11, dsr.hits.totalHits); + assertEquals(2, dsr.facetResults.size()); + assertEquals("dim (0)\n b (8)\n a (3)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(0))); + assertEquals("field (0)\n less than 10 (10)\n less than or equal to 10 (11)\n over 90 (9)\n 90 or above (10)\n over 1000 (0)\n", FacetTestUtils.toSimpleString(dsr.facetResults.get(1))); + + IOUtils.close(r, d); + } + public void testBasicDouble() throws Exception { Directory d = newDirectory(); RandomIndexWriter w = new RandomIndexWriter(random(), d); @@ -211,22 +316,19 @@ public class TestRangeAccumulator extends FacetTestCase { DoubleDocValuesField field = new DoubleDocValuesField("field", 0.0); doc.add(field); for(long l=0;l<100;l++) { - field.setDoubleValue((double) l); + field.setDoubleValue(l); w.addDocument(doc); } IndexReader r = w.getReader(); w.close(); - FacetSearchParams fsp = new FacetSearchParams( - new RangeFacetRequest("field", - new DoubleRange("less than 10", 0.0, true, 10.0, false), - new DoubleRange("less than or equal to 10", 0.0, true, 10.0, true), - new DoubleRange("over 90", 90.0, false, 100.0, false), - new DoubleRange("90 or above", 90.0, true, 100.0, false), - new DoubleRange("over 1000", 1000.0, false, Double.POSITIVE_INFINITY, false))); - - RangeAccumulator a = new RangeAccumulator(fsp, r); + RangeAccumulator a = new RangeAccumulator(new RangeFacetRequest("field", + new DoubleRange("less than 10", 0.0, true, 10.0, false), + new DoubleRange("less than or equal to 10", 0.0, true, 10.0, true), + new DoubleRange("over 90", 90.0, false, 100.0, false), + new DoubleRange("90 or above", 90.0, true, 100.0, false), + new DoubleRange("over 1000", 1000.0, false, Double.POSITIVE_INFINITY, false))); FacetsCollector fc = FacetsCollector.create(a); @@ -247,22 +349,19 @@ public class TestRangeAccumulator extends FacetTestCase { FloatDocValuesField field = new FloatDocValuesField("field", 0.0f); doc.add(field); for(long l=0;l<100;l++) { - field.setFloatValue((float) l); + field.setFloatValue(l); w.addDocument(doc); } IndexReader r = w.getReader(); w.close(); - FacetSearchParams fsp = new FacetSearchParams( - new RangeFacetRequest("field", - new FloatRange("less than 10", 0.0f, true, 10.0f, false), - new FloatRange("less than or equal to 10", 0.0f, true, 10.0f, true), - new FloatRange("over 90", 90.0f, false, 100.0f, false), - new FloatRange("90 or above", 90.0f, true, 100.0f, false), - new FloatRange("over 1000", 1000.0f, false, Float.POSITIVE_INFINITY, false))); - - RangeAccumulator a = new RangeAccumulator(fsp, r); + RangeAccumulator a = new RangeAccumulator(new RangeFacetRequest("field", + new FloatRange("less than 10", 0.0f, true, 10.0f, false), + new FloatRange("less than or equal to 10", 0.0f, true, 10.0f, true), + new FloatRange("over 90", 90.0f, false, 100.0f, false), + new FloatRange("90 or above", 90.0f, true, 100.0f, false), + new FloatRange("over 1000", 1000.0f, false, Float.POSITIVE_INFINITY, false))); FacetsCollector fc = FacetsCollector.create(a); @@ -335,8 +434,7 @@ public class TestRangeAccumulator extends FacetTestCase { } } - FacetSearchParams fsp = new FacetSearchParams(new RangeFacetRequest("field", ranges)); - FacetsCollector fc = FacetsCollector.create(new RangeAccumulator(fsp, r)); + FacetsCollector fc = FacetsCollector.create(new RangeAccumulator(new RangeFacetRequest("field", ranges))); s.search(new MatchAllDocsQuery(), fc); List results = fc.getFacetResults(); assertEquals(1, results.size()); @@ -350,7 +448,7 @@ public class TestRangeAccumulator extends FacetTestCase { assertEquals("field/r" + rangeID, subNode.label.toString('/')); assertEquals(expectedCounts[rangeID], (int) subNode.value); - LongRange range = (LongRange) ((RangeFacetRequest) results.get(0).getFacetRequest()).ranges[rangeID]; + LongRange range = (LongRange) ((RangeFacetRequest) results.get(0).getFacetRequest()).ranges[rangeID]; // Test drill-down: DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT); @@ -422,8 +520,7 @@ public class TestRangeAccumulator extends FacetTestCase { } } - FacetSearchParams fsp = new FacetSearchParams(new RangeFacetRequest("field", ranges)); - FacetsCollector fc = FacetsCollector.create(new RangeAccumulator(fsp, r)); + FacetsCollector fc = FacetsCollector.create(new RangeAccumulator(new RangeFacetRequest("field", ranges))); s.search(new MatchAllDocsQuery(), fc); List results = fc.getFacetResults(); assertEquals(1, results.size()); @@ -437,7 +534,7 @@ public class TestRangeAccumulator extends FacetTestCase { assertEquals("field/r" + rangeID, subNode.label.toString('/')); assertEquals(expectedCounts[rangeID], (int) subNode.value); - FloatRange range = (FloatRange) ((RangeFacetRequest) results.get(0).getFacetRequest()).ranges[rangeID]; + FloatRange range = (FloatRange) ((RangeFacetRequest) results.get(0).getFacetRequest()).ranges[rangeID]; // Test drill-down: DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT); @@ -509,8 +606,7 @@ public class TestRangeAccumulator extends FacetTestCase { } } - FacetSearchParams fsp = new FacetSearchParams(new RangeFacetRequest("field", ranges)); - FacetsCollector fc = FacetsCollector.create(new RangeAccumulator(fsp, r)); + FacetsCollector fc = FacetsCollector.create(new RangeAccumulator(new RangeFacetRequest("field", ranges))); s.search(new MatchAllDocsQuery(), fc); List results = fc.getFacetResults(); assertEquals(1, results.size()); @@ -524,7 +620,7 @@ public class TestRangeAccumulator extends FacetTestCase { assertEquals("field/r" + rangeID, subNode.label.toString('/')); assertEquals(expectedCounts[rangeID], (int) subNode.value); - DoubleRange range = (DoubleRange) ((RangeFacetRequest) results.get(0).getFacetRequest()).ranges[rangeID]; + DoubleRange range = (DoubleRange) ((RangeFacetRequest) results.get(0).getFacetRequest()).ranges[rangeID]; // Test drill-down: DrillDownQuery ddq = new DrillDownQuery(FacetIndexingParams.DEFAULT); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsAggregatorTest.java b/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsAggregatorTest.java index cb0cbeac6c9..9110c6cd49d 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsAggregatorTest.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsAggregatorTest.java @@ -269,7 +269,7 @@ public class CountingFacetsAggregatorTest extends FacetTestCase { IOUtils.close(indexWriter, taxoWriter); } - private FacetsAccumulator randomAccumulator(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) { + private TaxonomyFacetsAccumulator randomAccumulator(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) { final FacetsAggregator aggregator; double val = random().nextDouble(); if (val < 0.6) { @@ -279,7 +279,7 @@ public class CountingFacetsAggregatorTest extends FacetTestCase { } else { aggregator = new CachedOrdsCountingFacetsAggregator(); } - return new FacetsAccumulator(fsp, indexReader, taxoReader) { + return new TaxonomyFacetsAccumulator(fsp, indexReader, taxoReader) { @Override public FacetsAggregator getAggregator() { return aggregator; diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java b/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java index 994c75aff30..5d38db2b7f1 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java @@ -116,7 +116,7 @@ public class FacetResultTest extends FacetTestCase { @Override protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams fsp) throws IOException { FacetsAccumulator fa = super.getDrillSidewaysAccumulator(dim, fsp); - dimArrays.put(dim, fa.facetArrays); + dimArrays.put(dim, ((TaxonomyFacetsAccumulator) fa).facetArrays); return fa; } }; diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java index 5f6d6b08629..25b8e4cfce5 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestDrillSideways.java @@ -41,7 +41,6 @@ import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.params.FacetIndexingParams; import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.search.DrillSideways.DrillSidewaysResult; -import org.apache.lucene.facet.sortedset.SortedSetDocValuesAccumulator; import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetFields; import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; import org.apache.lucene.facet.taxonomy.CategoryPath; @@ -62,8 +61,8 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField.Type; import org.apache.lucene.search.SortField; +import org.apache.lucene.search.SortField.Type; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; @@ -336,6 +335,8 @@ public class TestDrillSideways extends FacetTestCase { String id; String contentToken; + public Doc() {} + // -1 if the doc is missing this dim, else the index // -into the values for this dim: int[] dims; @@ -790,17 +791,7 @@ public class TestDrillSideways extends FacetTestCase { Sort sort = new Sort(new SortField("id", SortField.Type.STRING)); DrillSideways ds; if (doUseDV) { - ds = new DrillSideways(s, null) { - @Override - protected FacetsAccumulator getDrillDownAccumulator(FacetSearchParams fsp) throws IOException { - return new SortedSetDocValuesAccumulator(fsp, sortedSetDVState); - } - - @Override - protected FacetsAccumulator getDrillSidewaysAccumulator(String dim, FacetSearchParams fsp) throws IOException { - return new SortedSetDocValuesAccumulator(fsp, sortedSetDVState); - } - }; + ds = new DrillSideways(s, sortedSetDVState); } else { ds = new DrillSideways(s, tr); } @@ -881,6 +872,7 @@ public class TestDrillSideways extends FacetTestCase { List hits; int[][] counts; int[] uniqueCounts; + public SimpleFacetResult() {} } private int[] getTopNOrds(final int[] counts, final String[] values, int topN) { diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java index bf011b42835..4225fc0db84 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsCollector.java @@ -3,9 +3,7 @@ package org.apache.lucene.facet.search; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; -import java.util.HashMap; import java.util.List; -import java.util.Map; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; @@ -90,7 +88,7 @@ public class TestFacetsCollector extends FacetTestCase { DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir); FacetSearchParams sParams = new FacetSearchParams(new SumScoreFacetRequest(new CategoryPath("a"), 10)); - FacetsAccumulator fa = new FacetsAccumulator(sParams, r, taxo) { + TaxonomyFacetsAccumulator fa = new TaxonomyFacetsAccumulator(sParams, r, taxo) { @Override public FacetsAggregator getAggregator() { return new SumScoreFacetsAggregator(); @@ -181,18 +179,7 @@ public class TestFacetsCollector extends FacetTestCase { new CountFacetRequest(new CategoryPath("a"), 10), new SumScoreFacetRequest(new CategoryPath("b"), 10)); - Map aggregators = new HashMap(); - aggregators.put(fip.getCategoryListParams(new CategoryPath("a")), new FastCountingFacetsAggregator()); - aggregators.put(fip.getCategoryListParams(new CategoryPath("b")), new SumScoreFacetsAggregator()); - final FacetsAggregator aggregator = new PerCategoryListAggregator(aggregators, fip); - FacetsAccumulator fa = new FacetsAccumulator(sParams, r, taxo) { - @Override - public FacetsAggregator getAggregator() { - return aggregator; - } - }; - - FacetsCollector fc = FacetsCollector.create(fa); + FacetsCollector fc = FacetsCollector.create(sParams, r, taxo); TopScoreDocCollector topDocs = TopScoreDocCollector.create(10, false); newSearcher(r).search(new MatchAllDocsQuery(), MultiCollector.wrap(fc, topDocs)); @@ -231,7 +218,7 @@ public class TestFacetsCollector extends FacetTestCase { FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(CategoryPath.EMPTY, 10)); - final FacetsAccumulator fa = random().nextBoolean() ? new FacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); + final TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); FacetsCollector fc = FacetsCollector.create(fa); newSearcher(r).search(new MatchAllDocsQuery(), fc); @@ -265,7 +252,7 @@ public class TestFacetsCollector extends FacetTestCase { FacetSearchParams fsp = new FacetSearchParams( new CountFacetRequest(new CategoryPath("a"), 10), new CountFacetRequest(new CategoryPath("b"), 10)); - final FacetsAccumulator fa = random().nextBoolean() ? new FacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); + final TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); final FacetsCollector fc = FacetsCollector.create(fa); newSearcher(r).search(new MatchAllDocsQuery(), fc); @@ -297,7 +284,7 @@ public class TestFacetsCollector extends FacetTestCase { FacetSearchParams fsp = new FacetSearchParams( new CountFacetRequest(new CategoryPath("a"), 10), new CountFacetRequest(new CategoryPath("b"), 10)); - final FacetsAccumulator fa = random().nextBoolean() ? new FacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); + final TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); final FacetsCollector fc = FacetsCollector.create(fa); // this should populate the cached results, but doing search should clear the cache fc.getFacetResults(); @@ -338,7 +325,7 @@ public class TestFacetsCollector extends FacetTestCase { // assert IntFacetResultHandler FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("a"), 10)); - FacetsAccumulator fa = random().nextBoolean() ? new FacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); + TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); FacetsCollector fc = FacetsCollector.create(fa); newSearcher(r).search(new MatchAllDocsQuery(), fc); assertTrue("invalid ordinal for child node: 0", 0 != fc.getFacetResults().get(0).getFacetResultNode().subResults.get(0).ordinal); @@ -346,7 +333,7 @@ public class TestFacetsCollector extends FacetTestCase { // assert IntFacetResultHandler fsp = new FacetSearchParams(new SumScoreFacetRequest(new CategoryPath("a"), 10)); if (random().nextBoolean()) { - fa = new FacetsAccumulator(fsp, r, taxo) { + fa = new TaxonomyFacetsAccumulator(fsp, r, taxo) { @Override public FacetsAggregator getAggregator() { return new SumScoreFacetsAggregator(); @@ -387,7 +374,7 @@ public class TestFacetsCollector extends FacetTestCase { CountFacetRequest cfr = new CountFacetRequest(new CategoryPath("a"), 2); cfr.setResultMode(random().nextBoolean() ? ResultMode.GLOBAL_FLAT : ResultMode.PER_NODE_IN_TREE); FacetSearchParams fsp = new FacetSearchParams(cfr); - final FacetsAccumulator fa = random().nextBoolean() ? new FacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); + final TaxonomyFacetsAccumulator fa = random().nextBoolean() ? new TaxonomyFacetsAccumulator(fsp, r, taxo) : new StandardFacetsAccumulator(fsp, r, taxo); FacetsCollector fc = FacetsCollector.create(fa); newSearcher(r).search(new MatchAllDocsQuery(), fc); @@ -426,15 +413,15 @@ public class TestFacetsCollector extends FacetTestCase { } final Sampler sampler = new RandomSampler(sampleParams, random()); - FacetsAccumulator[] accumulators = new FacetsAccumulator[] { - new FacetsAccumulator(fsp, indexReader, taxoReader), + TaxonomyFacetsAccumulator[] accumulators = new TaxonomyFacetsAccumulator[] { + new TaxonomyFacetsAccumulator(fsp, indexReader, taxoReader), new StandardFacetsAccumulator(fsp, indexReader, taxoReader), new SamplingAccumulator(sampler, fsp, indexReader, taxoReader), new AdaptiveFacetsAccumulator(fsp, indexReader, taxoReader), new SamplingWrapper(new StandardFacetsAccumulator(fsp, indexReader, taxoReader), sampler) }; - for (FacetsAccumulator fa : accumulators) { + for (TaxonomyFacetsAccumulator fa : accumulators) { FacetsCollector fc = FacetsCollector.create(fa); searcher.search(new MatchAllDocsQuery(), fc); List facetResults = fc.getFacetResults(); @@ -444,20 +431,19 @@ public class TestFacetsCollector extends FacetTestCase { try { // SortedSetDocValuesAccumulator cannot even be created in such state - assertNull(new SortedSetDocValuesAccumulator(fsp, new SortedSetDocValuesReaderState(indexReader))); + assertNull(new SortedSetDocValuesAccumulator(new SortedSetDocValuesReaderState(indexReader), fsp)); // if this ever changes, make sure FacetResultNode is labeled correctly fail("should not have succeeded to execute a request over a category which wasn't indexed as SortedSetDVField"); } catch (IllegalArgumentException e) { // expected } - fsp = new FacetSearchParams(new RangeFacetRequest("f", new LongRange("grr", 0, true, 1, true))); - RangeAccumulator ra = new RangeAccumulator(fsp, indexReader); + RangeAccumulator ra = new RangeAccumulator(new RangeFacetRequest("f", new LongRange("grr", 0, true, 1, true))); FacetsCollector fc = FacetsCollector.create(ra); searcher.search(new MatchAllDocsQuery(), fc); List facetResults = fc.getFacetResults(); assertNotNull(facetResults); - assertEquals("incorrect label returned for RangeAccumulator", fsp.facetRequests.get(0).categoryPath, facetResults.get(0).getFacetResultNode().label); + assertEquals("incorrect label returned for RangeAccumulator", new CategoryPath("f"), facetResults.get(0).getFacetResultNode().label); IOUtils.close(indexReader, taxoReader); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java b/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java index 55db5ad723b..008a0070273 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/sortedset/TestSortedSetDocValuesFacets.java @@ -112,7 +112,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { //SortedSetDocValuesCollector c = new SortedSetDocValuesCollector(state); //SortedSetDocValuesCollectorMergeBySeg c = new SortedSetDocValuesCollectorMergeBySeg(state); - FacetsCollector c = FacetsCollector.create(new SortedSetDocValuesAccumulator(fsp, state)); + FacetsCollector c = FacetsCollector.create(new SortedSetDocValuesAccumulator(state, fsp)); searcher.search(new MatchAllDocsQuery(), c); @@ -177,7 +177,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { FacetSearchParams fsp = new FacetSearchParams(requests); - FacetsCollector c = FacetsCollector.create(new SortedSetDocValuesAccumulator(fsp, state)); + FacetsCollector c = FacetsCollector.create(new SortedSetDocValuesAccumulator(state, fsp)); searcher.search(new MatchAllDocsQuery(), c);