From 42256baec586dbfeeb397dcdcd4f9a010deab398 Mon Sep 17 00:00:00 2001 From: Shai Erera Date: Wed, 30 Jan 2013 13:51:38 +0000 Subject: [PATCH] LUCENE-4715: Add OrdinalPolicy.ALL_BUT_DIMENSION git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1440416 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 5 ++ .../facet/index/CountingListBuilder.java | 22 ++++-- .../index/params/CategoryListParams.java | 75 ++++++++++++++----- .../params/PerDimensionOrdinalPolicy.java | 55 ++++++++++++++ .../facet/search/CountingFacetsCollector.java | 44 +++++------ .../lucene/facet/search/FacetsCollector.java | 10 ++- .../facet/search/StandardFacetsCollector.java | 12 +++ .../lucene/facet/util/ScoredDocIdsUtils.java | 1 + .../apache/lucene/facet/FacetTestBase.java | 75 +++++++++++++------ .../facet/example/TestMultiCLExample.java | 1 - .../TestFacetsPayloadMigrationReader.java | 22 +++++- .../lucene/facet/search/BaseTestTopK.java | 13 +--- .../search/CountingFacetsCollectorTest.java | 27 +++---- .../lucene/facet/search/TestDemoFacets.java | 6 +- .../TestFacetsAccumulatorWithComplement.java | 8 +- .../search/TestMultipleCategoryLists.java | 4 - .../search/TestSameRequestAccumulation.java | 10 ++- .../search/TestScoredDocIdCollector.java | 16 ++-- .../search/TestStandardFacetsAccumulator.java | 1 - .../TestTopKInEachNodeResultHandler.java | 3 - .../facet/search/TestTopKResultsHandler.java | 47 +++++++----- .../search/TestTopKResultsHandlerRandom.java | 16 ++-- .../facet/search/TestTotalFacetCounts.java | 12 +++ .../search/TestTotalFacetCountsCache.java | 2 +- .../search/sampling/BaseSampleTestTopK.java | 14 ++-- 25 files changed, 346 insertions(+), 155 deletions(-) create mode 100644 lucene/facet/src/java/org/apache/lucene/facet/index/params/PerDimensionOrdinalPolicy.java diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 7069db7c1d9..3abd8b9bb5c 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -61,6 +61,11 @@ Optimizations * LUCENE-4690: Performance improvements and non-hashing versions of NumericUtils.*ToPrefixCoded() (yonik) +* LUCENE-4715: CategoryListParams.getOrdinalPolicy now allows to return a + different OrdinalPolicy per dimension, to better tune how you index + facets. Also added OrdinalPolicy.ALL_BUT_DIMENSION. + (Shai Erera, Michael McCandless) + New Features * LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the diff --git a/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java b/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java index d96d85af59a..e41ca4bb958 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/index/CountingListBuilder.java @@ -3,6 +3,7 @@ package org.apache.lucene.facet.index; import java.io.IOException; import java.util.Collections; import java.util.HashMap; +import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; @@ -115,12 +116,12 @@ public class CountingListBuilder implements CategoryListBuilder { private final OrdinalsEncoder ordinalsEncoder; private final TaxonomyWriter taxoWriter; - private final OrdinalPolicy ordinalPolicy; + private final CategoryListParams clp; public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams indexingParams, TaxonomyWriter taxoWriter) { this.taxoWriter = taxoWriter; - this.ordinalPolicy = categoryListParams.getOrdinalPolicy(); + this.clp = categoryListParams; if (indexingParams.getPartitionSize() == Integer.MAX_VALUE) { ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams); } else { @@ -141,16 +142,23 @@ public class CountingListBuilder implements CategoryListBuilder { */ @Override public Map build(IntsRef ordinals, Iterable categories) throws IOException { - int upto = ordinals.length; // since we add ordinals to IntsRef, iterate upto original length - - if (ordinalPolicy == OrdinalPolicy.ALL_PARENTS) { // add all parents too - for (int i = 0; i < upto; i++) { - int ordinal = ordinals.ints[i]; + int upto = ordinals.length; // since we may add ordinals to IntsRef, iterate upto original length + + Iterator iter = categories.iterator(); + for (int i = 0; i < upto; i++) { + int ordinal = ordinals.ints[i]; + CategoryPath cp = iter.next(); + OrdinalPolicy op = clp.getOrdinalPolicy(cp.components[0]); + if (op != OrdinalPolicy.NO_PARENTS) { + // need to add parents too int parent = taxoWriter.getParent(ordinal); while (parent > 0) { ordinals.ints[ordinals.length++] = parent; parent = taxoWriter.getParent(parent); } + if (op == OrdinalPolicy.ALL_BUT_DIMENSION) { // discard the last added parent, which is the dimension + ordinals.length--; + } } } return ordinalsEncoder.encode(ordinals); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java b/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java index 08c1747ed7b..dcf8ed191a2 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/index/params/CategoryListParams.java @@ -4,6 +4,7 @@ import java.io.IOException; import org.apache.lucene.facet.search.CategoryListIterator; import org.apache.lucene.facet.search.DocValuesCategoryListIterator; +import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.util.PartitionsUtils; import org.apache.lucene.util.encoding.DGapVInt8IntEncoder; import org.apache.lucene.util.encoding.IntDecoder; @@ -35,25 +36,61 @@ import org.apache.lucene.util.encoding.UniqueValuesIntEncoder; */ public class CategoryListParams { - /** OrdinalPolicy defines which ordinals are encoded for every document. */ + /** + * Defines which category ordinals are encoded for every document. This also + * affects how category ordinals are aggregated, check the different policies + * for more details. + */ public static enum OrdinalPolicy { /** - * Encodes only the ordinal of leaf nodes. That is, the category A/B/C will - * not encode the ordinals of A and A/B. + * Encodes only the ordinals of leaf nodes. That is, for the category A/B/C, + * the ordinals of A and A/B will not be encoded. This policy is efficient + * for hierarchical dimensions, as it reduces the number of ordinals that + * are visited per document. During faceted search, this policy behaves + * exactly like {@link #ALL_PARENTS}, and the counts of all path components + * will be computed as well. * *

* NOTE: this {@link OrdinalPolicy} requires a special collector or - * accumulator, which will fix the parents' counts, unless you are not - * interested in the parents counts. + * accumulator, which will fix the parents' counts. + * + *

+ * NOTE: since only leaf nodes are encoded for the document, you + * should use this policy when the same document doesn't share two + * categories that have a mutual parent, or otherwise the counts will be + * wrong (the mutual parent will be over-counted). For example, if a + * document has the categories A/B/C and A/B/D, then with this policy the + * counts of "A" and "B" will be 2, which is wrong. If you intend to index + * hierarchical dimensions, with more than one category per document, you + * should use either {@link #ALL_PARENTS} or {@link #ALL_BUT_DIMENSION}. */ NO_PARENTS, /** * Encodes the ordinals of all path components. That is, the category A/B/C - * will encode the ordinals of A and A/B as well. This is the default - * {@link OrdinalPolicy}. + * will encode the ordinals of A and A/B as well. If you don't require the + * dimension's count during search, consider using + * {@link #ALL_BUT_DIMENSION}. */ - ALL_PARENTS + ALL_PARENTS, + + /** + * Encodes the ordinals of all path components except the dimension. The + * dimension of a category is defined to be the first components in + * {@link CategoryPath#components}. For the category A/B/C, the ordinal of + * A/B will be encoded as well, however not the ordinal of A. + * + *

+ * NOTE: when facets are aggregated, this policy behaves exactly like + * {@link #ALL_PARENTS}, except that the dimension is never counted. I.e. if + * you ask to count the facet "A", then while in {@link #ALL_PARENTS} you + * will get counts for "A" and its children, with this policy you + * will get counts for only its children. This policy is the default + * one, and makes sense for using with flat dimensions, whenever your + * application does not require the dimension's count. Otherwise, use + * {@link #ALL_PARENTS}. + */ + ALL_BUT_DIMENSION } /** The default field used to store the facets information. */ @@ -63,7 +100,7 @@ public class CategoryListParams { * The default {@link OrdinalPolicy} that's used when encoding a document's * category ordinals. */ - public static final OrdinalPolicy DEFAULT_ORDINAL_POLICY = OrdinalPolicy.ALL_PARENTS; + public static final OrdinalPolicy DEFAULT_ORDINAL_POLICY = OrdinalPolicy.ALL_BUT_DIMENSION; public final String field; @@ -115,19 +152,15 @@ public class CategoryListParams { return false; } CategoryListParams other = (CategoryListParams) o; - if (this.hashCode != other.hashCode) { + if (hashCode != other.hashCode) { return false; } - - // The above hashcodes might equal each other in the case of a collision, - // so at this point only directly term equality testing will settle - // the equality test. return field.equals(other.field); } @Override public int hashCode() { - return this.hashCode; + return hashCode; } /** Create the {@link CategoryListIterator} for the specified partition. */ @@ -137,14 +170,18 @@ public class CategoryListParams { return new DocValuesCategoryListIterator(docValuesField, createEncoder().createMatchingDecoder()); } - /** Returns the {@link OrdinalPolicy} to use for this {@link CategoryListParams}. */ - public OrdinalPolicy getOrdinalPolicy() { + /** + * Returns the {@link OrdinalPolicy} to use for the given dimension. This + * {@link CategoryListParams} always returns {@link #DEFAULT_ORDINAL_POLICY} + * for all dimensions. + */ + public OrdinalPolicy getOrdinalPolicy(String dimension) { return DEFAULT_ORDINAL_POLICY; } @Override public String toString() { - return "field=" + field + " encoder=" + createEncoder() + " ordinalPolicy=" + getOrdinalPolicy(); + return "field=" + field + " encoder=" + createEncoder() + " ordinalPolicy=" + getOrdinalPolicy(null); } -} \ No newline at end of file +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/index/params/PerDimensionOrdinalPolicy.java b/lucene/facet/src/java/org/apache/lucene/facet/index/params/PerDimensionOrdinalPolicy.java new file mode 100644 index 00000000000..c91b1a69ccf --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/index/params/PerDimensionOrdinalPolicy.java @@ -0,0 +1,55 @@ +package org.apache.lucene.facet.index.params; + +import java.util.Map; + +import org.apache.lucene.facet.taxonomy.CategoryPath; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * A {@link CategoryListParams} which allow controlling the + * {@link CategoryListParams.OrdinalPolicy} used for each dimension. The + * dimension is specified as the first component in + * {@link CategoryPath#components}. + */ +public class PerDimensionOrdinalPolicy extends CategoryListParams { + + private final Map policies; + private final OrdinalPolicy defaultOP; + + public PerDimensionOrdinalPolicy(Map policies) { + this(policies, DEFAULT_ORDINAL_POLICY); + } + + public PerDimensionOrdinalPolicy(Map policies, OrdinalPolicy defaultOP) { + this.defaultOP = defaultOP; + this.policies = policies; + } + + @Override + public OrdinalPolicy getOrdinalPolicy(String dimension) { + OrdinalPolicy op = policies.get(dimension); + return op == null ? defaultOP : op; + } + + @Override + public String toString() { + return super.toString() + " policies=" + policies; + } + +} diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java b/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java index 97eff9930de..461b1aeadf7 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/CountingFacetsCollector.java @@ -85,7 +85,7 @@ import org.apache.lucene.util.encoding.DGapVInt8IntDecoder; public class CountingFacetsCollector extends FacetsCollector { private final FacetSearchParams fsp; - private final OrdinalPolicy ordinalPolicy; + private final CategoryListParams clp; private final TaxonomyReader taxoReader; private final BytesRef buf = new BytesRef(32); private final FacetArrays facetArrays; @@ -107,8 +107,7 @@ public class CountingFacetsCollector extends FacetsCollector { assert assertParams(fsp) == null : assertParams(fsp); this.fsp = fsp; - CategoryListParams clp = fsp.indexingParams.getCategoryListParams(fsp.facetRequests.get(0).categoryPath); - this.ordinalPolicy = clp.getOrdinalPolicy(); + this.clp = fsp.indexingParams.getCategoryListParams(fsp.facetRequests.get(0).categoryPath); this.facetsField = clp.field; this.taxoReader = taxoReader; this.facetArrays = facetArrays; @@ -217,21 +216,21 @@ public class CountingFacetsCollector extends FacetsCollector { } } - private void countParents(int[] parents) { - // counts[0] is the count of ROOT, which we don't care about and counts[1] - // can only update counts[0], so we don't bother to visit it too. also, - // since parents always have lower ordinals than their children, we traverse - // the array backwards. this also allows us to update just the immediate - // parent's count (actually, otherwise it would be a mistake). - for (int i = counts.length - 1; i > 1; i--) { - int count = counts[i]; - if (count > 0) { - int parent = parents[i]; - if (parent != 0) { - counts[parent] += count; - } - } + /** + * Computes the counts of ordinals under the given ordinal's tree, by + * recursively going down to leaf nodes and rollin up their counts (called + * only with categories are indexing with OrdinalPolicy.NO_PARENTS). + */ + private int rollupCounts(int ordinal, int[] children, int[] siblings) { + int count = 0; + while (ordinal != TaxonomyReader.INVALID_ORDINAL) { + int childCount = counts[ordinal]; + childCount += rollupCounts(children[ordinal], children, siblings); + counts[ordinal] = childCount; + count += childCount; + ordinal = siblings[ordinal]; } + return count; } @Override @@ -242,11 +241,6 @@ public class CountingFacetsCollector extends FacetsCollector { ParallelTaxonomyArrays arrays = taxoReader.getParallelTaxonomyArrays(); - if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) { - // need to count parents - countParents(arrays.parents()); - } - // compute top-K final int[] children = arrays.children(); final int[] siblings = arrays.siblings(); @@ -256,6 +250,12 @@ public class CountingFacetsCollector extends FacetsCollector { if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist continue; } + OrdinalPolicy ordinalPolicy = clp.getOrdinalPolicy(fr.categoryPath.components[0]); + if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) { + // need to count parents + counts[rootOrd] += rollupCounts(children[rootOrd], children, siblings); + } + FacetResultNode root = new FacetResultNode(); root.ordinal = rootOrd; root.label = fr.categoryPath; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java index ad8a44d454e..1809d0c4cf5 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/FacetsCollector.java @@ -43,13 +43,21 @@ public abstract class FacetsCollector extends Collector { * Returns the most optimized {@link FacetsCollector} for the given search * parameters. The returned {@link FacetsCollector} is guaranteed to satisfy * the requested parameters. + * + * @throws IllegalArgumentException + * if there is no built-in collector that can satisfy the search + * parameters. */ public static FacetsCollector create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) { if (CountingFacetsCollector.assertParams(fsp) == null) { return new CountingFacetsCollector(fsp, taxoReader); } - return new StandardFacetsCollector(fsp, indexReader, taxoReader); + if (StandardFacetsCollector.assertParams(fsp) == null) { + return new StandardFacetsCollector(fsp, indexReader, taxoReader); + } + + throw new IllegalArgumentException("None of the built-in FacetsCollectors can handle the given search params"); } /** diff --git a/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java b/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java index b93320d055b..5147b35bf2f 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/search/StandardFacetsCollector.java @@ -49,6 +49,17 @@ public class StandardFacetsCollector extends FacetsCollector { private List results; private Object resultsGuard; + static String assertParams(FacetSearchParams fsp) { + // make sure none of the categories in the given FacetRequests was indexed with NO_PARENTS + for (FacetRequest fr : fsp.facetRequests) { + CategoryListParams clp = fsp.indexingParams.getCategoryListParams(fr.categoryPath); + if (clp.getOrdinalPolicy(fr.categoryPath.components[0]) == OrdinalPolicy.NO_PARENTS) { + return "this collector does not support aggregating categories that were indexed with OrdinalPolicy.NO_PARENTS"; + } + } + return null; + } + /** * Create a collector for accumulating facets while collecting documents * during search. @@ -62,6 +73,7 @@ public class StandardFacetsCollector extends FacetsCollector { * taxonomy containing the facets. */ public StandardFacetsCollector(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { + assert assertParams(facetSearchParams) == null : assertParams(facetSearchParams); facetsAccumulator = initFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader); scoreDocIdCollector = initScoredDocCollector(facetSearchParams, indexReader, taxonomyReader); resultsGuard = new Object(); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java b/lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java index 923045ff2d9..917c4ab1ddb 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/util/ScoredDocIdsUtils.java @@ -74,6 +74,7 @@ public class ScoredDocIdsUtils { /** Clear all deleted documents from a given open-bit-set according to a given reader */ private static void clearDeleted(final IndexReader reader, final FixedBitSet set) throws IOException { + // TODO use BitsFilteredDocIdSet? // If there are no deleted docs if (!reader.hasDeletions()) { diff --git a/lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java b/lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java index 91df84c6a0f..2ad38f01e78 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/FacetTestBase.java @@ -6,6 +6,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.HashSet; +import java.util.Iterator; import java.util.List; import java.util.Map; @@ -17,6 +18,7 @@ import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy; import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetSearchParams; @@ -44,6 +46,7 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util._TestUtil; +import org.apache.lucene.util.collections.IntToObjectMap; import org.junit.AfterClass; import org.junit.BeforeClass; @@ -73,7 +76,8 @@ public abstract class FacetTestBase extends FacetTestCase { SearchTaxoDirPair() {} } - private static HashMap dirsPerPartitionSize; + private static IntToObjectMap dirsPerPartitionSize; + private static IntToObjectMap fipPerPartitionSize; private static File TEST_DIR; /** Documents text field. */ @@ -91,12 +95,15 @@ public abstract class FacetTestBase extends FacetTestCase { @BeforeClass public static void beforeClassFacetTestBase() { TEST_DIR = _TestUtil.getTempDir("facets"); - dirsPerPartitionSize = new HashMap(); + dirsPerPartitionSize = new IntToObjectMap(); + fipPerPartitionSize = new IntToObjectMap(); } @AfterClass public static void afterClassFacetTestBase() throws Exception { - for (SearchTaxoDirPair pair : dirsPerPartitionSize.values()) { + Iterator iter = dirsPerPartitionSize.iterator(); + while (iter.hasNext()) { + SearchTaxoDirPair pair = iter.next(); IOUtils.close(pair.searchDir, pair.taxoDir); } } @@ -128,20 +135,16 @@ public abstract class FacetTestBase extends FacetTestCase { return DEFAULT_CONTENT[doc]; } - /** Prepare index (in RAM) with single partition */ - protected final void initIndex() throws Exception { - initIndex(Integer.MAX_VALUE); - } - - /** Prepare index (in RAM) with some documents and some facets */ - protected final void initIndex(int partitionSize) throws Exception { - initIndex(partitionSize, false); + /** Prepare index (in RAM) with some documents and some facets. */ + protected final void initIndex(FacetIndexingParams fip) throws Exception { + initIndex(false, fip); } - /** Prepare index (in RAM/Disk) with some documents and some facets */ - protected final void initIndex(int partitionSize, boolean forceDisk) throws Exception { + /** Prepare index (in RAM/Disk) with some documents and some facets. */ + protected final void initIndex(boolean forceDisk, FacetIndexingParams fip) throws Exception { + int partitionSize = fip.getPartitionSize(); if (VERBOSE) { - System.out.println("Partition Size: " + partitionSize+" forceDisk: "+forceDisk); + System.out.println("Partition Size: " + partitionSize + " forceDisk: "+forceDisk); } SearchTaxoDirPair pair = dirsPerPartitionSize.get(Integer.valueOf(partitionSize)); @@ -158,7 +161,7 @@ public abstract class FacetTestBase extends FacetTestCase { RandomIndexWriter iw = new RandomIndexWriter(random(), pair.searchDir, getIndexWriterConfig(getAnalyzer())); TaxonomyWriter taxo = new DirectoryTaxonomyWriter(pair.taxoDir, OpenMode.CREATE); - populateIndex(iw, taxo, getFacetIndexingParams(partitionSize)); + populateIndex(iw, taxo, fip); // commit changes (taxonomy prior to search index for consistency) taxo.commit(); @@ -182,14 +185,40 @@ public abstract class FacetTestBase extends FacetTestCase { /** Returns a {@link FacetIndexingParams} per the given partition size. */ protected FacetIndexingParams getFacetIndexingParams(final int partSize) { - // several of our encoders don't support the value 0, - // which is one of the values encoded when dealing w/ partitions. - return new FacetIndexingParams() { - @Override - public int getPartitionSize() { - return partSize; - } - }; + return getFacetIndexingParams(partSize, false); + } + + /** + * Returns a {@link FacetIndexingParams} per the given partition size. If + * requested, then {@link OrdinalPolicy} will be set to + * {@link OrdinalPolicy#ALL_PARENTS}, otherwise it will randomize. + */ + protected FacetIndexingParams getFacetIndexingParams(final int partSize, final boolean forceAllParents) { + FacetIndexingParams fip = fipPerPartitionSize.get(partSize); + if (fip == null) { + // randomize OrdinalPolicy. Since not all Collectors / Accumulators + // support NO_PARENTS, don't include it. + // TODO: once all code paths support NO_PARENTS, randomize it too. + CategoryListParams randomOP = new CategoryListParams() { + final OrdinalPolicy op = random().nextBoolean() ? OrdinalPolicy.ALL_BUT_DIMENSION : OrdinalPolicy.ALL_PARENTS; + @Override + public OrdinalPolicy getOrdinalPolicy(String dimension) { + return forceAllParents ? OrdinalPolicy.ALL_PARENTS : op; + } + }; + + // several of our encoders don't support the value 0, + // which is one of the values encoded when dealing w/ partitions, + // therefore don't randomize the encoder. + fip = new FacetIndexingParams(randomOP) { + @Override + public int getPartitionSize() { + return partSize; + } + }; + fipPerPartitionSize.put(partSize, fip); + } + return fip; } /** diff --git a/lucene/facet/src/test/org/apache/lucene/facet/example/TestMultiCLExample.java b/lucene/facet/src/test/org/apache/lucene/facet/example/TestMultiCLExample.java index 357ffb12dbd..55038df7f9f 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/example/TestMultiCLExample.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/example/TestMultiCLExample.java @@ -45,7 +45,6 @@ public class TestMultiCLExample extends LuceneTestCase { assertNotNull("Result should not be null", result); FacetResultNode node = result.getFacetResultNode(); assertEquals("Invalid label", "5", node.label.toString()); - assertEquals("Invalid value", 2.0, node.value, 0.0); assertEquals("Invalid # of subresults", 3, node.subResults.size()); Iterator subResults = node.subResults.iterator(); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/index/TestFacetsPayloadMigrationReader.java b/lucene/facet/src/test/org/apache/lucene/facet/index/TestFacetsPayloadMigrationReader.java index f45aba5c4f3..9c5c477bbf3 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/index/TestFacetsPayloadMigrationReader.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/index/TestFacetsPayloadMigrationReader.java @@ -25,6 +25,8 @@ import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.index.params.PerDimensionIndexingParams; +import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy; +import org.apache.lucene.facet.index.params.PerDimensionOrdinalPolicy; import org.apache.lucene.facet.search.CategoryListIterator; import org.apache.lucene.facet.search.DrillDown; import org.apache.lucene.facet.search.FacetsCollector; @@ -368,9 +370,23 @@ public class TestFacetsPayloadMigrationReader extends FacetTestCase { // set custom CLP fields for two dimensions and use the default ($facets) for the other two HashMap params = new HashMap(); - params.put(new CategoryPath(DIMENSIONS[0]), new CategoryListParams(DIMENSIONS[0])); - params.put(new CategoryPath(DIMENSIONS[1]), new CategoryListParams(DIMENSIONS[1])); - FacetIndexingParams fip = new PerDimensionIndexingParams(params) { + params.put(new CategoryPath(DIMENSIONS[0]), new CategoryListParams(DIMENSIONS[0]) { + @Override + public OrdinalPolicy getOrdinalPolicy(String dimension) { + return OrdinalPolicy.ALL_PARENTS; + } + }); + params.put(new CategoryPath(DIMENSIONS[1]), new CategoryListParams(DIMENSIONS[1]) { + @Override + public OrdinalPolicy getOrdinalPolicy(String dimension) { + return OrdinalPolicy.ALL_PARENTS; + } + }); + + HashMap policies = new HashMap(); + policies.put(DIMENSIONS[2], OrdinalPolicy.ALL_PARENTS); + policies.put(DIMENSIONS[3], OrdinalPolicy.ALL_PARENTS); + FacetIndexingParams fip = new PerDimensionIndexingParams(params, new PerDimensionOrdinalPolicy(policies)) { @Override public int getPartitionSize() { return partitionSize; diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/BaseTestTopK.java b/lucene/facet/src/test/org/apache/lucene/facet/search/BaseTestTopK.java index 45e2322bbd5..f9f58d9b879 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/BaseTestTopK.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/BaseTestTopK.java @@ -51,10 +51,9 @@ public abstract class BaseTestTopK extends FacetTestBase { private int nextInt; @Override - protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo, - FacetIndexingParams iParams) throws IOException { + protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo, FacetIndexingParams fip) throws IOException { currDoc = -1; - super.populateIndex(iw, taxo, iParams); + super.populateIndex(iw, taxo, fip); } /** prepare the next random int */ @@ -94,17 +93,13 @@ public abstract class BaseTestTopK extends FacetTestBase { return Arrays.asList(cp); } - protected FacetSearchParams searchParamsWithRequests(int numResults) { - return searchParamsWithRequests(numResults, Integer.MAX_VALUE); - } - - protected FacetSearchParams searchParamsWithRequests(int numResults, int partitionSize) { + protected FacetSearchParams searchParamsWithRequests(int numResults, FacetIndexingParams fip) { List facetRequests = new ArrayList(); facetRequests.add(new CountFacetRequest(new CategoryPath("a"), numResults)); facetRequests.add(new CountFacetRequest(new CategoryPath("a", "1"), numResults)); facetRequests.add(new CountFacetRequest(new CategoryPath("a", "1", "10"), numResults)); facetRequests.add(new CountFacetRequest(new CategoryPath("a", "2", "26", "267"), numResults)); - return getFacetSearchParams(facetRequests, getFacetIndexingParams(partitionSize)); + return getFacetSearchParams(facetRequests, fip); } @Override diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java b/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java index 93dc41fcaf0..f235a5aef04 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/CountingFacetsCollectorTest.java @@ -16,8 +16,9 @@ import org.apache.lucene.document.StringField; import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.index.params.CategoryListParams; +import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy; import org.apache.lucene.facet.index.params.FacetIndexingParams; -import org.apache.lucene.facet.index.params.PerDimensionIndexingParams; +import org.apache.lucene.facet.index.params.PerDimensionOrdinalPolicy; import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetRequest.SortBy; @@ -146,13 +147,11 @@ public class CountingFacetsCollectorTest extends FacetTestCase { termExpectedCounts.put(cp, termExpectedCounts.get(cp) + 1); } } - // add 1 to each dimension - allExpectedCounts.put(CP_A, allExpectedCounts.get(CP_A) + 1); + // add 1 to each NO_PARENTS dimension allExpectedCounts.put(CP_B, allExpectedCounts.get(CP_B) + 1); allExpectedCounts.put(CP_C, allExpectedCounts.get(CP_C) + 1); allExpectedCounts.put(CP_D, allExpectedCounts.get(CP_D) + 1); if (updateTermExpectedCounts) { - termExpectedCounts.put(CP_A, termExpectedCounts.get(CP_A) + 1); termExpectedCounts.put(CP_B, termExpectedCounts.get(CP_B) + 1); termExpectedCounts.put(CP_C, termExpectedCounts.get(CP_C) + 1); termExpectedCounts.put(CP_D, termExpectedCounts.get(CP_D) + 1); @@ -252,19 +251,13 @@ public class CountingFacetsCollectorTest extends FacetTestCase { conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // prevent merges, so we can control the index segments IndexWriter indexWriter = new IndexWriter(indexDir, conf); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); - CategoryListParams allParents = new CategoryListParams(); - CategoryListParams noParents = new CategoryListParams("no_parents") { - @Override - public OrdinalPolicy getOrdinalPolicy() { - return OrdinalPolicy.NO_PARENTS; - } - }; - Map params = new HashMap(); - params.put(CP_A, allParents); - params.put(CP_B, allParents); - params.put(CP_C, noParents); - params.put(CP_D, noParents); - fip = new PerDimensionIndexingParams(params); + + Map policies = new HashMap(); + policies.put(CP_B.components[0], OrdinalPolicy.ALL_PARENTS); + policies.put(CP_C.components[0], OrdinalPolicy.NO_PARENTS); + policies.put(CP_D.components[0], OrdinalPolicy.NO_PARENTS); + CategoryListParams clp = new PerDimensionOrdinalPolicy(policies); + fip = new FacetIndexingParams(clp); allExpectedCounts = newCounts(); termExpectedCounts = newCounts(); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java index a4f53e7da0a..1e402cc096c 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestDemoFacets.java @@ -104,9 +104,9 @@ public class TestDemoFacets extends FacetTestCase { // Retrieve & verify results: List results = c.getFacetResults(); assertEquals(2, results.size()); - assertEquals("Publish Date (5)\n 2012 (2)\n 2010 (2)\n 1999 (1)\n", + assertEquals("Publish Date (0)\n 2012 (2)\n 2010 (2)\n 1999 (1)\n", FacetTestUtils.toSimpleString(results.get(0))); - assertEquals("Author (5)\n Lisa (2)\n Frank (1)\n Susan (1)\n Bob (1)\n", + assertEquals("Author (0)\n Lisa (2)\n Frank (1)\n Susan (1)\n Bob (1)\n", FacetTestUtils.toSimpleString(results.get(1))); @@ -117,7 +117,7 @@ public class TestDemoFacets extends FacetTestCase { searcher.search(q2, c); results = c.getFacetResults(); assertEquals(1, results.size()); - assertEquals("Author (2)\n Lisa (1)\n Bob (1)\n", + assertEquals("Author (0)\n Lisa (1)\n Bob (1)\n", FacetTestUtils.toSimpleString(results.get(0))); // Smoke test PrintTaxonomyStats: diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsAccumulatorWithComplement.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsAccumulatorWithComplement.java index 13fc3d18dcf..a93d2611148 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsAccumulatorWithComplement.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestFacetsAccumulatorWithComplement.java @@ -14,6 +14,7 @@ import org.junit.Before; import org.junit.Test; import org.apache.lucene.facet.FacetTestBase; +import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.search.FacetsAccumulator; import org.apache.lucene.facet.search.ScoredDocIDs; import org.apache.lucene.facet.search.ScoredDocIdCollector; @@ -48,11 +49,14 @@ import org.apache.lucene.facet.taxonomy.CategoryPath; */ public class TestFacetsAccumulatorWithComplement extends FacetTestBase { + private FacetIndexingParams fip; + @Override @Before public void setUp() throws Exception { super.setUp(); - initIndex(); + fip = getFacetIndexingParams(Integer.MAX_VALUE); + initIndex(fip); } @Override @@ -125,7 +129,7 @@ public class TestFacetsAccumulatorWithComplement extends FacetTestBase { /** compute facets with certain facet requests and docs */ private List findFacets(ScoredDocIDs sDocids, boolean withComplement) throws IOException { - FacetSearchParams fsp = new FacetSearchParams(getFacetIndexingParams(Integer.MAX_VALUE), new CountFacetRequest(new CategoryPath("root","a"), 10)); + FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(new CategoryPath("root","a"), 10)); FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(fsp, indexReader, taxoReader); fAccumulator.setComplementThreshold( diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestMultipleCategoryLists.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestMultipleCategoryLists.java index d5b5bd29b24..26f8bd75310 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestMultipleCategoryLists.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestMultipleCategoryLists.java @@ -274,7 +274,6 @@ public class TestMultipleCategoryLists extends FacetTestCase { Iterable subResults = resNode.subResults; Iterator subIter = subResults.iterator(); - checkResult(resNode, "Band", 5.0); checkResult(subIter.next(), "Band/Rock & Pop", 4.0); checkResult(subIter.next(), "Band/Punk", 1.0); @@ -283,7 +282,6 @@ public class TestMultipleCategoryLists extends FacetTestCase { subResults = resNode.subResults; subIter = subResults.iterator(); - checkResult(resNode, "Band", 5.0); checkResult(subIter.next(), "Band/Rock & Pop", 4.0); checkResult(subIter.next(), "Band/Rock & Pop/Dave Matthews Band", 1.0); checkResult(subIter.next(), "Band/Rock & Pop/REM", 1.0); @@ -297,7 +295,6 @@ public class TestMultipleCategoryLists extends FacetTestCase { subResults = resNode.subResults; subIter = subResults.iterator(); - checkResult(resNode, "Author", 3.0); checkResult(subIter.next(), "Author/Kurt Vonnegut", 1.0); checkResult(subIter.next(), "Author/Stephen King", 1.0); checkResult(subIter.next(), "Author/Mark Twain", 1.0); @@ -307,7 +304,6 @@ public class TestMultipleCategoryLists extends FacetTestCase { subResults = resNode.subResults; subIter = subResults.iterator(); - checkResult(resNode, "Band/Rock & Pop", 4.0); checkResult(subIter.next(), "Band/Rock & Pop/Dave Matthews Band", 1.0); checkResult(subIter.next(), "Band/Rock & Pop/REM", 1.0); checkResult(subIter.next(), "Band/Rock & Pop/U2", 1.0); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestSameRequestAccumulation.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestSameRequestAccumulation.java index a63e6108e00..8575d32ca08 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestSameRequestAccumulation.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestSameRequestAccumulation.java @@ -3,6 +3,7 @@ package org.apache.lucene.facet.search; import java.util.List; import org.apache.lucene.facet.FacetTestBase; +import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.search.FacetsCollector; import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.FacetSearchParams; @@ -31,18 +32,21 @@ import org.junit.Before; public class TestSameRequestAccumulation extends FacetTestBase { + private FacetIndexingParams fip; + @Override @Before public void setUp() throws Exception { super.setUp(); - initIndex(); + fip = getFacetIndexingParams(Integer.MAX_VALUE); + initIndex(fip); } // Following LUCENE-4461 - ensure requesting the (exact) same request more // than once does not alter the results public void testTwoSameRequests() throws Exception { final CountFacetRequest facetRequest = new CountFacetRequest(new CategoryPath("root"), 10); - FacetSearchParams fsp = new FacetSearchParams(facetRequest); + FacetSearchParams fsp = new FacetSearchParams(fip, facetRequest); FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader); searcher.search(new MatchAllDocsQuery(), fc); @@ -50,7 +54,7 @@ public class TestSameRequestAccumulation extends FacetTestBase { final String expected = fc.getFacetResults().get(0).toString(); // now add the same facet request with duplicates (same instance and same one) - fsp = new FacetSearchParams(facetRequest, facetRequest, new CountFacetRequest(new CategoryPath("root"), 10)); + fsp = new FacetSearchParams(fip, facetRequest, facetRequest, new CountFacetRequest(new CategoryPath("root"), 10)); // make sure the search params holds 3 requests now assertEquals(3, fsp.facetRequests.size()); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestScoredDocIdCollector.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestScoredDocIdCollector.java index bac10b19659..03d099e5210 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestScoredDocIdCollector.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestScoredDocIdCollector.java @@ -5,6 +5,7 @@ import java.util.Arrays; import java.util.List; import org.apache.lucene.facet.FacetTestBase; +import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.params.ScoreFacetRequest; @@ -37,11 +38,14 @@ import org.junit.Test; /** Test ScoredDocIdCollector. */ public class TestScoredDocIdCollector extends FacetTestBase { + private FacetIndexingParams fip; + @Override @Before public void setUp() throws Exception { super.setUp(); - initIndex(); + fip = getFacetIndexingParams(Integer.MAX_VALUE); + initIndex(fip); } @Override @@ -73,8 +77,8 @@ public class TestScoredDocIdCollector extends FacetTestBase { // verify by facet values CategoryPath cp = new CategoryPath("root","a"); - FacetSearchParams countFSP = new FacetSearchParams(getFacetIndexingParams(Integer.MAX_VALUE), new CountFacetRequest(cp, 10)); - FacetSearchParams scoreFSP = new FacetSearchParams(getFacetIndexingParams(Integer.MAX_VALUE), new ScoreFacetRequest(cp, 10)); + FacetSearchParams countFSP = new FacetSearchParams(fip, new CountFacetRequest(cp, 10)); + FacetSearchParams scoreFSP = new FacetSearchParams(fip, new ScoreFacetRequest(cp, 10)); List countRes = findFacets(scoredDocIDs, countFSP); List scoreRes = findFacets(scoredDocIDs, scoreFSP); @@ -101,10 +105,8 @@ public class TestScoredDocIdCollector extends FacetTestBase { } // compute facets with certain facet requests and docs - private List findFacets(ScoredDocIDs sDocids, - FacetSearchParams facetSearchParams) throws IOException { - FacetsAccumulator fAccumulator = new StandardFacetsAccumulator( - facetSearchParams, indexReader, taxoReader); + private List findFacets(ScoredDocIDs sDocids, FacetSearchParams facetSearchParams) throws IOException { + FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxoReader); List res = fAccumulator.accumulate(sDocids); // Results are ready, printing them... diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java index 06e09480458..9bc3dc1cc0e 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestStandardFacetsAccumulator.java @@ -113,7 +113,6 @@ public class TestStandardFacetsAccumulator extends FacetTestCase { List results = fc.getFacetResults(); assertEquals("received too many facet results", 1, results.size()); FacetResultNode frn = results.get(0).getFacetResultNode(); - assertEquals("wrong weight for \"A\"", 4, (int) frn.value); assertEquals("wrong number of children", 2, frn.subResults.size()); for (FacetResultNode node : frn.subResults) { assertEquals("wrong weight for child " + node.label, 2, (int) node.value); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java index 856e36f94a4..c5621cb9f64 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKInEachNodeResultHandler.java @@ -181,7 +181,6 @@ public class TestTopKInEachNodeResultHandler extends FacetTestCase { boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); assertEquals(9, fr.getNumValidDescendants()); FacetResultNode parentRes = fr.getFacetResultNode(); - assertEquals(16.0, parentRes.value, Double.MIN_VALUE); assertEquals(2, parentRes.subResults.size()); // two nodes sorted by descending values: a/b with 8 and a/c with 6 // a/b has two children a/b/2 with value 3, and a/b/1 with value 2. @@ -217,7 +216,6 @@ public class TestTopKInEachNodeResultHandler extends FacetTestCase { hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); assertEquals(9, fr.getNumValidDescendants()); parentRes = fr.getFacetResultNode(); - assertEquals(16.0, parentRes.value, Double.MIN_VALUE); assertEquals(2, parentRes.subResults.size()); // two nodes sorted by descending values: a/b with 8 and a/c with 6 // a/b has two children a/b/2 with value 3, and a/b/1 with value 2. @@ -234,7 +232,6 @@ public class TestTopKInEachNodeResultHandler extends FacetTestCase { hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); assertEquals(4, fr.getNumValidDescendants(), 4); parentRes = fr.getFacetResultNode(); - assertEquals(16.0, parentRes.value, Double.MIN_VALUE); assertEquals(2, parentRes.subResults.size()); // two nodes sorted by descending values: // a/b with value 8 and a/c with value 6 diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java index a619529e7e5..a82a19c6559 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandler.java @@ -4,6 +4,8 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy; +import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetRequest.ResultMode; @@ -73,7 +75,9 @@ public class TestTopKResultsHandler extends BaseTestTopK { @Test public void testSimple() throws Exception { for (int partitionSize : partitionSizes) { - initIndex(partitionSize); + FacetIndexingParams fip = getFacetIndexingParams(partitionSize); + OrdinalPolicy op = fip.getCategoryListParams(null).getOrdinalPolicy(null); + initIndex(fip); List facetRequests = new ArrayList(); facetRequests.add(new CountFacetRequest(new CategoryPath("a"), 100)); @@ -87,8 +91,8 @@ public class TestTopKResultsHandler extends BaseTestTopK { facetRequests.add(new CountFacetRequest(new CategoryPath("a", "c"), 100)); // do different facet counts and compare to control - FacetSearchParams sParams = getFacetSearchParams(facetRequests, getFacetIndexingParams(partitionSize)); - + FacetSearchParams sParams = getFacetSearchParams(facetRequests, fip); + FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) { @Override protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { @@ -100,17 +104,21 @@ public class TestTopKResultsHandler extends BaseTestTopK { searcher.search(new MatchAllDocsQuery(), fc); List facetResults = fc.getFacetResults(); - + FacetResult fr = facetResults.get(0); FacetResultNode parentRes = fr.getFacetResultNode(); - assertEquals(13.0, parentRes.value, Double.MIN_VALUE); + if (op == OrdinalPolicy.ALL_PARENTS) { + assertEquals(13.0, parentRes.value, Double.MIN_VALUE); + } FacetResultNode[] frn = resultNodesAsArray(parentRes); assertEquals(7.0, frn[0].value, Double.MIN_VALUE); assertEquals(6.0, frn[1].value, Double.MIN_VALUE); fr = facetResults.get(1); parentRes = fr.getFacetResultNode(); - assertEquals(13.0, parentRes.value, Double.MIN_VALUE); + if (op == OrdinalPolicy.ALL_PARENTS) { + assertEquals(13.0, parentRes.value, Double.MIN_VALUE); + } frn = resultNodesAsArray(parentRes); assertEquals(7.0, frn[0].value, Double.MIN_VALUE); assertEquals(6.0, frn[1].value, Double.MIN_VALUE); @@ -121,7 +129,9 @@ public class TestTopKResultsHandler extends BaseTestTopK { fr = facetResults.get(2); parentRes = fr.getFacetResultNode(); - assertEquals(7.0, parentRes.value, Double.MIN_VALUE); + if (op == OrdinalPolicy.ALL_PARENTS) { + assertEquals(7.0, parentRes.value, Double.MIN_VALUE); + } frn = resultNodesAsArray(parentRes); assertEquals(2.0, frn[0].value, Double.MIN_VALUE); assertEquals(2.0, frn[1].value, Double.MIN_VALUE); @@ -130,13 +140,17 @@ public class TestTopKResultsHandler extends BaseTestTopK { fr = facetResults.get(3); parentRes = fr.getFacetResultNode(); - assertEquals(2.0, parentRes.value, Double.MIN_VALUE); + if (op == OrdinalPolicy.ALL_PARENTS) { + assertEquals(2.0, parentRes.value, Double.MIN_VALUE); + } frn = resultNodesAsArray(parentRes); assertEquals(0, frn.length); fr = facetResults.get(4); parentRes = fr.getFacetResultNode(); - assertEquals(6.0, parentRes.value, Double.MIN_VALUE); + if (op == OrdinalPolicy.ALL_PARENTS) { + assertEquals(6.0, parentRes.value, Double.MIN_VALUE); + } frn = resultNodesAsArray(parentRes); assertEquals(1.0, frn[0].value, Double.MIN_VALUE); closeAll(); @@ -149,12 +163,12 @@ public class TestTopKResultsHandler extends BaseTestTopK { @Test public void testGetMaxIntFacets() throws Exception { for (int partitionSize : partitionSizes) { - initIndex(partitionSize); + FacetIndexingParams fip = getFacetIndexingParams(partitionSize); + initIndex(fip); // do different facet counts and compare to control CategoryPath path = new CategoryPath("a", "b"); - FacetSearchParams sParams = getFacetSearchParams(getFacetIndexingParams(partitionSize), - new CountFacetRequest(path, Integer.MAX_VALUE)); + FacetSearchParams sParams = getFacetSearchParams(fip, new CountFacetRequest(path, Integer.MAX_VALUE)); FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) { @Override @@ -174,7 +188,7 @@ public class TestTopKResultsHandler extends BaseTestTopK { // As a control base results, ask for top-1000 results FacetSearchParams sParams2 = getFacetSearchParams( - getFacetIndexingParams(partitionSize), new CountFacetRequest(path, Integer.MAX_VALUE)); + fip, new CountFacetRequest(path, Integer.MAX_VALUE)); FacetsCollector fc2 = new StandardFacetsCollector(sParams2, indexReader, taxoReader) { @Override @@ -207,12 +221,11 @@ public class TestTopKResultsHandler extends BaseTestTopK { @Test public void testSimpleSearchForNonexistentFacet() throws Exception { for (int partitionSize : partitionSizes) { - initIndex(partitionSize); + FacetIndexingParams fip = getFacetIndexingParams(partitionSize); + initIndex(fip); CategoryPath path = new CategoryPath("Miau Hattulla"); - FacetSearchParams sParams = getFacetSearchParams( - getFacetIndexingParams(partitionSize), - new CountFacetRequest(path, 10)); + FacetSearchParams sParams = getFacetSearchParams(fip, new CountFacetRequest(path, 10)); FacetsCollector fc = FacetsCollector.create(sParams, indexReader, taxoReader); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java index 25094f0af5e..083f52306f1 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestTopKResultsHandlerRandom.java @@ -4,6 +4,7 @@ import java.io.IOException; import java.util.HashMap; import java.util.List; +import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.FacetResultNode; @@ -32,10 +33,10 @@ import org.junit.Test; public class TestTopKResultsHandlerRandom extends BaseTestTopK { - private List countFacets(int partitionSize, int numResults, final boolean doComplement) + private List countFacets(FacetIndexingParams fip, int numResults, final boolean doComplement) throws IOException { Query q = new MatchAllDocsQuery(); - FacetSearchParams facetSearchParams = searchParamsWithRequests(numResults, partitionSize); + FacetSearchParams facetSearchParams = searchParamsWithRequests(numResults, fip); FacetsCollector fc = new StandardFacetsCollector(facetSearchParams, indexReader, taxoReader) { @Override protected FacetsAccumulator initFacetsAccumulator( @@ -59,7 +60,8 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK { @Test public void testTopCountsOrder() throws Exception { for (int partitionSize : partitionSizes) { - initIndex(partitionSize); + FacetIndexingParams fip = getFacetIndexingParams(partitionSize); + initIndex(fip); /* * Try out faceted search in it's most basic form (no sampling nor complement @@ -67,7 +69,7 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK { * being indexed, and later on an "over-all" faceted search is performed. The * results are checked against the DF of each facet by itself */ - List facetResults = countFacets(partitionSize, 100000, false); + List facetResults = countFacets(fip, 100000, false); assertCountsAndCardinality(facetCountsTruth(), facetResults); /* @@ -77,10 +79,10 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK { * place in here. The results are checked against the a regular (a.k.a * no-complement, no-sampling) faceted search with the same parameters. */ - facetResults = countFacets(partitionSize, 100000, true); + facetResults = countFacets(fip, 100000, true); assertCountsAndCardinality(facetCountsTruth(), facetResults); - List allFacetResults = countFacets(partitionSize, 100000, false); + List allFacetResults = countFacets(fip, 100000, false); HashMap all = new HashMap(); int maxNumNodes = 0; @@ -108,7 +110,7 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK { if (VERBOSE) { System.out.println("------- verify for "+n+" top results"); } - List someResults = countFacets(partitionSize, n, false); + List someResults = countFacets(fip, n, false); k = 0; for (FacetResult fr : someResults) { FacetResultNode topResNode = fr.getFacetResultNode(); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCounts.java index 82388b7fabb..9c5ad268c9d 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCounts.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCounts.java @@ -8,7 +8,9 @@ import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.FacetTestUtils; import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyReaderPair; import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyWriterPair; +import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.FacetIndexingParams; +import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.store.Directory; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util._TestUtil; @@ -59,6 +61,16 @@ public class TestTotalFacetCounts extends FacetTestCase { public int getPartitionSize() { return partitionSize; } + + @Override + public CategoryListParams getCategoryListParams(CategoryPath category) { + return new CategoryListParams() { + @Override + public OrdinalPolicy getOrdinalPolicy(String dimension) { + return OrdinalPolicy.ALL_PARENTS; + } + }; + } }; // The counts that the TotalFacetCountsArray should have after adding // the below facets to the index. diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCountsCache.java b/lucene/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCountsCache.java index a767a8d06dd..55198032f0c 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCountsCache.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestTotalFacetCountsCache.java @@ -87,7 +87,7 @@ public class TestTotalFacetCountsCache extends FacetTestCase { /** Utility method to add a document and facets to an index/taxonomy. */ static void addFacets(FacetIndexingParams iParams, IndexWriter iw, - TaxonomyWriter tw, String... strings) throws IOException { + TaxonomyWriter tw, String... strings) throws IOException { Document doc = new Document(); FacetFields facetFields = new FacetFields(tw, iParams); facetFields.addFields(doc, Collections.singletonList(new CategoryPath(strings))); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java b/lucene/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java index d7de185c0b6..aca59444346 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/search/sampling/BaseSampleTestTopK.java @@ -3,6 +3,7 @@ package org.apache.lucene.facet.search.sampling; import java.util.List; import java.util.Random; +import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.search.BaseTestTopK; import org.apache.lucene.facet.search.FacetsAccumulator; import org.apache.lucene.facet.search.FacetsCollector; @@ -46,8 +47,8 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK { protected static final int RETRIES = 10; @Override - protected FacetSearchParams searchParamsWithRequests(int numResults, int partitionSize) { - FacetSearchParams res = super.searchParamsWithRequests(numResults, partitionSize); + protected FacetSearchParams searchParamsWithRequests(int numResults, FacetIndexingParams fip) { + FacetSearchParams res = super.searchParamsWithRequests(numResults, fip); for (FacetRequest req : res.facetRequests) { // randomize the way we aggregate results if (random().nextBoolean()) { @@ -71,20 +72,23 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK { boolean useRandomSampler = random().nextBoolean(); for (int partitionSize : partitionSizes) { try { - initIndex(partitionSize); + // complements return counts for all ordinals, so force ALL_PARENTS indexing + // so that it's easier to compare + FacetIndexingParams fip = getFacetIndexingParams(partitionSize, true); + initIndex(fip); // Get all of the documents and run the query, then do different // facet counts and compare to control Query q = new TermQuery(new Term(CONTENT_FIELD, BETA)); // 90% of the docs ScoredDocIdCollector docCollector = ScoredDocIdCollector.create(indexReader.maxDoc(), false); - FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, partitionSize); + FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, fip); FacetsCollector fc = FacetsCollector.create(expectedSearchParams, indexReader, taxoReader); searcher.search(q, MultiCollector.wrap(docCollector, fc)); List expectedResults = fc.getFacetResults(); - FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, partitionSize); + FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, fip); // try several times in case of failure, because the test has a chance to fail // if the top K facets are not sufficiently common with the sample set