LUCENE-4715: Add OrdinalPolicy.ALL_BUT_DIMENSION

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1440416 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2013-01-30 13:51:38 +00:00
parent 913b168255
commit 42256baec5
25 changed files with 346 additions and 155 deletions

View File

@ -61,6 +61,11 @@ Optimizations
* LUCENE-4690: Performance improvements and non-hashing versions * LUCENE-4690: Performance improvements and non-hashing versions
of NumericUtils.*ToPrefixCoded() (yonik) of NumericUtils.*ToPrefixCoded() (yonik)
* LUCENE-4715: CategoryListParams.getOrdinalPolicy now allows to return a
different OrdinalPolicy per dimension, to better tune how you index
facets. Also added OrdinalPolicy.ALL_BUT_DIMENSION.
(Shai Erera, Michael McCandless)
New Features New Features
* LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the * LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the

View File

@ -3,6 +3,7 @@ package org.apache.lucene.facet.index;
import java.io.IOException; import java.io.IOException;
import java.util.Collections; import java.util.Collections;
import java.util.HashMap; import java.util.HashMap;
import java.util.Iterator;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
@ -115,12 +116,12 @@ public class CountingListBuilder implements CategoryListBuilder {
private final OrdinalsEncoder ordinalsEncoder; private final OrdinalsEncoder ordinalsEncoder;
private final TaxonomyWriter taxoWriter; private final TaxonomyWriter taxoWriter;
private final OrdinalPolicy ordinalPolicy; private final CategoryListParams clp;
public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams indexingParams, public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams indexingParams,
TaxonomyWriter taxoWriter) { TaxonomyWriter taxoWriter) {
this.taxoWriter = taxoWriter; this.taxoWriter = taxoWriter;
this.ordinalPolicy = categoryListParams.getOrdinalPolicy(); this.clp = categoryListParams;
if (indexingParams.getPartitionSize() == Integer.MAX_VALUE) { if (indexingParams.getPartitionSize() == Integer.MAX_VALUE) {
ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams); ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams);
} else { } else {
@ -141,16 +142,23 @@ public class CountingListBuilder implements CategoryListBuilder {
*/ */
@Override @Override
public Map<String,BytesRef> build(IntsRef ordinals, Iterable<CategoryPath> categories) throws IOException { public Map<String,BytesRef> build(IntsRef ordinals, Iterable<CategoryPath> categories) throws IOException {
int upto = ordinals.length; // since we add ordinals to IntsRef, iterate upto original length int upto = ordinals.length; // since we may add ordinals to IntsRef, iterate upto original length
if (ordinalPolicy == OrdinalPolicy.ALL_PARENTS) { // add all parents too Iterator<CategoryPath> iter = categories.iterator();
for (int i = 0; i < upto; i++) { for (int i = 0; i < upto; i++) {
int ordinal = ordinals.ints[i]; int ordinal = ordinals.ints[i];
CategoryPath cp = iter.next();
OrdinalPolicy op = clp.getOrdinalPolicy(cp.components[0]);
if (op != OrdinalPolicy.NO_PARENTS) {
// need to add parents too
int parent = taxoWriter.getParent(ordinal); int parent = taxoWriter.getParent(ordinal);
while (parent > 0) { while (parent > 0) {
ordinals.ints[ordinals.length++] = parent; ordinals.ints[ordinals.length++] = parent;
parent = taxoWriter.getParent(parent); parent = taxoWriter.getParent(parent);
} }
if (op == OrdinalPolicy.ALL_BUT_DIMENSION) { // discard the last added parent, which is the dimension
ordinals.length--;
}
} }
} }
return ordinalsEncoder.encode(ordinals); return ordinalsEncoder.encode(ordinals);

View File

@ -4,6 +4,7 @@ import java.io.IOException;
import org.apache.lucene.facet.search.CategoryListIterator; import org.apache.lucene.facet.search.CategoryListIterator;
import org.apache.lucene.facet.search.DocValuesCategoryListIterator; import org.apache.lucene.facet.search.DocValuesCategoryListIterator;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.util.PartitionsUtils; import org.apache.lucene.facet.util.PartitionsUtils;
import org.apache.lucene.util.encoding.DGapVInt8IntEncoder; import org.apache.lucene.util.encoding.DGapVInt8IntEncoder;
import org.apache.lucene.util.encoding.IntDecoder; import org.apache.lucene.util.encoding.IntDecoder;
@ -35,25 +36,61 @@ import org.apache.lucene.util.encoding.UniqueValuesIntEncoder;
*/ */
public class CategoryListParams { public class CategoryListParams {
/** OrdinalPolicy defines which ordinals are encoded for every document. */ /**
* Defines which category ordinals are encoded for every document. This also
* affects how category ordinals are aggregated, check the different policies
* for more details.
*/
public static enum OrdinalPolicy { public static enum OrdinalPolicy {
/** /**
* Encodes only the ordinal of leaf nodes. That is, the category A/B/C will * Encodes only the ordinals of leaf nodes. That is, for the category A/B/C,
* not encode the ordinals of A and A/B. * the ordinals of A and A/B will not be encoded. This policy is efficient
* for hierarchical dimensions, as it reduces the number of ordinals that
* are visited per document. During faceted search, this policy behaves
* exactly like {@link #ALL_PARENTS}, and the counts of all path components
* will be computed as well.
* *
* <p> * <p>
* <b>NOTE:</b> this {@link OrdinalPolicy} requires a special collector or * <b>NOTE:</b> this {@link OrdinalPolicy} requires a special collector or
* accumulator, which will fix the parents' counts, unless you are not * accumulator, which will fix the parents' counts.
* interested in the parents counts. *
* <p>
* <b>NOTE:</b> since only leaf nodes are encoded for the document, you
* should use this policy when the same document doesn't share two
* categories that have a mutual parent, or otherwise the counts will be
* wrong (the mutual parent will be over-counted). For example, if a
* document has the categories A/B/C and A/B/D, then with this policy the
* counts of "A" and "B" will be 2, which is wrong. If you intend to index
* hierarchical dimensions, with more than one category per document, you
* should use either {@link #ALL_PARENTS} or {@link #ALL_BUT_DIMENSION}.
*/ */
NO_PARENTS, NO_PARENTS,
/** /**
* Encodes the ordinals of all path components. That is, the category A/B/C * Encodes the ordinals of all path components. That is, the category A/B/C
* will encode the ordinals of A and A/B as well. This is the default * will encode the ordinals of A and A/B as well. If you don't require the
* {@link OrdinalPolicy}. * dimension's count during search, consider using
* {@link #ALL_BUT_DIMENSION}.
*/ */
ALL_PARENTS ALL_PARENTS,
/**
* Encodes the ordinals of all path components except the dimension. The
* dimension of a category is defined to be the first components in
* {@link CategoryPath#components}. For the category A/B/C, the ordinal of
* A/B will be encoded as well, however not the ordinal of A.
*
* <p>
* <b>NOTE:</b> when facets are aggregated, this policy behaves exactly like
* {@link #ALL_PARENTS}, except that the dimension is never counted. I.e. if
* you ask to count the facet "A", then while in {@link #ALL_PARENTS} you
* will get counts for "A" <u>and its children</u>, with this policy you
* will get counts for <u>only its children</u>. This policy is the default
* one, and makes sense for using with flat dimensions, whenever your
* application does not require the dimension's count. Otherwise, use
* {@link #ALL_PARENTS}.
*/
ALL_BUT_DIMENSION
} }
/** The default field used to store the facets information. */ /** The default field used to store the facets information. */
@ -63,7 +100,7 @@ public class CategoryListParams {
* The default {@link OrdinalPolicy} that's used when encoding a document's * The default {@link OrdinalPolicy} that's used when encoding a document's
* category ordinals. * category ordinals.
*/ */
public static final OrdinalPolicy DEFAULT_ORDINAL_POLICY = OrdinalPolicy.ALL_PARENTS; public static final OrdinalPolicy DEFAULT_ORDINAL_POLICY = OrdinalPolicy.ALL_BUT_DIMENSION;
public final String field; public final String field;
@ -115,19 +152,15 @@ public class CategoryListParams {
return false; return false;
} }
CategoryListParams other = (CategoryListParams) o; CategoryListParams other = (CategoryListParams) o;
if (this.hashCode != other.hashCode) { if (hashCode != other.hashCode) {
return false; return false;
} }
// The above hashcodes might equal each other in the case of a collision,
// so at this point only directly term equality testing will settle
// the equality test.
return field.equals(other.field); return field.equals(other.field);
} }
@Override @Override
public int hashCode() { public int hashCode() {
return this.hashCode; return hashCode;
} }
/** Create the {@link CategoryListIterator} for the specified partition. */ /** Create the {@link CategoryListIterator} for the specified partition. */
@ -137,14 +170,18 @@ public class CategoryListParams {
return new DocValuesCategoryListIterator(docValuesField, createEncoder().createMatchingDecoder()); return new DocValuesCategoryListIterator(docValuesField, createEncoder().createMatchingDecoder());
} }
/** Returns the {@link OrdinalPolicy} to use for this {@link CategoryListParams}. */ /**
public OrdinalPolicy getOrdinalPolicy() { * Returns the {@link OrdinalPolicy} to use for the given dimension. This
* {@link CategoryListParams} always returns {@link #DEFAULT_ORDINAL_POLICY}
* for all dimensions.
*/
public OrdinalPolicy getOrdinalPolicy(String dimension) {
return DEFAULT_ORDINAL_POLICY; return DEFAULT_ORDINAL_POLICY;
} }
@Override @Override
public String toString() { public String toString() {
return "field=" + field + " encoder=" + createEncoder() + " ordinalPolicy=" + getOrdinalPolicy(); return "field=" + field + " encoder=" + createEncoder() + " ordinalPolicy=" + getOrdinalPolicy(null);
} }
} }

View File

@ -0,0 +1,55 @@
package org.apache.lucene.facet.index.params;
import java.util.Map;
import org.apache.lucene.facet.taxonomy.CategoryPath;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A {@link CategoryListParams} which allow controlling the
* {@link CategoryListParams.OrdinalPolicy} used for each dimension. The
* dimension is specified as the first component in
* {@link CategoryPath#components}.
*/
public class PerDimensionOrdinalPolicy extends CategoryListParams {
private final Map<String,OrdinalPolicy> policies;
private final OrdinalPolicy defaultOP;
public PerDimensionOrdinalPolicy(Map<String,OrdinalPolicy> policies) {
this(policies, DEFAULT_ORDINAL_POLICY);
}
public PerDimensionOrdinalPolicy(Map<String,OrdinalPolicy> policies, OrdinalPolicy defaultOP) {
this.defaultOP = defaultOP;
this.policies = policies;
}
@Override
public OrdinalPolicy getOrdinalPolicy(String dimension) {
OrdinalPolicy op = policies.get(dimension);
return op == null ? defaultOP : op;
}
@Override
public String toString() {
return super.toString() + " policies=" + policies;
}
}

View File

@ -85,7 +85,7 @@ import org.apache.lucene.util.encoding.DGapVInt8IntDecoder;
public class CountingFacetsCollector extends FacetsCollector { public class CountingFacetsCollector extends FacetsCollector {
private final FacetSearchParams fsp; private final FacetSearchParams fsp;
private final OrdinalPolicy ordinalPolicy; private final CategoryListParams clp;
private final TaxonomyReader taxoReader; private final TaxonomyReader taxoReader;
private final BytesRef buf = new BytesRef(32); private final BytesRef buf = new BytesRef(32);
private final FacetArrays facetArrays; private final FacetArrays facetArrays;
@ -107,8 +107,7 @@ public class CountingFacetsCollector extends FacetsCollector {
assert assertParams(fsp) == null : assertParams(fsp); assert assertParams(fsp) == null : assertParams(fsp);
this.fsp = fsp; this.fsp = fsp;
CategoryListParams clp = fsp.indexingParams.getCategoryListParams(fsp.facetRequests.get(0).categoryPath); this.clp = fsp.indexingParams.getCategoryListParams(fsp.facetRequests.get(0).categoryPath);
this.ordinalPolicy = clp.getOrdinalPolicy();
this.facetsField = clp.field; this.facetsField = clp.field;
this.taxoReader = taxoReader; this.taxoReader = taxoReader;
this.facetArrays = facetArrays; this.facetArrays = facetArrays;
@ -217,21 +216,21 @@ public class CountingFacetsCollector extends FacetsCollector {
} }
} }
private void countParents(int[] parents) { /**
// counts[0] is the count of ROOT, which we don't care about and counts[1] * Computes the counts of ordinals under the given ordinal's tree, by
// can only update counts[0], so we don't bother to visit it too. also, * recursively going down to leaf nodes and rollin up their counts (called
// since parents always have lower ordinals than their children, we traverse * only with categories are indexing with OrdinalPolicy.NO_PARENTS).
// the array backwards. this also allows us to update just the immediate */
// parent's count (actually, otherwise it would be a mistake). private int rollupCounts(int ordinal, int[] children, int[] siblings) {
for (int i = counts.length - 1; i > 1; i--) { int count = 0;
int count = counts[i]; while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
if (count > 0) { int childCount = counts[ordinal];
int parent = parents[i]; childCount += rollupCounts(children[ordinal], children, siblings);
if (parent != 0) { counts[ordinal] = childCount;
counts[parent] += count; count += childCount;
} ordinal = siblings[ordinal];
}
} }
return count;
} }
@Override @Override
@ -242,11 +241,6 @@ public class CountingFacetsCollector extends FacetsCollector {
ParallelTaxonomyArrays arrays = taxoReader.getParallelTaxonomyArrays(); ParallelTaxonomyArrays arrays = taxoReader.getParallelTaxonomyArrays();
if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
// need to count parents
countParents(arrays.parents());
}
// compute top-K // compute top-K
final int[] children = arrays.children(); final int[] children = arrays.children();
final int[] siblings = arrays.siblings(); final int[] siblings = arrays.siblings();
@ -256,6 +250,12 @@ public class CountingFacetsCollector extends FacetsCollector {
if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist
continue; continue;
} }
OrdinalPolicy ordinalPolicy = clp.getOrdinalPolicy(fr.categoryPath.components[0]);
if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
// need to count parents
counts[rootOrd] += rollupCounts(children[rootOrd], children, siblings);
}
FacetResultNode root = new FacetResultNode(); FacetResultNode root = new FacetResultNode();
root.ordinal = rootOrd; root.ordinal = rootOrd;
root.label = fr.categoryPath; root.label = fr.categoryPath;

View File

@ -43,13 +43,21 @@ public abstract class FacetsCollector extends Collector {
* Returns the most optimized {@link FacetsCollector} for the given search * Returns the most optimized {@link FacetsCollector} for the given search
* parameters. The returned {@link FacetsCollector} is guaranteed to satisfy * parameters. The returned {@link FacetsCollector} is guaranteed to satisfy
* the requested parameters. * the requested parameters.
*
* @throws IllegalArgumentException
* if there is no built-in collector that can satisfy the search
* parameters.
*/ */
public static FacetsCollector create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) { public static FacetsCollector create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) {
if (CountingFacetsCollector.assertParams(fsp) == null) { if (CountingFacetsCollector.assertParams(fsp) == null) {
return new CountingFacetsCollector(fsp, taxoReader); return new CountingFacetsCollector(fsp, taxoReader);
} }
return new StandardFacetsCollector(fsp, indexReader, taxoReader); if (StandardFacetsCollector.assertParams(fsp) == null) {
return new StandardFacetsCollector(fsp, indexReader, taxoReader);
}
throw new IllegalArgumentException("None of the built-in FacetsCollectors can handle the given search params");
} }
/** /**

View File

@ -49,6 +49,17 @@ public class StandardFacetsCollector extends FacetsCollector {
private List<FacetResult> results; private List<FacetResult> results;
private Object resultsGuard; private Object resultsGuard;
static String assertParams(FacetSearchParams fsp) {
// make sure none of the categories in the given FacetRequests was indexed with NO_PARENTS
for (FacetRequest fr : fsp.facetRequests) {
CategoryListParams clp = fsp.indexingParams.getCategoryListParams(fr.categoryPath);
if (clp.getOrdinalPolicy(fr.categoryPath.components[0]) == OrdinalPolicy.NO_PARENTS) {
return "this collector does not support aggregating categories that were indexed with OrdinalPolicy.NO_PARENTS";
}
}
return null;
}
/** /**
* Create a collector for accumulating facets while collecting documents * Create a collector for accumulating facets while collecting documents
* during search. * during search.
@ -62,6 +73,7 @@ public class StandardFacetsCollector extends FacetsCollector {
* taxonomy containing the facets. * taxonomy containing the facets.
*/ */
public StandardFacetsCollector(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { public StandardFacetsCollector(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
assert assertParams(facetSearchParams) == null : assertParams(facetSearchParams);
facetsAccumulator = initFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader); facetsAccumulator = initFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
scoreDocIdCollector = initScoredDocCollector(facetSearchParams, indexReader, taxonomyReader); scoreDocIdCollector = initScoredDocCollector(facetSearchParams, indexReader, taxonomyReader);
resultsGuard = new Object(); resultsGuard = new Object();

View File

@ -74,6 +74,7 @@ public class ScoredDocIdsUtils {
/** Clear all deleted documents from a given open-bit-set according to a given reader */ /** Clear all deleted documents from a given open-bit-set according to a given reader */
private static void clearDeleted(final IndexReader reader, final FixedBitSet set) throws IOException { private static void clearDeleted(final IndexReader reader, final FixedBitSet set) throws IOException {
// TODO use BitsFilteredDocIdSet?
// If there are no deleted docs // If there are no deleted docs
if (!reader.hasDeletions()) { if (!reader.hasDeletions()) {

View File

@ -6,6 +6,7 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.HashSet; import java.util.HashSet;
import java.util.Iterator;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@ -17,6 +18,7 @@ import org.apache.lucene.document.Field;
import org.apache.lucene.document.TextField; import org.apache.lucene.document.TextField;
import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.params.FacetSearchParams;
@ -44,6 +46,7 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.collections.IntToObjectMap;
import org.junit.AfterClass; import org.junit.AfterClass;
import org.junit.BeforeClass; import org.junit.BeforeClass;
@ -73,7 +76,8 @@ public abstract class FacetTestBase extends FacetTestCase {
SearchTaxoDirPair() {} SearchTaxoDirPair() {}
} }
private static HashMap<Integer, SearchTaxoDirPair> dirsPerPartitionSize; private static IntToObjectMap<SearchTaxoDirPair> dirsPerPartitionSize;
private static IntToObjectMap<FacetIndexingParams> fipPerPartitionSize;
private static File TEST_DIR; private static File TEST_DIR;
/** Documents text field. */ /** Documents text field. */
@ -91,12 +95,15 @@ public abstract class FacetTestBase extends FacetTestCase {
@BeforeClass @BeforeClass
public static void beforeClassFacetTestBase() { public static void beforeClassFacetTestBase() {
TEST_DIR = _TestUtil.getTempDir("facets"); TEST_DIR = _TestUtil.getTempDir("facets");
dirsPerPartitionSize = new HashMap<Integer, FacetTestBase.SearchTaxoDirPair>(); dirsPerPartitionSize = new IntToObjectMap<FacetTestBase.SearchTaxoDirPair>();
fipPerPartitionSize = new IntToObjectMap<FacetIndexingParams>();
} }
@AfterClass @AfterClass
public static void afterClassFacetTestBase() throws Exception { public static void afterClassFacetTestBase() throws Exception {
for (SearchTaxoDirPair pair : dirsPerPartitionSize.values()) { Iterator<SearchTaxoDirPair> iter = dirsPerPartitionSize.iterator();
while (iter.hasNext()) {
SearchTaxoDirPair pair = iter.next();
IOUtils.close(pair.searchDir, pair.taxoDir); IOUtils.close(pair.searchDir, pair.taxoDir);
} }
} }
@ -128,20 +135,16 @@ public abstract class FacetTestBase extends FacetTestCase {
return DEFAULT_CONTENT[doc]; return DEFAULT_CONTENT[doc];
} }
/** Prepare index (in RAM) with single partition */ /** Prepare index (in RAM) with some documents and some facets. */
protected final void initIndex() throws Exception { protected final void initIndex(FacetIndexingParams fip) throws Exception {
initIndex(Integer.MAX_VALUE); initIndex(false, fip);
}
/** Prepare index (in RAM) with some documents and some facets */
protected final void initIndex(int partitionSize) throws Exception {
initIndex(partitionSize, false);
} }
/** Prepare index (in RAM/Disk) with some documents and some facets */ /** Prepare index (in RAM/Disk) with some documents and some facets. */
protected final void initIndex(int partitionSize, boolean forceDisk) throws Exception { protected final void initIndex(boolean forceDisk, FacetIndexingParams fip) throws Exception {
int partitionSize = fip.getPartitionSize();
if (VERBOSE) { if (VERBOSE) {
System.out.println("Partition Size: " + partitionSize+" forceDisk: "+forceDisk); System.out.println("Partition Size: " + partitionSize + " forceDisk: "+forceDisk);
} }
SearchTaxoDirPair pair = dirsPerPartitionSize.get(Integer.valueOf(partitionSize)); SearchTaxoDirPair pair = dirsPerPartitionSize.get(Integer.valueOf(partitionSize));
@ -158,7 +161,7 @@ public abstract class FacetTestBase extends FacetTestCase {
RandomIndexWriter iw = new RandomIndexWriter(random(), pair.searchDir, getIndexWriterConfig(getAnalyzer())); RandomIndexWriter iw = new RandomIndexWriter(random(), pair.searchDir, getIndexWriterConfig(getAnalyzer()));
TaxonomyWriter taxo = new DirectoryTaxonomyWriter(pair.taxoDir, OpenMode.CREATE); TaxonomyWriter taxo = new DirectoryTaxonomyWriter(pair.taxoDir, OpenMode.CREATE);
populateIndex(iw, taxo, getFacetIndexingParams(partitionSize)); populateIndex(iw, taxo, fip);
// commit changes (taxonomy prior to search index for consistency) // commit changes (taxonomy prior to search index for consistency)
taxo.commit(); taxo.commit();
@ -182,14 +185,40 @@ public abstract class FacetTestBase extends FacetTestCase {
/** Returns a {@link FacetIndexingParams} per the given partition size. */ /** Returns a {@link FacetIndexingParams} per the given partition size. */
protected FacetIndexingParams getFacetIndexingParams(final int partSize) { protected FacetIndexingParams getFacetIndexingParams(final int partSize) {
// several of our encoders don't support the value 0, return getFacetIndexingParams(partSize, false);
// which is one of the values encoded when dealing w/ partitions. }
return new FacetIndexingParams() {
@Override /**
public int getPartitionSize() { * Returns a {@link FacetIndexingParams} per the given partition size. If
return partSize; * requested, then {@link OrdinalPolicy} will be set to
} * {@link OrdinalPolicy#ALL_PARENTS}, otherwise it will randomize.
}; */
protected FacetIndexingParams getFacetIndexingParams(final int partSize, final boolean forceAllParents) {
FacetIndexingParams fip = fipPerPartitionSize.get(partSize);
if (fip == null) {
// randomize OrdinalPolicy. Since not all Collectors / Accumulators
// support NO_PARENTS, don't include it.
// TODO: once all code paths support NO_PARENTS, randomize it too.
CategoryListParams randomOP = new CategoryListParams() {
final OrdinalPolicy op = random().nextBoolean() ? OrdinalPolicy.ALL_BUT_DIMENSION : OrdinalPolicy.ALL_PARENTS;
@Override
public OrdinalPolicy getOrdinalPolicy(String dimension) {
return forceAllParents ? OrdinalPolicy.ALL_PARENTS : op;
}
};
// several of our encoders don't support the value 0,
// which is one of the values encoded when dealing w/ partitions,
// therefore don't randomize the encoder.
fip = new FacetIndexingParams(randomOP) {
@Override
public int getPartitionSize() {
return partSize;
}
};
fipPerPartitionSize.put(partSize, fip);
}
return fip;
} }
/** /**

View File

@ -45,7 +45,6 @@ public class TestMultiCLExample extends LuceneTestCase {
assertNotNull("Result should not be null", result); assertNotNull("Result should not be null", result);
FacetResultNode node = result.getFacetResultNode(); FacetResultNode node = result.getFacetResultNode();
assertEquals("Invalid label", "5", node.label.toString()); assertEquals("Invalid label", "5", node.label.toString());
assertEquals("Invalid value", 2.0, node.value, 0.0);
assertEquals("Invalid # of subresults", 3, node.subResults.size()); assertEquals("Invalid # of subresults", 3, node.subResults.size());
Iterator<? extends FacetResultNode> subResults = node.subResults.iterator(); Iterator<? extends FacetResultNode> subResults = node.subResults.iterator();

View File

@ -25,6 +25,8 @@ import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.index.params.PerDimensionIndexingParams; import org.apache.lucene.facet.index.params.PerDimensionIndexingParams;
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.index.params.PerDimensionOrdinalPolicy;
import org.apache.lucene.facet.search.CategoryListIterator; import org.apache.lucene.facet.search.CategoryListIterator;
import org.apache.lucene.facet.search.DrillDown; import org.apache.lucene.facet.search.DrillDown;
import org.apache.lucene.facet.search.FacetsCollector; import org.apache.lucene.facet.search.FacetsCollector;
@ -368,9 +370,23 @@ public class TestFacetsPayloadMigrationReader extends FacetTestCase {
// set custom CLP fields for two dimensions and use the default ($facets) for the other two // set custom CLP fields for two dimensions and use the default ($facets) for the other two
HashMap<CategoryPath,CategoryListParams> params = new HashMap<CategoryPath,CategoryListParams>(); HashMap<CategoryPath,CategoryListParams> params = new HashMap<CategoryPath,CategoryListParams>();
params.put(new CategoryPath(DIMENSIONS[0]), new CategoryListParams(DIMENSIONS[0])); params.put(new CategoryPath(DIMENSIONS[0]), new CategoryListParams(DIMENSIONS[0]) {
params.put(new CategoryPath(DIMENSIONS[1]), new CategoryListParams(DIMENSIONS[1])); @Override
FacetIndexingParams fip = new PerDimensionIndexingParams(params) { public OrdinalPolicy getOrdinalPolicy(String dimension) {
return OrdinalPolicy.ALL_PARENTS;
}
});
params.put(new CategoryPath(DIMENSIONS[1]), new CategoryListParams(DIMENSIONS[1]) {
@Override
public OrdinalPolicy getOrdinalPolicy(String dimension) {
return OrdinalPolicy.ALL_PARENTS;
}
});
HashMap<String,OrdinalPolicy> policies = new HashMap<String,CategoryListParams.OrdinalPolicy>();
policies.put(DIMENSIONS[2], OrdinalPolicy.ALL_PARENTS);
policies.put(DIMENSIONS[3], OrdinalPolicy.ALL_PARENTS);
FacetIndexingParams fip = new PerDimensionIndexingParams(params, new PerDimensionOrdinalPolicy(policies)) {
@Override @Override
public int getPartitionSize() { public int getPartitionSize() {
return partitionSize; return partitionSize;

View File

@ -51,10 +51,9 @@ public abstract class BaseTestTopK extends FacetTestBase {
private int nextInt; private int nextInt;
@Override @Override
protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo, protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo, FacetIndexingParams fip) throws IOException {
FacetIndexingParams iParams) throws IOException {
currDoc = -1; currDoc = -1;
super.populateIndex(iw, taxo, iParams); super.populateIndex(iw, taxo, fip);
} }
/** prepare the next random int */ /** prepare the next random int */
@ -94,17 +93,13 @@ public abstract class BaseTestTopK extends FacetTestBase {
return Arrays.asList(cp); return Arrays.asList(cp);
} }
protected FacetSearchParams searchParamsWithRequests(int numResults) { protected FacetSearchParams searchParamsWithRequests(int numResults, FacetIndexingParams fip) {
return searchParamsWithRequests(numResults, Integer.MAX_VALUE);
}
protected FacetSearchParams searchParamsWithRequests(int numResults, int partitionSize) {
List<FacetRequest> facetRequests = new ArrayList<FacetRequest>(); List<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
facetRequests.add(new CountFacetRequest(new CategoryPath("a"), numResults)); facetRequests.add(new CountFacetRequest(new CategoryPath("a"), numResults));
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "1"), numResults)); facetRequests.add(new CountFacetRequest(new CategoryPath("a", "1"), numResults));
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "1", "10"), numResults)); facetRequests.add(new CountFacetRequest(new CategoryPath("a", "1", "10"), numResults));
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "2", "26", "267"), numResults)); facetRequests.add(new CountFacetRequest(new CategoryPath("a", "2", "26", "267"), numResults));
return getFacetSearchParams(facetRequests, getFacetIndexingParams(partitionSize)); return getFacetSearchParams(facetRequests, fip);
} }
@Override @Override

View File

@ -16,8 +16,9 @@ import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.index.FacetFields; import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.index.params.CategoryListParams; import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.index.params.PerDimensionIndexingParams; import org.apache.lucene.facet.index.params.PerDimensionOrdinalPolicy;
import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest.SortBy; import org.apache.lucene.facet.search.params.FacetRequest.SortBy;
@ -146,13 +147,11 @@ public class CountingFacetsCollectorTest extends FacetTestCase {
termExpectedCounts.put(cp, termExpectedCounts.get(cp) + 1); termExpectedCounts.put(cp, termExpectedCounts.get(cp) + 1);
} }
} }
// add 1 to each dimension // add 1 to each NO_PARENTS dimension
allExpectedCounts.put(CP_A, allExpectedCounts.get(CP_A) + 1);
allExpectedCounts.put(CP_B, allExpectedCounts.get(CP_B) + 1); allExpectedCounts.put(CP_B, allExpectedCounts.get(CP_B) + 1);
allExpectedCounts.put(CP_C, allExpectedCounts.get(CP_C) + 1); allExpectedCounts.put(CP_C, allExpectedCounts.get(CP_C) + 1);
allExpectedCounts.put(CP_D, allExpectedCounts.get(CP_D) + 1); allExpectedCounts.put(CP_D, allExpectedCounts.get(CP_D) + 1);
if (updateTermExpectedCounts) { if (updateTermExpectedCounts) {
termExpectedCounts.put(CP_A, termExpectedCounts.get(CP_A) + 1);
termExpectedCounts.put(CP_B, termExpectedCounts.get(CP_B) + 1); termExpectedCounts.put(CP_B, termExpectedCounts.get(CP_B) + 1);
termExpectedCounts.put(CP_C, termExpectedCounts.get(CP_C) + 1); termExpectedCounts.put(CP_C, termExpectedCounts.get(CP_C) + 1);
termExpectedCounts.put(CP_D, termExpectedCounts.get(CP_D) + 1); termExpectedCounts.put(CP_D, termExpectedCounts.get(CP_D) + 1);
@ -252,19 +251,13 @@ public class CountingFacetsCollectorTest extends FacetTestCase {
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // prevent merges, so we can control the index segments conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // prevent merges, so we can control the index segments
IndexWriter indexWriter = new IndexWriter(indexDir, conf); IndexWriter indexWriter = new IndexWriter(indexDir, conf);
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
CategoryListParams allParents = new CategoryListParams();
CategoryListParams noParents = new CategoryListParams("no_parents") { Map<String,OrdinalPolicy> policies = new HashMap<String,CategoryListParams.OrdinalPolicy>();
@Override policies.put(CP_B.components[0], OrdinalPolicy.ALL_PARENTS);
public OrdinalPolicy getOrdinalPolicy() { policies.put(CP_C.components[0], OrdinalPolicy.NO_PARENTS);
return OrdinalPolicy.NO_PARENTS; policies.put(CP_D.components[0], OrdinalPolicy.NO_PARENTS);
} CategoryListParams clp = new PerDimensionOrdinalPolicy(policies);
}; fip = new FacetIndexingParams(clp);
Map<CategoryPath,CategoryListParams> params = new HashMap<CategoryPath,CategoryListParams>();
params.put(CP_A, allParents);
params.put(CP_B, allParents);
params.put(CP_C, noParents);
params.put(CP_D, noParents);
fip = new PerDimensionIndexingParams(params);
allExpectedCounts = newCounts(); allExpectedCounts = newCounts();
termExpectedCounts = newCounts(); termExpectedCounts = newCounts();

View File

@ -104,9 +104,9 @@ public class TestDemoFacets extends FacetTestCase {
// Retrieve & verify results: // Retrieve & verify results:
List<FacetResult> results = c.getFacetResults(); List<FacetResult> results = c.getFacetResults();
assertEquals(2, results.size()); assertEquals(2, results.size());
assertEquals("Publish Date (5)\n 2012 (2)\n 2010 (2)\n 1999 (1)\n", assertEquals("Publish Date (0)\n 2012 (2)\n 2010 (2)\n 1999 (1)\n",
FacetTestUtils.toSimpleString(results.get(0))); FacetTestUtils.toSimpleString(results.get(0)));
assertEquals("Author (5)\n Lisa (2)\n Frank (1)\n Susan (1)\n Bob (1)\n", assertEquals("Author (0)\n Lisa (2)\n Frank (1)\n Susan (1)\n Bob (1)\n",
FacetTestUtils.toSimpleString(results.get(1))); FacetTestUtils.toSimpleString(results.get(1)));
@ -117,7 +117,7 @@ public class TestDemoFacets extends FacetTestCase {
searcher.search(q2, c); searcher.search(q2, c);
results = c.getFacetResults(); results = c.getFacetResults();
assertEquals(1, results.size()); assertEquals(1, results.size());
assertEquals("Author (2)\n Lisa (1)\n Bob (1)\n", assertEquals("Author (0)\n Lisa (1)\n Bob (1)\n",
FacetTestUtils.toSimpleString(results.get(0))); FacetTestUtils.toSimpleString(results.get(0)));
// Smoke test PrintTaxonomyStats: // Smoke test PrintTaxonomyStats:

View File

@ -14,6 +14,7 @@ import org.junit.Before;
import org.junit.Test; import org.junit.Test;
import org.apache.lucene.facet.FacetTestBase; import org.apache.lucene.facet.FacetTestBase;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.FacetsAccumulator; import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.ScoredDocIDs; import org.apache.lucene.facet.search.ScoredDocIDs;
import org.apache.lucene.facet.search.ScoredDocIdCollector; import org.apache.lucene.facet.search.ScoredDocIdCollector;
@ -48,11 +49,14 @@ import org.apache.lucene.facet.taxonomy.CategoryPath;
*/ */
public class TestFacetsAccumulatorWithComplement extends FacetTestBase { public class TestFacetsAccumulatorWithComplement extends FacetTestBase {
private FacetIndexingParams fip;
@Override @Override
@Before @Before
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp(); super.setUp();
initIndex(); fip = getFacetIndexingParams(Integer.MAX_VALUE);
initIndex(fip);
} }
@Override @Override
@ -125,7 +129,7 @@ public class TestFacetsAccumulatorWithComplement extends FacetTestBase {
/** compute facets with certain facet requests and docs */ /** compute facets with certain facet requests and docs */
private List<FacetResult> findFacets(ScoredDocIDs sDocids, boolean withComplement) throws IOException { private List<FacetResult> findFacets(ScoredDocIDs sDocids, boolean withComplement) throws IOException {
FacetSearchParams fsp = new FacetSearchParams(getFacetIndexingParams(Integer.MAX_VALUE), new CountFacetRequest(new CategoryPath("root","a"), 10)); FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(new CategoryPath("root","a"), 10));
FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(fsp, indexReader, taxoReader); FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(fsp, indexReader, taxoReader);
fAccumulator.setComplementThreshold( fAccumulator.setComplementThreshold(

View File

@ -274,7 +274,6 @@ public class TestMultipleCategoryLists extends FacetTestCase {
Iterable<? extends FacetResultNode> subResults = resNode.subResults; Iterable<? extends FacetResultNode> subResults = resNode.subResults;
Iterator<? extends FacetResultNode> subIter = subResults.iterator(); Iterator<? extends FacetResultNode> subIter = subResults.iterator();
checkResult(resNode, "Band", 5.0);
checkResult(subIter.next(), "Band/Rock & Pop", 4.0); checkResult(subIter.next(), "Band/Rock & Pop", 4.0);
checkResult(subIter.next(), "Band/Punk", 1.0); checkResult(subIter.next(), "Band/Punk", 1.0);
@ -283,7 +282,6 @@ public class TestMultipleCategoryLists extends FacetTestCase {
subResults = resNode.subResults; subResults = resNode.subResults;
subIter = subResults.iterator(); subIter = subResults.iterator();
checkResult(resNode, "Band", 5.0);
checkResult(subIter.next(), "Band/Rock & Pop", 4.0); checkResult(subIter.next(), "Band/Rock & Pop", 4.0);
checkResult(subIter.next(), "Band/Rock & Pop/Dave Matthews Band", 1.0); checkResult(subIter.next(), "Band/Rock & Pop/Dave Matthews Band", 1.0);
checkResult(subIter.next(), "Band/Rock & Pop/REM", 1.0); checkResult(subIter.next(), "Band/Rock & Pop/REM", 1.0);
@ -297,7 +295,6 @@ public class TestMultipleCategoryLists extends FacetTestCase {
subResults = resNode.subResults; subResults = resNode.subResults;
subIter = subResults.iterator(); subIter = subResults.iterator();
checkResult(resNode, "Author", 3.0);
checkResult(subIter.next(), "Author/Kurt Vonnegut", 1.0); checkResult(subIter.next(), "Author/Kurt Vonnegut", 1.0);
checkResult(subIter.next(), "Author/Stephen King", 1.0); checkResult(subIter.next(), "Author/Stephen King", 1.0);
checkResult(subIter.next(), "Author/Mark Twain", 1.0); checkResult(subIter.next(), "Author/Mark Twain", 1.0);
@ -307,7 +304,6 @@ public class TestMultipleCategoryLists extends FacetTestCase {
subResults = resNode.subResults; subResults = resNode.subResults;
subIter = subResults.iterator(); subIter = subResults.iterator();
checkResult(resNode, "Band/Rock & Pop", 4.0);
checkResult(subIter.next(), "Band/Rock & Pop/Dave Matthews Band", 1.0); checkResult(subIter.next(), "Band/Rock & Pop/Dave Matthews Band", 1.0);
checkResult(subIter.next(), "Band/Rock & Pop/REM", 1.0); checkResult(subIter.next(), "Band/Rock & Pop/REM", 1.0);
checkResult(subIter.next(), "Band/Rock & Pop/U2", 1.0); checkResult(subIter.next(), "Band/Rock & Pop/U2", 1.0);

View File

@ -3,6 +3,7 @@ package org.apache.lucene.facet.search;
import java.util.List; import java.util.List;
import org.apache.lucene.facet.FacetTestBase; import org.apache.lucene.facet.FacetTestBase;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.FacetsCollector; import org.apache.lucene.facet.search.FacetsCollector;
import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.params.FacetSearchParams;
@ -31,18 +32,21 @@ import org.junit.Before;
public class TestSameRequestAccumulation extends FacetTestBase { public class TestSameRequestAccumulation extends FacetTestBase {
private FacetIndexingParams fip;
@Override @Override
@Before @Before
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp(); super.setUp();
initIndex(); fip = getFacetIndexingParams(Integer.MAX_VALUE);
initIndex(fip);
} }
// Following LUCENE-4461 - ensure requesting the (exact) same request more // Following LUCENE-4461 - ensure requesting the (exact) same request more
// than once does not alter the results // than once does not alter the results
public void testTwoSameRequests() throws Exception { public void testTwoSameRequests() throws Exception {
final CountFacetRequest facetRequest = new CountFacetRequest(new CategoryPath("root"), 10); final CountFacetRequest facetRequest = new CountFacetRequest(new CategoryPath("root"), 10);
FacetSearchParams fsp = new FacetSearchParams(facetRequest); FacetSearchParams fsp = new FacetSearchParams(fip, facetRequest);
FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader); FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader);
searcher.search(new MatchAllDocsQuery(), fc); searcher.search(new MatchAllDocsQuery(), fc);
@ -50,7 +54,7 @@ public class TestSameRequestAccumulation extends FacetTestBase {
final String expected = fc.getFacetResults().get(0).toString(); final String expected = fc.getFacetResults().get(0).toString();
// now add the same facet request with duplicates (same instance and same one) // now add the same facet request with duplicates (same instance and same one)
fsp = new FacetSearchParams(facetRequest, facetRequest, new CountFacetRequest(new CategoryPath("root"), 10)); fsp = new FacetSearchParams(fip, facetRequest, facetRequest, new CountFacetRequest(new CategoryPath("root"), 10));
// make sure the search params holds 3 requests now // make sure the search params holds 3 requests now
assertEquals(3, fsp.facetRequests.size()); assertEquals(3, fsp.facetRequests.size());

View File

@ -5,6 +5,7 @@ import java.util.Arrays;
import java.util.List; import java.util.List;
import org.apache.lucene.facet.FacetTestBase; import org.apache.lucene.facet.FacetTestBase;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.params.ScoreFacetRequest; import org.apache.lucene.facet.search.params.ScoreFacetRequest;
@ -37,11 +38,14 @@ import org.junit.Test;
/** Test ScoredDocIdCollector. */ /** Test ScoredDocIdCollector. */
public class TestScoredDocIdCollector extends FacetTestBase { public class TestScoredDocIdCollector extends FacetTestBase {
private FacetIndexingParams fip;
@Override @Override
@Before @Before
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp(); super.setUp();
initIndex(); fip = getFacetIndexingParams(Integer.MAX_VALUE);
initIndex(fip);
} }
@Override @Override
@ -73,8 +77,8 @@ public class TestScoredDocIdCollector extends FacetTestBase {
// verify by facet values // verify by facet values
CategoryPath cp = new CategoryPath("root","a"); CategoryPath cp = new CategoryPath("root","a");
FacetSearchParams countFSP = new FacetSearchParams(getFacetIndexingParams(Integer.MAX_VALUE), new CountFacetRequest(cp, 10)); FacetSearchParams countFSP = new FacetSearchParams(fip, new CountFacetRequest(cp, 10));
FacetSearchParams scoreFSP = new FacetSearchParams(getFacetIndexingParams(Integer.MAX_VALUE), new ScoreFacetRequest(cp, 10)); FacetSearchParams scoreFSP = new FacetSearchParams(fip, new ScoreFacetRequest(cp, 10));
List<FacetResult> countRes = findFacets(scoredDocIDs, countFSP); List<FacetResult> countRes = findFacets(scoredDocIDs, countFSP);
List<FacetResult> scoreRes = findFacets(scoredDocIDs, scoreFSP); List<FacetResult> scoreRes = findFacets(scoredDocIDs, scoreFSP);
@ -101,10 +105,8 @@ public class TestScoredDocIdCollector extends FacetTestBase {
} }
// compute facets with certain facet requests and docs // compute facets with certain facet requests and docs
private List<FacetResult> findFacets(ScoredDocIDs sDocids, private List<FacetResult> findFacets(ScoredDocIDs sDocids, FacetSearchParams facetSearchParams) throws IOException {
FacetSearchParams facetSearchParams) throws IOException { FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxoReader);
FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(
facetSearchParams, indexReader, taxoReader);
List<FacetResult> res = fAccumulator.accumulate(sDocids); List<FacetResult> res = fAccumulator.accumulate(sDocids);
// Results are ready, printing them... // Results are ready, printing them...

View File

@ -113,7 +113,6 @@ public class TestStandardFacetsAccumulator extends FacetTestCase {
List<FacetResult> results = fc.getFacetResults(); List<FacetResult> results = fc.getFacetResults();
assertEquals("received too many facet results", 1, results.size()); assertEquals("received too many facet results", 1, results.size());
FacetResultNode frn = results.get(0).getFacetResultNode(); FacetResultNode frn = results.get(0).getFacetResultNode();
assertEquals("wrong weight for \"A\"", 4, (int) frn.value);
assertEquals("wrong number of children", 2, frn.subResults.size()); assertEquals("wrong number of children", 2, frn.subResults.size());
for (FacetResultNode node : frn.subResults) { for (FacetResultNode node : frn.subResults) {
assertEquals("wrong weight for child " + node.label, 2, (int) node.value); assertEquals("wrong weight for child " + node.label, 2, (int) node.value);

View File

@ -181,7 +181,6 @@ public class TestTopKInEachNodeResultHandler extends FacetTestCase {
boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
assertEquals(9, fr.getNumValidDescendants()); assertEquals(9, fr.getNumValidDescendants());
FacetResultNode parentRes = fr.getFacetResultNode(); FacetResultNode parentRes = fr.getFacetResultNode();
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size()); assertEquals(2, parentRes.subResults.size());
// two nodes sorted by descending values: a/b with 8 and a/c with 6 // two nodes sorted by descending values: a/b with 8 and a/c with 6
// a/b has two children a/b/2 with value 3, and a/b/1 with value 2. // a/b has two children a/b/2 with value 3, and a/b/1 with value 2.
@ -217,7 +216,6 @@ public class TestTopKInEachNodeResultHandler extends FacetTestCase {
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
assertEquals(9, fr.getNumValidDescendants()); assertEquals(9, fr.getNumValidDescendants());
parentRes = fr.getFacetResultNode(); parentRes = fr.getFacetResultNode();
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size()); assertEquals(2, parentRes.subResults.size());
// two nodes sorted by descending values: a/b with 8 and a/c with 6 // two nodes sorted by descending values: a/b with 8 and a/c with 6
// a/b has two children a/b/2 with value 3, and a/b/1 with value 2. // a/b has two children a/b/2 with value 3, and a/b/1 with value 2.
@ -234,7 +232,6 @@ public class TestTopKInEachNodeResultHandler extends FacetTestCase {
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]); hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
assertEquals(4, fr.getNumValidDescendants(), 4); assertEquals(4, fr.getNumValidDescendants(), 4);
parentRes = fr.getFacetResultNode(); parentRes = fr.getFacetResultNode();
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
assertEquals(2, parentRes.subResults.size()); assertEquals(2, parentRes.subResults.size());
// two nodes sorted by descending values: // two nodes sorted by descending values:
// a/b with value 8 and a/c with value 6 // a/b with value 8 and a/c with value 6

View File

@ -4,6 +4,8 @@ import java.util.ArrayList;
import java.util.Arrays; import java.util.Arrays;
import java.util.List; import java.util.List;
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.params.CountFacetRequest; import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest; import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest.ResultMode; import org.apache.lucene.facet.search.params.FacetRequest.ResultMode;
@ -73,7 +75,9 @@ public class TestTopKResultsHandler extends BaseTestTopK {
@Test @Test
public void testSimple() throws Exception { public void testSimple() throws Exception {
for (int partitionSize : partitionSizes) { for (int partitionSize : partitionSizes) {
initIndex(partitionSize); FacetIndexingParams fip = getFacetIndexingParams(partitionSize);
OrdinalPolicy op = fip.getCategoryListParams(null).getOrdinalPolicy(null);
initIndex(fip);
List<FacetRequest> facetRequests = new ArrayList<FacetRequest>(); List<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
facetRequests.add(new CountFacetRequest(new CategoryPath("a"), 100)); facetRequests.add(new CountFacetRequest(new CategoryPath("a"), 100));
@ -87,8 +91,8 @@ public class TestTopKResultsHandler extends BaseTestTopK {
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "c"), 100)); facetRequests.add(new CountFacetRequest(new CategoryPath("a", "c"), 100));
// do different facet counts and compare to control // do different facet counts and compare to control
FacetSearchParams sParams = getFacetSearchParams(facetRequests, getFacetIndexingParams(partitionSize)); FacetSearchParams sParams = getFacetSearchParams(facetRequests, fip);
FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) { FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) {
@Override @Override
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) { protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
@ -100,17 +104,21 @@ public class TestTopKResultsHandler extends BaseTestTopK {
searcher.search(new MatchAllDocsQuery(), fc); searcher.search(new MatchAllDocsQuery(), fc);
List<FacetResult> facetResults = fc.getFacetResults(); List<FacetResult> facetResults = fc.getFacetResults();
FacetResult fr = facetResults.get(0); FacetResult fr = facetResults.get(0);
FacetResultNode parentRes = fr.getFacetResultNode(); FacetResultNode parentRes = fr.getFacetResultNode();
assertEquals(13.0, parentRes.value, Double.MIN_VALUE); if (op == OrdinalPolicy.ALL_PARENTS) {
assertEquals(13.0, parentRes.value, Double.MIN_VALUE);
}
FacetResultNode[] frn = resultNodesAsArray(parentRes); FacetResultNode[] frn = resultNodesAsArray(parentRes);
assertEquals(7.0, frn[0].value, Double.MIN_VALUE); assertEquals(7.0, frn[0].value, Double.MIN_VALUE);
assertEquals(6.0, frn[1].value, Double.MIN_VALUE); assertEquals(6.0, frn[1].value, Double.MIN_VALUE);
fr = facetResults.get(1); fr = facetResults.get(1);
parentRes = fr.getFacetResultNode(); parentRes = fr.getFacetResultNode();
assertEquals(13.0, parentRes.value, Double.MIN_VALUE); if (op == OrdinalPolicy.ALL_PARENTS) {
assertEquals(13.0, parentRes.value, Double.MIN_VALUE);
}
frn = resultNodesAsArray(parentRes); frn = resultNodesAsArray(parentRes);
assertEquals(7.0, frn[0].value, Double.MIN_VALUE); assertEquals(7.0, frn[0].value, Double.MIN_VALUE);
assertEquals(6.0, frn[1].value, Double.MIN_VALUE); assertEquals(6.0, frn[1].value, Double.MIN_VALUE);
@ -121,7 +129,9 @@ public class TestTopKResultsHandler extends BaseTestTopK {
fr = facetResults.get(2); fr = facetResults.get(2);
parentRes = fr.getFacetResultNode(); parentRes = fr.getFacetResultNode();
assertEquals(7.0, parentRes.value, Double.MIN_VALUE); if (op == OrdinalPolicy.ALL_PARENTS) {
assertEquals(7.0, parentRes.value, Double.MIN_VALUE);
}
frn = resultNodesAsArray(parentRes); frn = resultNodesAsArray(parentRes);
assertEquals(2.0, frn[0].value, Double.MIN_VALUE); assertEquals(2.0, frn[0].value, Double.MIN_VALUE);
assertEquals(2.0, frn[1].value, Double.MIN_VALUE); assertEquals(2.0, frn[1].value, Double.MIN_VALUE);
@ -130,13 +140,17 @@ public class TestTopKResultsHandler extends BaseTestTopK {
fr = facetResults.get(3); fr = facetResults.get(3);
parentRes = fr.getFacetResultNode(); parentRes = fr.getFacetResultNode();
assertEquals(2.0, parentRes.value, Double.MIN_VALUE); if (op == OrdinalPolicy.ALL_PARENTS) {
assertEquals(2.0, parentRes.value, Double.MIN_VALUE);
}
frn = resultNodesAsArray(parentRes); frn = resultNodesAsArray(parentRes);
assertEquals(0, frn.length); assertEquals(0, frn.length);
fr = facetResults.get(4); fr = facetResults.get(4);
parentRes = fr.getFacetResultNode(); parentRes = fr.getFacetResultNode();
assertEquals(6.0, parentRes.value, Double.MIN_VALUE); if (op == OrdinalPolicy.ALL_PARENTS) {
assertEquals(6.0, parentRes.value, Double.MIN_VALUE);
}
frn = resultNodesAsArray(parentRes); frn = resultNodesAsArray(parentRes);
assertEquals(1.0, frn[0].value, Double.MIN_VALUE); assertEquals(1.0, frn[0].value, Double.MIN_VALUE);
closeAll(); closeAll();
@ -149,12 +163,12 @@ public class TestTopKResultsHandler extends BaseTestTopK {
@Test @Test
public void testGetMaxIntFacets() throws Exception { public void testGetMaxIntFacets() throws Exception {
for (int partitionSize : partitionSizes) { for (int partitionSize : partitionSizes) {
initIndex(partitionSize); FacetIndexingParams fip = getFacetIndexingParams(partitionSize);
initIndex(fip);
// do different facet counts and compare to control // do different facet counts and compare to control
CategoryPath path = new CategoryPath("a", "b"); CategoryPath path = new CategoryPath("a", "b");
FacetSearchParams sParams = getFacetSearchParams(getFacetIndexingParams(partitionSize), FacetSearchParams sParams = getFacetSearchParams(fip, new CountFacetRequest(path, Integer.MAX_VALUE));
new CountFacetRequest(path, Integer.MAX_VALUE));
FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) { FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) {
@Override @Override
@ -174,7 +188,7 @@ public class TestTopKResultsHandler extends BaseTestTopK {
// As a control base results, ask for top-1000 results // As a control base results, ask for top-1000 results
FacetSearchParams sParams2 = getFacetSearchParams( FacetSearchParams sParams2 = getFacetSearchParams(
getFacetIndexingParams(partitionSize), new CountFacetRequest(path, Integer.MAX_VALUE)); fip, new CountFacetRequest(path, Integer.MAX_VALUE));
FacetsCollector fc2 = new StandardFacetsCollector(sParams2, indexReader, taxoReader) { FacetsCollector fc2 = new StandardFacetsCollector(sParams2, indexReader, taxoReader) {
@Override @Override
@ -207,12 +221,11 @@ public class TestTopKResultsHandler extends BaseTestTopK {
@Test @Test
public void testSimpleSearchForNonexistentFacet() throws Exception { public void testSimpleSearchForNonexistentFacet() throws Exception {
for (int partitionSize : partitionSizes) { for (int partitionSize : partitionSizes) {
initIndex(partitionSize); FacetIndexingParams fip = getFacetIndexingParams(partitionSize);
initIndex(fip);
CategoryPath path = new CategoryPath("Miau Hattulla"); CategoryPath path = new CategoryPath("Miau Hattulla");
FacetSearchParams sParams = getFacetSearchParams( FacetSearchParams sParams = getFacetSearchParams(fip, new CountFacetRequest(path, 10));
getFacetIndexingParams(partitionSize),
new CountFacetRequest(path, 10));
FacetsCollector fc = FacetsCollector.create(sParams, indexReader, taxoReader); FacetsCollector fc = FacetsCollector.create(sParams, indexReader, taxoReader);

View File

@ -4,6 +4,7 @@ import java.io.IOException;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.params.FacetSearchParams; import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult; import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode; import org.apache.lucene.facet.search.results.FacetResultNode;
@ -32,10 +33,10 @@ import org.junit.Test;
public class TestTopKResultsHandlerRandom extends BaseTestTopK { public class TestTopKResultsHandlerRandom extends BaseTestTopK {
private List<FacetResult> countFacets(int partitionSize, int numResults, final boolean doComplement) private List<FacetResult> countFacets(FacetIndexingParams fip, int numResults, final boolean doComplement)
throws IOException { throws IOException {
Query q = new MatchAllDocsQuery(); Query q = new MatchAllDocsQuery();
FacetSearchParams facetSearchParams = searchParamsWithRequests(numResults, partitionSize); FacetSearchParams facetSearchParams = searchParamsWithRequests(numResults, fip);
FacetsCollector fc = new StandardFacetsCollector(facetSearchParams, indexReader, taxoReader) { FacetsCollector fc = new StandardFacetsCollector(facetSearchParams, indexReader, taxoReader) {
@Override @Override
protected FacetsAccumulator initFacetsAccumulator( protected FacetsAccumulator initFacetsAccumulator(
@ -59,7 +60,8 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
@Test @Test
public void testTopCountsOrder() throws Exception { public void testTopCountsOrder() throws Exception {
for (int partitionSize : partitionSizes) { for (int partitionSize : partitionSizes) {
initIndex(partitionSize); FacetIndexingParams fip = getFacetIndexingParams(partitionSize);
initIndex(fip);
/* /*
* Try out faceted search in it's most basic form (no sampling nor complement * Try out faceted search in it's most basic form (no sampling nor complement
@ -67,7 +69,7 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
* being indexed, and later on an "over-all" faceted search is performed. The * being indexed, and later on an "over-all" faceted search is performed. The
* results are checked against the DF of each facet by itself * results are checked against the DF of each facet by itself
*/ */
List<FacetResult> facetResults = countFacets(partitionSize, 100000, false); List<FacetResult> facetResults = countFacets(fip, 100000, false);
assertCountsAndCardinality(facetCountsTruth(), facetResults); assertCountsAndCardinality(facetCountsTruth(), facetResults);
/* /*
@ -77,10 +79,10 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
* place in here. The results are checked against the a regular (a.k.a * place in here. The results are checked against the a regular (a.k.a
* no-complement, no-sampling) faceted search with the same parameters. * no-complement, no-sampling) faceted search with the same parameters.
*/ */
facetResults = countFacets(partitionSize, 100000, true); facetResults = countFacets(fip, 100000, true);
assertCountsAndCardinality(facetCountsTruth(), facetResults); assertCountsAndCardinality(facetCountsTruth(), facetResults);
List<FacetResult> allFacetResults = countFacets(partitionSize, 100000, false); List<FacetResult> allFacetResults = countFacets(fip, 100000, false);
HashMap<String,Integer> all = new HashMap<String,Integer>(); HashMap<String,Integer> all = new HashMap<String,Integer>();
int maxNumNodes = 0; int maxNumNodes = 0;
@ -108,7 +110,7 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
if (VERBOSE) { if (VERBOSE) {
System.out.println("------- verify for "+n+" top results"); System.out.println("------- verify for "+n+" top results");
} }
List<FacetResult> someResults = countFacets(partitionSize, n, false); List<FacetResult> someResults = countFacets(fip, n, false);
k = 0; k = 0;
for (FacetResult fr : someResults) { for (FacetResult fr : someResults) {
FacetResultNode topResNode = fr.getFacetResultNode(); FacetResultNode topResNode = fr.getFacetResultNode();

View File

@ -8,7 +8,9 @@ import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.FacetTestUtils; import org.apache.lucene.facet.FacetTestUtils;
import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyReaderPair; import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyReaderPair;
import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyWriterPair; import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyWriterPair;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams; import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util._TestUtil; import org.apache.lucene.util._TestUtil;
@ -59,6 +61,16 @@ public class TestTotalFacetCounts extends FacetTestCase {
public int getPartitionSize() { public int getPartitionSize() {
return partitionSize; return partitionSize;
} }
@Override
public CategoryListParams getCategoryListParams(CategoryPath category) {
return new CategoryListParams() {
@Override
public OrdinalPolicy getOrdinalPolicy(String dimension) {
return OrdinalPolicy.ALL_PARENTS;
}
};
}
}; };
// The counts that the TotalFacetCountsArray should have after adding // The counts that the TotalFacetCountsArray should have after adding
// the below facets to the index. // the below facets to the index.

View File

@ -87,7 +87,7 @@ public class TestTotalFacetCountsCache extends FacetTestCase {
/** Utility method to add a document and facets to an index/taxonomy. */ /** Utility method to add a document and facets to an index/taxonomy. */
static void addFacets(FacetIndexingParams iParams, IndexWriter iw, static void addFacets(FacetIndexingParams iParams, IndexWriter iw,
TaxonomyWriter tw, String... strings) throws IOException { TaxonomyWriter tw, String... strings) throws IOException {
Document doc = new Document(); Document doc = new Document();
FacetFields facetFields = new FacetFields(tw, iParams); FacetFields facetFields = new FacetFields(tw, iParams);
facetFields.addFields(doc, Collections.singletonList(new CategoryPath(strings))); facetFields.addFields(doc, Collections.singletonList(new CategoryPath(strings)));

View File

@ -3,6 +3,7 @@ package org.apache.lucene.facet.search.sampling;
import java.util.List; import java.util.List;
import java.util.Random; import java.util.Random;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.BaseTestTopK; import org.apache.lucene.facet.search.BaseTestTopK;
import org.apache.lucene.facet.search.FacetsAccumulator; import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.search.FacetsCollector; import org.apache.lucene.facet.search.FacetsCollector;
@ -46,8 +47,8 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
protected static final int RETRIES = 10; protected static final int RETRIES = 10;
@Override @Override
protected FacetSearchParams searchParamsWithRequests(int numResults, int partitionSize) { protected FacetSearchParams searchParamsWithRequests(int numResults, FacetIndexingParams fip) {
FacetSearchParams res = super.searchParamsWithRequests(numResults, partitionSize); FacetSearchParams res = super.searchParamsWithRequests(numResults, fip);
for (FacetRequest req : res.facetRequests) { for (FacetRequest req : res.facetRequests) {
// randomize the way we aggregate results // randomize the way we aggregate results
if (random().nextBoolean()) { if (random().nextBoolean()) {
@ -71,20 +72,23 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
boolean useRandomSampler = random().nextBoolean(); boolean useRandomSampler = random().nextBoolean();
for (int partitionSize : partitionSizes) { for (int partitionSize : partitionSizes) {
try { try {
initIndex(partitionSize); // complements return counts for all ordinals, so force ALL_PARENTS indexing
// so that it's easier to compare
FacetIndexingParams fip = getFacetIndexingParams(partitionSize, true);
initIndex(fip);
// Get all of the documents and run the query, then do different // Get all of the documents and run the query, then do different
// facet counts and compare to control // facet counts and compare to control
Query q = new TermQuery(new Term(CONTENT_FIELD, BETA)); // 90% of the docs Query q = new TermQuery(new Term(CONTENT_FIELD, BETA)); // 90% of the docs
ScoredDocIdCollector docCollector = ScoredDocIdCollector.create(indexReader.maxDoc(), false); ScoredDocIdCollector docCollector = ScoredDocIdCollector.create(indexReader.maxDoc(), false);
FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, partitionSize); FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, fip);
FacetsCollector fc = FacetsCollector.create(expectedSearchParams, indexReader, taxoReader); FacetsCollector fc = FacetsCollector.create(expectedSearchParams, indexReader, taxoReader);
searcher.search(q, MultiCollector.wrap(docCollector, fc)); searcher.search(q, MultiCollector.wrap(docCollector, fc));
List<FacetResult> expectedResults = fc.getFacetResults(); List<FacetResult> expectedResults = fc.getFacetResults();
FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, partitionSize); FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, fip);
// try several times in case of failure, because the test has a chance to fail // try several times in case of failure, because the test has a chance to fail
// if the top K facets are not sufficiently common with the sample set // if the top K facets are not sufficiently common with the sample set