mirror of https://github.com/apache/lucene.git
LUCENE-4715: Add OrdinalPolicy.ALL_BUT_DIMENSION
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1440416 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
913b168255
commit
42256baec5
|
@ -61,6 +61,11 @@ Optimizations
|
|||
* LUCENE-4690: Performance improvements and non-hashing versions
|
||||
of NumericUtils.*ToPrefixCoded() (yonik)
|
||||
|
||||
* LUCENE-4715: CategoryListParams.getOrdinalPolicy now allows to return a
|
||||
different OrdinalPolicy per dimension, to better tune how you index
|
||||
facets. Also added OrdinalPolicy.ALL_BUT_DIMENSION.
|
||||
(Shai Erera, Michael McCandless)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the
|
||||
|
|
|
@ -3,6 +3,7 @@ package org.apache.lucene.facet.index;
|
|||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
|
@ -115,12 +116,12 @@ public class CountingListBuilder implements CategoryListBuilder {
|
|||
|
||||
private final OrdinalsEncoder ordinalsEncoder;
|
||||
private final TaxonomyWriter taxoWriter;
|
||||
private final OrdinalPolicy ordinalPolicy;
|
||||
private final CategoryListParams clp;
|
||||
|
||||
public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams indexingParams,
|
||||
TaxonomyWriter taxoWriter) {
|
||||
this.taxoWriter = taxoWriter;
|
||||
this.ordinalPolicy = categoryListParams.getOrdinalPolicy();
|
||||
this.clp = categoryListParams;
|
||||
if (indexingParams.getPartitionSize() == Integer.MAX_VALUE) {
|
||||
ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams);
|
||||
} else {
|
||||
|
@ -141,16 +142,23 @@ public class CountingListBuilder implements CategoryListBuilder {
|
|||
*/
|
||||
@Override
|
||||
public Map<String,BytesRef> build(IntsRef ordinals, Iterable<CategoryPath> categories) throws IOException {
|
||||
int upto = ordinals.length; // since we add ordinals to IntsRef, iterate upto original length
|
||||
|
||||
if (ordinalPolicy == OrdinalPolicy.ALL_PARENTS) { // add all parents too
|
||||
for (int i = 0; i < upto; i++) {
|
||||
int ordinal = ordinals.ints[i];
|
||||
int upto = ordinals.length; // since we may add ordinals to IntsRef, iterate upto original length
|
||||
|
||||
Iterator<CategoryPath> iter = categories.iterator();
|
||||
for (int i = 0; i < upto; i++) {
|
||||
int ordinal = ordinals.ints[i];
|
||||
CategoryPath cp = iter.next();
|
||||
OrdinalPolicy op = clp.getOrdinalPolicy(cp.components[0]);
|
||||
if (op != OrdinalPolicy.NO_PARENTS) {
|
||||
// need to add parents too
|
||||
int parent = taxoWriter.getParent(ordinal);
|
||||
while (parent > 0) {
|
||||
ordinals.ints[ordinals.length++] = parent;
|
||||
parent = taxoWriter.getParent(parent);
|
||||
}
|
||||
if (op == OrdinalPolicy.ALL_BUT_DIMENSION) { // discard the last added parent, which is the dimension
|
||||
ordinals.length--;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ordinalsEncoder.encode(ordinals);
|
||||
|
|
|
@ -4,6 +4,7 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.facet.search.CategoryListIterator;
|
||||
import org.apache.lucene.facet.search.DocValuesCategoryListIterator;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.util.PartitionsUtils;
|
||||
import org.apache.lucene.util.encoding.DGapVInt8IntEncoder;
|
||||
import org.apache.lucene.util.encoding.IntDecoder;
|
||||
|
@ -35,25 +36,61 @@ import org.apache.lucene.util.encoding.UniqueValuesIntEncoder;
|
|||
*/
|
||||
public class CategoryListParams {
|
||||
|
||||
/** OrdinalPolicy defines which ordinals are encoded for every document. */
|
||||
/**
|
||||
* Defines which category ordinals are encoded for every document. This also
|
||||
* affects how category ordinals are aggregated, check the different policies
|
||||
* for more details.
|
||||
*/
|
||||
public static enum OrdinalPolicy {
|
||||
/**
|
||||
* Encodes only the ordinal of leaf nodes. That is, the category A/B/C will
|
||||
* not encode the ordinals of A and A/B.
|
||||
* Encodes only the ordinals of leaf nodes. That is, for the category A/B/C,
|
||||
* the ordinals of A and A/B will not be encoded. This policy is efficient
|
||||
* for hierarchical dimensions, as it reduces the number of ordinals that
|
||||
* are visited per document. During faceted search, this policy behaves
|
||||
* exactly like {@link #ALL_PARENTS}, and the counts of all path components
|
||||
* will be computed as well.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> this {@link OrdinalPolicy} requires a special collector or
|
||||
* accumulator, which will fix the parents' counts, unless you are not
|
||||
* interested in the parents counts.
|
||||
* accumulator, which will fix the parents' counts.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> since only leaf nodes are encoded for the document, you
|
||||
* should use this policy when the same document doesn't share two
|
||||
* categories that have a mutual parent, or otherwise the counts will be
|
||||
* wrong (the mutual parent will be over-counted). For example, if a
|
||||
* document has the categories A/B/C and A/B/D, then with this policy the
|
||||
* counts of "A" and "B" will be 2, which is wrong. If you intend to index
|
||||
* hierarchical dimensions, with more than one category per document, you
|
||||
* should use either {@link #ALL_PARENTS} or {@link #ALL_BUT_DIMENSION}.
|
||||
*/
|
||||
NO_PARENTS,
|
||||
|
||||
/**
|
||||
* Encodes the ordinals of all path components. That is, the category A/B/C
|
||||
* will encode the ordinals of A and A/B as well. This is the default
|
||||
* {@link OrdinalPolicy}.
|
||||
* will encode the ordinals of A and A/B as well. If you don't require the
|
||||
* dimension's count during search, consider using
|
||||
* {@link #ALL_BUT_DIMENSION}.
|
||||
*/
|
||||
ALL_PARENTS
|
||||
ALL_PARENTS,
|
||||
|
||||
/**
|
||||
* Encodes the ordinals of all path components except the dimension. The
|
||||
* dimension of a category is defined to be the first components in
|
||||
* {@link CategoryPath#components}. For the category A/B/C, the ordinal of
|
||||
* A/B will be encoded as well, however not the ordinal of A.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> when facets are aggregated, this policy behaves exactly like
|
||||
* {@link #ALL_PARENTS}, except that the dimension is never counted. I.e. if
|
||||
* you ask to count the facet "A", then while in {@link #ALL_PARENTS} you
|
||||
* will get counts for "A" <u>and its children</u>, with this policy you
|
||||
* will get counts for <u>only its children</u>. This policy is the default
|
||||
* one, and makes sense for using with flat dimensions, whenever your
|
||||
* application does not require the dimension's count. Otherwise, use
|
||||
* {@link #ALL_PARENTS}.
|
||||
*/
|
||||
ALL_BUT_DIMENSION
|
||||
}
|
||||
|
||||
/** The default field used to store the facets information. */
|
||||
|
@ -63,7 +100,7 @@ public class CategoryListParams {
|
|||
* The default {@link OrdinalPolicy} that's used when encoding a document's
|
||||
* category ordinals.
|
||||
*/
|
||||
public static final OrdinalPolicy DEFAULT_ORDINAL_POLICY = OrdinalPolicy.ALL_PARENTS;
|
||||
public static final OrdinalPolicy DEFAULT_ORDINAL_POLICY = OrdinalPolicy.ALL_BUT_DIMENSION;
|
||||
|
||||
public final String field;
|
||||
|
||||
|
@ -115,19 +152,15 @@ public class CategoryListParams {
|
|||
return false;
|
||||
}
|
||||
CategoryListParams other = (CategoryListParams) o;
|
||||
if (this.hashCode != other.hashCode) {
|
||||
if (hashCode != other.hashCode) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// The above hashcodes might equal each other in the case of a collision,
|
||||
// so at this point only directly term equality testing will settle
|
||||
// the equality test.
|
||||
return field.equals(other.field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return this.hashCode;
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
/** Create the {@link CategoryListIterator} for the specified partition. */
|
||||
|
@ -137,14 +170,18 @@ public class CategoryListParams {
|
|||
return new DocValuesCategoryListIterator(docValuesField, createEncoder().createMatchingDecoder());
|
||||
}
|
||||
|
||||
/** Returns the {@link OrdinalPolicy} to use for this {@link CategoryListParams}. */
|
||||
public OrdinalPolicy getOrdinalPolicy() {
|
||||
/**
|
||||
* Returns the {@link OrdinalPolicy} to use for the given dimension. This
|
||||
* {@link CategoryListParams} always returns {@link #DEFAULT_ORDINAL_POLICY}
|
||||
* for all dimensions.
|
||||
*/
|
||||
public OrdinalPolicy getOrdinalPolicy(String dimension) {
|
||||
return DEFAULT_ORDINAL_POLICY;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "field=" + field + " encoder=" + createEncoder() + " ordinalPolicy=" + getOrdinalPolicy();
|
||||
return "field=" + field + " encoder=" + createEncoder() + " ordinalPolicy=" + getOrdinalPolicy(null);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,55 @@
|
|||
package org.apache.lucene.facet.index.params;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A {@link CategoryListParams} which allow controlling the
|
||||
* {@link CategoryListParams.OrdinalPolicy} used for each dimension. The
|
||||
* dimension is specified as the first component in
|
||||
* {@link CategoryPath#components}.
|
||||
*/
|
||||
public class PerDimensionOrdinalPolicy extends CategoryListParams {
|
||||
|
||||
private final Map<String,OrdinalPolicy> policies;
|
||||
private final OrdinalPolicy defaultOP;
|
||||
|
||||
public PerDimensionOrdinalPolicy(Map<String,OrdinalPolicy> policies) {
|
||||
this(policies, DEFAULT_ORDINAL_POLICY);
|
||||
}
|
||||
|
||||
public PerDimensionOrdinalPolicy(Map<String,OrdinalPolicy> policies, OrdinalPolicy defaultOP) {
|
||||
this.defaultOP = defaultOP;
|
||||
this.policies = policies;
|
||||
}
|
||||
|
||||
@Override
|
||||
public OrdinalPolicy getOrdinalPolicy(String dimension) {
|
||||
OrdinalPolicy op = policies.get(dimension);
|
||||
return op == null ? defaultOP : op;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return super.toString() + " policies=" + policies;
|
||||
}
|
||||
|
||||
}
|
|
@ -85,7 +85,7 @@ import org.apache.lucene.util.encoding.DGapVInt8IntDecoder;
|
|||
public class CountingFacetsCollector extends FacetsCollector {
|
||||
|
||||
private final FacetSearchParams fsp;
|
||||
private final OrdinalPolicy ordinalPolicy;
|
||||
private final CategoryListParams clp;
|
||||
private final TaxonomyReader taxoReader;
|
||||
private final BytesRef buf = new BytesRef(32);
|
||||
private final FacetArrays facetArrays;
|
||||
|
@ -107,8 +107,7 @@ public class CountingFacetsCollector extends FacetsCollector {
|
|||
assert assertParams(fsp) == null : assertParams(fsp);
|
||||
|
||||
this.fsp = fsp;
|
||||
CategoryListParams clp = fsp.indexingParams.getCategoryListParams(fsp.facetRequests.get(0).categoryPath);
|
||||
this.ordinalPolicy = clp.getOrdinalPolicy();
|
||||
this.clp = fsp.indexingParams.getCategoryListParams(fsp.facetRequests.get(0).categoryPath);
|
||||
this.facetsField = clp.field;
|
||||
this.taxoReader = taxoReader;
|
||||
this.facetArrays = facetArrays;
|
||||
|
@ -217,21 +216,21 @@ public class CountingFacetsCollector extends FacetsCollector {
|
|||
}
|
||||
}
|
||||
|
||||
private void countParents(int[] parents) {
|
||||
// counts[0] is the count of ROOT, which we don't care about and counts[1]
|
||||
// can only update counts[0], so we don't bother to visit it too. also,
|
||||
// since parents always have lower ordinals than their children, we traverse
|
||||
// the array backwards. this also allows us to update just the immediate
|
||||
// parent's count (actually, otherwise it would be a mistake).
|
||||
for (int i = counts.length - 1; i > 1; i--) {
|
||||
int count = counts[i];
|
||||
if (count > 0) {
|
||||
int parent = parents[i];
|
||||
if (parent != 0) {
|
||||
counts[parent] += count;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Computes the counts of ordinals under the given ordinal's tree, by
|
||||
* recursively going down to leaf nodes and rollin up their counts (called
|
||||
* only with categories are indexing with OrdinalPolicy.NO_PARENTS).
|
||||
*/
|
||||
private int rollupCounts(int ordinal, int[] children, int[] siblings) {
|
||||
int count = 0;
|
||||
while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int childCount = counts[ordinal];
|
||||
childCount += rollupCounts(children[ordinal], children, siblings);
|
||||
counts[ordinal] = childCount;
|
||||
count += childCount;
|
||||
ordinal = siblings[ordinal];
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -242,11 +241,6 @@ public class CountingFacetsCollector extends FacetsCollector {
|
|||
|
||||
ParallelTaxonomyArrays arrays = taxoReader.getParallelTaxonomyArrays();
|
||||
|
||||
if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
|
||||
// need to count parents
|
||||
countParents(arrays.parents());
|
||||
}
|
||||
|
||||
// compute top-K
|
||||
final int[] children = arrays.children();
|
||||
final int[] siblings = arrays.siblings();
|
||||
|
@ -256,6 +250,12 @@ public class CountingFacetsCollector extends FacetsCollector {
|
|||
if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist
|
||||
continue;
|
||||
}
|
||||
OrdinalPolicy ordinalPolicy = clp.getOrdinalPolicy(fr.categoryPath.components[0]);
|
||||
if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
|
||||
// need to count parents
|
||||
counts[rootOrd] += rollupCounts(children[rootOrd], children, siblings);
|
||||
}
|
||||
|
||||
FacetResultNode root = new FacetResultNode();
|
||||
root.ordinal = rootOrd;
|
||||
root.label = fr.categoryPath;
|
||||
|
|
|
@ -43,13 +43,21 @@ public abstract class FacetsCollector extends Collector {
|
|||
* Returns the most optimized {@link FacetsCollector} for the given search
|
||||
* parameters. The returned {@link FacetsCollector} is guaranteed to satisfy
|
||||
* the requested parameters.
|
||||
*
|
||||
* @throws IllegalArgumentException
|
||||
* if there is no built-in collector that can satisfy the search
|
||||
* parameters.
|
||||
*/
|
||||
public static FacetsCollector create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) {
|
||||
if (CountingFacetsCollector.assertParams(fsp) == null) {
|
||||
return new CountingFacetsCollector(fsp, taxoReader);
|
||||
}
|
||||
|
||||
return new StandardFacetsCollector(fsp, indexReader, taxoReader);
|
||||
if (StandardFacetsCollector.assertParams(fsp) == null) {
|
||||
return new StandardFacetsCollector(fsp, indexReader, taxoReader);
|
||||
}
|
||||
|
||||
throw new IllegalArgumentException("None of the built-in FacetsCollectors can handle the given search params");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -49,6 +49,17 @@ public class StandardFacetsCollector extends FacetsCollector {
|
|||
private List<FacetResult> results;
|
||||
private Object resultsGuard;
|
||||
|
||||
static String assertParams(FacetSearchParams fsp) {
|
||||
// make sure none of the categories in the given FacetRequests was indexed with NO_PARENTS
|
||||
for (FacetRequest fr : fsp.facetRequests) {
|
||||
CategoryListParams clp = fsp.indexingParams.getCategoryListParams(fr.categoryPath);
|
||||
if (clp.getOrdinalPolicy(fr.categoryPath.components[0]) == OrdinalPolicy.NO_PARENTS) {
|
||||
return "this collector does not support aggregating categories that were indexed with OrdinalPolicy.NO_PARENTS";
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a collector for accumulating facets while collecting documents
|
||||
* during search.
|
||||
|
@ -62,6 +73,7 @@ public class StandardFacetsCollector extends FacetsCollector {
|
|||
* taxonomy containing the facets.
|
||||
*/
|
||||
public StandardFacetsCollector(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
|
||||
assert assertParams(facetSearchParams) == null : assertParams(facetSearchParams);
|
||||
facetsAccumulator = initFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
|
||||
scoreDocIdCollector = initScoredDocCollector(facetSearchParams, indexReader, taxonomyReader);
|
||||
resultsGuard = new Object();
|
||||
|
|
|
@ -74,6 +74,7 @@ public class ScoredDocIdsUtils {
|
|||
|
||||
/** Clear all deleted documents from a given open-bit-set according to a given reader */
|
||||
private static void clearDeleted(final IndexReader reader, final FixedBitSet set) throws IOException {
|
||||
// TODO use BitsFilteredDocIdSet?
|
||||
|
||||
// If there are no deleted docs
|
||||
if (!reader.hasDeletions()) {
|
||||
|
|
|
@ -6,6 +6,7 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
|
@ -17,6 +18,7 @@ import org.apache.lucene.document.Field;
|
|||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.facet.index.FacetFields;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
|
@ -44,6 +46,7 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.collections.IntToObjectMap;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
|
@ -73,7 +76,8 @@ public abstract class FacetTestBase extends FacetTestCase {
|
|||
SearchTaxoDirPair() {}
|
||||
}
|
||||
|
||||
private static HashMap<Integer, SearchTaxoDirPair> dirsPerPartitionSize;
|
||||
private static IntToObjectMap<SearchTaxoDirPair> dirsPerPartitionSize;
|
||||
private static IntToObjectMap<FacetIndexingParams> fipPerPartitionSize;
|
||||
private static File TEST_DIR;
|
||||
|
||||
/** Documents text field. */
|
||||
|
@ -91,12 +95,15 @@ public abstract class FacetTestBase extends FacetTestCase {
|
|||
@BeforeClass
|
||||
public static void beforeClassFacetTestBase() {
|
||||
TEST_DIR = _TestUtil.getTempDir("facets");
|
||||
dirsPerPartitionSize = new HashMap<Integer, FacetTestBase.SearchTaxoDirPair>();
|
||||
dirsPerPartitionSize = new IntToObjectMap<FacetTestBase.SearchTaxoDirPair>();
|
||||
fipPerPartitionSize = new IntToObjectMap<FacetIndexingParams>();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClassFacetTestBase() throws Exception {
|
||||
for (SearchTaxoDirPair pair : dirsPerPartitionSize.values()) {
|
||||
Iterator<SearchTaxoDirPair> iter = dirsPerPartitionSize.iterator();
|
||||
while (iter.hasNext()) {
|
||||
SearchTaxoDirPair pair = iter.next();
|
||||
IOUtils.close(pair.searchDir, pair.taxoDir);
|
||||
}
|
||||
}
|
||||
|
@ -128,20 +135,16 @@ public abstract class FacetTestBase extends FacetTestCase {
|
|||
return DEFAULT_CONTENT[doc];
|
||||
}
|
||||
|
||||
/** Prepare index (in RAM) with single partition */
|
||||
protected final void initIndex() throws Exception {
|
||||
initIndex(Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
/** Prepare index (in RAM) with some documents and some facets */
|
||||
protected final void initIndex(int partitionSize) throws Exception {
|
||||
initIndex(partitionSize, false);
|
||||
/** Prepare index (in RAM) with some documents and some facets. */
|
||||
protected final void initIndex(FacetIndexingParams fip) throws Exception {
|
||||
initIndex(false, fip);
|
||||
}
|
||||
|
||||
/** Prepare index (in RAM/Disk) with some documents and some facets */
|
||||
protected final void initIndex(int partitionSize, boolean forceDisk) throws Exception {
|
||||
/** Prepare index (in RAM/Disk) with some documents and some facets. */
|
||||
protected final void initIndex(boolean forceDisk, FacetIndexingParams fip) throws Exception {
|
||||
int partitionSize = fip.getPartitionSize();
|
||||
if (VERBOSE) {
|
||||
System.out.println("Partition Size: " + partitionSize+" forceDisk: "+forceDisk);
|
||||
System.out.println("Partition Size: " + partitionSize + " forceDisk: "+forceDisk);
|
||||
}
|
||||
|
||||
SearchTaxoDirPair pair = dirsPerPartitionSize.get(Integer.valueOf(partitionSize));
|
||||
|
@ -158,7 +161,7 @@ public abstract class FacetTestBase extends FacetTestCase {
|
|||
RandomIndexWriter iw = new RandomIndexWriter(random(), pair.searchDir, getIndexWriterConfig(getAnalyzer()));
|
||||
TaxonomyWriter taxo = new DirectoryTaxonomyWriter(pair.taxoDir, OpenMode.CREATE);
|
||||
|
||||
populateIndex(iw, taxo, getFacetIndexingParams(partitionSize));
|
||||
populateIndex(iw, taxo, fip);
|
||||
|
||||
// commit changes (taxonomy prior to search index for consistency)
|
||||
taxo.commit();
|
||||
|
@ -182,14 +185,40 @@ public abstract class FacetTestBase extends FacetTestCase {
|
|||
|
||||
/** Returns a {@link FacetIndexingParams} per the given partition size. */
|
||||
protected FacetIndexingParams getFacetIndexingParams(final int partSize) {
|
||||
// several of our encoders don't support the value 0,
|
||||
// which is one of the values encoded when dealing w/ partitions.
|
||||
return new FacetIndexingParams() {
|
||||
@Override
|
||||
public int getPartitionSize() {
|
||||
return partSize;
|
||||
}
|
||||
};
|
||||
return getFacetIndexingParams(partSize, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link FacetIndexingParams} per the given partition size. If
|
||||
* requested, then {@link OrdinalPolicy} will be set to
|
||||
* {@link OrdinalPolicy#ALL_PARENTS}, otherwise it will randomize.
|
||||
*/
|
||||
protected FacetIndexingParams getFacetIndexingParams(final int partSize, final boolean forceAllParents) {
|
||||
FacetIndexingParams fip = fipPerPartitionSize.get(partSize);
|
||||
if (fip == null) {
|
||||
// randomize OrdinalPolicy. Since not all Collectors / Accumulators
|
||||
// support NO_PARENTS, don't include it.
|
||||
// TODO: once all code paths support NO_PARENTS, randomize it too.
|
||||
CategoryListParams randomOP = new CategoryListParams() {
|
||||
final OrdinalPolicy op = random().nextBoolean() ? OrdinalPolicy.ALL_BUT_DIMENSION : OrdinalPolicy.ALL_PARENTS;
|
||||
@Override
|
||||
public OrdinalPolicy getOrdinalPolicy(String dimension) {
|
||||
return forceAllParents ? OrdinalPolicy.ALL_PARENTS : op;
|
||||
}
|
||||
};
|
||||
|
||||
// several of our encoders don't support the value 0,
|
||||
// which is one of the values encoded when dealing w/ partitions,
|
||||
// therefore don't randomize the encoder.
|
||||
fip = new FacetIndexingParams(randomOP) {
|
||||
@Override
|
||||
public int getPartitionSize() {
|
||||
return partSize;
|
||||
}
|
||||
};
|
||||
fipPerPartitionSize.put(partSize, fip);
|
||||
}
|
||||
return fip;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -45,7 +45,6 @@ public class TestMultiCLExample extends LuceneTestCase {
|
|||
assertNotNull("Result should not be null", result);
|
||||
FacetResultNode node = result.getFacetResultNode();
|
||||
assertEquals("Invalid label", "5", node.label.toString());
|
||||
assertEquals("Invalid value", 2.0, node.value, 0.0);
|
||||
assertEquals("Invalid # of subresults", 3, node.subResults.size());
|
||||
|
||||
Iterator<? extends FacetResultNode> subResults = node.subResults.iterator();
|
||||
|
|
|
@ -25,6 +25,8 @@ import org.apache.lucene.facet.FacetTestCase;
|
|||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.index.params.PerDimensionIndexingParams;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.params.PerDimensionOrdinalPolicy;
|
||||
import org.apache.lucene.facet.search.CategoryListIterator;
|
||||
import org.apache.lucene.facet.search.DrillDown;
|
||||
import org.apache.lucene.facet.search.FacetsCollector;
|
||||
|
@ -368,9 +370,23 @@ public class TestFacetsPayloadMigrationReader extends FacetTestCase {
|
|||
|
||||
// set custom CLP fields for two dimensions and use the default ($facets) for the other two
|
||||
HashMap<CategoryPath,CategoryListParams> params = new HashMap<CategoryPath,CategoryListParams>();
|
||||
params.put(new CategoryPath(DIMENSIONS[0]), new CategoryListParams(DIMENSIONS[0]));
|
||||
params.put(new CategoryPath(DIMENSIONS[1]), new CategoryListParams(DIMENSIONS[1]));
|
||||
FacetIndexingParams fip = new PerDimensionIndexingParams(params) {
|
||||
params.put(new CategoryPath(DIMENSIONS[0]), new CategoryListParams(DIMENSIONS[0]) {
|
||||
@Override
|
||||
public OrdinalPolicy getOrdinalPolicy(String dimension) {
|
||||
return OrdinalPolicy.ALL_PARENTS;
|
||||
}
|
||||
});
|
||||
params.put(new CategoryPath(DIMENSIONS[1]), new CategoryListParams(DIMENSIONS[1]) {
|
||||
@Override
|
||||
public OrdinalPolicy getOrdinalPolicy(String dimension) {
|
||||
return OrdinalPolicy.ALL_PARENTS;
|
||||
}
|
||||
});
|
||||
|
||||
HashMap<String,OrdinalPolicy> policies = new HashMap<String,CategoryListParams.OrdinalPolicy>();
|
||||
policies.put(DIMENSIONS[2], OrdinalPolicy.ALL_PARENTS);
|
||||
policies.put(DIMENSIONS[3], OrdinalPolicy.ALL_PARENTS);
|
||||
FacetIndexingParams fip = new PerDimensionIndexingParams(params, new PerDimensionOrdinalPolicy(policies)) {
|
||||
@Override
|
||||
public int getPartitionSize() {
|
||||
return partitionSize;
|
||||
|
|
|
@ -51,10 +51,9 @@ public abstract class BaseTestTopK extends FacetTestBase {
|
|||
private int nextInt;
|
||||
|
||||
@Override
|
||||
protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo,
|
||||
FacetIndexingParams iParams) throws IOException {
|
||||
protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo, FacetIndexingParams fip) throws IOException {
|
||||
currDoc = -1;
|
||||
super.populateIndex(iw, taxo, iParams);
|
||||
super.populateIndex(iw, taxo, fip);
|
||||
}
|
||||
|
||||
/** prepare the next random int */
|
||||
|
@ -94,17 +93,13 @@ public abstract class BaseTestTopK extends FacetTestBase {
|
|||
return Arrays.asList(cp);
|
||||
}
|
||||
|
||||
protected FacetSearchParams searchParamsWithRequests(int numResults) {
|
||||
return searchParamsWithRequests(numResults, Integer.MAX_VALUE);
|
||||
}
|
||||
|
||||
protected FacetSearchParams searchParamsWithRequests(int numResults, int partitionSize) {
|
||||
protected FacetSearchParams searchParamsWithRequests(int numResults, FacetIndexingParams fip) {
|
||||
List<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
|
||||
facetRequests.add(new CountFacetRequest(new CategoryPath("a"), numResults));
|
||||
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "1"), numResults));
|
||||
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "1", "10"), numResults));
|
||||
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "2", "26", "267"), numResults));
|
||||
return getFacetSearchParams(facetRequests, getFacetIndexingParams(partitionSize));
|
||||
return getFacetSearchParams(facetRequests, fip);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -16,8 +16,9 @@ import org.apache.lucene.document.StringField;
|
|||
import org.apache.lucene.facet.FacetTestCase;
|
||||
import org.apache.lucene.facet.index.FacetFields;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.index.params.PerDimensionIndexingParams;
|
||||
import org.apache.lucene.facet.index.params.PerDimensionOrdinalPolicy;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest.SortBy;
|
||||
|
@ -146,13 +147,11 @@ public class CountingFacetsCollectorTest extends FacetTestCase {
|
|||
termExpectedCounts.put(cp, termExpectedCounts.get(cp) + 1);
|
||||
}
|
||||
}
|
||||
// add 1 to each dimension
|
||||
allExpectedCounts.put(CP_A, allExpectedCounts.get(CP_A) + 1);
|
||||
// add 1 to each NO_PARENTS dimension
|
||||
allExpectedCounts.put(CP_B, allExpectedCounts.get(CP_B) + 1);
|
||||
allExpectedCounts.put(CP_C, allExpectedCounts.get(CP_C) + 1);
|
||||
allExpectedCounts.put(CP_D, allExpectedCounts.get(CP_D) + 1);
|
||||
if (updateTermExpectedCounts) {
|
||||
termExpectedCounts.put(CP_A, termExpectedCounts.get(CP_A) + 1);
|
||||
termExpectedCounts.put(CP_B, termExpectedCounts.get(CP_B) + 1);
|
||||
termExpectedCounts.put(CP_C, termExpectedCounts.get(CP_C) + 1);
|
||||
termExpectedCounts.put(CP_D, termExpectedCounts.get(CP_D) + 1);
|
||||
|
@ -252,19 +251,13 @@ public class CountingFacetsCollectorTest extends FacetTestCase {
|
|||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // prevent merges, so we can control the index segments
|
||||
IndexWriter indexWriter = new IndexWriter(indexDir, conf);
|
||||
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
|
||||
CategoryListParams allParents = new CategoryListParams();
|
||||
CategoryListParams noParents = new CategoryListParams("no_parents") {
|
||||
@Override
|
||||
public OrdinalPolicy getOrdinalPolicy() {
|
||||
return OrdinalPolicy.NO_PARENTS;
|
||||
}
|
||||
};
|
||||
Map<CategoryPath,CategoryListParams> params = new HashMap<CategoryPath,CategoryListParams>();
|
||||
params.put(CP_A, allParents);
|
||||
params.put(CP_B, allParents);
|
||||
params.put(CP_C, noParents);
|
||||
params.put(CP_D, noParents);
|
||||
fip = new PerDimensionIndexingParams(params);
|
||||
|
||||
Map<String,OrdinalPolicy> policies = new HashMap<String,CategoryListParams.OrdinalPolicy>();
|
||||
policies.put(CP_B.components[0], OrdinalPolicy.ALL_PARENTS);
|
||||
policies.put(CP_C.components[0], OrdinalPolicy.NO_PARENTS);
|
||||
policies.put(CP_D.components[0], OrdinalPolicy.NO_PARENTS);
|
||||
CategoryListParams clp = new PerDimensionOrdinalPolicy(policies);
|
||||
fip = new FacetIndexingParams(clp);
|
||||
|
||||
allExpectedCounts = newCounts();
|
||||
termExpectedCounts = newCounts();
|
||||
|
|
|
@ -104,9 +104,9 @@ public class TestDemoFacets extends FacetTestCase {
|
|||
// Retrieve & verify results:
|
||||
List<FacetResult> results = c.getFacetResults();
|
||||
assertEquals(2, results.size());
|
||||
assertEquals("Publish Date (5)\n 2012 (2)\n 2010 (2)\n 1999 (1)\n",
|
||||
assertEquals("Publish Date (0)\n 2012 (2)\n 2010 (2)\n 1999 (1)\n",
|
||||
FacetTestUtils.toSimpleString(results.get(0)));
|
||||
assertEquals("Author (5)\n Lisa (2)\n Frank (1)\n Susan (1)\n Bob (1)\n",
|
||||
assertEquals("Author (0)\n Lisa (2)\n Frank (1)\n Susan (1)\n Bob (1)\n",
|
||||
FacetTestUtils.toSimpleString(results.get(1)));
|
||||
|
||||
|
||||
|
@ -117,7 +117,7 @@ public class TestDemoFacets extends FacetTestCase {
|
|||
searcher.search(q2, c);
|
||||
results = c.getFacetResults();
|
||||
assertEquals(1, results.size());
|
||||
assertEquals("Author (2)\n Lisa (1)\n Bob (1)\n",
|
||||
assertEquals("Author (0)\n Lisa (1)\n Bob (1)\n",
|
||||
FacetTestUtils.toSimpleString(results.get(0)));
|
||||
|
||||
// Smoke test PrintTaxonomyStats:
|
||||
|
|
|
@ -14,6 +14,7 @@ import org.junit.Before;
|
|||
import org.junit.Test;
|
||||
|
||||
import org.apache.lucene.facet.FacetTestBase;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.FacetsAccumulator;
|
||||
import org.apache.lucene.facet.search.ScoredDocIDs;
|
||||
import org.apache.lucene.facet.search.ScoredDocIdCollector;
|
||||
|
@ -48,11 +49,14 @@ import org.apache.lucene.facet.taxonomy.CategoryPath;
|
|||
*/
|
||||
public class TestFacetsAccumulatorWithComplement extends FacetTestBase {
|
||||
|
||||
private FacetIndexingParams fip;
|
||||
|
||||
@Override
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
initIndex();
|
||||
fip = getFacetIndexingParams(Integer.MAX_VALUE);
|
||||
initIndex(fip);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -125,7 +129,7 @@ public class TestFacetsAccumulatorWithComplement extends FacetTestBase {
|
|||
|
||||
/** compute facets with certain facet requests and docs */
|
||||
private List<FacetResult> findFacets(ScoredDocIDs sDocids, boolean withComplement) throws IOException {
|
||||
FacetSearchParams fsp = new FacetSearchParams(getFacetIndexingParams(Integer.MAX_VALUE), new CountFacetRequest(new CategoryPath("root","a"), 10));
|
||||
FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(new CategoryPath("root","a"), 10));
|
||||
FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(fsp, indexReader, taxoReader);
|
||||
|
||||
fAccumulator.setComplementThreshold(
|
||||
|
|
|
@ -274,7 +274,6 @@ public class TestMultipleCategoryLists extends FacetTestCase {
|
|||
Iterable<? extends FacetResultNode> subResults = resNode.subResults;
|
||||
Iterator<? extends FacetResultNode> subIter = subResults.iterator();
|
||||
|
||||
checkResult(resNode, "Band", 5.0);
|
||||
checkResult(subIter.next(), "Band/Rock & Pop", 4.0);
|
||||
checkResult(subIter.next(), "Band/Punk", 1.0);
|
||||
|
||||
|
@ -283,7 +282,6 @@ public class TestMultipleCategoryLists extends FacetTestCase {
|
|||
subResults = resNode.subResults;
|
||||
subIter = subResults.iterator();
|
||||
|
||||
checkResult(resNode, "Band", 5.0);
|
||||
checkResult(subIter.next(), "Band/Rock & Pop", 4.0);
|
||||
checkResult(subIter.next(), "Band/Rock & Pop/Dave Matthews Band", 1.0);
|
||||
checkResult(subIter.next(), "Band/Rock & Pop/REM", 1.0);
|
||||
|
@ -297,7 +295,6 @@ public class TestMultipleCategoryLists extends FacetTestCase {
|
|||
subResults = resNode.subResults;
|
||||
subIter = subResults.iterator();
|
||||
|
||||
checkResult(resNode, "Author", 3.0);
|
||||
checkResult(subIter.next(), "Author/Kurt Vonnegut", 1.0);
|
||||
checkResult(subIter.next(), "Author/Stephen King", 1.0);
|
||||
checkResult(subIter.next(), "Author/Mark Twain", 1.0);
|
||||
|
@ -307,7 +304,6 @@ public class TestMultipleCategoryLists extends FacetTestCase {
|
|||
subResults = resNode.subResults;
|
||||
subIter = subResults.iterator();
|
||||
|
||||
checkResult(resNode, "Band/Rock & Pop", 4.0);
|
||||
checkResult(subIter.next(), "Band/Rock & Pop/Dave Matthews Band", 1.0);
|
||||
checkResult(subIter.next(), "Band/Rock & Pop/REM", 1.0);
|
||||
checkResult(subIter.next(), "Band/Rock & Pop/U2", 1.0);
|
||||
|
|
|
@ -3,6 +3,7 @@ package org.apache.lucene.facet.search;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.FacetTestBase;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.FacetsCollector;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
|
@ -31,18 +32,21 @@ import org.junit.Before;
|
|||
|
||||
public class TestSameRequestAccumulation extends FacetTestBase {
|
||||
|
||||
private FacetIndexingParams fip;
|
||||
|
||||
@Override
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
initIndex();
|
||||
fip = getFacetIndexingParams(Integer.MAX_VALUE);
|
||||
initIndex(fip);
|
||||
}
|
||||
|
||||
// Following LUCENE-4461 - ensure requesting the (exact) same request more
|
||||
// than once does not alter the results
|
||||
public void testTwoSameRequests() throws Exception {
|
||||
final CountFacetRequest facetRequest = new CountFacetRequest(new CategoryPath("root"), 10);
|
||||
FacetSearchParams fsp = new FacetSearchParams(facetRequest);
|
||||
FacetSearchParams fsp = new FacetSearchParams(fip, facetRequest);
|
||||
|
||||
FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader);
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
|
@ -50,7 +54,7 @@ public class TestSameRequestAccumulation extends FacetTestBase {
|
|||
final String expected = fc.getFacetResults().get(0).toString();
|
||||
|
||||
// now add the same facet request with duplicates (same instance and same one)
|
||||
fsp = new FacetSearchParams(facetRequest, facetRequest, new CountFacetRequest(new CategoryPath("root"), 10));
|
||||
fsp = new FacetSearchParams(fip, facetRequest, facetRequest, new CountFacetRequest(new CategoryPath("root"), 10));
|
||||
|
||||
// make sure the search params holds 3 requests now
|
||||
assertEquals(3, fsp.facetRequests.size());
|
||||
|
|
|
@ -5,6 +5,7 @@ import java.util.Arrays;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.FacetTestBase;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.params.ScoreFacetRequest;
|
||||
|
@ -37,11 +38,14 @@ import org.junit.Test;
|
|||
/** Test ScoredDocIdCollector. */
|
||||
public class TestScoredDocIdCollector extends FacetTestBase {
|
||||
|
||||
private FacetIndexingParams fip;
|
||||
|
||||
@Override
|
||||
@Before
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
initIndex();
|
||||
fip = getFacetIndexingParams(Integer.MAX_VALUE);
|
||||
initIndex(fip);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -73,8 +77,8 @@ public class TestScoredDocIdCollector extends FacetTestBase {
|
|||
|
||||
// verify by facet values
|
||||
CategoryPath cp = new CategoryPath("root","a");
|
||||
FacetSearchParams countFSP = new FacetSearchParams(getFacetIndexingParams(Integer.MAX_VALUE), new CountFacetRequest(cp, 10));
|
||||
FacetSearchParams scoreFSP = new FacetSearchParams(getFacetIndexingParams(Integer.MAX_VALUE), new ScoreFacetRequest(cp, 10));
|
||||
FacetSearchParams countFSP = new FacetSearchParams(fip, new CountFacetRequest(cp, 10));
|
||||
FacetSearchParams scoreFSP = new FacetSearchParams(fip, new ScoreFacetRequest(cp, 10));
|
||||
|
||||
List<FacetResult> countRes = findFacets(scoredDocIDs, countFSP);
|
||||
List<FacetResult> scoreRes = findFacets(scoredDocIDs, scoreFSP);
|
||||
|
@ -101,10 +105,8 @@ public class TestScoredDocIdCollector extends FacetTestBase {
|
|||
}
|
||||
|
||||
// compute facets with certain facet requests and docs
|
||||
private List<FacetResult> findFacets(ScoredDocIDs sDocids,
|
||||
FacetSearchParams facetSearchParams) throws IOException {
|
||||
FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(
|
||||
facetSearchParams, indexReader, taxoReader);
|
||||
private List<FacetResult> findFacets(ScoredDocIDs sDocids, FacetSearchParams facetSearchParams) throws IOException {
|
||||
FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxoReader);
|
||||
List<FacetResult> res = fAccumulator.accumulate(sDocids);
|
||||
|
||||
// Results are ready, printing them...
|
||||
|
|
|
@ -113,7 +113,6 @@ public class TestStandardFacetsAccumulator extends FacetTestCase {
|
|||
List<FacetResult> results = fc.getFacetResults();
|
||||
assertEquals("received too many facet results", 1, results.size());
|
||||
FacetResultNode frn = results.get(0).getFacetResultNode();
|
||||
assertEquals("wrong weight for \"A\"", 4, (int) frn.value);
|
||||
assertEquals("wrong number of children", 2, frn.subResults.size());
|
||||
for (FacetResultNode node : frn.subResults) {
|
||||
assertEquals("wrong weight for child " + node.label, 2, (int) node.value);
|
||||
|
|
|
@ -181,7 +181,6 @@ public class TestTopKInEachNodeResultHandler extends FacetTestCase {
|
|||
boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
||||
assertEquals(9, fr.getNumValidDescendants());
|
||||
FacetResultNode parentRes = fr.getFacetResultNode();
|
||||
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.subResults.size());
|
||||
// two nodes sorted by descending values: a/b with 8 and a/c with 6
|
||||
// a/b has two children a/b/2 with value 3, and a/b/1 with value 2.
|
||||
|
@ -217,7 +216,6 @@ public class TestTopKInEachNodeResultHandler extends FacetTestCase {
|
|||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
||||
assertEquals(9, fr.getNumValidDescendants());
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.subResults.size());
|
||||
// two nodes sorted by descending values: a/b with 8 and a/c with 6
|
||||
// a/b has two children a/b/2 with value 3, and a/b/1 with value 2.
|
||||
|
@ -234,7 +232,6 @@ public class TestTopKInEachNodeResultHandler extends FacetTestCase {
|
|||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
||||
assertEquals(4, fr.getNumValidDescendants(), 4);
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
|
||||
assertEquals(2, parentRes.subResults.size());
|
||||
// two nodes sorted by descending values:
|
||||
// a/b with value 8 and a/c with value 6
|
||||
|
|
|
@ -4,6 +4,8 @@ import java.util.ArrayList;
|
|||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest.ResultMode;
|
||||
|
@ -73,7 +75,9 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
|||
@Test
|
||||
public void testSimple() throws Exception {
|
||||
for (int partitionSize : partitionSizes) {
|
||||
initIndex(partitionSize);
|
||||
FacetIndexingParams fip = getFacetIndexingParams(partitionSize);
|
||||
OrdinalPolicy op = fip.getCategoryListParams(null).getOrdinalPolicy(null);
|
||||
initIndex(fip);
|
||||
|
||||
List<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
|
||||
facetRequests.add(new CountFacetRequest(new CategoryPath("a"), 100));
|
||||
|
@ -87,8 +91,8 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
|||
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "c"), 100));
|
||||
|
||||
// do different facet counts and compare to control
|
||||
FacetSearchParams sParams = getFacetSearchParams(facetRequests, getFacetIndexingParams(partitionSize));
|
||||
|
||||
FacetSearchParams sParams = getFacetSearchParams(facetRequests, fip);
|
||||
|
||||
FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) {
|
||||
@Override
|
||||
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
|
||||
|
@ -100,17 +104,21 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
|||
|
||||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
List<FacetResult> facetResults = fc.getFacetResults();
|
||||
|
||||
|
||||
FacetResult fr = facetResults.get(0);
|
||||
FacetResultNode parentRes = fr.getFacetResultNode();
|
||||
assertEquals(13.0, parentRes.value, Double.MIN_VALUE);
|
||||
if (op == OrdinalPolicy.ALL_PARENTS) {
|
||||
assertEquals(13.0, parentRes.value, Double.MIN_VALUE);
|
||||
}
|
||||
FacetResultNode[] frn = resultNodesAsArray(parentRes);
|
||||
assertEquals(7.0, frn[0].value, Double.MIN_VALUE);
|
||||
assertEquals(6.0, frn[1].value, Double.MIN_VALUE);
|
||||
|
||||
fr = facetResults.get(1);
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(13.0, parentRes.value, Double.MIN_VALUE);
|
||||
if (op == OrdinalPolicy.ALL_PARENTS) {
|
||||
assertEquals(13.0, parentRes.value, Double.MIN_VALUE);
|
||||
}
|
||||
frn = resultNodesAsArray(parentRes);
|
||||
assertEquals(7.0, frn[0].value, Double.MIN_VALUE);
|
||||
assertEquals(6.0, frn[1].value, Double.MIN_VALUE);
|
||||
|
@ -121,7 +129,9 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
|||
|
||||
fr = facetResults.get(2);
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(7.0, parentRes.value, Double.MIN_VALUE);
|
||||
if (op == OrdinalPolicy.ALL_PARENTS) {
|
||||
assertEquals(7.0, parentRes.value, Double.MIN_VALUE);
|
||||
}
|
||||
frn = resultNodesAsArray(parentRes);
|
||||
assertEquals(2.0, frn[0].value, Double.MIN_VALUE);
|
||||
assertEquals(2.0, frn[1].value, Double.MIN_VALUE);
|
||||
|
@ -130,13 +140,17 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
|||
|
||||
fr = facetResults.get(3);
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(2.0, parentRes.value, Double.MIN_VALUE);
|
||||
if (op == OrdinalPolicy.ALL_PARENTS) {
|
||||
assertEquals(2.0, parentRes.value, Double.MIN_VALUE);
|
||||
}
|
||||
frn = resultNodesAsArray(parentRes);
|
||||
assertEquals(0, frn.length);
|
||||
|
||||
fr = facetResults.get(4);
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(6.0, parentRes.value, Double.MIN_VALUE);
|
||||
if (op == OrdinalPolicy.ALL_PARENTS) {
|
||||
assertEquals(6.0, parentRes.value, Double.MIN_VALUE);
|
||||
}
|
||||
frn = resultNodesAsArray(parentRes);
|
||||
assertEquals(1.0, frn[0].value, Double.MIN_VALUE);
|
||||
closeAll();
|
||||
|
@ -149,12 +163,12 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
|||
@Test
|
||||
public void testGetMaxIntFacets() throws Exception {
|
||||
for (int partitionSize : partitionSizes) {
|
||||
initIndex(partitionSize);
|
||||
FacetIndexingParams fip = getFacetIndexingParams(partitionSize);
|
||||
initIndex(fip);
|
||||
|
||||
// do different facet counts and compare to control
|
||||
CategoryPath path = new CategoryPath("a", "b");
|
||||
FacetSearchParams sParams = getFacetSearchParams(getFacetIndexingParams(partitionSize),
|
||||
new CountFacetRequest(path, Integer.MAX_VALUE));
|
||||
FacetSearchParams sParams = getFacetSearchParams(fip, new CountFacetRequest(path, Integer.MAX_VALUE));
|
||||
|
||||
FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) {
|
||||
@Override
|
||||
|
@ -174,7 +188,7 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
|||
|
||||
// As a control base results, ask for top-1000 results
|
||||
FacetSearchParams sParams2 = getFacetSearchParams(
|
||||
getFacetIndexingParams(partitionSize), new CountFacetRequest(path, Integer.MAX_VALUE));
|
||||
fip, new CountFacetRequest(path, Integer.MAX_VALUE));
|
||||
|
||||
FacetsCollector fc2 = new StandardFacetsCollector(sParams2, indexReader, taxoReader) {
|
||||
@Override
|
||||
|
@ -207,12 +221,11 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
|||
@Test
|
||||
public void testSimpleSearchForNonexistentFacet() throws Exception {
|
||||
for (int partitionSize : partitionSizes) {
|
||||
initIndex(partitionSize);
|
||||
FacetIndexingParams fip = getFacetIndexingParams(partitionSize);
|
||||
initIndex(fip);
|
||||
|
||||
CategoryPath path = new CategoryPath("Miau Hattulla");
|
||||
FacetSearchParams sParams = getFacetSearchParams(
|
||||
getFacetIndexingParams(partitionSize),
|
||||
new CountFacetRequest(path, 10));
|
||||
FacetSearchParams sParams = getFacetSearchParams(fip, new CountFacetRequest(path, 10));
|
||||
|
||||
FacetsCollector fc = FacetsCollector.create(sParams, indexReader, taxoReader);
|
||||
|
||||
|
|
|
@ -4,6 +4,7 @@ import java.io.IOException;
|
|||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
|
@ -32,10 +33,10 @@ import org.junit.Test;
|
|||
|
||||
public class TestTopKResultsHandlerRandom extends BaseTestTopK {
|
||||
|
||||
private List<FacetResult> countFacets(int partitionSize, int numResults, final boolean doComplement)
|
||||
private List<FacetResult> countFacets(FacetIndexingParams fip, int numResults, final boolean doComplement)
|
||||
throws IOException {
|
||||
Query q = new MatchAllDocsQuery();
|
||||
FacetSearchParams facetSearchParams = searchParamsWithRequests(numResults, partitionSize);
|
||||
FacetSearchParams facetSearchParams = searchParamsWithRequests(numResults, fip);
|
||||
FacetsCollector fc = new StandardFacetsCollector(facetSearchParams, indexReader, taxoReader) {
|
||||
@Override
|
||||
protected FacetsAccumulator initFacetsAccumulator(
|
||||
|
@ -59,7 +60,8 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
|
|||
@Test
|
||||
public void testTopCountsOrder() throws Exception {
|
||||
for (int partitionSize : partitionSizes) {
|
||||
initIndex(partitionSize);
|
||||
FacetIndexingParams fip = getFacetIndexingParams(partitionSize);
|
||||
initIndex(fip);
|
||||
|
||||
/*
|
||||
* Try out faceted search in it's most basic form (no sampling nor complement
|
||||
|
@ -67,7 +69,7 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
|
|||
* being indexed, and later on an "over-all" faceted search is performed. The
|
||||
* results are checked against the DF of each facet by itself
|
||||
*/
|
||||
List<FacetResult> facetResults = countFacets(partitionSize, 100000, false);
|
||||
List<FacetResult> facetResults = countFacets(fip, 100000, false);
|
||||
assertCountsAndCardinality(facetCountsTruth(), facetResults);
|
||||
|
||||
/*
|
||||
|
@ -77,10 +79,10 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
|
|||
* place in here. The results are checked against the a regular (a.k.a
|
||||
* no-complement, no-sampling) faceted search with the same parameters.
|
||||
*/
|
||||
facetResults = countFacets(partitionSize, 100000, true);
|
||||
facetResults = countFacets(fip, 100000, true);
|
||||
assertCountsAndCardinality(facetCountsTruth(), facetResults);
|
||||
|
||||
List<FacetResult> allFacetResults = countFacets(partitionSize, 100000, false);
|
||||
List<FacetResult> allFacetResults = countFacets(fip, 100000, false);
|
||||
|
||||
HashMap<String,Integer> all = new HashMap<String,Integer>();
|
||||
int maxNumNodes = 0;
|
||||
|
@ -108,7 +110,7 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
|
|||
if (VERBOSE) {
|
||||
System.out.println("------- verify for "+n+" top results");
|
||||
}
|
||||
List<FacetResult> someResults = countFacets(partitionSize, n, false);
|
||||
List<FacetResult> someResults = countFacets(fip, n, false);
|
||||
k = 0;
|
||||
for (FacetResult fr : someResults) {
|
||||
FacetResultNode topResNode = fr.getFacetResultNode();
|
||||
|
|
|
@ -8,7 +8,9 @@ import org.apache.lucene.facet.FacetTestCase;
|
|||
import org.apache.lucene.facet.FacetTestUtils;
|
||||
import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyReaderPair;
|
||||
import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyWriterPair;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
@ -59,6 +61,16 @@ public class TestTotalFacetCounts extends FacetTestCase {
|
|||
public int getPartitionSize() {
|
||||
return partitionSize;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CategoryListParams getCategoryListParams(CategoryPath category) {
|
||||
return new CategoryListParams() {
|
||||
@Override
|
||||
public OrdinalPolicy getOrdinalPolicy(String dimension) {
|
||||
return OrdinalPolicy.ALL_PARENTS;
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
// The counts that the TotalFacetCountsArray should have after adding
|
||||
// the below facets to the index.
|
||||
|
|
|
@ -87,7 +87,7 @@ public class TestTotalFacetCountsCache extends FacetTestCase {
|
|||
|
||||
/** Utility method to add a document and facets to an index/taxonomy. */
|
||||
static void addFacets(FacetIndexingParams iParams, IndexWriter iw,
|
||||
TaxonomyWriter tw, String... strings) throws IOException {
|
||||
TaxonomyWriter tw, String... strings) throws IOException {
|
||||
Document doc = new Document();
|
||||
FacetFields facetFields = new FacetFields(tw, iParams);
|
||||
facetFields.addFields(doc, Collections.singletonList(new CategoryPath(strings)));
|
||||
|
|
|
@ -3,6 +3,7 @@ package org.apache.lucene.facet.search.sampling;
|
|||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.BaseTestTopK;
|
||||
import org.apache.lucene.facet.search.FacetsAccumulator;
|
||||
import org.apache.lucene.facet.search.FacetsCollector;
|
||||
|
@ -46,8 +47,8 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
|
|||
protected static final int RETRIES = 10;
|
||||
|
||||
@Override
|
||||
protected FacetSearchParams searchParamsWithRequests(int numResults, int partitionSize) {
|
||||
FacetSearchParams res = super.searchParamsWithRequests(numResults, partitionSize);
|
||||
protected FacetSearchParams searchParamsWithRequests(int numResults, FacetIndexingParams fip) {
|
||||
FacetSearchParams res = super.searchParamsWithRequests(numResults, fip);
|
||||
for (FacetRequest req : res.facetRequests) {
|
||||
// randomize the way we aggregate results
|
||||
if (random().nextBoolean()) {
|
||||
|
@ -71,20 +72,23 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
|
|||
boolean useRandomSampler = random().nextBoolean();
|
||||
for (int partitionSize : partitionSizes) {
|
||||
try {
|
||||
initIndex(partitionSize);
|
||||
// complements return counts for all ordinals, so force ALL_PARENTS indexing
|
||||
// so that it's easier to compare
|
||||
FacetIndexingParams fip = getFacetIndexingParams(partitionSize, true);
|
||||
initIndex(fip);
|
||||
// Get all of the documents and run the query, then do different
|
||||
// facet counts and compare to control
|
||||
Query q = new TermQuery(new Term(CONTENT_FIELD, BETA)); // 90% of the docs
|
||||
ScoredDocIdCollector docCollector = ScoredDocIdCollector.create(indexReader.maxDoc(), false);
|
||||
|
||||
FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, partitionSize);
|
||||
FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, fip);
|
||||
FacetsCollector fc = FacetsCollector.create(expectedSearchParams, indexReader, taxoReader);
|
||||
|
||||
searcher.search(q, MultiCollector.wrap(docCollector, fc));
|
||||
|
||||
List<FacetResult> expectedResults = fc.getFacetResults();
|
||||
|
||||
FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, partitionSize);
|
||||
FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, fip);
|
||||
|
||||
// try several times in case of failure, because the test has a chance to fail
|
||||
// if the top K facets are not sufficiently common with the sample set
|
||||
|
|
Loading…
Reference in New Issue