mirror of https://github.com/apache/lucene.git
LUCENE-4715: Add OrdinalPolicy.ALL_BUT_DIMENSION
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1440416 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
913b168255
commit
42256baec5
|
@ -61,6 +61,11 @@ Optimizations
|
||||||
* LUCENE-4690: Performance improvements and non-hashing versions
|
* LUCENE-4690: Performance improvements and non-hashing versions
|
||||||
of NumericUtils.*ToPrefixCoded() (yonik)
|
of NumericUtils.*ToPrefixCoded() (yonik)
|
||||||
|
|
||||||
|
* LUCENE-4715: CategoryListParams.getOrdinalPolicy now allows to return a
|
||||||
|
different OrdinalPolicy per dimension, to better tune how you index
|
||||||
|
facets. Also added OrdinalPolicy.ALL_BUT_DIMENSION.
|
||||||
|
(Shai Erera, Michael McCandless)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the
|
* LUCENE-4686: New specialized DGapVInt8IntEncoder for facets (now the
|
||||||
|
|
|
@ -3,6 +3,7 @@ package org.apache.lucene.facet.index;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Collections;
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
import java.util.Map.Entry;
|
||||||
|
|
||||||
|
@ -115,12 +116,12 @@ public class CountingListBuilder implements CategoryListBuilder {
|
||||||
|
|
||||||
private final OrdinalsEncoder ordinalsEncoder;
|
private final OrdinalsEncoder ordinalsEncoder;
|
||||||
private final TaxonomyWriter taxoWriter;
|
private final TaxonomyWriter taxoWriter;
|
||||||
private final OrdinalPolicy ordinalPolicy;
|
private final CategoryListParams clp;
|
||||||
|
|
||||||
public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams indexingParams,
|
public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams indexingParams,
|
||||||
TaxonomyWriter taxoWriter) {
|
TaxonomyWriter taxoWriter) {
|
||||||
this.taxoWriter = taxoWriter;
|
this.taxoWriter = taxoWriter;
|
||||||
this.ordinalPolicy = categoryListParams.getOrdinalPolicy();
|
this.clp = categoryListParams;
|
||||||
if (indexingParams.getPartitionSize() == Integer.MAX_VALUE) {
|
if (indexingParams.getPartitionSize() == Integer.MAX_VALUE) {
|
||||||
ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams);
|
ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams);
|
||||||
} else {
|
} else {
|
||||||
|
@ -141,16 +142,23 @@ public class CountingListBuilder implements CategoryListBuilder {
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Map<String,BytesRef> build(IntsRef ordinals, Iterable<CategoryPath> categories) throws IOException {
|
public Map<String,BytesRef> build(IntsRef ordinals, Iterable<CategoryPath> categories) throws IOException {
|
||||||
int upto = ordinals.length; // since we add ordinals to IntsRef, iterate upto original length
|
int upto = ordinals.length; // since we may add ordinals to IntsRef, iterate upto original length
|
||||||
|
|
||||||
if (ordinalPolicy == OrdinalPolicy.ALL_PARENTS) { // add all parents too
|
Iterator<CategoryPath> iter = categories.iterator();
|
||||||
for (int i = 0; i < upto; i++) {
|
for (int i = 0; i < upto; i++) {
|
||||||
int ordinal = ordinals.ints[i];
|
int ordinal = ordinals.ints[i];
|
||||||
|
CategoryPath cp = iter.next();
|
||||||
|
OrdinalPolicy op = clp.getOrdinalPolicy(cp.components[0]);
|
||||||
|
if (op != OrdinalPolicy.NO_PARENTS) {
|
||||||
|
// need to add parents too
|
||||||
int parent = taxoWriter.getParent(ordinal);
|
int parent = taxoWriter.getParent(ordinal);
|
||||||
while (parent > 0) {
|
while (parent > 0) {
|
||||||
ordinals.ints[ordinals.length++] = parent;
|
ordinals.ints[ordinals.length++] = parent;
|
||||||
parent = taxoWriter.getParent(parent);
|
parent = taxoWriter.getParent(parent);
|
||||||
}
|
}
|
||||||
|
if (op == OrdinalPolicy.ALL_BUT_DIMENSION) { // discard the last added parent, which is the dimension
|
||||||
|
ordinals.length--;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return ordinalsEncoder.encode(ordinals);
|
return ordinalsEncoder.encode(ordinals);
|
||||||
|
|
|
@ -4,6 +4,7 @@ import java.io.IOException;
|
||||||
|
|
||||||
import org.apache.lucene.facet.search.CategoryListIterator;
|
import org.apache.lucene.facet.search.CategoryListIterator;
|
||||||
import org.apache.lucene.facet.search.DocValuesCategoryListIterator;
|
import org.apache.lucene.facet.search.DocValuesCategoryListIterator;
|
||||||
|
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||||
import org.apache.lucene.facet.util.PartitionsUtils;
|
import org.apache.lucene.facet.util.PartitionsUtils;
|
||||||
import org.apache.lucene.util.encoding.DGapVInt8IntEncoder;
|
import org.apache.lucene.util.encoding.DGapVInt8IntEncoder;
|
||||||
import org.apache.lucene.util.encoding.IntDecoder;
|
import org.apache.lucene.util.encoding.IntDecoder;
|
||||||
|
@ -35,25 +36,61 @@ import org.apache.lucene.util.encoding.UniqueValuesIntEncoder;
|
||||||
*/
|
*/
|
||||||
public class CategoryListParams {
|
public class CategoryListParams {
|
||||||
|
|
||||||
/** OrdinalPolicy defines which ordinals are encoded for every document. */
|
/**
|
||||||
|
* Defines which category ordinals are encoded for every document. This also
|
||||||
|
* affects how category ordinals are aggregated, check the different policies
|
||||||
|
* for more details.
|
||||||
|
*/
|
||||||
public static enum OrdinalPolicy {
|
public static enum OrdinalPolicy {
|
||||||
/**
|
/**
|
||||||
* Encodes only the ordinal of leaf nodes. That is, the category A/B/C will
|
* Encodes only the ordinals of leaf nodes. That is, for the category A/B/C,
|
||||||
* not encode the ordinals of A and A/B.
|
* the ordinals of A and A/B will not be encoded. This policy is efficient
|
||||||
|
* for hierarchical dimensions, as it reduces the number of ordinals that
|
||||||
|
* are visited per document. During faceted search, this policy behaves
|
||||||
|
* exactly like {@link #ALL_PARENTS}, and the counts of all path components
|
||||||
|
* will be computed as well.
|
||||||
*
|
*
|
||||||
* <p>
|
* <p>
|
||||||
* <b>NOTE:</b> this {@link OrdinalPolicy} requires a special collector or
|
* <b>NOTE:</b> this {@link OrdinalPolicy} requires a special collector or
|
||||||
* accumulator, which will fix the parents' counts, unless you are not
|
* accumulator, which will fix the parents' counts.
|
||||||
* interested in the parents counts.
|
*
|
||||||
|
* <p>
|
||||||
|
* <b>NOTE:</b> since only leaf nodes are encoded for the document, you
|
||||||
|
* should use this policy when the same document doesn't share two
|
||||||
|
* categories that have a mutual parent, or otherwise the counts will be
|
||||||
|
* wrong (the mutual parent will be over-counted). For example, if a
|
||||||
|
* document has the categories A/B/C and A/B/D, then with this policy the
|
||||||
|
* counts of "A" and "B" will be 2, which is wrong. If you intend to index
|
||||||
|
* hierarchical dimensions, with more than one category per document, you
|
||||||
|
* should use either {@link #ALL_PARENTS} or {@link #ALL_BUT_DIMENSION}.
|
||||||
*/
|
*/
|
||||||
NO_PARENTS,
|
NO_PARENTS,
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Encodes the ordinals of all path components. That is, the category A/B/C
|
* Encodes the ordinals of all path components. That is, the category A/B/C
|
||||||
* will encode the ordinals of A and A/B as well. This is the default
|
* will encode the ordinals of A and A/B as well. If you don't require the
|
||||||
* {@link OrdinalPolicy}.
|
* dimension's count during search, consider using
|
||||||
|
* {@link #ALL_BUT_DIMENSION}.
|
||||||
*/
|
*/
|
||||||
ALL_PARENTS
|
ALL_PARENTS,
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Encodes the ordinals of all path components except the dimension. The
|
||||||
|
* dimension of a category is defined to be the first components in
|
||||||
|
* {@link CategoryPath#components}. For the category A/B/C, the ordinal of
|
||||||
|
* A/B will be encoded as well, however not the ordinal of A.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* <b>NOTE:</b> when facets are aggregated, this policy behaves exactly like
|
||||||
|
* {@link #ALL_PARENTS}, except that the dimension is never counted. I.e. if
|
||||||
|
* you ask to count the facet "A", then while in {@link #ALL_PARENTS} you
|
||||||
|
* will get counts for "A" <u>and its children</u>, with this policy you
|
||||||
|
* will get counts for <u>only its children</u>. This policy is the default
|
||||||
|
* one, and makes sense for using with flat dimensions, whenever your
|
||||||
|
* application does not require the dimension's count. Otherwise, use
|
||||||
|
* {@link #ALL_PARENTS}.
|
||||||
|
*/
|
||||||
|
ALL_BUT_DIMENSION
|
||||||
}
|
}
|
||||||
|
|
||||||
/** The default field used to store the facets information. */
|
/** The default field used to store the facets information. */
|
||||||
|
@ -63,7 +100,7 @@ public class CategoryListParams {
|
||||||
* The default {@link OrdinalPolicy} that's used when encoding a document's
|
* The default {@link OrdinalPolicy} that's used when encoding a document's
|
||||||
* category ordinals.
|
* category ordinals.
|
||||||
*/
|
*/
|
||||||
public static final OrdinalPolicy DEFAULT_ORDINAL_POLICY = OrdinalPolicy.ALL_PARENTS;
|
public static final OrdinalPolicy DEFAULT_ORDINAL_POLICY = OrdinalPolicy.ALL_BUT_DIMENSION;
|
||||||
|
|
||||||
public final String field;
|
public final String field;
|
||||||
|
|
||||||
|
@ -115,19 +152,15 @@ public class CategoryListParams {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
CategoryListParams other = (CategoryListParams) o;
|
CategoryListParams other = (CategoryListParams) o;
|
||||||
if (this.hashCode != other.hashCode) {
|
if (hashCode != other.hashCode) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// The above hashcodes might equal each other in the case of a collision,
|
|
||||||
// so at this point only directly term equality testing will settle
|
|
||||||
// the equality test.
|
|
||||||
return field.equals(other.field);
|
return field.equals(other.field);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return this.hashCode;
|
return hashCode;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Create the {@link CategoryListIterator} for the specified partition. */
|
/** Create the {@link CategoryListIterator} for the specified partition. */
|
||||||
|
@ -137,14 +170,18 @@ public class CategoryListParams {
|
||||||
return new DocValuesCategoryListIterator(docValuesField, createEncoder().createMatchingDecoder());
|
return new DocValuesCategoryListIterator(docValuesField, createEncoder().createMatchingDecoder());
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Returns the {@link OrdinalPolicy} to use for this {@link CategoryListParams}. */
|
/**
|
||||||
public OrdinalPolicy getOrdinalPolicy() {
|
* Returns the {@link OrdinalPolicy} to use for the given dimension. This
|
||||||
|
* {@link CategoryListParams} always returns {@link #DEFAULT_ORDINAL_POLICY}
|
||||||
|
* for all dimensions.
|
||||||
|
*/
|
||||||
|
public OrdinalPolicy getOrdinalPolicy(String dimension) {
|
||||||
return DEFAULT_ORDINAL_POLICY;
|
return DEFAULT_ORDINAL_POLICY;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return "field=" + field + " encoder=" + createEncoder() + " ordinalPolicy=" + getOrdinalPolicy();
|
return "field=" + field + " encoder=" + createEncoder() + " ordinalPolicy=" + getOrdinalPolicy(null);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
package org.apache.lucene.facet.index.params;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A {@link CategoryListParams} which allow controlling the
|
||||||
|
* {@link CategoryListParams.OrdinalPolicy} used for each dimension. The
|
||||||
|
* dimension is specified as the first component in
|
||||||
|
* {@link CategoryPath#components}.
|
||||||
|
*/
|
||||||
|
public class PerDimensionOrdinalPolicy extends CategoryListParams {
|
||||||
|
|
||||||
|
private final Map<String,OrdinalPolicy> policies;
|
||||||
|
private final OrdinalPolicy defaultOP;
|
||||||
|
|
||||||
|
public PerDimensionOrdinalPolicy(Map<String,OrdinalPolicy> policies) {
|
||||||
|
this(policies, DEFAULT_ORDINAL_POLICY);
|
||||||
|
}
|
||||||
|
|
||||||
|
public PerDimensionOrdinalPolicy(Map<String,OrdinalPolicy> policies, OrdinalPolicy defaultOP) {
|
||||||
|
this.defaultOP = defaultOP;
|
||||||
|
this.policies = policies;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public OrdinalPolicy getOrdinalPolicy(String dimension) {
|
||||||
|
OrdinalPolicy op = policies.get(dimension);
|
||||||
|
return op == null ? defaultOP : op;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
return super.toString() + " policies=" + policies;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -85,7 +85,7 @@ import org.apache.lucene.util.encoding.DGapVInt8IntDecoder;
|
||||||
public class CountingFacetsCollector extends FacetsCollector {
|
public class CountingFacetsCollector extends FacetsCollector {
|
||||||
|
|
||||||
private final FacetSearchParams fsp;
|
private final FacetSearchParams fsp;
|
||||||
private final OrdinalPolicy ordinalPolicy;
|
private final CategoryListParams clp;
|
||||||
private final TaxonomyReader taxoReader;
|
private final TaxonomyReader taxoReader;
|
||||||
private final BytesRef buf = new BytesRef(32);
|
private final BytesRef buf = new BytesRef(32);
|
||||||
private final FacetArrays facetArrays;
|
private final FacetArrays facetArrays;
|
||||||
|
@ -107,8 +107,7 @@ public class CountingFacetsCollector extends FacetsCollector {
|
||||||
assert assertParams(fsp) == null : assertParams(fsp);
|
assert assertParams(fsp) == null : assertParams(fsp);
|
||||||
|
|
||||||
this.fsp = fsp;
|
this.fsp = fsp;
|
||||||
CategoryListParams clp = fsp.indexingParams.getCategoryListParams(fsp.facetRequests.get(0).categoryPath);
|
this.clp = fsp.indexingParams.getCategoryListParams(fsp.facetRequests.get(0).categoryPath);
|
||||||
this.ordinalPolicy = clp.getOrdinalPolicy();
|
|
||||||
this.facetsField = clp.field;
|
this.facetsField = clp.field;
|
||||||
this.taxoReader = taxoReader;
|
this.taxoReader = taxoReader;
|
||||||
this.facetArrays = facetArrays;
|
this.facetArrays = facetArrays;
|
||||||
|
@ -217,21 +216,21 @@ public class CountingFacetsCollector extends FacetsCollector {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void countParents(int[] parents) {
|
/**
|
||||||
// counts[0] is the count of ROOT, which we don't care about and counts[1]
|
* Computes the counts of ordinals under the given ordinal's tree, by
|
||||||
// can only update counts[0], so we don't bother to visit it too. also,
|
* recursively going down to leaf nodes and rollin up their counts (called
|
||||||
// since parents always have lower ordinals than their children, we traverse
|
* only with categories are indexing with OrdinalPolicy.NO_PARENTS).
|
||||||
// the array backwards. this also allows us to update just the immediate
|
*/
|
||||||
// parent's count (actually, otherwise it would be a mistake).
|
private int rollupCounts(int ordinal, int[] children, int[] siblings) {
|
||||||
for (int i = counts.length - 1; i > 1; i--) {
|
int count = 0;
|
||||||
int count = counts[i];
|
while (ordinal != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
if (count > 0) {
|
int childCount = counts[ordinal];
|
||||||
int parent = parents[i];
|
childCount += rollupCounts(children[ordinal], children, siblings);
|
||||||
if (parent != 0) {
|
counts[ordinal] = childCount;
|
||||||
counts[parent] += count;
|
count += childCount;
|
||||||
}
|
ordinal = siblings[ordinal];
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -242,11 +241,6 @@ public class CountingFacetsCollector extends FacetsCollector {
|
||||||
|
|
||||||
ParallelTaxonomyArrays arrays = taxoReader.getParallelTaxonomyArrays();
|
ParallelTaxonomyArrays arrays = taxoReader.getParallelTaxonomyArrays();
|
||||||
|
|
||||||
if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
|
|
||||||
// need to count parents
|
|
||||||
countParents(arrays.parents());
|
|
||||||
}
|
|
||||||
|
|
||||||
// compute top-K
|
// compute top-K
|
||||||
final int[] children = arrays.children();
|
final int[] children = arrays.children();
|
||||||
final int[] siblings = arrays.siblings();
|
final int[] siblings = arrays.siblings();
|
||||||
|
@ -256,6 +250,12 @@ public class CountingFacetsCollector extends FacetsCollector {
|
||||||
if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist
|
if (rootOrd == TaxonomyReader.INVALID_ORDINAL) { // category does not exist
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
OrdinalPolicy ordinalPolicy = clp.getOrdinalPolicy(fr.categoryPath.components[0]);
|
||||||
|
if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
|
||||||
|
// need to count parents
|
||||||
|
counts[rootOrd] += rollupCounts(children[rootOrd], children, siblings);
|
||||||
|
}
|
||||||
|
|
||||||
FacetResultNode root = new FacetResultNode();
|
FacetResultNode root = new FacetResultNode();
|
||||||
root.ordinal = rootOrd;
|
root.ordinal = rootOrd;
|
||||||
root.label = fr.categoryPath;
|
root.label = fr.categoryPath;
|
||||||
|
|
|
@ -43,13 +43,21 @@ public abstract class FacetsCollector extends Collector {
|
||||||
* Returns the most optimized {@link FacetsCollector} for the given search
|
* Returns the most optimized {@link FacetsCollector} for the given search
|
||||||
* parameters. The returned {@link FacetsCollector} is guaranteed to satisfy
|
* parameters. The returned {@link FacetsCollector} is guaranteed to satisfy
|
||||||
* the requested parameters.
|
* the requested parameters.
|
||||||
|
*
|
||||||
|
* @throws IllegalArgumentException
|
||||||
|
* if there is no built-in collector that can satisfy the search
|
||||||
|
* parameters.
|
||||||
*/
|
*/
|
||||||
public static FacetsCollector create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) {
|
public static FacetsCollector create(FacetSearchParams fsp, IndexReader indexReader, TaxonomyReader taxoReader) {
|
||||||
if (CountingFacetsCollector.assertParams(fsp) == null) {
|
if (CountingFacetsCollector.assertParams(fsp) == null) {
|
||||||
return new CountingFacetsCollector(fsp, taxoReader);
|
return new CountingFacetsCollector(fsp, taxoReader);
|
||||||
}
|
}
|
||||||
|
|
||||||
return new StandardFacetsCollector(fsp, indexReader, taxoReader);
|
if (StandardFacetsCollector.assertParams(fsp) == null) {
|
||||||
|
return new StandardFacetsCollector(fsp, indexReader, taxoReader);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new IllegalArgumentException("None of the built-in FacetsCollectors can handle the given search params");
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -49,6 +49,17 @@ public class StandardFacetsCollector extends FacetsCollector {
|
||||||
private List<FacetResult> results;
|
private List<FacetResult> results;
|
||||||
private Object resultsGuard;
|
private Object resultsGuard;
|
||||||
|
|
||||||
|
static String assertParams(FacetSearchParams fsp) {
|
||||||
|
// make sure none of the categories in the given FacetRequests was indexed with NO_PARENTS
|
||||||
|
for (FacetRequest fr : fsp.facetRequests) {
|
||||||
|
CategoryListParams clp = fsp.indexingParams.getCategoryListParams(fr.categoryPath);
|
||||||
|
if (clp.getOrdinalPolicy(fr.categoryPath.components[0]) == OrdinalPolicy.NO_PARENTS) {
|
||||||
|
return "this collector does not support aggregating categories that were indexed with OrdinalPolicy.NO_PARENTS";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Create a collector for accumulating facets while collecting documents
|
* Create a collector for accumulating facets while collecting documents
|
||||||
* during search.
|
* during search.
|
||||||
|
@ -62,6 +73,7 @@ public class StandardFacetsCollector extends FacetsCollector {
|
||||||
* taxonomy containing the facets.
|
* taxonomy containing the facets.
|
||||||
*/
|
*/
|
||||||
public StandardFacetsCollector(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
|
public StandardFacetsCollector(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
|
||||||
|
assert assertParams(facetSearchParams) == null : assertParams(facetSearchParams);
|
||||||
facetsAccumulator = initFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
|
facetsAccumulator = initFacetsAccumulator(facetSearchParams, indexReader, taxonomyReader);
|
||||||
scoreDocIdCollector = initScoredDocCollector(facetSearchParams, indexReader, taxonomyReader);
|
scoreDocIdCollector = initScoredDocCollector(facetSearchParams, indexReader, taxonomyReader);
|
||||||
resultsGuard = new Object();
|
resultsGuard = new Object();
|
||||||
|
|
|
@ -74,6 +74,7 @@ public class ScoredDocIdsUtils {
|
||||||
|
|
||||||
/** Clear all deleted documents from a given open-bit-set according to a given reader */
|
/** Clear all deleted documents from a given open-bit-set according to a given reader */
|
||||||
private static void clearDeleted(final IndexReader reader, final FixedBitSet set) throws IOException {
|
private static void clearDeleted(final IndexReader reader, final FixedBitSet set) throws IOException {
|
||||||
|
// TODO use BitsFilteredDocIdSet?
|
||||||
|
|
||||||
// If there are no deleted docs
|
// If there are no deleted docs
|
||||||
if (!reader.hasDeletions()) {
|
if (!reader.hasDeletions()) {
|
||||||
|
|
|
@ -6,6 +6,7 @@ import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
|
import java.util.Iterator;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
|
@ -17,6 +18,7 @@ import org.apache.lucene.document.Field;
|
||||||
import org.apache.lucene.document.TextField;
|
import org.apache.lucene.document.TextField;
|
||||||
import org.apache.lucene.facet.index.FacetFields;
|
import org.apache.lucene.facet.index.FacetFields;
|
||||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||||
|
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
|
||||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||||
|
@ -44,6 +46,7 @@ import org.apache.lucene.util.Bits;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||||
import org.apache.lucene.util._TestUtil;
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
import org.apache.lucene.util.collections.IntToObjectMap;
|
||||||
import org.junit.AfterClass;
|
import org.junit.AfterClass;
|
||||||
import org.junit.BeforeClass;
|
import org.junit.BeforeClass;
|
||||||
|
|
||||||
|
@ -73,7 +76,8 @@ public abstract class FacetTestBase extends FacetTestCase {
|
||||||
SearchTaxoDirPair() {}
|
SearchTaxoDirPair() {}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static HashMap<Integer, SearchTaxoDirPair> dirsPerPartitionSize;
|
private static IntToObjectMap<SearchTaxoDirPair> dirsPerPartitionSize;
|
||||||
|
private static IntToObjectMap<FacetIndexingParams> fipPerPartitionSize;
|
||||||
private static File TEST_DIR;
|
private static File TEST_DIR;
|
||||||
|
|
||||||
/** Documents text field. */
|
/** Documents text field. */
|
||||||
|
@ -91,12 +95,15 @@ public abstract class FacetTestBase extends FacetTestCase {
|
||||||
@BeforeClass
|
@BeforeClass
|
||||||
public static void beforeClassFacetTestBase() {
|
public static void beforeClassFacetTestBase() {
|
||||||
TEST_DIR = _TestUtil.getTempDir("facets");
|
TEST_DIR = _TestUtil.getTempDir("facets");
|
||||||
dirsPerPartitionSize = new HashMap<Integer, FacetTestBase.SearchTaxoDirPair>();
|
dirsPerPartitionSize = new IntToObjectMap<FacetTestBase.SearchTaxoDirPair>();
|
||||||
|
fipPerPartitionSize = new IntToObjectMap<FacetIndexingParams>();
|
||||||
}
|
}
|
||||||
|
|
||||||
@AfterClass
|
@AfterClass
|
||||||
public static void afterClassFacetTestBase() throws Exception {
|
public static void afterClassFacetTestBase() throws Exception {
|
||||||
for (SearchTaxoDirPair pair : dirsPerPartitionSize.values()) {
|
Iterator<SearchTaxoDirPair> iter = dirsPerPartitionSize.iterator();
|
||||||
|
while (iter.hasNext()) {
|
||||||
|
SearchTaxoDirPair pair = iter.next();
|
||||||
IOUtils.close(pair.searchDir, pair.taxoDir);
|
IOUtils.close(pair.searchDir, pair.taxoDir);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -128,20 +135,16 @@ public abstract class FacetTestBase extends FacetTestCase {
|
||||||
return DEFAULT_CONTENT[doc];
|
return DEFAULT_CONTENT[doc];
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Prepare index (in RAM) with single partition */
|
/** Prepare index (in RAM) with some documents and some facets. */
|
||||||
protected final void initIndex() throws Exception {
|
protected final void initIndex(FacetIndexingParams fip) throws Exception {
|
||||||
initIndex(Integer.MAX_VALUE);
|
initIndex(false, fip);
|
||||||
}
|
|
||||||
|
|
||||||
/** Prepare index (in RAM) with some documents and some facets */
|
|
||||||
protected final void initIndex(int partitionSize) throws Exception {
|
|
||||||
initIndex(partitionSize, false);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/** Prepare index (in RAM/Disk) with some documents and some facets */
|
/** Prepare index (in RAM/Disk) with some documents and some facets. */
|
||||||
protected final void initIndex(int partitionSize, boolean forceDisk) throws Exception {
|
protected final void initIndex(boolean forceDisk, FacetIndexingParams fip) throws Exception {
|
||||||
|
int partitionSize = fip.getPartitionSize();
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("Partition Size: " + partitionSize+" forceDisk: "+forceDisk);
|
System.out.println("Partition Size: " + partitionSize + " forceDisk: "+forceDisk);
|
||||||
}
|
}
|
||||||
|
|
||||||
SearchTaxoDirPair pair = dirsPerPartitionSize.get(Integer.valueOf(partitionSize));
|
SearchTaxoDirPair pair = dirsPerPartitionSize.get(Integer.valueOf(partitionSize));
|
||||||
|
@ -158,7 +161,7 @@ public abstract class FacetTestBase extends FacetTestCase {
|
||||||
RandomIndexWriter iw = new RandomIndexWriter(random(), pair.searchDir, getIndexWriterConfig(getAnalyzer()));
|
RandomIndexWriter iw = new RandomIndexWriter(random(), pair.searchDir, getIndexWriterConfig(getAnalyzer()));
|
||||||
TaxonomyWriter taxo = new DirectoryTaxonomyWriter(pair.taxoDir, OpenMode.CREATE);
|
TaxonomyWriter taxo = new DirectoryTaxonomyWriter(pair.taxoDir, OpenMode.CREATE);
|
||||||
|
|
||||||
populateIndex(iw, taxo, getFacetIndexingParams(partitionSize));
|
populateIndex(iw, taxo, fip);
|
||||||
|
|
||||||
// commit changes (taxonomy prior to search index for consistency)
|
// commit changes (taxonomy prior to search index for consistency)
|
||||||
taxo.commit();
|
taxo.commit();
|
||||||
|
@ -182,14 +185,40 @@ public abstract class FacetTestBase extends FacetTestCase {
|
||||||
|
|
||||||
/** Returns a {@link FacetIndexingParams} per the given partition size. */
|
/** Returns a {@link FacetIndexingParams} per the given partition size. */
|
||||||
protected FacetIndexingParams getFacetIndexingParams(final int partSize) {
|
protected FacetIndexingParams getFacetIndexingParams(final int partSize) {
|
||||||
// several of our encoders don't support the value 0,
|
return getFacetIndexingParams(partSize, false);
|
||||||
// which is one of the values encoded when dealing w/ partitions.
|
}
|
||||||
return new FacetIndexingParams() {
|
|
||||||
@Override
|
/**
|
||||||
public int getPartitionSize() {
|
* Returns a {@link FacetIndexingParams} per the given partition size. If
|
||||||
return partSize;
|
* requested, then {@link OrdinalPolicy} will be set to
|
||||||
}
|
* {@link OrdinalPolicy#ALL_PARENTS}, otherwise it will randomize.
|
||||||
};
|
*/
|
||||||
|
protected FacetIndexingParams getFacetIndexingParams(final int partSize, final boolean forceAllParents) {
|
||||||
|
FacetIndexingParams fip = fipPerPartitionSize.get(partSize);
|
||||||
|
if (fip == null) {
|
||||||
|
// randomize OrdinalPolicy. Since not all Collectors / Accumulators
|
||||||
|
// support NO_PARENTS, don't include it.
|
||||||
|
// TODO: once all code paths support NO_PARENTS, randomize it too.
|
||||||
|
CategoryListParams randomOP = new CategoryListParams() {
|
||||||
|
final OrdinalPolicy op = random().nextBoolean() ? OrdinalPolicy.ALL_BUT_DIMENSION : OrdinalPolicy.ALL_PARENTS;
|
||||||
|
@Override
|
||||||
|
public OrdinalPolicy getOrdinalPolicy(String dimension) {
|
||||||
|
return forceAllParents ? OrdinalPolicy.ALL_PARENTS : op;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// several of our encoders don't support the value 0,
|
||||||
|
// which is one of the values encoded when dealing w/ partitions,
|
||||||
|
// therefore don't randomize the encoder.
|
||||||
|
fip = new FacetIndexingParams(randomOP) {
|
||||||
|
@Override
|
||||||
|
public int getPartitionSize() {
|
||||||
|
return partSize;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
fipPerPartitionSize.put(partSize, fip);
|
||||||
|
}
|
||||||
|
return fip;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -45,7 +45,6 @@ public class TestMultiCLExample extends LuceneTestCase {
|
||||||
assertNotNull("Result should not be null", result);
|
assertNotNull("Result should not be null", result);
|
||||||
FacetResultNode node = result.getFacetResultNode();
|
FacetResultNode node = result.getFacetResultNode();
|
||||||
assertEquals("Invalid label", "5", node.label.toString());
|
assertEquals("Invalid label", "5", node.label.toString());
|
||||||
assertEquals("Invalid value", 2.0, node.value, 0.0);
|
|
||||||
assertEquals("Invalid # of subresults", 3, node.subResults.size());
|
assertEquals("Invalid # of subresults", 3, node.subResults.size());
|
||||||
|
|
||||||
Iterator<? extends FacetResultNode> subResults = node.subResults.iterator();
|
Iterator<? extends FacetResultNode> subResults = node.subResults.iterator();
|
||||||
|
|
|
@ -25,6 +25,8 @@ import org.apache.lucene.facet.FacetTestCase;
|
||||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||||
import org.apache.lucene.facet.index.params.PerDimensionIndexingParams;
|
import org.apache.lucene.facet.index.params.PerDimensionIndexingParams;
|
||||||
|
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
|
||||||
|
import org.apache.lucene.facet.index.params.PerDimensionOrdinalPolicy;
|
||||||
import org.apache.lucene.facet.search.CategoryListIterator;
|
import org.apache.lucene.facet.search.CategoryListIterator;
|
||||||
import org.apache.lucene.facet.search.DrillDown;
|
import org.apache.lucene.facet.search.DrillDown;
|
||||||
import org.apache.lucene.facet.search.FacetsCollector;
|
import org.apache.lucene.facet.search.FacetsCollector;
|
||||||
|
@ -368,9 +370,23 @@ public class TestFacetsPayloadMigrationReader extends FacetTestCase {
|
||||||
|
|
||||||
// set custom CLP fields for two dimensions and use the default ($facets) for the other two
|
// set custom CLP fields for two dimensions and use the default ($facets) for the other two
|
||||||
HashMap<CategoryPath,CategoryListParams> params = new HashMap<CategoryPath,CategoryListParams>();
|
HashMap<CategoryPath,CategoryListParams> params = new HashMap<CategoryPath,CategoryListParams>();
|
||||||
params.put(new CategoryPath(DIMENSIONS[0]), new CategoryListParams(DIMENSIONS[0]));
|
params.put(new CategoryPath(DIMENSIONS[0]), new CategoryListParams(DIMENSIONS[0]) {
|
||||||
params.put(new CategoryPath(DIMENSIONS[1]), new CategoryListParams(DIMENSIONS[1]));
|
@Override
|
||||||
FacetIndexingParams fip = new PerDimensionIndexingParams(params) {
|
public OrdinalPolicy getOrdinalPolicy(String dimension) {
|
||||||
|
return OrdinalPolicy.ALL_PARENTS;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
params.put(new CategoryPath(DIMENSIONS[1]), new CategoryListParams(DIMENSIONS[1]) {
|
||||||
|
@Override
|
||||||
|
public OrdinalPolicy getOrdinalPolicy(String dimension) {
|
||||||
|
return OrdinalPolicy.ALL_PARENTS;
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
HashMap<String,OrdinalPolicy> policies = new HashMap<String,CategoryListParams.OrdinalPolicy>();
|
||||||
|
policies.put(DIMENSIONS[2], OrdinalPolicy.ALL_PARENTS);
|
||||||
|
policies.put(DIMENSIONS[3], OrdinalPolicy.ALL_PARENTS);
|
||||||
|
FacetIndexingParams fip = new PerDimensionIndexingParams(params, new PerDimensionOrdinalPolicy(policies)) {
|
||||||
@Override
|
@Override
|
||||||
public int getPartitionSize() {
|
public int getPartitionSize() {
|
||||||
return partitionSize;
|
return partitionSize;
|
||||||
|
|
|
@ -51,10 +51,9 @@ public abstract class BaseTestTopK extends FacetTestBase {
|
||||||
private int nextInt;
|
private int nextInt;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo,
|
protected void populateIndex(RandomIndexWriter iw, TaxonomyWriter taxo, FacetIndexingParams fip) throws IOException {
|
||||||
FacetIndexingParams iParams) throws IOException {
|
|
||||||
currDoc = -1;
|
currDoc = -1;
|
||||||
super.populateIndex(iw, taxo, iParams);
|
super.populateIndex(iw, taxo, fip);
|
||||||
}
|
}
|
||||||
|
|
||||||
/** prepare the next random int */
|
/** prepare the next random int */
|
||||||
|
@ -94,17 +93,13 @@ public abstract class BaseTestTopK extends FacetTestBase {
|
||||||
return Arrays.asList(cp);
|
return Arrays.asList(cp);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected FacetSearchParams searchParamsWithRequests(int numResults) {
|
protected FacetSearchParams searchParamsWithRequests(int numResults, FacetIndexingParams fip) {
|
||||||
return searchParamsWithRequests(numResults, Integer.MAX_VALUE);
|
|
||||||
}
|
|
||||||
|
|
||||||
protected FacetSearchParams searchParamsWithRequests(int numResults, int partitionSize) {
|
|
||||||
List<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
|
List<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
|
||||||
facetRequests.add(new CountFacetRequest(new CategoryPath("a"), numResults));
|
facetRequests.add(new CountFacetRequest(new CategoryPath("a"), numResults));
|
||||||
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "1"), numResults));
|
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "1"), numResults));
|
||||||
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "1", "10"), numResults));
|
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "1", "10"), numResults));
|
||||||
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "2", "26", "267"), numResults));
|
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "2", "26", "267"), numResults));
|
||||||
return getFacetSearchParams(facetRequests, getFacetIndexingParams(partitionSize));
|
return getFacetSearchParams(facetRequests, fip);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -16,8 +16,9 @@ import org.apache.lucene.document.StringField;
|
||||||
import org.apache.lucene.facet.FacetTestCase;
|
import org.apache.lucene.facet.FacetTestCase;
|
||||||
import org.apache.lucene.facet.index.FacetFields;
|
import org.apache.lucene.facet.index.FacetFields;
|
||||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||||
|
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
|
||||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||||
import org.apache.lucene.facet.index.params.PerDimensionIndexingParams;
|
import org.apache.lucene.facet.index.params.PerDimensionOrdinalPolicy;
|
||||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||||
import org.apache.lucene.facet.search.params.FacetRequest.SortBy;
|
import org.apache.lucene.facet.search.params.FacetRequest.SortBy;
|
||||||
|
@ -146,13 +147,11 @@ public class CountingFacetsCollectorTest extends FacetTestCase {
|
||||||
termExpectedCounts.put(cp, termExpectedCounts.get(cp) + 1);
|
termExpectedCounts.put(cp, termExpectedCounts.get(cp) + 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// add 1 to each dimension
|
// add 1 to each NO_PARENTS dimension
|
||||||
allExpectedCounts.put(CP_A, allExpectedCounts.get(CP_A) + 1);
|
|
||||||
allExpectedCounts.put(CP_B, allExpectedCounts.get(CP_B) + 1);
|
allExpectedCounts.put(CP_B, allExpectedCounts.get(CP_B) + 1);
|
||||||
allExpectedCounts.put(CP_C, allExpectedCounts.get(CP_C) + 1);
|
allExpectedCounts.put(CP_C, allExpectedCounts.get(CP_C) + 1);
|
||||||
allExpectedCounts.put(CP_D, allExpectedCounts.get(CP_D) + 1);
|
allExpectedCounts.put(CP_D, allExpectedCounts.get(CP_D) + 1);
|
||||||
if (updateTermExpectedCounts) {
|
if (updateTermExpectedCounts) {
|
||||||
termExpectedCounts.put(CP_A, termExpectedCounts.get(CP_A) + 1);
|
|
||||||
termExpectedCounts.put(CP_B, termExpectedCounts.get(CP_B) + 1);
|
termExpectedCounts.put(CP_B, termExpectedCounts.get(CP_B) + 1);
|
||||||
termExpectedCounts.put(CP_C, termExpectedCounts.get(CP_C) + 1);
|
termExpectedCounts.put(CP_C, termExpectedCounts.get(CP_C) + 1);
|
||||||
termExpectedCounts.put(CP_D, termExpectedCounts.get(CP_D) + 1);
|
termExpectedCounts.put(CP_D, termExpectedCounts.get(CP_D) + 1);
|
||||||
|
@ -252,19 +251,13 @@ public class CountingFacetsCollectorTest extends FacetTestCase {
|
||||||
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // prevent merges, so we can control the index segments
|
conf.setMergePolicy(NoMergePolicy.COMPOUND_FILES); // prevent merges, so we can control the index segments
|
||||||
IndexWriter indexWriter = new IndexWriter(indexDir, conf);
|
IndexWriter indexWriter = new IndexWriter(indexDir, conf);
|
||||||
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
|
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
|
||||||
CategoryListParams allParents = new CategoryListParams();
|
|
||||||
CategoryListParams noParents = new CategoryListParams("no_parents") {
|
Map<String,OrdinalPolicy> policies = new HashMap<String,CategoryListParams.OrdinalPolicy>();
|
||||||
@Override
|
policies.put(CP_B.components[0], OrdinalPolicy.ALL_PARENTS);
|
||||||
public OrdinalPolicy getOrdinalPolicy() {
|
policies.put(CP_C.components[0], OrdinalPolicy.NO_PARENTS);
|
||||||
return OrdinalPolicy.NO_PARENTS;
|
policies.put(CP_D.components[0], OrdinalPolicy.NO_PARENTS);
|
||||||
}
|
CategoryListParams clp = new PerDimensionOrdinalPolicy(policies);
|
||||||
};
|
fip = new FacetIndexingParams(clp);
|
||||||
Map<CategoryPath,CategoryListParams> params = new HashMap<CategoryPath,CategoryListParams>();
|
|
||||||
params.put(CP_A, allParents);
|
|
||||||
params.put(CP_B, allParents);
|
|
||||||
params.put(CP_C, noParents);
|
|
||||||
params.put(CP_D, noParents);
|
|
||||||
fip = new PerDimensionIndexingParams(params);
|
|
||||||
|
|
||||||
allExpectedCounts = newCounts();
|
allExpectedCounts = newCounts();
|
||||||
termExpectedCounts = newCounts();
|
termExpectedCounts = newCounts();
|
||||||
|
|
|
@ -104,9 +104,9 @@ public class TestDemoFacets extends FacetTestCase {
|
||||||
// Retrieve & verify results:
|
// Retrieve & verify results:
|
||||||
List<FacetResult> results = c.getFacetResults();
|
List<FacetResult> results = c.getFacetResults();
|
||||||
assertEquals(2, results.size());
|
assertEquals(2, results.size());
|
||||||
assertEquals("Publish Date (5)\n 2012 (2)\n 2010 (2)\n 1999 (1)\n",
|
assertEquals("Publish Date (0)\n 2012 (2)\n 2010 (2)\n 1999 (1)\n",
|
||||||
FacetTestUtils.toSimpleString(results.get(0)));
|
FacetTestUtils.toSimpleString(results.get(0)));
|
||||||
assertEquals("Author (5)\n Lisa (2)\n Frank (1)\n Susan (1)\n Bob (1)\n",
|
assertEquals("Author (0)\n Lisa (2)\n Frank (1)\n Susan (1)\n Bob (1)\n",
|
||||||
FacetTestUtils.toSimpleString(results.get(1)));
|
FacetTestUtils.toSimpleString(results.get(1)));
|
||||||
|
|
||||||
|
|
||||||
|
@ -117,7 +117,7 @@ public class TestDemoFacets extends FacetTestCase {
|
||||||
searcher.search(q2, c);
|
searcher.search(q2, c);
|
||||||
results = c.getFacetResults();
|
results = c.getFacetResults();
|
||||||
assertEquals(1, results.size());
|
assertEquals(1, results.size());
|
||||||
assertEquals("Author (2)\n Lisa (1)\n Bob (1)\n",
|
assertEquals("Author (0)\n Lisa (1)\n Bob (1)\n",
|
||||||
FacetTestUtils.toSimpleString(results.get(0)));
|
FacetTestUtils.toSimpleString(results.get(0)));
|
||||||
|
|
||||||
// Smoke test PrintTaxonomyStats:
|
// Smoke test PrintTaxonomyStats:
|
||||||
|
|
|
@ -14,6 +14,7 @@ import org.junit.Before;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
|
|
||||||
import org.apache.lucene.facet.FacetTestBase;
|
import org.apache.lucene.facet.FacetTestBase;
|
||||||
|
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||||
import org.apache.lucene.facet.search.FacetsAccumulator;
|
import org.apache.lucene.facet.search.FacetsAccumulator;
|
||||||
import org.apache.lucene.facet.search.ScoredDocIDs;
|
import org.apache.lucene.facet.search.ScoredDocIDs;
|
||||||
import org.apache.lucene.facet.search.ScoredDocIdCollector;
|
import org.apache.lucene.facet.search.ScoredDocIdCollector;
|
||||||
|
@ -48,11 +49,14 @@ import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||||
*/
|
*/
|
||||||
public class TestFacetsAccumulatorWithComplement extends FacetTestBase {
|
public class TestFacetsAccumulatorWithComplement extends FacetTestBase {
|
||||||
|
|
||||||
|
private FacetIndexingParams fip;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Before
|
@Before
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
initIndex();
|
fip = getFacetIndexingParams(Integer.MAX_VALUE);
|
||||||
|
initIndex(fip);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -125,7 +129,7 @@ public class TestFacetsAccumulatorWithComplement extends FacetTestBase {
|
||||||
|
|
||||||
/** compute facets with certain facet requests and docs */
|
/** compute facets with certain facet requests and docs */
|
||||||
private List<FacetResult> findFacets(ScoredDocIDs sDocids, boolean withComplement) throws IOException {
|
private List<FacetResult> findFacets(ScoredDocIDs sDocids, boolean withComplement) throws IOException {
|
||||||
FacetSearchParams fsp = new FacetSearchParams(getFacetIndexingParams(Integer.MAX_VALUE), new CountFacetRequest(new CategoryPath("root","a"), 10));
|
FacetSearchParams fsp = new FacetSearchParams(fip, new CountFacetRequest(new CategoryPath("root","a"), 10));
|
||||||
FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(fsp, indexReader, taxoReader);
|
FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(fsp, indexReader, taxoReader);
|
||||||
|
|
||||||
fAccumulator.setComplementThreshold(
|
fAccumulator.setComplementThreshold(
|
||||||
|
|
|
@ -274,7 +274,6 @@ public class TestMultipleCategoryLists extends FacetTestCase {
|
||||||
Iterable<? extends FacetResultNode> subResults = resNode.subResults;
|
Iterable<? extends FacetResultNode> subResults = resNode.subResults;
|
||||||
Iterator<? extends FacetResultNode> subIter = subResults.iterator();
|
Iterator<? extends FacetResultNode> subIter = subResults.iterator();
|
||||||
|
|
||||||
checkResult(resNode, "Band", 5.0);
|
|
||||||
checkResult(subIter.next(), "Band/Rock & Pop", 4.0);
|
checkResult(subIter.next(), "Band/Rock & Pop", 4.0);
|
||||||
checkResult(subIter.next(), "Band/Punk", 1.0);
|
checkResult(subIter.next(), "Band/Punk", 1.0);
|
||||||
|
|
||||||
|
@ -283,7 +282,6 @@ public class TestMultipleCategoryLists extends FacetTestCase {
|
||||||
subResults = resNode.subResults;
|
subResults = resNode.subResults;
|
||||||
subIter = subResults.iterator();
|
subIter = subResults.iterator();
|
||||||
|
|
||||||
checkResult(resNode, "Band", 5.0);
|
|
||||||
checkResult(subIter.next(), "Band/Rock & Pop", 4.0);
|
checkResult(subIter.next(), "Band/Rock & Pop", 4.0);
|
||||||
checkResult(subIter.next(), "Band/Rock & Pop/Dave Matthews Band", 1.0);
|
checkResult(subIter.next(), "Band/Rock & Pop/Dave Matthews Band", 1.0);
|
||||||
checkResult(subIter.next(), "Band/Rock & Pop/REM", 1.0);
|
checkResult(subIter.next(), "Band/Rock & Pop/REM", 1.0);
|
||||||
|
@ -297,7 +295,6 @@ public class TestMultipleCategoryLists extends FacetTestCase {
|
||||||
subResults = resNode.subResults;
|
subResults = resNode.subResults;
|
||||||
subIter = subResults.iterator();
|
subIter = subResults.iterator();
|
||||||
|
|
||||||
checkResult(resNode, "Author", 3.0);
|
|
||||||
checkResult(subIter.next(), "Author/Kurt Vonnegut", 1.0);
|
checkResult(subIter.next(), "Author/Kurt Vonnegut", 1.0);
|
||||||
checkResult(subIter.next(), "Author/Stephen King", 1.0);
|
checkResult(subIter.next(), "Author/Stephen King", 1.0);
|
||||||
checkResult(subIter.next(), "Author/Mark Twain", 1.0);
|
checkResult(subIter.next(), "Author/Mark Twain", 1.0);
|
||||||
|
@ -307,7 +304,6 @@ public class TestMultipleCategoryLists extends FacetTestCase {
|
||||||
subResults = resNode.subResults;
|
subResults = resNode.subResults;
|
||||||
subIter = subResults.iterator();
|
subIter = subResults.iterator();
|
||||||
|
|
||||||
checkResult(resNode, "Band/Rock & Pop", 4.0);
|
|
||||||
checkResult(subIter.next(), "Band/Rock & Pop/Dave Matthews Band", 1.0);
|
checkResult(subIter.next(), "Band/Rock & Pop/Dave Matthews Band", 1.0);
|
||||||
checkResult(subIter.next(), "Band/Rock & Pop/REM", 1.0);
|
checkResult(subIter.next(), "Band/Rock & Pop/REM", 1.0);
|
||||||
checkResult(subIter.next(), "Band/Rock & Pop/U2", 1.0);
|
checkResult(subIter.next(), "Band/Rock & Pop/U2", 1.0);
|
||||||
|
|
|
@ -3,6 +3,7 @@ package org.apache.lucene.facet.search;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.facet.FacetTestBase;
|
import org.apache.lucene.facet.FacetTestBase;
|
||||||
|
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||||
import org.apache.lucene.facet.search.FacetsCollector;
|
import org.apache.lucene.facet.search.FacetsCollector;
|
||||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||||
|
@ -31,18 +32,21 @@ import org.junit.Before;
|
||||||
|
|
||||||
public class TestSameRequestAccumulation extends FacetTestBase {
|
public class TestSameRequestAccumulation extends FacetTestBase {
|
||||||
|
|
||||||
|
private FacetIndexingParams fip;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Before
|
@Before
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
initIndex();
|
fip = getFacetIndexingParams(Integer.MAX_VALUE);
|
||||||
|
initIndex(fip);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Following LUCENE-4461 - ensure requesting the (exact) same request more
|
// Following LUCENE-4461 - ensure requesting the (exact) same request more
|
||||||
// than once does not alter the results
|
// than once does not alter the results
|
||||||
public void testTwoSameRequests() throws Exception {
|
public void testTwoSameRequests() throws Exception {
|
||||||
final CountFacetRequest facetRequest = new CountFacetRequest(new CategoryPath("root"), 10);
|
final CountFacetRequest facetRequest = new CountFacetRequest(new CategoryPath("root"), 10);
|
||||||
FacetSearchParams fsp = new FacetSearchParams(facetRequest);
|
FacetSearchParams fsp = new FacetSearchParams(fip, facetRequest);
|
||||||
|
|
||||||
FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader);
|
FacetsCollector fc = FacetsCollector.create(fsp, indexReader, taxoReader);
|
||||||
searcher.search(new MatchAllDocsQuery(), fc);
|
searcher.search(new MatchAllDocsQuery(), fc);
|
||||||
|
@ -50,7 +54,7 @@ public class TestSameRequestAccumulation extends FacetTestBase {
|
||||||
final String expected = fc.getFacetResults().get(0).toString();
|
final String expected = fc.getFacetResults().get(0).toString();
|
||||||
|
|
||||||
// now add the same facet request with duplicates (same instance and same one)
|
// now add the same facet request with duplicates (same instance and same one)
|
||||||
fsp = new FacetSearchParams(facetRequest, facetRequest, new CountFacetRequest(new CategoryPath("root"), 10));
|
fsp = new FacetSearchParams(fip, facetRequest, facetRequest, new CountFacetRequest(new CategoryPath("root"), 10));
|
||||||
|
|
||||||
// make sure the search params holds 3 requests now
|
// make sure the search params holds 3 requests now
|
||||||
assertEquals(3, fsp.facetRequests.size());
|
assertEquals(3, fsp.facetRequests.size());
|
||||||
|
|
|
@ -5,6 +5,7 @@ import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
import org.apache.lucene.facet.FacetTestBase;
|
import org.apache.lucene.facet.FacetTestBase;
|
||||||
|
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||||
import org.apache.lucene.facet.search.params.ScoreFacetRequest;
|
import org.apache.lucene.facet.search.params.ScoreFacetRequest;
|
||||||
|
@ -37,11 +38,14 @@ import org.junit.Test;
|
||||||
/** Test ScoredDocIdCollector. */
|
/** Test ScoredDocIdCollector. */
|
||||||
public class TestScoredDocIdCollector extends FacetTestBase {
|
public class TestScoredDocIdCollector extends FacetTestBase {
|
||||||
|
|
||||||
|
private FacetIndexingParams fip;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
@Before
|
@Before
|
||||||
public void setUp() throws Exception {
|
public void setUp() throws Exception {
|
||||||
super.setUp();
|
super.setUp();
|
||||||
initIndex();
|
fip = getFacetIndexingParams(Integer.MAX_VALUE);
|
||||||
|
initIndex(fip);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -73,8 +77,8 @@ public class TestScoredDocIdCollector extends FacetTestBase {
|
||||||
|
|
||||||
// verify by facet values
|
// verify by facet values
|
||||||
CategoryPath cp = new CategoryPath("root","a");
|
CategoryPath cp = new CategoryPath("root","a");
|
||||||
FacetSearchParams countFSP = new FacetSearchParams(getFacetIndexingParams(Integer.MAX_VALUE), new CountFacetRequest(cp, 10));
|
FacetSearchParams countFSP = new FacetSearchParams(fip, new CountFacetRequest(cp, 10));
|
||||||
FacetSearchParams scoreFSP = new FacetSearchParams(getFacetIndexingParams(Integer.MAX_VALUE), new ScoreFacetRequest(cp, 10));
|
FacetSearchParams scoreFSP = new FacetSearchParams(fip, new ScoreFacetRequest(cp, 10));
|
||||||
|
|
||||||
List<FacetResult> countRes = findFacets(scoredDocIDs, countFSP);
|
List<FacetResult> countRes = findFacets(scoredDocIDs, countFSP);
|
||||||
List<FacetResult> scoreRes = findFacets(scoredDocIDs, scoreFSP);
|
List<FacetResult> scoreRes = findFacets(scoredDocIDs, scoreFSP);
|
||||||
|
@ -101,10 +105,8 @@ public class TestScoredDocIdCollector extends FacetTestBase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// compute facets with certain facet requests and docs
|
// compute facets with certain facet requests and docs
|
||||||
private List<FacetResult> findFacets(ScoredDocIDs sDocids,
|
private List<FacetResult> findFacets(ScoredDocIDs sDocids, FacetSearchParams facetSearchParams) throws IOException {
|
||||||
FacetSearchParams facetSearchParams) throws IOException {
|
FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(facetSearchParams, indexReader, taxoReader);
|
||||||
FacetsAccumulator fAccumulator = new StandardFacetsAccumulator(
|
|
||||||
facetSearchParams, indexReader, taxoReader);
|
|
||||||
List<FacetResult> res = fAccumulator.accumulate(sDocids);
|
List<FacetResult> res = fAccumulator.accumulate(sDocids);
|
||||||
|
|
||||||
// Results are ready, printing them...
|
// Results are ready, printing them...
|
||||||
|
|
|
@ -113,7 +113,6 @@ public class TestStandardFacetsAccumulator extends FacetTestCase {
|
||||||
List<FacetResult> results = fc.getFacetResults();
|
List<FacetResult> results = fc.getFacetResults();
|
||||||
assertEquals("received too many facet results", 1, results.size());
|
assertEquals("received too many facet results", 1, results.size());
|
||||||
FacetResultNode frn = results.get(0).getFacetResultNode();
|
FacetResultNode frn = results.get(0).getFacetResultNode();
|
||||||
assertEquals("wrong weight for \"A\"", 4, (int) frn.value);
|
|
||||||
assertEquals("wrong number of children", 2, frn.subResults.size());
|
assertEquals("wrong number of children", 2, frn.subResults.size());
|
||||||
for (FacetResultNode node : frn.subResults) {
|
for (FacetResultNode node : frn.subResults) {
|
||||||
assertEquals("wrong weight for child " + node.label, 2, (int) node.value);
|
assertEquals("wrong weight for child " + node.label, 2, (int) node.value);
|
||||||
|
|
|
@ -181,7 +181,6 @@ public class TestTopKInEachNodeResultHandler extends FacetTestCase {
|
||||||
boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
||||||
assertEquals(9, fr.getNumValidDescendants());
|
assertEquals(9, fr.getNumValidDescendants());
|
||||||
FacetResultNode parentRes = fr.getFacetResultNode();
|
FacetResultNode parentRes = fr.getFacetResultNode();
|
||||||
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
|
|
||||||
assertEquals(2, parentRes.subResults.size());
|
assertEquals(2, parentRes.subResults.size());
|
||||||
// two nodes sorted by descending values: a/b with 8 and a/c with 6
|
// two nodes sorted by descending values: a/b with 8 and a/c with 6
|
||||||
// a/b has two children a/b/2 with value 3, and a/b/1 with value 2.
|
// a/b has two children a/b/2 with value 3, and a/b/1 with value 2.
|
||||||
|
@ -217,7 +216,6 @@ public class TestTopKInEachNodeResultHandler extends FacetTestCase {
|
||||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
||||||
assertEquals(9, fr.getNumValidDescendants());
|
assertEquals(9, fr.getNumValidDescendants());
|
||||||
parentRes = fr.getFacetResultNode();
|
parentRes = fr.getFacetResultNode();
|
||||||
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
|
|
||||||
assertEquals(2, parentRes.subResults.size());
|
assertEquals(2, parentRes.subResults.size());
|
||||||
// two nodes sorted by descending values: a/b with 8 and a/c with 6
|
// two nodes sorted by descending values: a/b with 8 and a/c with 6
|
||||||
// a/b has two children a/b/2 with value 3, and a/b/1 with value 2.
|
// a/b has two children a/b/2 with value 3, and a/b/1 with value 2.
|
||||||
|
@ -234,7 +232,6 @@ public class TestTopKInEachNodeResultHandler extends FacetTestCase {
|
||||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
hasDoctor |= "Doctor".equals(fr.getFacetRequest().categoryPath.components[0]);
|
||||||
assertEquals(4, fr.getNumValidDescendants(), 4);
|
assertEquals(4, fr.getNumValidDescendants(), 4);
|
||||||
parentRes = fr.getFacetResultNode();
|
parentRes = fr.getFacetResultNode();
|
||||||
assertEquals(16.0, parentRes.value, Double.MIN_VALUE);
|
|
||||||
assertEquals(2, parentRes.subResults.size());
|
assertEquals(2, parentRes.subResults.size());
|
||||||
// two nodes sorted by descending values:
|
// two nodes sorted by descending values:
|
||||||
// a/b with value 8 and a/c with value 6
|
// a/b with value 8 and a/c with value 6
|
||||||
|
|
|
@ -4,6 +4,8 @@ import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
|
||||||
|
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||||
import org.apache.lucene.facet.search.params.FacetRequest.ResultMode;
|
import org.apache.lucene.facet.search.params.FacetRequest.ResultMode;
|
||||||
|
@ -73,7 +75,9 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
||||||
@Test
|
@Test
|
||||||
public void testSimple() throws Exception {
|
public void testSimple() throws Exception {
|
||||||
for (int partitionSize : partitionSizes) {
|
for (int partitionSize : partitionSizes) {
|
||||||
initIndex(partitionSize);
|
FacetIndexingParams fip = getFacetIndexingParams(partitionSize);
|
||||||
|
OrdinalPolicy op = fip.getCategoryListParams(null).getOrdinalPolicy(null);
|
||||||
|
initIndex(fip);
|
||||||
|
|
||||||
List<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
|
List<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
|
||||||
facetRequests.add(new CountFacetRequest(new CategoryPath("a"), 100));
|
facetRequests.add(new CountFacetRequest(new CategoryPath("a"), 100));
|
||||||
|
@ -87,8 +91,8 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
||||||
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "c"), 100));
|
facetRequests.add(new CountFacetRequest(new CategoryPath("a", "c"), 100));
|
||||||
|
|
||||||
// do different facet counts and compare to control
|
// do different facet counts and compare to control
|
||||||
FacetSearchParams sParams = getFacetSearchParams(facetRequests, getFacetIndexingParams(partitionSize));
|
FacetSearchParams sParams = getFacetSearchParams(facetRequests, fip);
|
||||||
|
|
||||||
FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) {
|
FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) {
|
||||||
@Override
|
@Override
|
||||||
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
|
protected FacetsAccumulator initFacetsAccumulator(FacetSearchParams facetSearchParams, IndexReader indexReader, TaxonomyReader taxonomyReader) {
|
||||||
|
@ -100,17 +104,21 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
||||||
|
|
||||||
searcher.search(new MatchAllDocsQuery(), fc);
|
searcher.search(new MatchAllDocsQuery(), fc);
|
||||||
List<FacetResult> facetResults = fc.getFacetResults();
|
List<FacetResult> facetResults = fc.getFacetResults();
|
||||||
|
|
||||||
FacetResult fr = facetResults.get(0);
|
FacetResult fr = facetResults.get(0);
|
||||||
FacetResultNode parentRes = fr.getFacetResultNode();
|
FacetResultNode parentRes = fr.getFacetResultNode();
|
||||||
assertEquals(13.0, parentRes.value, Double.MIN_VALUE);
|
if (op == OrdinalPolicy.ALL_PARENTS) {
|
||||||
|
assertEquals(13.0, parentRes.value, Double.MIN_VALUE);
|
||||||
|
}
|
||||||
FacetResultNode[] frn = resultNodesAsArray(parentRes);
|
FacetResultNode[] frn = resultNodesAsArray(parentRes);
|
||||||
assertEquals(7.0, frn[0].value, Double.MIN_VALUE);
|
assertEquals(7.0, frn[0].value, Double.MIN_VALUE);
|
||||||
assertEquals(6.0, frn[1].value, Double.MIN_VALUE);
|
assertEquals(6.0, frn[1].value, Double.MIN_VALUE);
|
||||||
|
|
||||||
fr = facetResults.get(1);
|
fr = facetResults.get(1);
|
||||||
parentRes = fr.getFacetResultNode();
|
parentRes = fr.getFacetResultNode();
|
||||||
assertEquals(13.0, parentRes.value, Double.MIN_VALUE);
|
if (op == OrdinalPolicy.ALL_PARENTS) {
|
||||||
|
assertEquals(13.0, parentRes.value, Double.MIN_VALUE);
|
||||||
|
}
|
||||||
frn = resultNodesAsArray(parentRes);
|
frn = resultNodesAsArray(parentRes);
|
||||||
assertEquals(7.0, frn[0].value, Double.MIN_VALUE);
|
assertEquals(7.0, frn[0].value, Double.MIN_VALUE);
|
||||||
assertEquals(6.0, frn[1].value, Double.MIN_VALUE);
|
assertEquals(6.0, frn[1].value, Double.MIN_VALUE);
|
||||||
|
@ -121,7 +129,9 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
||||||
|
|
||||||
fr = facetResults.get(2);
|
fr = facetResults.get(2);
|
||||||
parentRes = fr.getFacetResultNode();
|
parentRes = fr.getFacetResultNode();
|
||||||
assertEquals(7.0, parentRes.value, Double.MIN_VALUE);
|
if (op == OrdinalPolicy.ALL_PARENTS) {
|
||||||
|
assertEquals(7.0, parentRes.value, Double.MIN_VALUE);
|
||||||
|
}
|
||||||
frn = resultNodesAsArray(parentRes);
|
frn = resultNodesAsArray(parentRes);
|
||||||
assertEquals(2.0, frn[0].value, Double.MIN_VALUE);
|
assertEquals(2.0, frn[0].value, Double.MIN_VALUE);
|
||||||
assertEquals(2.0, frn[1].value, Double.MIN_VALUE);
|
assertEquals(2.0, frn[1].value, Double.MIN_VALUE);
|
||||||
|
@ -130,13 +140,17 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
||||||
|
|
||||||
fr = facetResults.get(3);
|
fr = facetResults.get(3);
|
||||||
parentRes = fr.getFacetResultNode();
|
parentRes = fr.getFacetResultNode();
|
||||||
assertEquals(2.0, parentRes.value, Double.MIN_VALUE);
|
if (op == OrdinalPolicy.ALL_PARENTS) {
|
||||||
|
assertEquals(2.0, parentRes.value, Double.MIN_VALUE);
|
||||||
|
}
|
||||||
frn = resultNodesAsArray(parentRes);
|
frn = resultNodesAsArray(parentRes);
|
||||||
assertEquals(0, frn.length);
|
assertEquals(0, frn.length);
|
||||||
|
|
||||||
fr = facetResults.get(4);
|
fr = facetResults.get(4);
|
||||||
parentRes = fr.getFacetResultNode();
|
parentRes = fr.getFacetResultNode();
|
||||||
assertEquals(6.0, parentRes.value, Double.MIN_VALUE);
|
if (op == OrdinalPolicy.ALL_PARENTS) {
|
||||||
|
assertEquals(6.0, parentRes.value, Double.MIN_VALUE);
|
||||||
|
}
|
||||||
frn = resultNodesAsArray(parentRes);
|
frn = resultNodesAsArray(parentRes);
|
||||||
assertEquals(1.0, frn[0].value, Double.MIN_VALUE);
|
assertEquals(1.0, frn[0].value, Double.MIN_VALUE);
|
||||||
closeAll();
|
closeAll();
|
||||||
|
@ -149,12 +163,12 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
||||||
@Test
|
@Test
|
||||||
public void testGetMaxIntFacets() throws Exception {
|
public void testGetMaxIntFacets() throws Exception {
|
||||||
for (int partitionSize : partitionSizes) {
|
for (int partitionSize : partitionSizes) {
|
||||||
initIndex(partitionSize);
|
FacetIndexingParams fip = getFacetIndexingParams(partitionSize);
|
||||||
|
initIndex(fip);
|
||||||
|
|
||||||
// do different facet counts and compare to control
|
// do different facet counts and compare to control
|
||||||
CategoryPath path = new CategoryPath("a", "b");
|
CategoryPath path = new CategoryPath("a", "b");
|
||||||
FacetSearchParams sParams = getFacetSearchParams(getFacetIndexingParams(partitionSize),
|
FacetSearchParams sParams = getFacetSearchParams(fip, new CountFacetRequest(path, Integer.MAX_VALUE));
|
||||||
new CountFacetRequest(path, Integer.MAX_VALUE));
|
|
||||||
|
|
||||||
FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) {
|
FacetsCollector fc = new StandardFacetsCollector(sParams, indexReader, taxoReader) {
|
||||||
@Override
|
@Override
|
||||||
|
@ -174,7 +188,7 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
||||||
|
|
||||||
// As a control base results, ask for top-1000 results
|
// As a control base results, ask for top-1000 results
|
||||||
FacetSearchParams sParams2 = getFacetSearchParams(
|
FacetSearchParams sParams2 = getFacetSearchParams(
|
||||||
getFacetIndexingParams(partitionSize), new CountFacetRequest(path, Integer.MAX_VALUE));
|
fip, new CountFacetRequest(path, Integer.MAX_VALUE));
|
||||||
|
|
||||||
FacetsCollector fc2 = new StandardFacetsCollector(sParams2, indexReader, taxoReader) {
|
FacetsCollector fc2 = new StandardFacetsCollector(sParams2, indexReader, taxoReader) {
|
||||||
@Override
|
@Override
|
||||||
|
@ -207,12 +221,11 @@ public class TestTopKResultsHandler extends BaseTestTopK {
|
||||||
@Test
|
@Test
|
||||||
public void testSimpleSearchForNonexistentFacet() throws Exception {
|
public void testSimpleSearchForNonexistentFacet() throws Exception {
|
||||||
for (int partitionSize : partitionSizes) {
|
for (int partitionSize : partitionSizes) {
|
||||||
initIndex(partitionSize);
|
FacetIndexingParams fip = getFacetIndexingParams(partitionSize);
|
||||||
|
initIndex(fip);
|
||||||
|
|
||||||
CategoryPath path = new CategoryPath("Miau Hattulla");
|
CategoryPath path = new CategoryPath("Miau Hattulla");
|
||||||
FacetSearchParams sParams = getFacetSearchParams(
|
FacetSearchParams sParams = getFacetSearchParams(fip, new CountFacetRequest(path, 10));
|
||||||
getFacetIndexingParams(partitionSize),
|
|
||||||
new CountFacetRequest(path, 10));
|
|
||||||
|
|
||||||
FacetsCollector fc = FacetsCollector.create(sParams, indexReader, taxoReader);
|
FacetsCollector fc = FacetsCollector.create(sParams, indexReader, taxoReader);
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||||
import org.apache.lucene.facet.search.results.FacetResult;
|
import org.apache.lucene.facet.search.results.FacetResult;
|
||||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||||
|
@ -32,10 +33,10 @@ import org.junit.Test;
|
||||||
|
|
||||||
public class TestTopKResultsHandlerRandom extends BaseTestTopK {
|
public class TestTopKResultsHandlerRandom extends BaseTestTopK {
|
||||||
|
|
||||||
private List<FacetResult> countFacets(int partitionSize, int numResults, final boolean doComplement)
|
private List<FacetResult> countFacets(FacetIndexingParams fip, int numResults, final boolean doComplement)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
Query q = new MatchAllDocsQuery();
|
Query q = new MatchAllDocsQuery();
|
||||||
FacetSearchParams facetSearchParams = searchParamsWithRequests(numResults, partitionSize);
|
FacetSearchParams facetSearchParams = searchParamsWithRequests(numResults, fip);
|
||||||
FacetsCollector fc = new StandardFacetsCollector(facetSearchParams, indexReader, taxoReader) {
|
FacetsCollector fc = new StandardFacetsCollector(facetSearchParams, indexReader, taxoReader) {
|
||||||
@Override
|
@Override
|
||||||
protected FacetsAccumulator initFacetsAccumulator(
|
protected FacetsAccumulator initFacetsAccumulator(
|
||||||
|
@ -59,7 +60,8 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
|
||||||
@Test
|
@Test
|
||||||
public void testTopCountsOrder() throws Exception {
|
public void testTopCountsOrder() throws Exception {
|
||||||
for (int partitionSize : partitionSizes) {
|
for (int partitionSize : partitionSizes) {
|
||||||
initIndex(partitionSize);
|
FacetIndexingParams fip = getFacetIndexingParams(partitionSize);
|
||||||
|
initIndex(fip);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Try out faceted search in it's most basic form (no sampling nor complement
|
* Try out faceted search in it's most basic form (no sampling nor complement
|
||||||
|
@ -67,7 +69,7 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
|
||||||
* being indexed, and later on an "over-all" faceted search is performed. The
|
* being indexed, and later on an "over-all" faceted search is performed. The
|
||||||
* results are checked against the DF of each facet by itself
|
* results are checked against the DF of each facet by itself
|
||||||
*/
|
*/
|
||||||
List<FacetResult> facetResults = countFacets(partitionSize, 100000, false);
|
List<FacetResult> facetResults = countFacets(fip, 100000, false);
|
||||||
assertCountsAndCardinality(facetCountsTruth(), facetResults);
|
assertCountsAndCardinality(facetCountsTruth(), facetResults);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -77,10 +79,10 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
|
||||||
* place in here. The results are checked against the a regular (a.k.a
|
* place in here. The results are checked against the a regular (a.k.a
|
||||||
* no-complement, no-sampling) faceted search with the same parameters.
|
* no-complement, no-sampling) faceted search with the same parameters.
|
||||||
*/
|
*/
|
||||||
facetResults = countFacets(partitionSize, 100000, true);
|
facetResults = countFacets(fip, 100000, true);
|
||||||
assertCountsAndCardinality(facetCountsTruth(), facetResults);
|
assertCountsAndCardinality(facetCountsTruth(), facetResults);
|
||||||
|
|
||||||
List<FacetResult> allFacetResults = countFacets(partitionSize, 100000, false);
|
List<FacetResult> allFacetResults = countFacets(fip, 100000, false);
|
||||||
|
|
||||||
HashMap<String,Integer> all = new HashMap<String,Integer>();
|
HashMap<String,Integer> all = new HashMap<String,Integer>();
|
||||||
int maxNumNodes = 0;
|
int maxNumNodes = 0;
|
||||||
|
@ -108,7 +110,7 @@ public class TestTopKResultsHandlerRandom extends BaseTestTopK {
|
||||||
if (VERBOSE) {
|
if (VERBOSE) {
|
||||||
System.out.println("------- verify for "+n+" top results");
|
System.out.println("------- verify for "+n+" top results");
|
||||||
}
|
}
|
||||||
List<FacetResult> someResults = countFacets(partitionSize, n, false);
|
List<FacetResult> someResults = countFacets(fip, n, false);
|
||||||
k = 0;
|
k = 0;
|
||||||
for (FacetResult fr : someResults) {
|
for (FacetResult fr : someResults) {
|
||||||
FacetResultNode topResNode = fr.getFacetResultNode();
|
FacetResultNode topResNode = fr.getFacetResultNode();
|
||||||
|
|
|
@ -8,7 +8,9 @@ import org.apache.lucene.facet.FacetTestCase;
|
||||||
import org.apache.lucene.facet.FacetTestUtils;
|
import org.apache.lucene.facet.FacetTestUtils;
|
||||||
import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyReaderPair;
|
import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyReaderPair;
|
||||||
import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyWriterPair;
|
import org.apache.lucene.facet.FacetTestUtils.IndexTaxonomyWriterPair;
|
||||||
|
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||||
|
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.IOUtils;
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util._TestUtil;
|
import org.apache.lucene.util._TestUtil;
|
||||||
|
@ -59,6 +61,16 @@ public class TestTotalFacetCounts extends FacetTestCase {
|
||||||
public int getPartitionSize() {
|
public int getPartitionSize() {
|
||||||
return partitionSize;
|
return partitionSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public CategoryListParams getCategoryListParams(CategoryPath category) {
|
||||||
|
return new CategoryListParams() {
|
||||||
|
@Override
|
||||||
|
public OrdinalPolicy getOrdinalPolicy(String dimension) {
|
||||||
|
return OrdinalPolicy.ALL_PARENTS;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
};
|
};
|
||||||
// The counts that the TotalFacetCountsArray should have after adding
|
// The counts that the TotalFacetCountsArray should have after adding
|
||||||
// the below facets to the index.
|
// the below facets to the index.
|
||||||
|
|
|
@ -87,7 +87,7 @@ public class TestTotalFacetCountsCache extends FacetTestCase {
|
||||||
|
|
||||||
/** Utility method to add a document and facets to an index/taxonomy. */
|
/** Utility method to add a document and facets to an index/taxonomy. */
|
||||||
static void addFacets(FacetIndexingParams iParams, IndexWriter iw,
|
static void addFacets(FacetIndexingParams iParams, IndexWriter iw,
|
||||||
TaxonomyWriter tw, String... strings) throws IOException {
|
TaxonomyWriter tw, String... strings) throws IOException {
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
FacetFields facetFields = new FacetFields(tw, iParams);
|
FacetFields facetFields = new FacetFields(tw, iParams);
|
||||||
facetFields.addFields(doc, Collections.singletonList(new CategoryPath(strings)));
|
facetFields.addFields(doc, Collections.singletonList(new CategoryPath(strings)));
|
||||||
|
|
|
@ -3,6 +3,7 @@ package org.apache.lucene.facet.search.sampling;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||||
import org.apache.lucene.facet.search.BaseTestTopK;
|
import org.apache.lucene.facet.search.BaseTestTopK;
|
||||||
import org.apache.lucene.facet.search.FacetsAccumulator;
|
import org.apache.lucene.facet.search.FacetsAccumulator;
|
||||||
import org.apache.lucene.facet.search.FacetsCollector;
|
import org.apache.lucene.facet.search.FacetsCollector;
|
||||||
|
@ -46,8 +47,8 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
|
||||||
protected static final int RETRIES = 10;
|
protected static final int RETRIES = 10;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected FacetSearchParams searchParamsWithRequests(int numResults, int partitionSize) {
|
protected FacetSearchParams searchParamsWithRequests(int numResults, FacetIndexingParams fip) {
|
||||||
FacetSearchParams res = super.searchParamsWithRequests(numResults, partitionSize);
|
FacetSearchParams res = super.searchParamsWithRequests(numResults, fip);
|
||||||
for (FacetRequest req : res.facetRequests) {
|
for (FacetRequest req : res.facetRequests) {
|
||||||
// randomize the way we aggregate results
|
// randomize the way we aggregate results
|
||||||
if (random().nextBoolean()) {
|
if (random().nextBoolean()) {
|
||||||
|
@ -71,20 +72,23 @@ public abstract class BaseSampleTestTopK extends BaseTestTopK {
|
||||||
boolean useRandomSampler = random().nextBoolean();
|
boolean useRandomSampler = random().nextBoolean();
|
||||||
for (int partitionSize : partitionSizes) {
|
for (int partitionSize : partitionSizes) {
|
||||||
try {
|
try {
|
||||||
initIndex(partitionSize);
|
// complements return counts for all ordinals, so force ALL_PARENTS indexing
|
||||||
|
// so that it's easier to compare
|
||||||
|
FacetIndexingParams fip = getFacetIndexingParams(partitionSize, true);
|
||||||
|
initIndex(fip);
|
||||||
// Get all of the documents and run the query, then do different
|
// Get all of the documents and run the query, then do different
|
||||||
// facet counts and compare to control
|
// facet counts and compare to control
|
||||||
Query q = new TermQuery(new Term(CONTENT_FIELD, BETA)); // 90% of the docs
|
Query q = new TermQuery(new Term(CONTENT_FIELD, BETA)); // 90% of the docs
|
||||||
ScoredDocIdCollector docCollector = ScoredDocIdCollector.create(indexReader.maxDoc(), false);
|
ScoredDocIdCollector docCollector = ScoredDocIdCollector.create(indexReader.maxDoc(), false);
|
||||||
|
|
||||||
FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, partitionSize);
|
FacetSearchParams expectedSearchParams = searchParamsWithRequests(K, fip);
|
||||||
FacetsCollector fc = FacetsCollector.create(expectedSearchParams, indexReader, taxoReader);
|
FacetsCollector fc = FacetsCollector.create(expectedSearchParams, indexReader, taxoReader);
|
||||||
|
|
||||||
searcher.search(q, MultiCollector.wrap(docCollector, fc));
|
searcher.search(q, MultiCollector.wrap(docCollector, fc));
|
||||||
|
|
||||||
List<FacetResult> expectedResults = fc.getFacetResults();
|
List<FacetResult> expectedResults = fc.getFacetResults();
|
||||||
|
|
||||||
FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, partitionSize);
|
FacetSearchParams samplingSearchParams = searchParamsWithRequests(K, fip);
|
||||||
|
|
||||||
// try several times in case of failure, because the test has a chance to fail
|
// try several times in case of failure, because the test has a chance to fail
|
||||||
// if the top K facets are not sufficiently common with the sample set
|
// if the top K facets are not sufficiently common with the sample set
|
||||||
|
|
Loading…
Reference in New Issue