mirror of https://github.com/apache/lucene.git
LUCENE-4700: move OrdinalPolicy to CategoryListParams
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1437661 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f13080dd64
commit
f5fa833da7
|
@ -38,6 +38,11 @@ Changes in backwards compatibility policy
|
|||
returns the most optimized collector for the given parameters.
|
||||
(Shai Erera, Michael McCandless)
|
||||
|
||||
* LUCENE-4700: OrdinalPolicy is now per CategoryListParams, and is no longer
|
||||
an interface, but rather an enum with values NO_PARENTS and ALL_PARENTS.
|
||||
PathPolicy was removed, you should extend FacetFields and DrillDownStream
|
||||
to control which categories are added as drill-down terms. (Shai Erera)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate
|
||||
|
|
|
@ -6,8 +6,8 @@ import java.util.HashMap;
|
|||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
|
@ -120,7 +120,7 @@ public class CountingListBuilder implements CategoryListBuilder {
|
|||
public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams indexingParams,
|
||||
TaxonomyWriter taxoWriter) {
|
||||
this.taxoWriter = taxoWriter;
|
||||
this.ordinalPolicy = indexingParams.getOrdinalPolicy();
|
||||
this.ordinalPolicy = categoryListParams.getOrdinalPolicy();
|
||||
if (indexingParams.getPartitionSize() == Integer.MAX_VALUE) {
|
||||
ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams);
|
||||
} else {
|
||||
|
@ -143,14 +143,14 @@ public class CountingListBuilder implements CategoryListBuilder {
|
|||
public Map<String,BytesRef> build(IntsRef ordinals, Iterable<CategoryPath> categories) throws IOException {
|
||||
int upto = ordinals.length; // since we add ordinals to IntsRef, iterate upto original length
|
||||
|
||||
for (int i = 0; i < upto; i++) {
|
||||
int ordinal = ordinals.ints[i];
|
||||
int parent = taxoWriter.getParent(ordinal);
|
||||
while (parent > 0) {
|
||||
if (ordinalPolicy.shouldAdd(parent)) {
|
||||
if (ordinalPolicy == OrdinalPolicy.ALL_PARENTS) { // add all parents too
|
||||
for (int i = 0; i < upto; i++) {
|
||||
int ordinal = ordinals.ints[i];
|
||||
int parent = taxoWriter.getParent(ordinal);
|
||||
while (parent > 0) {
|
||||
ordinals.ints[ordinals.length++] = parent;
|
||||
parent = taxoWriter.getParent(parent);
|
||||
}
|
||||
parent = taxoWriter.getParent(parent);
|
||||
}
|
||||
}
|
||||
return ordinalsEncoder.encode(ordinals);
|
||||
|
|
|
@ -5,7 +5,6 @@ import java.util.Iterator;
|
|||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
|
@ -36,7 +35,6 @@ public class DrillDownStream extends TokenStream {
|
|||
private final FacetIndexingParams indexingParams;
|
||||
private final Iterator<CategoryPath> categories;
|
||||
private final CharTermAttribute termAttribute;
|
||||
private final PathPolicy pathPolicy;
|
||||
|
||||
private CategoryPath current;
|
||||
private boolean isParent;
|
||||
|
@ -45,7 +43,6 @@ public class DrillDownStream extends TokenStream {
|
|||
termAttribute = addAttribute(CharTermAttribute.class);
|
||||
this.categories = categories.iterator();
|
||||
this.indexingParams = indexingParams;
|
||||
this.pathPolicy = indexingParams.getPathPolicy();
|
||||
}
|
||||
|
||||
protected void addAdditionalAttributes(CategoryPath category, boolean isParent) {
|
||||
|
@ -71,10 +68,7 @@ public class DrillDownStream extends TokenStream {
|
|||
addAdditionalAttributes(current, isParent);
|
||||
|
||||
// prepare current for next call by trimming the last component (parents)
|
||||
do {
|
||||
// skip all parent categories which are not accepted by PathPolicy
|
||||
current = current.subpath(current.length - 1);
|
||||
} while (!pathPolicy.shouldAdd(current) && current.length > 0);
|
||||
current = current.subpath(current.length - 1);
|
||||
isParent = true;
|
||||
return true;
|
||||
}
|
||||
|
|
|
@ -1,73 +0,0 @@
|
|||
package org.apache.lucene.facet.index.categorypolicy;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Filter out any "top level" category ordinals. <br> {@link #shouldAdd(int)}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class NonTopLevelOrdinalPolicy implements OrdinalPolicy {
|
||||
|
||||
/**
|
||||
* The taxonomyWriter with which the given ordinals' parent is determined.
|
||||
*/
|
||||
private TaxonomyWriter taxonomyWriter;
|
||||
|
||||
/**
|
||||
* Constructs a new non-top-level-ordinal-filter. With a given
|
||||
* taxonomyWriter.
|
||||
*
|
||||
*/
|
||||
public NonTopLevelOrdinalPolicy() {
|
||||
this.taxonomyWriter = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param taxonomyWriter
|
||||
* A relevant taxonomyWriter object, with which ordinals sent to
|
||||
* {@link #shouldAdd(int)} are examined.
|
||||
*/
|
||||
@Override
|
||||
public void init(TaxonomyWriter taxonomyWriter) {
|
||||
this.taxonomyWriter = taxonomyWriter;
|
||||
}
|
||||
|
||||
/**
|
||||
* Filters out ordinal which are ROOT or who's parent is ROOT. In order to
|
||||
* determine if a parent is root, there's a need for
|
||||
* {@link TaxonomyWriter#getParent(int)}.
|
||||
*/
|
||||
@Override
|
||||
public boolean shouldAdd(int ordinal) {
|
||||
if (ordinal > TaxonomyReader.ROOT_ORDINAL) {
|
||||
try {
|
||||
if (this.taxonomyWriter.getParent(ordinal) > TaxonomyReader.ROOT_ORDINAL) {
|
||||
return true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
package org.apache.lucene.facet.index.categorypolicy;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This class filters our the ROOT category, and it's direct descendants. For
|
||||
* more information see {@link PathPolicy}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class NonTopLevelPathPolicy implements PathPolicy {
|
||||
|
||||
/**
|
||||
* The shortest path length delivered is two components (root + one child).
|
||||
*/
|
||||
public final int DEFAULT_MINIMAL_SUBPATH_LENGTH = 2;
|
||||
|
||||
/**
|
||||
* Filters out (returns false) CategoryPaths equal or less than
|
||||
* {@link TaxonomyReader#ROOT_ORDINAL}. true otherwise.
|
||||
*/
|
||||
@Override
|
||||
public boolean shouldAdd(CategoryPath categoryPath) {
|
||||
return categoryPath.length >= DEFAULT_MINIMAL_SUBPATH_LENGTH;
|
||||
}
|
||||
}
|
|
@ -1,83 +0,0 @@
|
|||
package org.apache.lucene.facet.index.categorypolicy;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.lucene.facet.search.FacetsAccumulator;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* A policy for adding category parent ordinals to the list of ordinals that are
|
||||
* encoded for a given document. The default {@link #ALL_PARENTS} policy always
|
||||
* adds all parents, where {@link #NO_PARENTS} never adds any parents.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface OrdinalPolicy extends Serializable {
|
||||
|
||||
/**
|
||||
* An {@link OrdinalPolicy} which never stores parent ordinals. Useful if you
|
||||
* only want to store the exact categories that were added to the document.
|
||||
* Note that this is a rather expert policy, which requires a matching
|
||||
* {@link FacetsAccumulator} that computes the weight of the parent categories
|
||||
* on-the-fly.
|
||||
*/
|
||||
public static final OrdinalPolicy NO_PARENTS = new OrdinalPolicy() {
|
||||
@Override
|
||||
public boolean shouldAdd(int ordinal) { return false; }
|
||||
|
||||
@Override
|
||||
public void init(TaxonomyWriter taxonomyWriter) {}
|
||||
};
|
||||
|
||||
/**
|
||||
* An {@link OrdinalPolicy} which stores all parent ordinals, except
|
||||
* {@link TaxonomyReader#ROOT_ORDINAL}. This is the default
|
||||
* {@link OrdinalPolicy} and works with the default {@link FacetsAccumulator}.
|
||||
*/
|
||||
public static final OrdinalPolicy ALL_PARENTS = new OrdinalPolicy() {
|
||||
@Override
|
||||
public boolean shouldAdd(int ordinal) { return ordinal > TaxonomyReader.ROOT_ORDINAL; }
|
||||
|
||||
@Override
|
||||
public void init(TaxonomyWriter taxonomyWriter) {}
|
||||
};
|
||||
|
||||
/**
|
||||
* Check whether a given category ordinal should be added to the stream.
|
||||
*
|
||||
* @param ordinal
|
||||
* A given category ordinal which is to be tested for stream
|
||||
* addition.
|
||||
* @return <code>true</code> if the category should be added.
|
||||
* <code>false</code> otherwise.
|
||||
*/
|
||||
public abstract boolean shouldAdd(int ordinal);
|
||||
|
||||
/**
|
||||
* Initialize the policy with a TaxonomyWriter. This method can be
|
||||
* implemented as noop if the ordinal policy is not taxonomy dependent
|
||||
*
|
||||
* @param taxonomyWriter
|
||||
* A relevant taxonomyWriter object, with which ordinals sent to
|
||||
* {@link #shouldAdd(int)} are examined.
|
||||
*/
|
||||
public abstract void init(TaxonomyWriter taxonomyWriter);
|
||||
}
|
|
@ -1,55 +0,0 @@
|
|||
package org.apache.lucene.facet.index.categorypolicy;
|
||||
|
||||
import java.io.Serializable;
|
||||
|
||||
import org.apache.lucene.facet.index.DrillDownStream;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Determines which {@link CategoryPath categories} should be added as terms to
|
||||
* the {@link DrillDownStream}. The default approach is implemented by
|
||||
* {@link #ALL_CATEGORIES}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public interface PathPolicy extends Serializable {
|
||||
|
||||
/**
|
||||
* A {@link PathPolicy} which adds all {@link CategoryPath} that have at least
|
||||
* one component (i.e. {@link CategoryPath#length} > 0) to the categories
|
||||
* stream.
|
||||
*/
|
||||
public static final PathPolicy ALL_CATEGORIES = new PathPolicy() {
|
||||
@Override
|
||||
public boolean shouldAdd(CategoryPath categoryPath) { return categoryPath.length > 0; }
|
||||
};
|
||||
|
||||
/**
|
||||
* Check whether a given category path should be added to the stream.
|
||||
*
|
||||
* @param categoryPath
|
||||
* A given category path which is to be tested for stream
|
||||
* addition.
|
||||
* @return <code>true</code> if the category path should be added.
|
||||
* <code>false</code> otherwise.
|
||||
*/
|
||||
public abstract boolean shouldAdd(CategoryPath categoryPath);
|
||||
|
||||
}
|
|
@ -1,37 +0,0 @@
|
|||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
-->
|
||||
<html>
|
||||
<head>
|
||||
<title>Policies for indexing categories</title>
|
||||
</head>
|
||||
<body>
|
||||
<h1>Policies for indexing categories</h1>
|
||||
|
||||
There are two kinds of policies:
|
||||
<ul>
|
||||
<li>Path policies are based on the path of the category.</li>
|
||||
<li>Ordinal policies are based on the ordinal of the category.</li>
|
||||
</ul>
|
||||
|
||||
Policies are "consulted" with during indexing, for deciding whether a category should
|
||||
be added to the index or not. The two kinds of policies can be used for different purposes.
|
||||
For example, path policies dictates which categories can participate in a drill-down operation,
|
||||
while ordinal policies affect which can be accumulated (e.g. counted).
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
|
@ -36,9 +36,36 @@ import org.apache.lucene.util.encoding.UniqueValuesIntEncoder;
|
|||
*/
|
||||
public class CategoryListParams implements Serializable {
|
||||
|
||||
/** OrdinalPolicy defines which ordinals are encoded for every document. */
|
||||
public static enum OrdinalPolicy {
|
||||
/**
|
||||
* Encodes only the ordinal of leaf nodes. That is, the category A/B/C will
|
||||
* not encode the ordinals of A and A/B.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> this {@link OrdinalPolicy} requires a special collector or
|
||||
* accumulator, which will fix the parents' counts, unless you are not
|
||||
* interested in the parents counts.
|
||||
*/
|
||||
NO_PARENTS,
|
||||
|
||||
/**
|
||||
* Encodes the ordinals of all path components. That is, the category A/B/C
|
||||
* will encode the ordinals of A and A/B as well. This is the default
|
||||
* {@link OrdinalPolicy}.
|
||||
*/
|
||||
ALL_PARENTS
|
||||
}
|
||||
|
||||
/** The default field used to store the facets information. */
|
||||
public static final String DEFAULT_FIELD = "$facets";
|
||||
|
||||
/**
|
||||
* The default {@link OrdinalPolicy} that's used when encoding a document's
|
||||
* category ordinals.
|
||||
*/
|
||||
public static final OrdinalPolicy DEFAULT_ORDINAL_POLICY = OrdinalPolicy.ALL_PARENTS;
|
||||
|
||||
public final String field;
|
||||
|
||||
private final int hashCode;
|
||||
|
@ -92,6 +119,7 @@ public class CategoryListParams implements Serializable {
|
|||
if (this.hashCode != other.hashCode) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// The above hashcodes might equal each other in the case of a collision,
|
||||
// so at this point only directly term equality testing will settle
|
||||
// the equality test.
|
||||
|
@ -110,4 +138,9 @@ public class CategoryListParams implements Serializable {
|
|||
return new DocValuesCategoryListIterator(docValuesField, createEncoder().createMatchingDecoder());
|
||||
}
|
||||
|
||||
/** Returns the {@link OrdinalPolicy} to use for this {@link CategoryListParams}. */
|
||||
public OrdinalPolicy getOrdinalPolicy() {
|
||||
return DEFAULT_ORDINAL_POLICY;
|
||||
}
|
||||
|
||||
}
|
|
@ -3,8 +3,7 @@ package org.apache.lucene.facet.index.params;
|
|||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.search.FacetArrays;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
|
@ -47,9 +46,8 @@ public class FacetIndexingParams {
|
|||
protected static final CategoryListParams DEFAULT_CATEGORY_LIST_PARAMS = new CategoryListParams();
|
||||
|
||||
/**
|
||||
* A {@link FacetIndexingParams} which fixes {@link OrdinalPolicy} to
|
||||
* {@link OrdinalPolicy#ALL_PARENTS}. This is a singleton equivalent to new
|
||||
* {@link #FacetIndexingParams()}.
|
||||
* A {@link FacetIndexingParams} which fixes a single
|
||||
* {@link CategoryListParams} with {@link OrdinalPolicy#ALL_PARENTS}.
|
||||
*/
|
||||
public static final FacetIndexingParams ALL_PARENTS = new FacetIndexingParams();
|
||||
|
||||
|
@ -62,8 +60,6 @@ public class FacetIndexingParams {
|
|||
*/
|
||||
public static final char DEFAULT_FACET_DELIM_CHAR = '\uF749';
|
||||
|
||||
private final OrdinalPolicy ordinalPolicy = OrdinalPolicy.ALL_PARENTS;
|
||||
private final PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
|
||||
private final int partitionSize = Integer.MAX_VALUE;
|
||||
|
||||
protected final CategoryListParams clParams;
|
||||
|
@ -130,32 +126,12 @@ public class FacetIndexingParams {
|
|||
return Collections.singletonList(clParams);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link OrdinalPolicy} that is used during indexing. By default
|
||||
* returns {@link OrdinalPolicy#ALL_PARENTS} which means that the full
|
||||
* hierarchy will be stored for every document.
|
||||
*/
|
||||
public OrdinalPolicy getOrdinalPolicy() {
|
||||
return ordinalPolicy;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the {@link PathPolicy} that is used during indexing. By default
|
||||
* returns {@link PathPolicy#ALL_CATEGORIES} which means that the full
|
||||
* hierarchy is added as drill-down terms for every document.
|
||||
*/
|
||||
public PathPolicy getPathPolicy() {
|
||||
return pathPolicy;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
final int prime = 31;
|
||||
int result = 1;
|
||||
result = prime * result + ((clParams == null) ? 0 : clParams.hashCode());
|
||||
result = prime * result + ((ordinalPolicy == null) ? 0 : ordinalPolicy.hashCode());
|
||||
result = prime * result + partitionSize;
|
||||
result = prime * result + ((pathPolicy == null) ? 0 : pathPolicy.hashCode());
|
||||
|
||||
for (CategoryListParams clp : getAllCategoryListParams()) {
|
||||
result ^= clp.hashCode();
|
||||
|
@ -183,23 +159,9 @@ public class FacetIndexingParams {
|
|||
} else if (!clParams.equals(other.clParams)) {
|
||||
return false;
|
||||
}
|
||||
if (ordinalPolicy == null) {
|
||||
if (other.ordinalPolicy != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!ordinalPolicy.equals(other.ordinalPolicy)) {
|
||||
return false;
|
||||
}
|
||||
if (partitionSize != other.partitionSize) {
|
||||
return false;
|
||||
}
|
||||
if (pathPolicy == null) {
|
||||
if (other.pathPolicy != null) {
|
||||
return false;
|
||||
}
|
||||
} else if (!pathPolicy.equals(other.pathPolicy)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Iterable<CategoryListParams> cLs = getAllCategoryListParams();
|
||||
Iterable<CategoryListParams> otherCLs = other.getAllCategoryListParams();
|
||||
|
|
|
@ -7,8 +7,8 @@ import java.util.HashMap;
|
|||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
|
||||
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
|
@ -83,6 +83,7 @@ import org.apache.lucene.util.encoding.DGapVInt8IntDecoder;
|
|||
public class CountingFacetsCollector extends FacetsCollector {
|
||||
|
||||
private final FacetSearchParams fsp;
|
||||
private final OrdinalPolicy ordinalPolicy;
|
||||
private final TaxonomyReader taxoReader;
|
||||
private final BytesRef buf = new BytesRef(32);
|
||||
private final FacetArrays facetArrays;
|
||||
|
@ -104,10 +105,12 @@ public class CountingFacetsCollector extends FacetsCollector {
|
|||
assert assertParams(fsp) == null : assertParams(fsp);
|
||||
|
||||
this.fsp = fsp;
|
||||
CategoryListParams clp = fsp.indexingParams.getCategoryListParams(null);
|
||||
this.ordinalPolicy = clp.getOrdinalPolicy();
|
||||
this.taxoReader = taxoReader;
|
||||
this.facetArrays = facetArrays;
|
||||
this.counts = facetArrays.getIntArray();
|
||||
this.facetsField = fsp.indexingParams.getCategoryListParams(null).field;
|
||||
this.facetsField = clp.field;
|
||||
this.useDirectSource = useDirectSource;
|
||||
}
|
||||
|
||||
|
@ -230,7 +233,7 @@ public class CountingFacetsCollector extends FacetsCollector {
|
|||
|
||||
ParallelTaxonomyArrays arrays = taxoReader.getParallelTaxonomyArrays();
|
||||
|
||||
if (fsp.indexingParams.getOrdinalPolicy() == OrdinalPolicy.NO_PARENTS) {
|
||||
if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
|
||||
// need to count parents
|
||||
countParents(arrays.parents());
|
||||
}
|
||||
|
|
|
@ -3,8 +3,8 @@ package org.apache.lucene.facet.search;
|
|||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetSearchParams;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
|
|
|
@ -1,82 +0,0 @@
|
|||
package org.apache.lucene.facet.index.categorypolicy;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class OrdinalPolicyTest extends LuceneTestCase {
|
||||
|
||||
@Test
|
||||
public void testDefaultOrdinalPolicy() {
|
||||
// check ordinal policy
|
||||
OrdinalPolicy ordinalPolicy = OrdinalPolicy.ALL_PARENTS;
|
||||
assertFalse("default ordinal policy should not match root", ordinalPolicy.shouldAdd(TaxonomyReader.ROOT_ORDINAL));
|
||||
for (int i = 0; i < 300; i++) {
|
||||
int ordinal = 1 + random().nextInt(Integer.MAX_VALUE - 1);
|
||||
assertTrue("default ordinal policy should match " + ordinal, ordinalPolicy.shouldAdd(ordinal));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonTopLevelOrdinalPolicy() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
TaxonomyWriter taxonomy = null;
|
||||
taxonomy = new DirectoryTaxonomyWriter(dir);
|
||||
|
||||
int[] topLevelOrdinals = new int[10];
|
||||
String[] topLevelStrings = new String[10];
|
||||
for (int i = 0; i < 10; i++) {
|
||||
topLevelStrings[i] = Integer.valueOf(random().nextInt(30)).toString();
|
||||
topLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(topLevelStrings[i]));
|
||||
}
|
||||
int[] nonTopLevelOrdinals = new int[300];
|
||||
for (int i = 0; i < 300; i++) {
|
||||
int nComponents = 2 + random().nextInt(10);
|
||||
String[] components = new String[nComponents];
|
||||
components[0] = topLevelStrings[i % 10];
|
||||
for (int j = 1; j < components.length; j++) {
|
||||
components[j] = (Integer.valueOf(random().nextInt(30))).toString();
|
||||
}
|
||||
nonTopLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(components));
|
||||
}
|
||||
// check ordinal policy
|
||||
OrdinalPolicy ordinalPolicy = new NonTopLevelOrdinalPolicy();
|
||||
ordinalPolicy.init(taxonomy);
|
||||
assertFalse("top level ordinal policy should not match root", ordinalPolicy.shouldAdd(TaxonomyReader.ROOT_ORDINAL));
|
||||
for (int i = 0; i < 10; i++) {
|
||||
assertFalse("top level ordinal policy should not match " + topLevelOrdinals[i],
|
||||
ordinalPolicy.shouldAdd(topLevelOrdinals[i]));
|
||||
}
|
||||
for (int i = 0; i < 300; i++) {
|
||||
assertTrue("top level ordinal policy should match " + nonTopLevelOrdinals[i],
|
||||
ordinalPolicy.shouldAdd(nonTopLevelOrdinals[i]));
|
||||
}
|
||||
|
||||
// check illegal ordinal
|
||||
assertFalse("Should not add illegal ordinal", ordinalPolicy.shouldAdd(100000));
|
||||
taxonomy.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
|
@ -1,88 +0,0 @@
|
|||
package org.apache.lucene.facet.index.categorypolicy;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class PathPolicyTest extends LuceneTestCase {
|
||||
|
||||
@Test
|
||||
public void testDefaultPathPolicy() {
|
||||
// check path policy
|
||||
CategoryPath cp = CategoryPath.EMPTY;
|
||||
PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
|
||||
assertFalse("default path policy should not accept root", pathPolicy.shouldAdd(cp));
|
||||
for (int i = 0; i < 300; i++) {
|
||||
int nComponents = 1 + random().nextInt(10);
|
||||
String[] components = new String[nComponents];
|
||||
for (int j = 0; j < components.length; j++) {
|
||||
components[j] = (Integer.valueOf(random().nextInt(30))).toString();
|
||||
}
|
||||
cp = new CategoryPath(components);
|
||||
assertTrue("default path policy should accept " + cp.toString('/'), pathPolicy.shouldAdd(cp));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNonTopLevelPathPolicy() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
TaxonomyWriter taxonomy = null;
|
||||
taxonomy = new DirectoryTaxonomyWriter(dir);
|
||||
|
||||
CategoryPath[] topLevelPaths = new CategoryPath[10];
|
||||
String[] topLevelStrings = new String[10];
|
||||
for (int i = 0; i < 10; i++) {
|
||||
topLevelStrings[i] = Integer.valueOf(random().nextInt(30)).toString();
|
||||
|
||||
topLevelPaths[i] = new CategoryPath(topLevelStrings[i]);
|
||||
taxonomy.addCategory(topLevelPaths[i]);
|
||||
}
|
||||
CategoryPath[] nonTopLevelPaths = new CategoryPath[300];
|
||||
for (int i = 0; i < 300; i++) {
|
||||
int nComponents = 2 + random().nextInt(10);
|
||||
String[] components = new String[nComponents];
|
||||
components[0] = topLevelStrings[i % 10];
|
||||
for (int j = 1; j < components.length; j++) {
|
||||
components[j] = (Integer.valueOf(random().nextInt(30))).toString();
|
||||
}
|
||||
nonTopLevelPaths[i] = new CategoryPath(components);
|
||||
taxonomy.addCategory(nonTopLevelPaths[i]);
|
||||
}
|
||||
// check ordinal policy
|
||||
PathPolicy pathPolicy = new NonTopLevelPathPolicy();
|
||||
assertFalse("top level path policy should not match root",
|
||||
pathPolicy.shouldAdd(CategoryPath.EMPTY));
|
||||
for (int i = 0; i < 10; i++) {
|
||||
assertFalse("top level path policy should not match "
|
||||
+ topLevelPaths[i],
|
||||
pathPolicy.shouldAdd(topLevelPaths[i]));
|
||||
}
|
||||
for (int i = 0; i < 300; i++) {
|
||||
assertTrue("top level path policy should match "
|
||||
+ nonTopLevelPaths[i],
|
||||
pathPolicy.shouldAdd(nonTopLevelPaths[i]));
|
||||
}
|
||||
taxonomy.close();
|
||||
dir.close();
|
||||
}
|
||||
}
|
|
@ -1,10 +1,7 @@
|
|||
package org.apache.lucene.facet.index.params;
|
||||
|
||||
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
|
||||
import org.apache.lucene.facet.search.DrillDown;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.util.PartitionsUtils;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -66,35 +63,4 @@ public class FacetIndexingParamsTest extends LuceneTestCase {
|
|||
assertEquals("Expected default category list field is " + clp.field, clp.field, dfip.getCategoryListParams(null).field);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCategoryPolicies() {
|
||||
FacetIndexingParams dfip = FacetIndexingParams.ALL_PARENTS;
|
||||
// check path policy
|
||||
CategoryPath cp = CategoryPath.EMPTY;
|
||||
PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
|
||||
assertEquals("path policy does not match default for root", pathPolicy.shouldAdd(cp), dfip.getPathPolicy().shouldAdd(cp));
|
||||
for (int i = 0; i < 30; i++) {
|
||||
int nComponents = random().nextInt(10) + 1;
|
||||
String[] components = new String[nComponents];
|
||||
for (int j = 0; j < components.length; j++) {
|
||||
components[j] = (Integer.valueOf(random().nextInt(30))).toString();
|
||||
}
|
||||
cp = new CategoryPath(components);
|
||||
assertEquals("path policy does not match default for " + cp.toString('/'),
|
||||
pathPolicy.shouldAdd(cp), dfip.getPathPolicy().shouldAdd(cp));
|
||||
}
|
||||
|
||||
// check ordinal policy
|
||||
OrdinalPolicy ordinalPolicy = OrdinalPolicy.ALL_PARENTS;
|
||||
assertEquals("ordinal policy does not match default for root",
|
||||
ordinalPolicy.shouldAdd(TaxonomyReader.ROOT_ORDINAL),
|
||||
dfip.getOrdinalPolicy().shouldAdd(TaxonomyReader.ROOT_ORDINAL));
|
||||
for (int i = 0; i < 30; i++) {
|
||||
int ordinal = random().nextInt();
|
||||
assertEquals("ordinal policy does not match default for " + ordinal,
|
||||
ordinalPolicy.shouldAdd(ordinal),
|
||||
dfip.getOrdinalPolicy().shouldAdd(ordinal));
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -12,7 +12,6 @@ import org.apache.lucene.document.Document;
|
|||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.facet.index.FacetFields;
|
||||
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
|
||||
import org.apache.lucene.facet.index.params.CategoryListParams;
|
||||
import org.apache.lucene.facet.index.params.FacetIndexingParams;
|
||||
import org.apache.lucene.facet.search.params.CountFacetRequest;
|
||||
|
@ -421,8 +420,13 @@ public class CountingFacetsCollectorTest extends LuceneTestCase {
|
|||
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
|
||||
FacetIndexingParams fip = new FacetIndexingParams() {
|
||||
@Override
|
||||
public OrdinalPolicy getOrdinalPolicy() {
|
||||
return OrdinalPolicy.NO_PARENTS;
|
||||
public CategoryListParams getCategoryListParams(CategoryPath category) {
|
||||
return new CategoryListParams() {
|
||||
@Override
|
||||
public OrdinalPolicy getOrdinalPolicy() {
|
||||
return OrdinalPolicy.NO_PARENTS;
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
FacetFields facetFields = new FacetFields(taxoWriter, fip);
|
||||
|
|
Loading…
Reference in New Issue