LUCENE-4700: move OrdinalPolicy to CategoryListParams

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1437661 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2013-01-23 19:49:43 +00:00
parent f13080dd64
commit f5fa833da7
16 changed files with 64 additions and 559 deletions

View File

@ -38,6 +38,11 @@ Changes in backwards compatibility policy
returns the most optimized collector for the given parameters.
(Shai Erera, Michael McCandless)
* LUCENE-4700: OrdinalPolicy is now per CategoryListParams, and is no longer
an interface, but rather an enum with values NO_PARENTS and ALL_PARENTS.
PathPolicy was removed, you should extend FacetFields and DrillDownStream
to control which categories are added as drill-down terms. (Shai Erera)
Optimizations
* LUCENE-4687: BloomFilterPostingsFormat now lazily initializes delegate

View File

@ -6,8 +6,8 @@ import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
@ -120,7 +120,7 @@ public class CountingListBuilder implements CategoryListBuilder {
public CountingListBuilder(CategoryListParams categoryListParams, FacetIndexingParams indexingParams,
TaxonomyWriter taxoWriter) {
this.taxoWriter = taxoWriter;
this.ordinalPolicy = indexingParams.getOrdinalPolicy();
this.ordinalPolicy = categoryListParams.getOrdinalPolicy();
if (indexingParams.getPartitionSize() == Integer.MAX_VALUE) {
ordinalsEncoder = new NoPartitionsOrdinalsEncoder(categoryListParams);
} else {
@ -143,14 +143,14 @@ public class CountingListBuilder implements CategoryListBuilder {
public Map<String,BytesRef> build(IntsRef ordinals, Iterable<CategoryPath> categories) throws IOException {
int upto = ordinals.length; // since we add ordinals to IntsRef, iterate upto original length
for (int i = 0; i < upto; i++) {
int ordinal = ordinals.ints[i];
int parent = taxoWriter.getParent(ordinal);
while (parent > 0) {
if (ordinalPolicy.shouldAdd(parent)) {
if (ordinalPolicy == OrdinalPolicy.ALL_PARENTS) { // add all parents too
for (int i = 0; i < upto; i++) {
int ordinal = ordinals.ints[i];
int parent = taxoWriter.getParent(ordinal);
while (parent > 0) {
ordinals.ints[ordinals.length++] = parent;
parent = taxoWriter.getParent(parent);
}
parent = taxoWriter.getParent(parent);
}
}
return ordinalsEncoder.encode(ordinals);

View File

@ -5,7 +5,6 @@ import java.util.Iterator;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.taxonomy.CategoryPath;
@ -36,7 +35,6 @@ public class DrillDownStream extends TokenStream {
private final FacetIndexingParams indexingParams;
private final Iterator<CategoryPath> categories;
private final CharTermAttribute termAttribute;
private final PathPolicy pathPolicy;
private CategoryPath current;
private boolean isParent;
@ -45,7 +43,6 @@ public class DrillDownStream extends TokenStream {
termAttribute = addAttribute(CharTermAttribute.class);
this.categories = categories.iterator();
this.indexingParams = indexingParams;
this.pathPolicy = indexingParams.getPathPolicy();
}
protected void addAdditionalAttributes(CategoryPath category, boolean isParent) {
@ -71,10 +68,7 @@ public class DrillDownStream extends TokenStream {
addAdditionalAttributes(current, isParent);
// prepare current for next call by trimming the last component (parents)
do {
// skip all parent categories which are not accepted by PathPolicy
current = current.subpath(current.length - 1);
} while (!pathPolicy.shouldAdd(current) && current.length > 0);
current = current.subpath(current.length - 1);
isParent = true;
return true;
}

View File

@ -1,73 +0,0 @@
package org.apache.lucene.facet.index.categorypolicy;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Filter out any "top level" category ordinals. <br> {@link #shouldAdd(int)}.
*
* @lucene.experimental
*/
public class NonTopLevelOrdinalPolicy implements OrdinalPolicy {
/**
* The taxonomyWriter with which the given ordinals' parent is determined.
*/
private TaxonomyWriter taxonomyWriter;
/**
* Constructs a new non-top-level-ordinal-filter. With a given
* taxonomyWriter.
*
*/
public NonTopLevelOrdinalPolicy() {
this.taxonomyWriter = null;
}
/**
* @param taxonomyWriter
* A relevant taxonomyWriter object, with which ordinals sent to
* {@link #shouldAdd(int)} are examined.
*/
@Override
public void init(TaxonomyWriter taxonomyWriter) {
this.taxonomyWriter = taxonomyWriter;
}
/**
* Filters out ordinal which are ROOT or who's parent is ROOT. In order to
* determine if a parent is root, there's a need for
* {@link TaxonomyWriter#getParent(int)}.
*/
@Override
public boolean shouldAdd(int ordinal) {
if (ordinal > TaxonomyReader.ROOT_ORDINAL) {
try {
if (this.taxonomyWriter.getParent(ordinal) > TaxonomyReader.ROOT_ORDINAL) {
return true;
}
} catch (Exception e) {
return false;
}
}
return false;
}
}

View File

@ -1,44 +0,0 @@
package org.apache.lucene.facet.index.categorypolicy;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This class filters our the ROOT category, and it's direct descendants. For
* more information see {@link PathPolicy}.
*
* @lucene.experimental
*/
public class NonTopLevelPathPolicy implements PathPolicy {
/**
* The shortest path length delivered is two components (root + one child).
*/
public final int DEFAULT_MINIMAL_SUBPATH_LENGTH = 2;
/**
* Filters out (returns false) CategoryPaths equal or less than
* {@link TaxonomyReader#ROOT_ORDINAL}. true otherwise.
*/
@Override
public boolean shouldAdd(CategoryPath categoryPath) {
return categoryPath.length >= DEFAULT_MINIMAL_SUBPATH_LENGTH;
}
}

View File

@ -1,83 +0,0 @@
package org.apache.lucene.facet.index.categorypolicy;
import java.io.Serializable;
import org.apache.lucene.facet.search.FacetsAccumulator;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* A policy for adding category parent ordinals to the list of ordinals that are
* encoded for a given document. The default {@link #ALL_PARENTS} policy always
* adds all parents, where {@link #NO_PARENTS} never adds any parents.
*
* @lucene.experimental
*/
public interface OrdinalPolicy extends Serializable {
/**
* An {@link OrdinalPolicy} which never stores parent ordinals. Useful if you
* only want to store the exact categories that were added to the document.
* Note that this is a rather expert policy, which requires a matching
* {@link FacetsAccumulator} that computes the weight of the parent categories
* on-the-fly.
*/
public static final OrdinalPolicy NO_PARENTS = new OrdinalPolicy() {
@Override
public boolean shouldAdd(int ordinal) { return false; }
@Override
public void init(TaxonomyWriter taxonomyWriter) {}
};
/**
* An {@link OrdinalPolicy} which stores all parent ordinals, except
* {@link TaxonomyReader#ROOT_ORDINAL}. This is the default
* {@link OrdinalPolicy} and works with the default {@link FacetsAccumulator}.
*/
public static final OrdinalPolicy ALL_PARENTS = new OrdinalPolicy() {
@Override
public boolean shouldAdd(int ordinal) { return ordinal > TaxonomyReader.ROOT_ORDINAL; }
@Override
public void init(TaxonomyWriter taxonomyWriter) {}
};
/**
* Check whether a given category ordinal should be added to the stream.
*
* @param ordinal
* A given category ordinal which is to be tested for stream
* addition.
* @return <code>true</code> if the category should be added.
* <code>false</code> otherwise.
*/
public abstract boolean shouldAdd(int ordinal);
/**
* Initialize the policy with a TaxonomyWriter. This method can be
* implemented as noop if the ordinal policy is not taxonomy dependent
*
* @param taxonomyWriter
* A relevant taxonomyWriter object, with which ordinals sent to
* {@link #shouldAdd(int)} are examined.
*/
public abstract void init(TaxonomyWriter taxonomyWriter);
}

View File

@ -1,55 +0,0 @@
package org.apache.lucene.facet.index.categorypolicy;
import java.io.Serializable;
import org.apache.lucene.facet.index.DrillDownStream;
import org.apache.lucene.facet.taxonomy.CategoryPath;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Determines which {@link CategoryPath categories} should be added as terms to
* the {@link DrillDownStream}. The default approach is implemented by
* {@link #ALL_CATEGORIES}.
*
* @lucene.experimental
*/
public interface PathPolicy extends Serializable {
/**
* A {@link PathPolicy} which adds all {@link CategoryPath} that have at least
* one component (i.e. {@link CategoryPath#length} &gt; 0) to the categories
* stream.
*/
public static final PathPolicy ALL_CATEGORIES = new PathPolicy() {
@Override
public boolean shouldAdd(CategoryPath categoryPath) { return categoryPath.length > 0; }
};
/**
* Check whether a given category path should be added to the stream.
*
* @param categoryPath
* A given category path which is to be tested for stream
* addition.
* @return <code>true</code> if the category path should be added.
* <code>false</code> otherwise.
*/
public abstract boolean shouldAdd(CategoryPath categoryPath);
}

View File

@ -1,37 +0,0 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
<html>
<head>
<title>Policies for indexing categories</title>
</head>
<body>
<h1>Policies for indexing categories</h1>
There are two kinds of policies:
<ul>
<li>Path policies are based on the path of the category.</li>
<li>Ordinal policies are based on the ordinal of the category.</li>
</ul>
Policies are "consulted" with during indexing, for deciding whether a category should
be added to the index or not. The two kinds of policies can be used for different purposes.
For example, path policies dictates which categories can participate in a drill-down operation,
while ordinal policies affect which can be accumulated (e.g. counted).
</body>
</html>

View File

@ -36,9 +36,36 @@ import org.apache.lucene.util.encoding.UniqueValuesIntEncoder;
*/
public class CategoryListParams implements Serializable {
/** OrdinalPolicy defines which ordinals are encoded for every document. */
public static enum OrdinalPolicy {
/**
* Encodes only the ordinal of leaf nodes. That is, the category A/B/C will
* not encode the ordinals of A and A/B.
*
* <p>
* <b>NOTE:</b> this {@link OrdinalPolicy} requires a special collector or
* accumulator, which will fix the parents' counts, unless you are not
* interested in the parents counts.
*/
NO_PARENTS,
/**
* Encodes the ordinals of all path components. That is, the category A/B/C
* will encode the ordinals of A and A/B as well. This is the default
* {@link OrdinalPolicy}.
*/
ALL_PARENTS
}
/** The default field used to store the facets information. */
public static final String DEFAULT_FIELD = "$facets";
/**
* The default {@link OrdinalPolicy} that's used when encoding a document's
* category ordinals.
*/
public static final OrdinalPolicy DEFAULT_ORDINAL_POLICY = OrdinalPolicy.ALL_PARENTS;
public final String field;
private final int hashCode;
@ -92,6 +119,7 @@ public class CategoryListParams implements Serializable {
if (this.hashCode != other.hashCode) {
return false;
}
// The above hashcodes might equal each other in the case of a collision,
// so at this point only directly term equality testing will settle
// the equality test.
@ -110,4 +138,9 @@ public class CategoryListParams implements Serializable {
return new DocValuesCategoryListIterator(docValuesField, createEncoder().createMatchingDecoder());
}
/** Returns the {@link OrdinalPolicy} to use for this {@link CategoryListParams}. */
public OrdinalPolicy getOrdinalPolicy() {
return DEFAULT_ORDINAL_POLICY;
}
}

View File

@ -3,8 +3,7 @@ package org.apache.lucene.facet.index.params;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.search.FacetArrays;
import org.apache.lucene.facet.taxonomy.CategoryPath;
@ -47,9 +46,8 @@ public class FacetIndexingParams {
protected static final CategoryListParams DEFAULT_CATEGORY_LIST_PARAMS = new CategoryListParams();
/**
* A {@link FacetIndexingParams} which fixes {@link OrdinalPolicy} to
* {@link OrdinalPolicy#ALL_PARENTS}. This is a singleton equivalent to new
* {@link #FacetIndexingParams()}.
* A {@link FacetIndexingParams} which fixes a single
* {@link CategoryListParams} with {@link OrdinalPolicy#ALL_PARENTS}.
*/
public static final FacetIndexingParams ALL_PARENTS = new FacetIndexingParams();
@ -62,8 +60,6 @@ public class FacetIndexingParams {
*/
public static final char DEFAULT_FACET_DELIM_CHAR = '\uF749';
private final OrdinalPolicy ordinalPolicy = OrdinalPolicy.ALL_PARENTS;
private final PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
private final int partitionSize = Integer.MAX_VALUE;
protected final CategoryListParams clParams;
@ -130,32 +126,12 @@ public class FacetIndexingParams {
return Collections.singletonList(clParams);
}
/**
* Returns the {@link OrdinalPolicy} that is used during indexing. By default
* returns {@link OrdinalPolicy#ALL_PARENTS} which means that the full
* hierarchy will be stored for every document.
*/
public OrdinalPolicy getOrdinalPolicy() {
return ordinalPolicy;
}
/**
* Returns the {@link PathPolicy} that is used during indexing. By default
* returns {@link PathPolicy#ALL_CATEGORIES} which means that the full
* hierarchy is added as drill-down terms for every document.
*/
public PathPolicy getPathPolicy() {
return pathPolicy;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + ((clParams == null) ? 0 : clParams.hashCode());
result = prime * result + ((ordinalPolicy == null) ? 0 : ordinalPolicy.hashCode());
result = prime * result + partitionSize;
result = prime * result + ((pathPolicy == null) ? 0 : pathPolicy.hashCode());
for (CategoryListParams clp : getAllCategoryListParams()) {
result ^= clp.hashCode();
@ -183,23 +159,9 @@ public class FacetIndexingParams {
} else if (!clParams.equals(other.clParams)) {
return false;
}
if (ordinalPolicy == null) {
if (other.ordinalPolicy != null) {
return false;
}
} else if (!ordinalPolicy.equals(other.ordinalPolicy)) {
return false;
}
if (partitionSize != other.partitionSize) {
return false;
}
if (pathPolicy == null) {
if (other.pathPolicy != null) {
return false;
}
} else if (!pathPolicy.equals(other.pathPolicy)) {
return false;
}
Iterable<CategoryListParams> cLs = getAllCategoryListParams();
Iterable<CategoryListParams> otherCLs = other.getAllCategoryListParams();

View File

@ -7,8 +7,8 @@ import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetRequest;
@ -83,6 +83,7 @@ import org.apache.lucene.util.encoding.DGapVInt8IntDecoder;
public class CountingFacetsCollector extends FacetsCollector {
private final FacetSearchParams fsp;
private final OrdinalPolicy ordinalPolicy;
private final TaxonomyReader taxoReader;
private final BytesRef buf = new BytesRef(32);
private final FacetArrays facetArrays;
@ -104,10 +105,12 @@ public class CountingFacetsCollector extends FacetsCollector {
assert assertParams(fsp) == null : assertParams(fsp);
this.fsp = fsp;
CategoryListParams clp = fsp.indexingParams.getCategoryListParams(null);
this.ordinalPolicy = clp.getOrdinalPolicy();
this.taxoReader = taxoReader;
this.facetArrays = facetArrays;
this.counts = facetArrays.getIntArray();
this.facetsField = fsp.indexingParams.getCategoryListParams(null).field;
this.facetsField = clp.field;
this.useDirectSource = useDirectSource;
}
@ -230,7 +233,7 @@ public class CountingFacetsCollector extends FacetsCollector {
ParallelTaxonomyArrays arrays = taxoReader.getParallelTaxonomyArrays();
if (fsp.indexingParams.getOrdinalPolicy() == OrdinalPolicy.NO_PARENTS) {
if (ordinalPolicy == OrdinalPolicy.NO_PARENTS) {
// need to count parents
countParents(arrays.parents());
}

View File

@ -3,8 +3,8 @@ package org.apache.lucene.facet.search;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.CategoryListParams.OrdinalPolicy;
import org.apache.lucene.facet.search.params.FacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;

View File

@ -1,82 +0,0 @@
package org.apache.lucene.facet.index.categorypolicy;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class OrdinalPolicyTest extends LuceneTestCase {
@Test
public void testDefaultOrdinalPolicy() {
// check ordinal policy
OrdinalPolicy ordinalPolicy = OrdinalPolicy.ALL_PARENTS;
assertFalse("default ordinal policy should not match root", ordinalPolicy.shouldAdd(TaxonomyReader.ROOT_ORDINAL));
for (int i = 0; i < 300; i++) {
int ordinal = 1 + random().nextInt(Integer.MAX_VALUE - 1);
assertTrue("default ordinal policy should match " + ordinal, ordinalPolicy.shouldAdd(ordinal));
}
}
@Test
public void testNonTopLevelOrdinalPolicy() throws Exception {
Directory dir = newDirectory();
TaxonomyWriter taxonomy = null;
taxonomy = new DirectoryTaxonomyWriter(dir);
int[] topLevelOrdinals = new int[10];
String[] topLevelStrings = new String[10];
for (int i = 0; i < 10; i++) {
topLevelStrings[i] = Integer.valueOf(random().nextInt(30)).toString();
topLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(topLevelStrings[i]));
}
int[] nonTopLevelOrdinals = new int[300];
for (int i = 0; i < 300; i++) {
int nComponents = 2 + random().nextInt(10);
String[] components = new String[nComponents];
components[0] = topLevelStrings[i % 10];
for (int j = 1; j < components.length; j++) {
components[j] = (Integer.valueOf(random().nextInt(30))).toString();
}
nonTopLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(components));
}
// check ordinal policy
OrdinalPolicy ordinalPolicy = new NonTopLevelOrdinalPolicy();
ordinalPolicy.init(taxonomy);
assertFalse("top level ordinal policy should not match root", ordinalPolicy.shouldAdd(TaxonomyReader.ROOT_ORDINAL));
for (int i = 0; i < 10; i++) {
assertFalse("top level ordinal policy should not match " + topLevelOrdinals[i],
ordinalPolicy.shouldAdd(topLevelOrdinals[i]));
}
for (int i = 0; i < 300; i++) {
assertTrue("top level ordinal policy should match " + nonTopLevelOrdinals[i],
ordinalPolicy.shouldAdd(nonTopLevelOrdinals[i]));
}
// check illegal ordinal
assertFalse("Should not add illegal ordinal", ordinalPolicy.shouldAdd(100000));
taxonomy.close();
dir.close();
}
}

View File

@ -1,88 +0,0 @@
package org.apache.lucene.facet.index.categorypolicy;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class PathPolicyTest extends LuceneTestCase {
@Test
public void testDefaultPathPolicy() {
// check path policy
CategoryPath cp = CategoryPath.EMPTY;
PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
assertFalse("default path policy should not accept root", pathPolicy.shouldAdd(cp));
for (int i = 0; i < 300; i++) {
int nComponents = 1 + random().nextInt(10);
String[] components = new String[nComponents];
for (int j = 0; j < components.length; j++) {
components[j] = (Integer.valueOf(random().nextInt(30))).toString();
}
cp = new CategoryPath(components);
assertTrue("default path policy should accept " + cp.toString('/'), pathPolicy.shouldAdd(cp));
}
}
@Test
public void testNonTopLevelPathPolicy() throws Exception {
Directory dir = newDirectory();
TaxonomyWriter taxonomy = null;
taxonomy = new DirectoryTaxonomyWriter(dir);
CategoryPath[] topLevelPaths = new CategoryPath[10];
String[] topLevelStrings = new String[10];
for (int i = 0; i < 10; i++) {
topLevelStrings[i] = Integer.valueOf(random().nextInt(30)).toString();
topLevelPaths[i] = new CategoryPath(topLevelStrings[i]);
taxonomy.addCategory(topLevelPaths[i]);
}
CategoryPath[] nonTopLevelPaths = new CategoryPath[300];
for (int i = 0; i < 300; i++) {
int nComponents = 2 + random().nextInt(10);
String[] components = new String[nComponents];
components[0] = topLevelStrings[i % 10];
for (int j = 1; j < components.length; j++) {
components[j] = (Integer.valueOf(random().nextInt(30))).toString();
}
nonTopLevelPaths[i] = new CategoryPath(components);
taxonomy.addCategory(nonTopLevelPaths[i]);
}
// check ordinal policy
PathPolicy pathPolicy = new NonTopLevelPathPolicy();
assertFalse("top level path policy should not match root",
pathPolicy.shouldAdd(CategoryPath.EMPTY));
for (int i = 0; i < 10; i++) {
assertFalse("top level path policy should not match "
+ topLevelPaths[i],
pathPolicy.shouldAdd(topLevelPaths[i]));
}
for (int i = 0; i < 300; i++) {
assertTrue("top level path policy should match "
+ nonTopLevelPaths[i],
pathPolicy.shouldAdd(nonTopLevelPaths[i]));
}
taxonomy.close();
dir.close();
}
}

View File

@ -1,10 +1,7 @@
package org.apache.lucene.facet.index.params;
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.categorypolicy.PathPolicy;
import org.apache.lucene.facet.search.DrillDown;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.util.PartitionsUtils;
import org.apache.lucene.index.Term;
import org.apache.lucene.util.LuceneTestCase;
@ -66,35 +63,4 @@ public class FacetIndexingParamsTest extends LuceneTestCase {
assertEquals("Expected default category list field is " + clp.field, clp.field, dfip.getCategoryListParams(null).field);
}
@Test
public void testCategoryPolicies() {
FacetIndexingParams dfip = FacetIndexingParams.ALL_PARENTS;
// check path policy
CategoryPath cp = CategoryPath.EMPTY;
PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
assertEquals("path policy does not match default for root", pathPolicy.shouldAdd(cp), dfip.getPathPolicy().shouldAdd(cp));
for (int i = 0; i < 30; i++) {
int nComponents = random().nextInt(10) + 1;
String[] components = new String[nComponents];
for (int j = 0; j < components.length; j++) {
components[j] = (Integer.valueOf(random().nextInt(30))).toString();
}
cp = new CategoryPath(components);
assertEquals("path policy does not match default for " + cp.toString('/'),
pathPolicy.shouldAdd(cp), dfip.getPathPolicy().shouldAdd(cp));
}
// check ordinal policy
OrdinalPolicy ordinalPolicy = OrdinalPolicy.ALL_PARENTS;
assertEquals("ordinal policy does not match default for root",
ordinalPolicy.shouldAdd(TaxonomyReader.ROOT_ORDINAL),
dfip.getOrdinalPolicy().shouldAdd(TaxonomyReader.ROOT_ORDINAL));
for (int i = 0; i < 30; i++) {
int ordinal = random().nextInt();
assertEquals("ordinal policy does not match default for " + ordinal,
ordinalPolicy.shouldAdd(ordinal),
dfip.getOrdinalPolicy().shouldAdd(ordinal));
}
}
}

View File

@ -12,7 +12,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.index.categorypolicy.OrdinalPolicy;
import org.apache.lucene.facet.index.params.CategoryListParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.params.CountFacetRequest;
@ -421,8 +420,13 @@ public class CountingFacetsCollectorTest extends LuceneTestCase {
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
FacetIndexingParams fip = new FacetIndexingParams() {
@Override
public OrdinalPolicy getOrdinalPolicy() {
return OrdinalPolicy.NO_PARENTS;
public CategoryListParams getCategoryListParams(CategoryPath category) {
return new CategoryListParams() {
@Override
public OrdinalPolicy getOrdinalPolicy() {
return OrdinalPolicy.NO_PARENTS;
}
};
}
};
FacetFields facetFields = new FacetFields(taxoWriter, fip);