LUCENE-5339: more tests, add DimConfig.requireDimCount

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5339@1543803 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-11-20 12:33:36 +00:00
parent c960539c4c
commit 189feaacb6
12 changed files with 260 additions and 253 deletions

4
TODO
View File

@ -1,8 +1,8 @@
nocommit this!
TODO
- re-enable ALL_BUT_DIM somehow?
- this is broken for multi-valued non-hierarchical too
- add sugar apis to do sort-by-score, sort-by-field sort AND collect into SimpleFacetsCollector?
- getSpecificValue for a dim isn't reliable
- we could put more stuff into the "schema", e.g. this field is
sorted-set-DV and that one is taxo?
- standardize on facet or facets (e.g. FacetIndexWriter)

View File

@ -21,24 +21,22 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.index.IndexDocument;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
@ -61,6 +59,13 @@ public class DocumentBuilder {
this.config = config;
}
private static void checkSeen(Set<String> seenDims, String dim) {
if (seenDims.contains(dim)) {
throw new IllegalArgumentException("dimension \"" + dim + "\" is not multiValued, but it appears more than once in this document");
}
seenDims.add(dim);
}
public IndexDocument build(IndexDocument doc) throws IOException {
// Find all FacetFields, collated by the actual field:
Map<String,List<FacetField>> byField = new HashMap<String,List<FacetField>>();
@ -71,10 +76,15 @@ public class DocumentBuilder {
// ... and also all AssociationFacetFields
Map<String,List<AssociationFacetField>> assocByField = new HashMap<String,List<AssociationFacetField>>();
Set<String> seenDims = new HashSet<String>();
for(IndexableField field : doc.indexableFields()) {
if (field.fieldType() == FacetField.TYPE) {
FacetField facetField = (FacetField) field;
FacetsConfig.DimConfig dimConfig = config.getDimConfig(facetField.dim);
if (dimConfig.multiValued == false) {
checkSeen(seenDims, facetField.dim);
}
String indexFieldName = dimConfig.indexFieldName;
List<FacetField> fields = byField.get(indexFieldName);
if (fields == null) {
@ -87,6 +97,9 @@ public class DocumentBuilder {
if (field.fieldType() == SortedSetDocValuesFacetField.TYPE) {
SortedSetDocValuesFacetField facetField = (SortedSetDocValuesFacetField) field;
FacetsConfig.DimConfig dimConfig = config.getDimConfig(facetField.dim);
if (dimConfig.multiValued == false) {
checkSeen(seenDims, facetField.dim);
}
String indexFieldName = dimConfig.indexFieldName;
List<SortedSetDocValuesFacetField> fields = dvByField.get(indexFieldName);
if (fields == null) {
@ -99,8 +112,16 @@ public class DocumentBuilder {
if (field.fieldType() == AssociationFacetField.TYPE) {
AssociationFacetField facetField = (AssociationFacetField) field;
FacetsConfig.DimConfig dimConfig = config.getDimConfig(facetField.dim);
if (dimConfig.multiValued == false) {
checkSeen(seenDims, facetField.dim);
}
if (dimConfig.hierarchical) {
throw new IllegalArgumentException("AssociationFacetField cannot be hierarchical (dim=\"" + facetField.dim + "\")");
}
if (dimConfig.requireDimCount) {
throw new IllegalArgumentException("AssociationFacetField cannot requireDimCount (dim=\"" + facetField.dim + "\")");
}
// nocommit how to use a different default name for assocs?
String indexFieldName = dimConfig.indexFieldName;
List<AssociationFacetField> fields = assocByField.get(indexFieldName);
if (fields == null) {
@ -173,9 +194,6 @@ public class DocumentBuilder {
for(Map.Entry<String,List<FacetField>> ent : byField.entrySet()) {
// nocommit maybe we can somehow catch singleValued
// dim appearing more than once?
String indexFieldName = ent.getKey();
//System.out.println(" fields=" + ent.getValue());
@ -190,10 +208,13 @@ public class DocumentBuilder {
FacetLabel cp = FacetLabel.create(facetField.dim, facetField.path);
int ordinal = taxoWriter.addCategory(cp);
if (ordinals.length == ordinals.ints.length) {
ordinals.grow(ordinals.length+1);
}
ordinals.ints[ordinals.length++] = ordinal;
//System.out.println(" add cp=" + cp);
if (ft.hierarchical && ft.multiValued) {
if (ft.multiValued && (ft.hierarchical || ft.requireDimCount)) {
// Add all parents too:
int parent = taxoWriter.getParent(ordinal);
while (parent > 0) {
@ -203,6 +224,11 @@ public class DocumentBuilder {
ordinals.ints[ordinals.length++] = parent;
parent = taxoWriter.getParent(parent);
}
if (ft.requireDimCount == false) {
// Remove last (dimension) ord:
ordinals.length--;
}
}
// Drill down:

View File

@ -20,22 +20,36 @@ package org.apache.lucene.facet.simple;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
/** By default a dimension is flat and single valued; use
* the setters in this class to change that for any dims */
/** By default a dimension is flat, single valued and does
* not require count for the dimension; use
* the setters in this class to change these settings for
* any dims.
*
* <p><b>NOTE</b>: this configuration is not saved into the
* index, but it's vital, and up to the application to
* ensure, that at search time the provided FacetsConfig
* matches what was used during indexing.
*
* @lucene.experimental */
public class FacetsConfig {
public static final String DEFAULT_INDEX_FIELD_NAME = "$facets";
// nocommit pull DimType into here (shai?)
private final Map<String,DimConfig> fieldTypes = new ConcurrentHashMap<String,DimConfig>();
/** @lucene.internal */
// nocommit expose this to the user, vs the setters?
public static final class DimConfig {
/** True if this dimension is hierarchical. */
boolean hierarchical;
/** True if this dimension is multi-valued. */
boolean multiValued;
/** True if the count/aggregate for the entire dimension
* is required, which is unusual (default is false). */
boolean requireDimCount;
/** Actual field where this dimension's facet labels
* should be indexed */
String indexFieldName = DEFAULT_INDEX_FIELD_NAME;
@ -52,22 +66,31 @@ public class FacetsConfig {
}
// nocommit maybe setDimConfig instead?
public synchronized void setHierarchical(String dimName) {
public synchronized void setHierarchical(String dimName, boolean v) {
DimConfig ft = fieldTypes.get(dimName);
if (ft == null) {
ft = new DimConfig();
fieldTypes.put(dimName, ft);
}
ft.hierarchical = true;
ft.hierarchical = v;
}
public synchronized void setMultiValued(String dimName) {
public synchronized void setMultiValued(String dimName, boolean v) {
DimConfig ft = fieldTypes.get(dimName);
if (ft == null) {
ft = new DimConfig();
fieldTypes.put(dimName, ft);
}
ft.multiValued = true;
ft.multiValued = v;
}
public synchronized void setRequireDimCount(String dimName, boolean v) {
DimConfig ft = fieldTypes.get(dimName);
if (ft == null) {
ft = new DimConfig();
fieldTypes.put(dimName, ft);
}
ft.requireDimCount = v;
}
public synchronized void setIndexFieldName(String dimName, String indexFieldName) {

View File

@ -18,15 +18,11 @@ package org.apache.lucene.facet.simple;
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.simple.SimpleFacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
@ -132,7 +128,7 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets {
return null;
}
TopOrdAndIntQueue q = new TopOrdAndIntQueue(topN);
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
int bottomCount = 0;
@ -163,8 +159,15 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets {
return null;
}
if (dimConfig.hierarchical && dimConfig.multiValued) {
totCount = counts[dimOrd];
if (dimConfig.multiValued) {
if (dimConfig.requireDimCount) {
totCount = counts[dimOrd];
} else {
// Our sum'd count is not correct, in general:
totCount = -1;
}
} else {
// Our sum'd dim count is accurate, so we keep it
}
LabelAndValue[] labelValues = new LabelAndValue[q.size()];

View File

@ -18,18 +18,13 @@ package org.apache.lucene.facet.simple;
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.simple.SimpleFacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IntsRef;
@ -116,7 +111,7 @@ public class TaxonomyFacetCounts extends TaxonomyFacets {
return null;
}
TopOrdAndIntQueue q = new TopOrdAndIntQueue(topN);
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
int bottomCount = 0;
@ -147,8 +142,15 @@ public class TaxonomyFacetCounts extends TaxonomyFacets {
return null;
}
if (dimConfig.hierarchical && dimConfig.multiValued) {
totCount = counts[dimOrd];
if (dimConfig.multiValued) {
if (dimConfig.requireDimCount) {
totCount = counts[dimOrd];
} else {
// Our sum'd count is not correct, in general:
totCount = -1;
}
} else {
// Our sum'd dim count is accurate, so we keep it
}
LabelAndValue[] labelValues = new LabelAndValue[q.size()];

View File

@ -18,15 +18,10 @@ package org.apache.lucene.facet.simple;
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.simple.SimpleFacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
@ -83,36 +78,6 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
++doc;
}
}
// nocommit we could do this lazily instead:
// Rollup any necessary dims:
// nocommit should we rollup?
/*
for(Map.Entry<String,FacetsConfig.DimConfig> ent : config.getDimConfigs().entrySet()) {
String dim = ent.getKey();
FacetsConfig.DimConfig ft = ent.getValue();
if (ft.hierarchical && ft.multiValued == false) {
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
// It can be -1 if this field was declared in the
// config but never indexed:
if (dimRootOrd > 0) {
counts[dimRootOrd] += rollup(children[dimRootOrd]);
}
}
}
*/
}
private float rollup(int ord) {
int sum = 0;
while (ord != TaxonomyReader.INVALID_ORDINAL) {
float childValue = values[ord] + rollup(children[ord]);
values[ord] = childValue;
sum += childValue;
ord = siblings[ord];
}
return sum;
}
/** Return the count for a specific path. Returns -1 if
@ -137,8 +102,7 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
return null;
}
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(topN);
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
float bottomValue = 0;
int ord = children[dimOrd];
@ -169,12 +133,6 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
return null;
}
/*
if (dimConfig.hierarchical && dimConfig.multiValued) {
totCount = counts[dimOrd];
}
*/
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for(int i=labelValues.length-1;i>=0;i--) {
TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop();

View File

@ -18,15 +18,10 @@ package org.apache.lucene.facet.simple;
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.simple.SimpleFacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
@ -83,36 +78,6 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
++doc;
}
}
// nocommit we could do this lazily instead:
// Rollup any necessary dims:
// nocommit should we rollup?
/*
for(Map.Entry<String,FacetsConfig.DimConfig> ent : config.getDimConfigs().entrySet()) {
String dim = ent.getKey();
FacetsConfig.DimConfig ft = ent.getValue();
if (ft.hierarchical && ft.multiValued == false) {
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
// It can be -1 if this field was declared in the
// config but never indexed:
if (dimRootOrd > 0) {
counts[dimRootOrd] += rollup(children[dimRootOrd]);
}
}
}
*/
}
private int rollup(int ord) {
int sum = 0;
while (ord != TaxonomyReader.INVALID_ORDINAL) {
int childValue = values[ord] + rollup(children[ord]);
values[ord] = childValue;
sum += childValue;
ord = siblings[ord];
}
return sum;
}
/** Return the count for a specific path. Returns -1 if
@ -137,8 +102,7 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
return null;
}
TopOrdAndIntQueue q = new TopOrdAndIntQueue(topN);
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
int bottomValue = 0;
int ord = children[dimOrd];
@ -169,13 +133,6 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
return null;
}
/*
FacetsConfig.DimConfig ft = config.getDimConfig(path.components[0]);
if (ft.hierarchical && ft.multiValued) {
totCount = counts[dimOrd];
}
*/
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for(int i=labelValues.length-1;i>=0;i--) {
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();

View File

@ -18,22 +18,16 @@ package org.apache.lucene.facet.simple;
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.simple.SimpleFacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.queries.function.FunctionValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IntsRef;
@ -150,8 +144,7 @@ public class TaxonomyFacetSumValueSource extends TaxonomyFacets {
return null;
}
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(topN);
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
float bottomValue = 0;
int ord = children[dimOrd];
@ -181,8 +174,15 @@ public class TaxonomyFacetSumValueSource extends TaxonomyFacets {
return null;
}
if (dimConfig.hierarchical && dimConfig.multiValued) {
sumValues = values[dimOrd];
if (dimConfig.multiValued) {
if (dimConfig.requireDimCount) {
sumValues = values[dimOrd];
} else {
// Our sum'd count is not correct, in general:
sumValues = -1;
}
} else {
// Our sum'd dim count is accurate, so we keep it
}
LabelAndValue[] labelValues = new LabelAndValue[q.size()];

View File

@ -17,60 +17,20 @@ package org.apache.lucene.facet.simple;
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.FacetTestUtils;
import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.simple.SimpleDrillSideways.SimpleDrillSidewaysResult;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetFields;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.InfoStream;
import org.apache.lucene.util._TestUtil;
import org.junit.Test;
public class TestSimpleDrillSideways extends FacetTestCase {
@ -87,7 +47,7 @@ public class TestSimpleDrillSideways extends FacetTestCase {
taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.setHierarchical("Publish Date");
config.setHierarchical("Publish Date", true);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
DocumentBuilder builder = new DocumentBuilder(taxoWriter, config);

View File

@ -17,19 +17,11 @@ package org.apache.lucene.facet.simple;
* limitations under the License.
*/
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.search.FacetsCollector;
import org.apache.lucene.facet.simple.SortedSetDocValuesFacetCounts;
import org.apache.lucene.facet.simple.SortedSetDocValuesReaderState;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
@ -42,10 +34,12 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
// randomly uses SortedSetDV
public void testBasic() throws Exception {
System.out.println("here: " + defaultCodecSupportsSortedSet());
assumeTrue("Test requires SortedSetDV support", defaultCodecSupportsSortedSet());
Directory dir = newDirectory();
FacetsConfig config = new FacetsConfig();
config.setMultiValued("a", true);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
DocumentBuilder builder = new DocumentBuilder(null, config);
@ -54,6 +48,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase {
doc.add(new SortedSetDocValuesFacetField("a", "bar"));
doc.add(new SortedSetDocValuesFacetField("a", "zoo"));
doc.add(new SortedSetDocValuesFacetField("b", "baz"));
System.out.println("TEST: now add");
writer.addDocument(builder.build(doc));
if (random().nextBoolean()) {
writer.commit();

View File

@ -17,28 +17,18 @@ package org.apache.lucene.facet.simple;
* limitations under the License.
*/
import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetsCollector;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.junit.AfterClass;
@ -68,7 +58,9 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
// Cannot mix ints & floats in the same indexed field:
config.setIndexFieldName("int", "$facets.int");
config.setMultiValued("int", true);
config.setIndexFieldName("float", "$facets.float");
config.setMultiValued("float", true);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
DocumentBuilder builder = new DocumentBuilder(taxoWriter, config);
@ -189,4 +181,46 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
}
IOUtils.close(writer, taxoWriter, dir, taxoDir);
}
public void testNoHierarchy() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
FacetsConfig config = new FacetsConfig();
config.setHierarchical("a", true);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
DocumentBuilder builder = new DocumentBuilder(taxoWriter, config);
Document doc = new Document();
doc.add(new IntAssociationFacetField(14, "a", "x"));
try {
writer.addDocument(builder.build(doc));
fail("did not hit expected exception");
} catch (IllegalArgumentException exc) {
// expected
}
IOUtils.close(writer, taxoWriter, dir, taxoDir);
}
public void testRequireDimCount() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
FacetsConfig config = new FacetsConfig();
config.setRequireDimCount("a", true);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
DocumentBuilder builder = new DocumentBuilder(taxoWriter, config);
Document doc = new Document();
doc.add(new IntAssociationFacetField(14, "a", "x"));
try {
writer.addDocument(builder.build(doc));
fail("did not hit expected exception");
} catch (IllegalArgumentException exc) {
// expected
}
IOUtils.close(writer, taxoWriter, dir, taxoDir);
}
}

View File

@ -19,8 +19,6 @@ package org.apache.lucene.facet.simple;
import java.io.ByteArrayOutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
@ -34,8 +32,6 @@ import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.facet.util.PrintTaxonomyStats;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
@ -44,7 +40,6 @@ import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util._TestUtil;
@ -59,7 +54,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.setHierarchical("Publish Date");
config.setHierarchical("Publish Date", true);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
DocumentBuilder builder = new DocumentBuilder(taxoWriter, config);
@ -91,11 +86,9 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
// NRT open
IndexSearcher searcher = newSearcher(writer.getReader());
writer.close();
// NRT open
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
taxoWriter.close();
// Aggregate the facet counts:
SimpleFacetsCollector c = new SimpleFacetsCollector();
@ -133,10 +126,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
assertTrue(result.indexOf(" /2012") != -1);
assertTrue(result.indexOf(" /20") != -1);
taxoReader.close();
searcher.getIndexReader().close();
dir.close();
taxoDir.close();
IOUtils.close(writer, taxoWriter, searcher.getIndexReader(), taxoReader, taxoDir, dir);
}
// LUCENE-5333
@ -176,11 +166,9 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
// NRT open
IndexSearcher searcher = newSearcher(writer.getReader());
writer.close();
// NRT open
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
taxoWriter.close();
SimpleFacetsCollector c = new SimpleFacetsCollector();
searcher.search(new MatchAllDocsQuery(), c);
@ -195,10 +183,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
assertEquals("b (2)\n bar1 (1)\n bar2 (1)\n", results.get(1).toString());
assertEquals("c (1)\n baz1 (1)\n", results.get(2).toString());
searcher.getIndexReader().close();
taxoReader.close();
taxoDir.close();
dir.close();
IOUtils.close(writer, taxoWriter, searcher.getIndexReader(), taxoReader, taxoDir, dir);
}
public void testWrongIndexFieldName() throws Exception {
@ -220,11 +205,9 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
// NRT open
IndexSearcher searcher = newSearcher(writer.getReader());
writer.close();
// NRT open
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
taxoWriter.close();
SimpleFacetsCollector c = new SimpleFacetsCollector();
searcher.search(new MatchAllDocsQuery(), c);
@ -259,10 +242,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
// expected
}
searcher.getIndexReader().close();
taxoReader.close();
taxoDir.close();
dir.close();
IOUtils.close(writer, taxoWriter, searcher.getIndexReader(), taxoReader, taxoDir, dir);
}
@ -290,10 +270,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
doc.add(newTextField("field", "text", Field.Store.NO));
doc.add(new FacetField("a", "path"));
writer.addDocument(builder.build(doc));
writer.close();
taxoWriter.close();
dir.close();
taxoDir.close();
IOUtils.close(writer, taxoWriter, dir, taxoDir);
}
public void testMultiValuedHierarchy() throws Exception {
@ -301,8 +278,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
Directory taxoDir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.setHierarchical("a");
config.setMultiValued("a");
config.setHierarchical("a", true);
config.setMultiValued("a", true);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
DocumentBuilder builder = new DocumentBuilder(taxoWriter, config);
@ -314,11 +291,9 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
// NRT open
IndexSearcher searcher = newSearcher(writer.getReader());
writer.close();
// NRT open
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
taxoWriter.close();
// Aggregate the facet counts:
SimpleFacetsCollector c = new SimpleFacetsCollector();
@ -333,10 +308,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
assertEquals(1, result.labelValues.length);
assertEquals(1, result.labelValues[0].value.intValue());
searcher.getIndexReader().close();
taxoReader.close();
dir.close();
taxoDir.close();
IOUtils.close(writer, taxoWriter, searcher.getIndexReader(), taxoReader, dir, taxoDir);
}
public void testLabelWithDelimiter() throws Exception {
@ -346,6 +318,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.setMultiValued("dim", true);
DocumentBuilder builder = new DocumentBuilder(taxoWriter, config);
Document doc = new Document();
@ -356,11 +329,9 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
// NRT open
IndexSearcher searcher = newSearcher(writer.getReader());
writer.close();
// NRT open
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
taxoWriter.close();
SimpleFacetsCollector c = new SimpleFacetsCollector();
searcher.search(new MatchAllDocsQuery(), c);
@ -370,65 +341,143 @@ public class TestTaxonomyFacetCounts extends FacetTestCase {
assertEquals(1, facets.getSpecificValue("dim", "test\u001Etwo"));
SimpleFacetResult result = facets.getTopChildren(10, "dim");
assertEquals("dim (2)\n test\u001Fone (1)\n test\u001Etwo (1)\n", result.toString());
IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir);
assertEquals("dim (-1)\n test\u001Fone (1)\n test\u001Etwo (1)\n", result.toString());
IOUtils.close(writer, taxoWriter, searcher.getIndexReader(), taxoReader, dir, taxoDir);
}
public void testRequireDimCount() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.setMultiValued("dim2", true);
config.setMultiValued("dim3", true);
config.setHierarchical("dim3", true);
config.setRequireDimCount("dim", true);
config.setRequireDimCount("dim2", true);
config.setRequireDimCount("dim3", true);
DocumentBuilder builder = new DocumentBuilder(taxoWriter, config);
Document doc = new Document();
doc.add(newTextField("field", "text", Field.Store.NO));
doc.add(new FacetField("dim", "a"));
doc.add(new FacetField("dim2", "a"));
doc.add(new FacetField("dim2", "b"));
doc.add(new FacetField("dim3", "a", "b"));
doc.add(new FacetField("dim3", "a", "c"));
writer.addDocument(builder.build(doc));
// NRT open
IndexSearcher searcher = newSearcher(writer.getReader());
// NRT open
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
SimpleFacetsCollector c = new SimpleFacetsCollector();
searcher.search(new MatchAllDocsQuery(), c);
Facets facets = getFacetCounts(taxoReader, config, c);
assertEquals(1, facets.getTopChildren(10, "dim").value);
assertEquals(1, facets.getTopChildren(10, "dim2").value);
assertEquals(1, facets.getTopChildren(10, "dim3").value);
IOUtils.close(writer, taxoWriter, searcher.getIndexReader(), taxoReader, dir, taxoDir);
}
/*
// LUCENE-4583: make sure if we require > 32 KB for one
// document, we don't hit exc when using Facet42DocValuesFormat
public void testManyFacetsInOneDocument() throws Exception {
assumeTrue("default Codec doesn't support huge BinaryDocValues", _TestUtil.fieldSupportsHugeBinaryDocValues(CategoryListParams.DEFAULT_FIELD));
assumeTrue("default Codec doesn't support huge BinaryDocValues", _TestUtil.fieldSupportsHugeBinaryDocValues(FacetsConfig.DEFAULT_INDEX_FIELD_NAME));
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc);
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
FacetFields facetFields = new FacetFields(taxoWriter);
FacetsConfig config = new FacetsConfig();
config.setMultiValued("dim", true);
DocumentBuilder builder = new DocumentBuilder(taxoWriter, config);
int numLabels = _TestUtil.nextInt(random(), 40000, 100000);
Document doc = new Document();
doc.add(newTextField("field", "text", Field.Store.NO));
List<CategoryPath> paths = new ArrayList<CategoryPath>();
for (int i = 0; i < numLabels; i++) {
paths.add(new CategoryPath("dim", "" + i));
doc.add(new FacetField("dim", "" + i));
}
facetFields.addFields(doc, paths);
writer.addDocument(doc);
writer.addDocument(builder.build(doc));
// NRT open
IndexSearcher searcher = newSearcher(writer.getReader());
writer.close();
// NRT open
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
taxoWriter.close();
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("dim"), Integer.MAX_VALUE));
// Aggregate the facet counts:
FacetsCollector c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader);
SimpleFacetsCollector c = new SimpleFacetsCollector();
// MatchAllDocsQuery is for "browsing" (counts facets
// for all non-deleted docs in the index); normally
// you'd use a "normal" query, and use MultiCollector to
// wrap collecting the "normal" hits and also facets:
searcher.search(new MatchAllDocsQuery(), c);
List<FacetResult> results = c.getFacetResults();
assertEquals(1, results.size());
FacetResultNode root = results.get(0).getFacetResultNode();
assertEquals(numLabels, root.subResults.size());
Facets facets = getFacetCounts(taxoReader, config, c);
SimpleFacetResult result = facets.getTopChildren(Integer.MAX_VALUE, "dim");
assertEquals(numLabels, result.labelValues.length);
Set<String> allLabels = new HashSet<String>();
for (FacetResultNode childNode : root.subResults) {
assertEquals(2, childNode.label.length);
allLabels.add(childNode.label.components[1]);
assertEquals(1, (int) childNode.value);
for (LabelAndValue labelValue : result.labelValues) {
allLabels.add(labelValue.label);
assertEquals(1, labelValue.value.intValue());
}
assertEquals(numLabels, allLabels.size());
IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir);
IOUtils.close(searcher.getIndexReader(), taxoWriter, writer, taxoReader, dir, taxoDir);
}
// Make sure we catch when app didn't declare field as
// hierarchical but it was:
public void testDetectHierarchicalField() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
FacetsConfig config = new FacetsConfig();
DocumentBuilder builder = new DocumentBuilder(taxoWriter, config);
Document doc = new Document();
doc.add(newTextField("field", "text", Field.Store.NO));
doc.add(new FacetField("a", "path", "other"));
try {
builder.build(doc);
fail("did not hit expected exception");
} catch (IllegalArgumentException iae) {
// expected
}
IOUtils.close(writer, taxoWriter, dir, taxoDir);
}
// Make sure we catch when app didn't declare field as
// multi-valued but it was:
public void testDetectMultiValuedField() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
FacetsConfig config = new FacetsConfig();
DocumentBuilder builder = new DocumentBuilder(taxoWriter, config);
Document doc = new Document();
doc.add(newTextField("field", "text", Field.Store.NO));
doc.add(new FacetField("a", "path"));
doc.add(new FacetField("a", "path2"));
try {
builder.build(doc);
fail("did not hit expected exception");
} catch (IllegalArgumentException iae) {
// expected
}
IOUtils.close(writer, taxoWriter, dir, taxoDir);
}
*/
}