diff --git a/TODO b/TODO index fae69b2c5a5..1bea89300d1 100644 --- a/TODO +++ b/TODO @@ -1,8 +1,8 @@ nocommit this! TODO - - re-enable ALL_BUT_DIM somehow? - - this is broken for multi-valued non-hierarchical too + - add sugar apis to do sort-by-score, sort-by-field sort AND collect into SimpleFacetsCollector? + - getSpecificValue for a dim isn't reliable - we could put more stuff into the "schema", e.g. this field is sorted-set-DV and that one is taxo? - standardize on facet or facets (e.g. FacetIndexWriter) diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/DocumentBuilder.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/DocumentBuilder.java index e1598d889d3..6e94a6d892d 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/simple/DocumentBuilder.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/DocumentBuilder.java @@ -21,24 +21,22 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import org.apache.lucene.document.BinaryDocValuesField; import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; import org.apache.lucene.document.SortedSetDocValuesField; import org.apache.lucene.document.StringField; import org.apache.lucene.facet.taxonomy.FacetLabel; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.index.IndexDocument; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableFieldType; import org.apache.lucene.index.StorableField; -import org.apache.lucene.store.Directory; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IntsRef; @@ -61,6 +59,13 @@ public class DocumentBuilder { this.config = config; } + private static void checkSeen(Set seenDims, String dim) { + if (seenDims.contains(dim)) { + throw new IllegalArgumentException("dimension \"" + dim + "\" is not multiValued, but it appears more than once in this document"); + } + seenDims.add(dim); + } + public IndexDocument build(IndexDocument doc) throws IOException { // Find all FacetFields, collated by the actual field: Map> byField = new HashMap>(); @@ -71,10 +76,15 @@ public class DocumentBuilder { // ... and also all AssociationFacetFields Map> assocByField = new HashMap>(); + Set seenDims = new HashSet(); + for(IndexableField field : doc.indexableFields()) { if (field.fieldType() == FacetField.TYPE) { FacetField facetField = (FacetField) field; FacetsConfig.DimConfig dimConfig = config.getDimConfig(facetField.dim); + if (dimConfig.multiValued == false) { + checkSeen(seenDims, facetField.dim); + } String indexFieldName = dimConfig.indexFieldName; List fields = byField.get(indexFieldName); if (fields == null) { @@ -87,6 +97,9 @@ public class DocumentBuilder { if (field.fieldType() == SortedSetDocValuesFacetField.TYPE) { SortedSetDocValuesFacetField facetField = (SortedSetDocValuesFacetField) field; FacetsConfig.DimConfig dimConfig = config.getDimConfig(facetField.dim); + if (dimConfig.multiValued == false) { + checkSeen(seenDims, facetField.dim); + } String indexFieldName = dimConfig.indexFieldName; List fields = dvByField.get(indexFieldName); if (fields == null) { @@ -99,8 +112,16 @@ public class DocumentBuilder { if (field.fieldType() == AssociationFacetField.TYPE) { AssociationFacetField facetField = (AssociationFacetField) field; FacetsConfig.DimConfig dimConfig = config.getDimConfig(facetField.dim); + if (dimConfig.multiValued == false) { + checkSeen(seenDims, facetField.dim); + } + if (dimConfig.hierarchical) { + throw new IllegalArgumentException("AssociationFacetField cannot be hierarchical (dim=\"" + facetField.dim + "\")"); + } + if (dimConfig.requireDimCount) { + throw new IllegalArgumentException("AssociationFacetField cannot requireDimCount (dim=\"" + facetField.dim + "\")"); + } - // nocommit how to use a different default name for assocs? String indexFieldName = dimConfig.indexFieldName; List fields = assocByField.get(indexFieldName); if (fields == null) { @@ -173,9 +194,6 @@ public class DocumentBuilder { for(Map.Entry> ent : byField.entrySet()) { - // nocommit maybe we can somehow catch singleValued - // dim appearing more than once? - String indexFieldName = ent.getKey(); //System.out.println(" fields=" + ent.getValue()); @@ -190,10 +208,13 @@ public class DocumentBuilder { FacetLabel cp = FacetLabel.create(facetField.dim, facetField.path); int ordinal = taxoWriter.addCategory(cp); + if (ordinals.length == ordinals.ints.length) { + ordinals.grow(ordinals.length+1); + } ordinals.ints[ordinals.length++] = ordinal; //System.out.println(" add cp=" + cp); - if (ft.hierarchical && ft.multiValued) { + if (ft.multiValued && (ft.hierarchical || ft.requireDimCount)) { // Add all parents too: int parent = taxoWriter.getParent(ordinal); while (parent > 0) { @@ -203,6 +224,11 @@ public class DocumentBuilder { ordinals.ints[ordinals.length++] = parent; parent = taxoWriter.getParent(parent); } + + if (ft.requireDimCount == false) { + // Remove last (dimension) ord: + ordinals.length--; + } } // Drill down: diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/FacetsConfig.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/FacetsConfig.java index 5c5f4d999a8..744cd1ad90e 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/simple/FacetsConfig.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/FacetsConfig.java @@ -20,22 +20,36 @@ package org.apache.lucene.facet.simple; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; -/** By default a dimension is flat and single valued; use - * the setters in this class to change that for any dims */ +/** By default a dimension is flat, single valued and does + * not require count for the dimension; use + * the setters in this class to change these settings for + * any dims. + * + *

NOTE: this configuration is not saved into the + * index, but it's vital, and up to the application to + * ensure, that at search time the provided FacetsConfig + * matches what was used during indexing. + * + * @lucene.experimental */ public class FacetsConfig { public static final String DEFAULT_INDEX_FIELD_NAME = "$facets"; - // nocommit pull DimType into here (shai?) - private final Map fieldTypes = new ConcurrentHashMap(); /** @lucene.internal */ // nocommit expose this to the user, vs the setters? public static final class DimConfig { + /** True if this dimension is hierarchical. */ boolean hierarchical; + + /** True if this dimension is multi-valued. */ boolean multiValued; + /** True if the count/aggregate for the entire dimension + * is required, which is unusual (default is false). */ + boolean requireDimCount; + /** Actual field where this dimension's facet labels * should be indexed */ String indexFieldName = DEFAULT_INDEX_FIELD_NAME; @@ -52,22 +66,31 @@ public class FacetsConfig { } // nocommit maybe setDimConfig instead? - public synchronized void setHierarchical(String dimName) { + public synchronized void setHierarchical(String dimName, boolean v) { DimConfig ft = fieldTypes.get(dimName); if (ft == null) { ft = new DimConfig(); fieldTypes.put(dimName, ft); } - ft.hierarchical = true; + ft.hierarchical = v; } - public synchronized void setMultiValued(String dimName) { + public synchronized void setMultiValued(String dimName, boolean v) { DimConfig ft = fieldTypes.get(dimName); if (ft == null) { ft = new DimConfig(); fieldTypes.put(dimName, ft); } - ft.multiValued = true; + ft.multiValued = v; + } + + public synchronized void setRequireDimCount(String dimName, boolean v) { + DimConfig ft = fieldTypes.get(dimName); + if (ft == null) { + ft = new DimConfig(); + fieldTypes.put(dimName, ft); + } + ft.requireDimCount = v; } public synchronized void setIndexFieldName(String dimName, String indexFieldName) { diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/FastTaxonomyFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/FastTaxonomyFacetCounts.java index ad2ab0fe702..0d0fca5b035 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/simple/FastTaxonomyFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/FastTaxonomyFacetCounts.java @@ -18,15 +18,11 @@ package org.apache.lucene.facet.simple; */ import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; import java.util.List; import java.util.Map; import org.apache.lucene.facet.simple.SimpleFacetsCollector.MatchingDocs; import org.apache.lucene.facet.taxonomy.FacetLabel; -import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.util.BytesRef; @@ -132,7 +128,7 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets { return null; } - TopOrdAndIntQueue q = new TopOrdAndIntQueue(topN); + TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN)); int bottomCount = 0; @@ -163,8 +159,15 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets { return null; } - if (dimConfig.hierarchical && dimConfig.multiValued) { - totCount = counts[dimOrd]; + if (dimConfig.multiValued) { + if (dimConfig.requireDimCount) { + totCount = counts[dimOrd]; + } else { + // Our sum'd count is not correct, in general: + totCount = -1; + } + } else { + // Our sum'd dim count is accurate, so we keep it } LabelAndValue[] labelValues = new LabelAndValue[q.size()]; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetCounts.java index 008d3f5db53..c17d9a19ba1 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetCounts.java @@ -18,18 +18,13 @@ package org.apache.lucene.facet.simple; */ import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; import java.util.List; import java.util.Map; import org.apache.lucene.facet.simple.SimpleFacetsCollector.MatchingDocs; import org.apache.lucene.facet.taxonomy.FacetLabel; -import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.BinaryDocValues; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IntsRef; @@ -116,7 +111,7 @@ public class TaxonomyFacetCounts extends TaxonomyFacets { return null; } - TopOrdAndIntQueue q = new TopOrdAndIntQueue(topN); + TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN)); int bottomCount = 0; @@ -147,8 +142,15 @@ public class TaxonomyFacetCounts extends TaxonomyFacets { return null; } - if (dimConfig.hierarchical && dimConfig.multiValued) { - totCount = counts[dimOrd]; + if (dimConfig.multiValued) { + if (dimConfig.requireDimCount) { + totCount = counts[dimOrd]; + } else { + // Our sum'd count is not correct, in general: + totCount = -1; + } + } else { + // Our sum'd dim count is accurate, so we keep it } LabelAndValue[] labelValues = new LabelAndValue[q.size()]; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetSumFloatAssociations.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetSumFloatAssociations.java index ccd7dd3368e..60308e0c3b2 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetSumFloatAssociations.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetSumFloatAssociations.java @@ -18,15 +18,10 @@ package org.apache.lucene.facet.simple; */ import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; import java.util.List; -import java.util.Map; import org.apache.lucene.facet.simple.SimpleFacetsCollector.MatchingDocs; import org.apache.lucene.facet.taxonomy.FacetLabel; -import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.util.BytesRef; @@ -83,36 +78,6 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets { ++doc; } } - - // nocommit we could do this lazily instead: - - // Rollup any necessary dims: - // nocommit should we rollup? - /* - for(Map.Entry ent : config.getDimConfigs().entrySet()) { - String dim = ent.getKey(); - FacetsConfig.DimConfig ft = ent.getValue(); - if (ft.hierarchical && ft.multiValued == false) { - int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim)); - // It can be -1 if this field was declared in the - // config but never indexed: - if (dimRootOrd > 0) { - counts[dimRootOrd] += rollup(children[dimRootOrd]); - } - } - } - */ - } - - private float rollup(int ord) { - int sum = 0; - while (ord != TaxonomyReader.INVALID_ORDINAL) { - float childValue = values[ord] + rollup(children[ord]); - values[ord] = childValue; - sum += childValue; - ord = siblings[ord]; - } - return sum; } /** Return the count for a specific path. Returns -1 if @@ -137,8 +102,7 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets { return null; } - TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(topN); - + TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN)); float bottomValue = 0; int ord = children[dimOrd]; @@ -169,12 +133,6 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets { return null; } - /* - if (dimConfig.hierarchical && dimConfig.multiValued) { - totCount = counts[dimOrd]; - } - */ - LabelAndValue[] labelValues = new LabelAndValue[q.size()]; for(int i=labelValues.length-1;i>=0;i--) { TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop(); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetSumIntAssociations.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetSumIntAssociations.java index 2c6365f2892..e9b2f67a92a 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetSumIntAssociations.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetSumIntAssociations.java @@ -18,15 +18,10 @@ package org.apache.lucene.facet.simple; */ import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; import java.util.List; -import java.util.Map; import org.apache.lucene.facet.simple.SimpleFacetsCollector.MatchingDocs; import org.apache.lucene.facet.taxonomy.FacetLabel; -import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.util.BytesRef; @@ -83,36 +78,6 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets { ++doc; } } - - // nocommit we could do this lazily instead: - - // Rollup any necessary dims: - // nocommit should we rollup? - /* - for(Map.Entry ent : config.getDimConfigs().entrySet()) { - String dim = ent.getKey(); - FacetsConfig.DimConfig ft = ent.getValue(); - if (ft.hierarchical && ft.multiValued == false) { - int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim)); - // It can be -1 if this field was declared in the - // config but never indexed: - if (dimRootOrd > 0) { - counts[dimRootOrd] += rollup(children[dimRootOrd]); - } - } - } - */ - } - - private int rollup(int ord) { - int sum = 0; - while (ord != TaxonomyReader.INVALID_ORDINAL) { - int childValue = values[ord] + rollup(children[ord]); - values[ord] = childValue; - sum += childValue; - ord = siblings[ord]; - } - return sum; } /** Return the count for a specific path. Returns -1 if @@ -137,8 +102,7 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets { return null; } - TopOrdAndIntQueue q = new TopOrdAndIntQueue(topN); - + TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN)); int bottomValue = 0; int ord = children[dimOrd]; @@ -169,13 +133,6 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets { return null; } - /* - FacetsConfig.DimConfig ft = config.getDimConfig(path.components[0]); - if (ft.hierarchical && ft.multiValued) { - totCount = counts[dimOrd]; - } - */ - LabelAndValue[] labelValues = new LabelAndValue[q.size()]; for(int i=labelValues.length-1;i>=0;i--) { TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop(); diff --git a/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetSumValueSource.java b/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetSumValueSource.java index a4eb0ac9235..1a081860dbe 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetSumValueSource.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/simple/TaxonomyFacetSumValueSource.java @@ -18,22 +18,16 @@ package org.apache.lucene.facet.simple; */ import java.io.IOException; -import java.util.ArrayList; -import java.util.Collections; -import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.lucene.facet.simple.SimpleFacetsCollector.MatchingDocs; import org.apache.lucene.facet.taxonomy.FacetLabel; -import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; -import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.queries.function.FunctionValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.search.Scorer; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IntsRef; @@ -150,8 +144,7 @@ public class TaxonomyFacetSumValueSource extends TaxonomyFacets { return null; } - TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(topN); - + TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN)); float bottomValue = 0; int ord = children[dimOrd]; @@ -181,8 +174,15 @@ public class TaxonomyFacetSumValueSource extends TaxonomyFacets { return null; } - if (dimConfig.hierarchical && dimConfig.multiValued) { - sumValues = values[dimOrd]; + if (dimConfig.multiValued) { + if (dimConfig.requireDimCount) { + sumValues = values[dimOrd]; + } else { + // Our sum'd count is not correct, in general: + sumValues = -1; + } + } else { + // Our sum'd dim count is accurate, so we keep it } LabelAndValue[] labelValues = new LabelAndValue[q.size()]; diff --git a/lucene/facet/src/test/org/apache/lucene/facet/simple/TestSimpleDrillSideways.java b/lucene/facet/src/test/org/apache/lucene/facet/simple/TestSimpleDrillSideways.java index 845ddd65257..ff28b22c60a 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/simple/TestSimpleDrillSideways.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/simple/TestSimpleDrillSideways.java @@ -17,60 +17,20 @@ package org.apache.lucene.facet.simple; * limitations under the License. */ -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collections; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; -import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.StringField; import org.apache.lucene.facet.FacetTestCase; -import org.apache.lucene.facet.FacetTestUtils; import org.apache.lucene.facet.index.FacetFields; -import org.apache.lucene.facet.params.FacetIndexingParams; -import org.apache.lucene.facet.params.FacetSearchParams; import org.apache.lucene.facet.simple.SimpleDrillSideways.SimpleDrillSidewaysResult; -import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetFields; -import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.Filter; import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField.Type; -import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.Bits; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.FixedBitSet; -import org.apache.lucene.util.IOUtils; -import org.apache.lucene.util.InPlaceMergeSorter; -import org.apache.lucene.util.InfoStream; -import org.apache.lucene.util._TestUtil; -import org.junit.Test; public class TestSimpleDrillSideways extends FacetTestCase { @@ -87,7 +47,7 @@ public class TestSimpleDrillSideways extends FacetTestCase { taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); - config.setHierarchical("Publish Date"); + config.setHierarchical("Publish Date", true); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); DocumentBuilder builder = new DocumentBuilder(taxoWriter, config); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/simple/TestSortedSetDocValuesFacets.java b/lucene/facet/src/test/org/apache/lucene/facet/simple/TestSortedSetDocValuesFacets.java index 10ceee34c19..c929540b790 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/simple/TestSortedSetDocValuesFacets.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/simple/TestSortedSetDocValuesFacets.java @@ -17,19 +17,11 @@ package org.apache.lucene.facet.simple; * limitations under the License. */ -import java.util.ArrayList; -import java.util.Collections; import java.util.List; -import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.facet.FacetTestCase; -import org.apache.lucene.facet.search.FacetsCollector; -import org.apache.lucene.facet.simple.SortedSetDocValuesFacetCounts; -import org.apache.lucene.facet.simple.SortedSetDocValuesReaderState; -import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; @@ -42,10 +34,12 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { // randomly uses SortedSetDV public void testBasic() throws Exception { + System.out.println("here: " + defaultCodecSupportsSortedSet()); assumeTrue("Test requires SortedSetDV support", defaultCodecSupportsSortedSet()); Directory dir = newDirectory(); FacetsConfig config = new FacetsConfig(); + config.setMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); DocumentBuilder builder = new DocumentBuilder(null, config); @@ -54,6 +48,7 @@ public class TestSortedSetDocValuesFacets extends FacetTestCase { doc.add(new SortedSetDocValuesFacetField("a", "bar")); doc.add(new SortedSetDocValuesFacetField("a", "zoo")); doc.add(new SortedSetDocValuesFacetField("b", "baz")); + System.out.println("TEST: now add"); writer.addDocument(builder.build(doc)); if (random().nextBoolean()) { writer.commit(); diff --git a/lucene/facet/src/test/org/apache/lucene/facet/simple/TestTaxonomyFacetAssociations.java b/lucene/facet/src/test/org/apache/lucene/facet/simple/TestTaxonomyFacetAssociations.java index 3879fc061e3..24c0560a155 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/simple/TestTaxonomyFacetAssociations.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/simple/TestTaxonomyFacetAssociations.java @@ -17,28 +17,18 @@ package org.apache.lucene.facet.simple; * limitations under the License. */ -import java.util.List; -import org.apache.lucene.analysis.MockAnalyzer; -import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.facet.FacetTestCase; -import org.apache.lucene.facet.params.FacetSearchParams; -import org.apache.lucene.facet.search.FacetResult; -import org.apache.lucene.facet.search.FacetsCollector; import org.apache.lucene.facet.taxonomy.FacetLabel; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; -import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.Query; import org.apache.lucene.store.Directory; import org.apache.lucene.util.IOUtils; import org.junit.AfterClass; @@ -68,7 +58,9 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase { // Cannot mix ints & floats in the same indexed field: config.setIndexFieldName("int", "$facets.int"); + config.setMultiValued("int", true); config.setIndexFieldName("float", "$facets.float"); + config.setMultiValued("float", true); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); DocumentBuilder builder = new DocumentBuilder(taxoWriter, config); @@ -189,4 +181,46 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase { } IOUtils.close(writer, taxoWriter, dir, taxoDir); } + + public void testNoHierarchy() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + + TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); + FacetsConfig config = new FacetsConfig(); + config.setHierarchical("a", true); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + + DocumentBuilder builder = new DocumentBuilder(taxoWriter, config); + Document doc = new Document(); + doc.add(new IntAssociationFacetField(14, "a", "x")); + try { + writer.addDocument(builder.build(doc)); + fail("did not hit expected exception"); + } catch (IllegalArgumentException exc) { + // expected + } + IOUtils.close(writer, taxoWriter, dir, taxoDir); + } + + public void testRequireDimCount() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + + TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir); + FacetsConfig config = new FacetsConfig(); + config.setRequireDimCount("a", true); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + + DocumentBuilder builder = new DocumentBuilder(taxoWriter, config); + Document doc = new Document(); + doc.add(new IntAssociationFacetField(14, "a", "x")); + try { + writer.addDocument(builder.build(doc)); + fail("did not hit expected exception"); + } catch (IllegalArgumentException exc) { + // expected + } + IOUtils.close(writer, taxoWriter, dir, taxoDir); + } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/simple/TestTaxonomyFacetCounts.java b/lucene/facet/src/test/org/apache/lucene/facet/simple/TestTaxonomyFacetCounts.java index b659b654b25..776ae9e66b7 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/simple/TestTaxonomyFacetCounts.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/simple/TestTaxonomyFacetCounts.java @@ -19,8 +19,6 @@ package org.apache.lucene.facet.simple; import java.io.ByteArrayOutputStream; import java.io.PrintStream; -import java.util.ArrayList; -import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; @@ -34,8 +32,6 @@ import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; import org.apache.lucene.facet.util.PrintTaxonomyStats; -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.search.IndexSearcher; @@ -44,7 +40,6 @@ import org.apache.lucene.search.similarities.DefaultSimilarity; import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; import org.apache.lucene.search.similarities.Similarity; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util._TestUtil; @@ -59,7 +54,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); - config.setHierarchical("Publish Date"); + config.setHierarchical("Publish Date", true); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); DocumentBuilder builder = new DocumentBuilder(taxoWriter, config); @@ -91,11 +86,9 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { // NRT open IndexSearcher searcher = newSearcher(writer.getReader()); - writer.close(); // NRT open TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - taxoWriter.close(); // Aggregate the facet counts: SimpleFacetsCollector c = new SimpleFacetsCollector(); @@ -133,10 +126,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { assertTrue(result.indexOf(" /2012") != -1); assertTrue(result.indexOf(" /20") != -1); - taxoReader.close(); - searcher.getIndexReader().close(); - dir.close(); - taxoDir.close(); + IOUtils.close(writer, taxoWriter, searcher.getIndexReader(), taxoReader, taxoDir, dir); } // LUCENE-5333 @@ -176,11 +166,9 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { // NRT open IndexSearcher searcher = newSearcher(writer.getReader()); - writer.close(); // NRT open TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - taxoWriter.close(); SimpleFacetsCollector c = new SimpleFacetsCollector(); searcher.search(new MatchAllDocsQuery(), c); @@ -195,10 +183,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { assertEquals("b (2)\n bar1 (1)\n bar2 (1)\n", results.get(1).toString()); assertEquals("c (1)\n baz1 (1)\n", results.get(2).toString()); - searcher.getIndexReader().close(); - taxoReader.close(); - taxoDir.close(); - dir.close(); + IOUtils.close(writer, taxoWriter, searcher.getIndexReader(), taxoReader, taxoDir, dir); } public void testWrongIndexFieldName() throws Exception { @@ -220,11 +205,9 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { // NRT open IndexSearcher searcher = newSearcher(writer.getReader()); - writer.close(); // NRT open TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - taxoWriter.close(); SimpleFacetsCollector c = new SimpleFacetsCollector(); searcher.search(new MatchAllDocsQuery(), c); @@ -259,10 +242,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { // expected } - searcher.getIndexReader().close(); - taxoReader.close(); - taxoDir.close(); - dir.close(); + IOUtils.close(writer, taxoWriter, searcher.getIndexReader(), taxoReader, taxoDir, dir); } @@ -290,10 +270,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { doc.add(newTextField("field", "text", Field.Store.NO)); doc.add(new FacetField("a", "path")); writer.addDocument(builder.build(doc)); - writer.close(); - taxoWriter.close(); - dir.close(); - taxoDir.close(); + IOUtils.close(writer, taxoWriter, dir, taxoDir); } public void testMultiValuedHierarchy() throws Exception { @@ -301,8 +278,8 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { Directory taxoDir = newDirectory(); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); - config.setHierarchical("a"); - config.setMultiValued("a"); + config.setHierarchical("a", true); + config.setMultiValued("a", true); RandomIndexWriter writer = new RandomIndexWriter(random(), dir); DocumentBuilder builder = new DocumentBuilder(taxoWriter, config); @@ -314,11 +291,9 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { // NRT open IndexSearcher searcher = newSearcher(writer.getReader()); - writer.close(); // NRT open TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - taxoWriter.close(); // Aggregate the facet counts: SimpleFacetsCollector c = new SimpleFacetsCollector(); @@ -333,10 +308,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { assertEquals(1, result.labelValues.length); assertEquals(1, result.labelValues[0].value.intValue()); - searcher.getIndexReader().close(); - taxoReader.close(); - dir.close(); - taxoDir.close(); + IOUtils.close(writer, taxoWriter, searcher.getIndexReader(), taxoReader, dir, taxoDir); } public void testLabelWithDelimiter() throws Exception { @@ -346,6 +318,7 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); FacetsConfig config = new FacetsConfig(); + config.setMultiValued("dim", true); DocumentBuilder builder = new DocumentBuilder(taxoWriter, config); Document doc = new Document(); @@ -356,11 +329,9 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { // NRT open IndexSearcher searcher = newSearcher(writer.getReader()); - writer.close(); // NRT open TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - taxoWriter.close(); SimpleFacetsCollector c = new SimpleFacetsCollector(); searcher.search(new MatchAllDocsQuery(), c); @@ -370,65 +341,143 @@ public class TestTaxonomyFacetCounts extends FacetTestCase { assertEquals(1, facets.getSpecificValue("dim", "test\u001Etwo")); SimpleFacetResult result = facets.getTopChildren(10, "dim"); - assertEquals("dim (2)\n test\u001Fone (1)\n test\u001Etwo (1)\n", result.toString()); - IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir); + assertEquals("dim (-1)\n test\u001Fone (1)\n test\u001Etwo (1)\n", result.toString()); + IOUtils.close(writer, taxoWriter, searcher.getIndexReader(), taxoReader, dir, taxoDir); + } + + public void testRequireDimCount() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); + + FacetsConfig config = new FacetsConfig(); + config.setMultiValued("dim2", true); + config.setMultiValued("dim3", true); + config.setHierarchical("dim3", true); + config.setRequireDimCount("dim", true); + config.setRequireDimCount("dim2", true); + config.setRequireDimCount("dim3", true); + DocumentBuilder builder = new DocumentBuilder(taxoWriter, config); + + Document doc = new Document(); + doc.add(newTextField("field", "text", Field.Store.NO)); + doc.add(new FacetField("dim", "a")); + doc.add(new FacetField("dim2", "a")); + doc.add(new FacetField("dim2", "b")); + doc.add(new FacetField("dim3", "a", "b")); + doc.add(new FacetField("dim3", "a", "c")); + writer.addDocument(builder.build(doc)); + + // NRT open + IndexSearcher searcher = newSearcher(writer.getReader()); + + // NRT open + TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); + + SimpleFacetsCollector c = new SimpleFacetsCollector(); + searcher.search(new MatchAllDocsQuery(), c); + + Facets facets = getFacetCounts(taxoReader, config, c); + assertEquals(1, facets.getTopChildren(10, "dim").value); + assertEquals(1, facets.getTopChildren(10, "dim2").value); + assertEquals(1, facets.getTopChildren(10, "dim3").value); + IOUtils.close(writer, taxoWriter, searcher.getIndexReader(), taxoReader, dir, taxoDir); } - /* // LUCENE-4583: make sure if we require > 32 KB for one // document, we don't hit exc when using Facet42DocValuesFormat public void testManyFacetsInOneDocument() throws Exception { - assumeTrue("default Codec doesn't support huge BinaryDocValues", _TestUtil.fieldSupportsHugeBinaryDocValues(CategoryListParams.DEFAULT_FIELD)); + assumeTrue("default Codec doesn't support huge BinaryDocValues", _TestUtil.fieldSupportsHugeBinaryDocValues(FacetsConfig.DEFAULT_INDEX_FIELD_NAME)); Directory dir = newDirectory(); Directory taxoDir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); RandomIndexWriter writer = new RandomIndexWriter(random(), dir, iwc); DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); - - FacetFields facetFields = new FacetFields(taxoWriter); + + FacetsConfig config = new FacetsConfig(); + config.setMultiValued("dim", true); + DocumentBuilder builder = new DocumentBuilder(taxoWriter, config); int numLabels = _TestUtil.nextInt(random(), 40000, 100000); Document doc = new Document(); doc.add(newTextField("field", "text", Field.Store.NO)); - List paths = new ArrayList(); for (int i = 0; i < numLabels; i++) { - paths.add(new CategoryPath("dim", "" + i)); + doc.add(new FacetField("dim", "" + i)); } - facetFields.addFields(doc, paths); - writer.addDocument(doc); + writer.addDocument(builder.build(doc)); // NRT open IndexSearcher searcher = newSearcher(writer.getReader()); - writer.close(); // NRT open TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter); - taxoWriter.close(); - - FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("dim"), Integer.MAX_VALUE)); // Aggregate the facet counts: - FacetsCollector c = FacetsCollector.create(fsp, searcher.getIndexReader(), taxoReader); + SimpleFacetsCollector c = new SimpleFacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query, and use MultiCollector to // wrap collecting the "normal" hits and also facets: searcher.search(new MatchAllDocsQuery(), c); - List results = c.getFacetResults(); - assertEquals(1, results.size()); - FacetResultNode root = results.get(0).getFacetResultNode(); - assertEquals(numLabels, root.subResults.size()); + Facets facets = getFacetCounts(taxoReader, config, c); + + SimpleFacetResult result = facets.getTopChildren(Integer.MAX_VALUE, "dim"); + assertEquals(numLabels, result.labelValues.length); Set allLabels = new HashSet(); - for (FacetResultNode childNode : root.subResults) { - assertEquals(2, childNode.label.length); - allLabels.add(childNode.label.components[1]); - assertEquals(1, (int) childNode.value); + for (LabelAndValue labelValue : result.labelValues) { + allLabels.add(labelValue.label); + assertEquals(1, labelValue.value.intValue()); } assertEquals(numLabels, allLabels.size()); - IOUtils.close(searcher.getIndexReader(), taxoReader, dir, taxoDir); + IOUtils.close(searcher.getIndexReader(), taxoWriter, writer, taxoReader, dir, taxoDir); + } + + // Make sure we catch when app didn't declare field as + // hierarchical but it was: + public void testDetectHierarchicalField() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + FacetsConfig config = new FacetsConfig(); + DocumentBuilder builder = new DocumentBuilder(taxoWriter, config); + + Document doc = new Document(); + doc.add(newTextField("field", "text", Field.Store.NO)); + doc.add(new FacetField("a", "path", "other")); + try { + builder.build(doc); + fail("did not hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + } + IOUtils.close(writer, taxoWriter, dir, taxoDir); + } + + // Make sure we catch when app didn't declare field as + // multi-valued but it was: + public void testDetectMultiValuedField() throws Exception { + Directory dir = newDirectory(); + Directory taxoDir = newDirectory(); + TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE); + RandomIndexWriter writer = new RandomIndexWriter(random(), dir); + FacetsConfig config = new FacetsConfig(); + DocumentBuilder builder = new DocumentBuilder(taxoWriter, config); + + Document doc = new Document(); + doc.add(newTextField("field", "text", Field.Store.NO)); + doc.add(new FacetField("a", "path")); + doc.add(new FacetField("a", "path2")); + try { + builder.build(doc); + fail("did not hit expected exception"); + } catch (IllegalArgumentException iae) { + // expected + } + IOUtils.close(writer, taxoWriter, dir, taxoDir); } - */ }