From 458786d0f4133abfed9fc3e2376dff685b737c43 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Wed, 27 Nov 2013 16:34:39 +0000 Subject: [PATCH] LUCENE-5339: factor out base classes for int/float taxonomy aggregates git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5339@1546097 13f79535-47bb-0310-9956-ffa450edef68 --- .../facet/TestAssociationsFacetsExample.java | 4 +- .../lucene/facet/FastTaxonomyFacetCounts.java | 119 +-------------- .../lucene/facet/FloatTaxonomyFacets.java | 137 +++++++++++++++++ .../lucene/facet/IntTaxonomyFacets.java | 141 ++++++++++++++++++ .../lucene/facet/TaxonomyFacetCounts.java | 113 +------------- .../TaxonomyFacetSumFloatAssociations.java | 71 +-------- .../TaxonomyFacetSumIntAssociations.java | 72 +-------- .../facet/TaxonomyFacetSumValueSource.java | 111 +------------- .../facet/TestTaxonomyFacetAssociations.java | 9 +- 9 files changed, 297 insertions(+), 480 deletions(-) create mode 100644 lucene/facet/src/java/org/apache/lucene/facet/FloatTaxonomyFacets.java create mode 100644 lucene/facet/src/java/org/apache/lucene/facet/IntTaxonomyFacets.java diff --git a/lucene/demo/src/test/org/apache/lucene/demo/facet/TestAssociationsFacetsExample.java b/lucene/demo/src/test/org/apache/lucene/demo/facet/TestAssociationsFacetsExample.java index 85133e5233f..810d7798540 100644 --- a/lucene/demo/src/test/org/apache/lucene/demo/facet/TestAssociationsFacetsExample.java +++ b/lucene/demo/src/test/org/apache/lucene/demo/facet/TestAssociationsFacetsExample.java @@ -29,7 +29,7 @@ public class TestAssociationsFacetsExample extends LuceneTestCase { public void testExamples() throws Exception { List res = new AssociationsFacetsExample().runSumAssociations(); assertEquals("Wrong number of results", 2, res.size()); - assertEquals("value=6 childCount=2\n lucene (4)\n solr (2)\n", res.get(0).toString()); - assertEquals("value=1.96 childCount=2\n computing (1.62)\n software (0.34)\n", res.get(1).toString()); + assertEquals("value=-1 childCount=2\n lucene (4)\n solr (2)\n", res.get(0).toString()); + assertEquals("value=-1.0 childCount=2\n computing (1.62)\n software (0.34)\n", res.get(1).toString()); } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/FastTaxonomyFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/FastTaxonomyFacetCounts.java index d8c002b3e58..6fb61145713 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/FastTaxonomyFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/FastTaxonomyFacetCounts.java @@ -19,10 +19,8 @@ package org.apache.lucene.facet; import java.io.IOException; import java.util.List; -import java.util.Map; import org.apache.lucene.facet.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.taxonomy.FacetLabel; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.util.BytesRef; @@ -32,8 +30,7 @@ import org.apache.lucene.util.FixedBitSet; * into DocValues was used. * * @lucene.experimental */ -public class FastTaxonomyFacetCounts extends TaxonomyFacets { - private final int[] counts; +public class FastTaxonomyFacetCounts extends IntTaxonomyFacets { /** Create {@code FastTaxonomyFacetCounts}, which also * counts all facet labels. */ @@ -48,7 +45,6 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets { * field name for certain dimensions. */ public FastTaxonomyFacetCounts(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { super(indexFieldName, taxoReader, config); - counts = new int[taxoReader.getSize()]; count(fc.getMatchingDocs()); } @@ -76,9 +72,8 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets { byte b = bytes[offset++]; if (b >= 0) { prev = ord = ((ord << 7) | b) + prev; - assert ord < counts.length: "ord=" + ord + " vs maxOrd=" + counts.length; - //System.out.println(" ord=" + ord); - ++counts[ord]; + assert ord < values.length: "ord=" + ord + " vs maxOrd=" + values.length; + ++values[ord]; ord = 0; } else { ord = (ord << 7) | (b & 0x7F); @@ -88,112 +83,6 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets { } } - // nocommit we could do this lazily instead: - - // Rollup any necessary dims: - for(Map.Entry ent : config.getDimConfigs().entrySet()) { - String dim = ent.getKey(); - FacetsConfig.DimConfig ft = ent.getValue(); - if (ft.hierarchical && ft.multiValued == false) { - int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim)); - // It can be -1 if this field was declared in the - // config but never indexed: - if (dimRootOrd > 0) { - counts[dimRootOrd] += rollup(children[dimRootOrd]); - } - } - } - } - - private int rollup(int ord) { - int sum = 0; - while (ord != TaxonomyReader.INVALID_ORDINAL) { - int childValue = counts[ord] + rollup(children[ord]); - counts[ord] = childValue; - sum += childValue; - ord = siblings[ord]; - } - return sum; - } - - @Override - public Number getSpecificValue(String dim, String... path) throws IOException { - verifyDim(dim); - int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path)); - if (ord < 0) { - return -1; - } - return counts[ord]; - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { - // TODO: can we factor this out? - if (topN <= 0) { - throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")"); - } - FacetsConfig.DimConfig dimConfig = verifyDim(dim); - //System.out.println("ftfc.getTopChildren topN=" + topN); - FacetLabel cp = FacetLabel.create(dim, path); - int dimOrd = taxoReader.getOrdinal(cp); - if (dimOrd == -1) { - //System.out.println("no ord for dim=" + dim + " path=" + path); - return null; - } - - TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN)); - - int bottomCount = 0; - - int ord = children[dimOrd]; - int totCount = 0; - int childCount = 0; - - TopOrdAndIntQueue.OrdAndValue reuse = null; - while(ord != TaxonomyReader.INVALID_ORDINAL) { - //System.out.println(" check ord=" + ord + " label=" + taxoReader.getPath(ord) + " topN=" + topN); - if (counts[ord] > 0) { - totCount += counts[ord]; - childCount++; - if (counts[ord] > bottomCount) { - if (reuse == null) { - reuse = new TopOrdAndIntQueue.OrdAndValue(); - } - reuse.ord = ord; - reuse.value = counts[ord]; - reuse = q.insertWithOverflow(reuse); - if (q.size() == topN) { - bottomCount = q.top().value; - } - } - } - - ord = siblings[ord]; - } - - if (totCount == 0) { - //System.out.println(" no matches"); - return null; - } - - if (dimConfig.multiValued) { - if (dimConfig.requireDimCount) { - totCount = counts[dimOrd]; - } else { - // Our sum'd count is not correct, in general: - totCount = -1; - } - } else { - // Our sum'd dim count is accurate, so we keep it - } - - LabelAndValue[] labelValues = new LabelAndValue[q.size()]; - for(int i=labelValues.length-1;i>=0;i--) { - TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop(); - FacetLabel child = taxoReader.getPath(ordAndValue.ord); - labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value); - } - - return new FacetResult(totCount, labelValues, childCount); + rollup(); } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/FloatTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/FloatTaxonomyFacets.java new file mode 100644 index 00000000000..b0bf08062a5 --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/FloatTaxonomyFacets.java @@ -0,0 +1,137 @@ +package org.apache.lucene.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Map; + +import org.apache.lucene.facet.taxonomy.FacetLabel; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; + +/** Base class for all taxonomy-based facets that aggregate + * to a per-ords float[]. */ + +public abstract class FloatTaxonomyFacets extends TaxonomyFacets { + + protected final float[] values; + + protected FloatTaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config) throws IOException { + super(indexFieldName, taxoReader, config); + values = new float[taxoReader.getSize()]; + } + + // nocommit we could do this lazily instead: + protected void rollup() throws IOException { + // Rollup any necessary dims: + for(Map.Entry ent : config.getDimConfigs().entrySet()) { + String dim = ent.getKey(); + FacetsConfig.DimConfig ft = ent.getValue(); + if (ft.hierarchical && ft.multiValued == false) { + int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim)); + assert dimRootOrd > 0; + values[dimRootOrd] += rollup(children[dimRootOrd]); + } + } + } + + private float rollup(int ord) { + float sum = 0; + while (ord != TaxonomyReader.INVALID_ORDINAL) { + float childValue = values[ord] + rollup(children[ord]); + values[ord] = childValue; + sum += childValue; + ord = siblings[ord]; + } + return sum; + } + + @Override + public Number getSpecificValue(String dim, String... path) throws IOException { + verifyDim(dim); + int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path)); + if (ord < 0) { + return -1; + } + return values[ord]; + } + + @Override + public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { + if (topN <= 0) { + throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")"); + } + FacetsConfig.DimConfig dimConfig = verifyDim(dim); + FacetLabel cp = FacetLabel.create(dim, path); + int dimOrd = taxoReader.getOrdinal(cp); + if (dimOrd == -1) { + return null; + } + + TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN)); + float bottomValue = 0; + + int ord = children[dimOrd]; + float sumValues = 0; + int childCount = 0; + + TopOrdAndFloatQueue.OrdAndValue reuse = null; + while(ord != TaxonomyReader.INVALID_ORDINAL) { + if (values[ord] > 0) { + sumValues += values[ord]; + childCount++; + if (values[ord] > bottomValue) { + if (reuse == null) { + reuse = new TopOrdAndFloatQueue.OrdAndValue(); + } + reuse.ord = ord; + reuse.value = values[ord]; + reuse = q.insertWithOverflow(reuse); + if (q.size() == topN) { + bottomValue = q.top().value; + } + } + } + + ord = siblings[ord]; + } + + if (sumValues == 0) { + return null; + } + + if (dimConfig.multiValued) { + if (dimConfig.requireDimCount) { + sumValues = values[dimOrd]; + } else { + // Our sum'd count is not correct, in general: + sumValues = -1; + } + } else { + // Our sum'd dim count is accurate, so we keep it + } + + LabelAndValue[] labelValues = new LabelAndValue[q.size()]; + for(int i=labelValues.length-1;i>=0;i--) { + TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop(); + FacetLabel child = taxoReader.getPath(ordAndValue.ord); + labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value); + } + + return new FacetResult(sumValues, labelValues, childCount); + } +} \ No newline at end of file diff --git a/lucene/facet/src/java/org/apache/lucene/facet/IntTaxonomyFacets.java b/lucene/facet/src/java/org/apache/lucene/facet/IntTaxonomyFacets.java new file mode 100644 index 00000000000..92668fea505 --- /dev/null +++ b/lucene/facet/src/java/org/apache/lucene/facet/IntTaxonomyFacets.java @@ -0,0 +1,141 @@ +package org.apache.lucene.facet; + +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; +import java.util.Map; + +import org.apache.lucene.facet.taxonomy.FacetLabel; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; + +/** Base class for all taxonomy-based facets that aggregate + * to a per-ords int[]. */ + +public abstract class IntTaxonomyFacets extends TaxonomyFacets { + + protected final int[] values; + + protected IntTaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config) throws IOException { + super(indexFieldName, taxoReader, config); + values = new int[taxoReader.getSize()]; + } + + // nocommit we could do this lazily instead: + protected void rollup() throws IOException { + // Rollup any necessary dims: + for(Map.Entry ent : config.getDimConfigs().entrySet()) { + String dim = ent.getKey(); + FacetsConfig.DimConfig ft = ent.getValue(); + if (ft.hierarchical && ft.multiValued == false) { + int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim)); + // It can be -1 if this field was declared in the + // config but never indexed: + if (dimRootOrd > 0) { + values[dimRootOrd] += rollup(children[dimRootOrd]); + } + } + } + } + + private int rollup(int ord) { + int sum = 0; + while (ord != TaxonomyReader.INVALID_ORDINAL) { + int childValue = values[ord] + rollup(children[ord]); + values[ord] = childValue; + sum += childValue; + ord = siblings[ord]; + } + return sum; + } + + @Override + public Number getSpecificValue(String dim, String... path) throws IOException { + verifyDim(dim); + int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path)); + if (ord < 0) { + return -1; + } + return values[ord]; + } + + @Override + public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { + if (topN <= 0) { + throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")"); + } + FacetsConfig.DimConfig dimConfig = verifyDim(dim); + FacetLabel cp = FacetLabel.create(dim, path); + int dimOrd = taxoReader.getOrdinal(cp); + if (dimOrd == -1) { + return null; + } + + TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN)); + + int bottomValue = 0; + + int ord = children[dimOrd]; + int totValue = 0; + int childCount = 0; + + TopOrdAndIntQueue.OrdAndValue reuse = null; + while(ord != TaxonomyReader.INVALID_ORDINAL) { + if (values[ord] > 0) { + totValue += values[ord]; + childCount++; + if (values[ord] > bottomValue) { + if (reuse == null) { + reuse = new TopOrdAndIntQueue.OrdAndValue(); + } + reuse.ord = ord; + reuse.value = values[ord]; + reuse = q.insertWithOverflow(reuse); + if (q.size() == topN) { + bottomValue = q.top().value; + } + } + } + + ord = siblings[ord]; + } + + if (totValue == 0) { + return null; + } + + if (dimConfig.multiValued) { + if (dimConfig.requireDimCount) { + totValue = values[dimOrd]; + } else { + // Our sum'd value is not correct, in general: + totValue = -1; + } + } else { + // Our sum'd dim value is accurate, so we keep it + } + + LabelAndValue[] labelValues = new LabelAndValue[q.size()]; + for(int i=labelValues.length-1;i>=0;i--) { + TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop(); + FacetLabel child = taxoReader.getPath(ordAndValue.ord); + labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value); + } + + return new FacetResult(totValue, labelValues, childCount); + } +} \ No newline at end of file diff --git a/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetCounts.java index 925735a659b..84037aa458d 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetCounts.java @@ -19,10 +19,8 @@ package org.apache.lucene.facet; import java.io.IOException; import java.util.List; -import java.util.Map; import org.apache.lucene.facet.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.taxonomy.FacetLabel; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.util.FixedBitSet; @@ -35,9 +33,8 @@ import org.apache.lucene.util.IntsRef; * @lucene.experimental */ // nocommit remove & add specialized Cached variation only? -public class TaxonomyFacetCounts extends TaxonomyFacets { +public class TaxonomyFacetCounts extends IntTaxonomyFacets { private final OrdinalsReader ordinalsReader; - private final int[] counts; /** Create {@code TaxonomyFacetCounts}, which also * counts all facet labels. Use this for a non-default @@ -46,7 +43,6 @@ public class TaxonomyFacetCounts extends TaxonomyFacets { public TaxonomyFacetCounts(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { super(ordinalsReader.getIndexFieldName(), taxoReader, config); this.ordinalsReader = ordinalsReader; - counts = new int[taxoReader.getSize()]; count(fc.getMatchingDocs()); } @@ -61,115 +57,12 @@ public class TaxonomyFacetCounts extends TaxonomyFacets { while (doc < length && (doc = bits.nextSetBit(doc)) != -1) { ords.get(doc, scratch); for(int i=0;i ent : config.getDimConfigs().entrySet()) { - String dim = ent.getKey(); - FacetsConfig.DimConfig ft = ent.getValue(); - if (ft.hierarchical && ft.multiValued == false) { - int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim)); - // It can be -1 if this field was declared in the - // config but never indexed: - if (dimRootOrd > 0) { - counts[dimRootOrd] += rollup(children[dimRootOrd]); - } - } - } - } - - private int rollup(int ord) { - int sum = 0; - while (ord != TaxonomyReader.INVALID_ORDINAL) { - int childValue = counts[ord] + rollup(children[ord]); - counts[ord] = childValue; - sum += childValue; - ord = siblings[ord]; - } - return sum; - } - - @Override - public Number getSpecificValue(String dim, String... path) throws IOException { - verifyDim(dim); - int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path)); - if (ord < 0) { - return -1; - } - return counts[ord]; - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { - // TODO: can we factor this out? - if (topN <= 0) { - throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")"); - } - FacetsConfig.DimConfig dimConfig = verifyDim(dim); - FacetLabel cp = FacetLabel.create(dim, path); - int dimOrd = taxoReader.getOrdinal(cp); - if (dimOrd == -1) { - //System.out.println("no ord for path=" + path); - return null; - } - - TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN)); - - int bottomCount = 0; - - int ord = children[dimOrd]; - int totCount = 0; - int childCount = 0; - - TopOrdAndIntQueue.OrdAndValue reuse = null; - while(ord != TaxonomyReader.INVALID_ORDINAL) { - if (counts[ord] > 0) { - totCount += counts[ord]; - childCount++; - if (counts[ord] > bottomCount) { - if (reuse == null) { - reuse = new TopOrdAndIntQueue.OrdAndValue(); - } - reuse.ord = ord; - reuse.value = counts[ord]; - reuse = q.insertWithOverflow(reuse); - if (q.size() == topN) { - bottomCount = q.top().value; - } - } - } - - ord = siblings[ord]; - } - - if (totCount == 0) { - return null; - } - - if (dimConfig.multiValued) { - if (dimConfig.requireDimCount) { - totCount = counts[dimOrd]; - } else { - // Our sum'd count is not correct, in general: - totCount = -1; - } - } else { - // Our sum'd dim count is accurate, so we keep it - } - - LabelAndValue[] labelValues = new LabelAndValue[q.size()]; - for(int i=labelValues.length-1;i>=0;i--) { - TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop(); - FacetLabel child = taxoReader.getPath(ordAndValue.ord); - labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value); - } - - return new FacetResult(totCount, labelValues, childCount); + rollup(); } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetSumFloatAssociations.java b/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetSumFloatAssociations.java index 75994d6ebda..5966a723668 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetSumFloatAssociations.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetSumFloatAssociations.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.util.List; import org.apache.lucene.facet.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.taxonomy.FacetLabel; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.util.BytesRef; @@ -32,8 +31,7 @@ import org.apache.lucene.util.FixedBitSet; * encoding. * * @lucene.experimental */ -public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets { - private final float[] values; +public class TaxonomyFacetSumFloatAssociations extends FloatTaxonomyFacets { /** Create {@code TaxonomyFacetSumFloatAssociations} against * the default index field. */ @@ -45,7 +43,6 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets { * the specified index field. */ public TaxonomyFacetSumFloatAssociations(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { super(indexFieldName, taxoReader, config); - values = new float[taxoReader.getSize()]; sumValues(fc.getMatchingDocs()); } @@ -86,71 +83,7 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets { ++doc; } } - } - @Override - public Number getSpecificValue(String dim, String... path) throws IOException { - verifyDim(dim); - int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path)); - if (ord < 0) { - return -1; - } - return values[ord]; - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { - // TODO: can we factor this out? - if (topN <= 0) { - throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")"); - } - FacetsConfig.DimConfig dimConfig = verifyDim(dim); - FacetLabel cp = FacetLabel.create(dim, path); - int dimOrd = taxoReader.getOrdinal(cp); - if (dimOrd == -1) { - //System.out.println("no ord for path=" + path); - return null; - } - - TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN)); - float bottomValue = 0; - - int ord = children[dimOrd]; - float sumValue = 0; - int childCount = 0; - TopOrdAndFloatQueue.OrdAndValue reuse = null; - while(ord != TaxonomyReader.INVALID_ORDINAL) { - if (values[ord] > 0) { - sumValue += values[ord]; - childCount++; - if (values[ord] > bottomValue) { - if (reuse == null) { - reuse = new TopOrdAndFloatQueue.OrdAndValue(); - } - reuse.ord = ord; - reuse.value = values[ord]; - reuse = q.insertWithOverflow(reuse); - if (q.size() == topN) { - bottomValue = q.top().value; - } - } - } - - ord = siblings[ord]; - } - - if (sumValue == 0) { - //System.out.println("totCount=0 for path=" + path); - return null; - } - - LabelAndValue[] labelValues = new LabelAndValue[q.size()]; - for(int i=labelValues.length-1;i>=0;i--) { - TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop(); - FacetLabel child = taxoReader.getPath(ordAndValue.ord); - labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value); - } - - return new FacetResult(sumValue, labelValues, childCount); + rollup(); } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetSumIntAssociations.java b/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetSumIntAssociations.java index 208e7186db0..1279aae7d4c 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetSumIntAssociations.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetSumIntAssociations.java @@ -21,7 +21,6 @@ import java.io.IOException; import java.util.List; import org.apache.lucene.facet.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.taxonomy.FacetLabel; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.util.BytesRef; @@ -32,8 +31,7 @@ import org.apache.lucene.util.FixedBitSet; * encoding. * * @lucene.experimental */ -public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets { - private final int[] values; +public class TaxonomyFacetSumIntAssociations extends IntTaxonomyFacets { /** Create {@code TaxonomyFacetSumIntAssociations} against * the default index field. */ @@ -45,7 +43,6 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets { * the specified index field. */ public TaxonomyFacetSumIntAssociations(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException { super(indexFieldName, taxoReader, config); - values = new int[taxoReader.getSize()]; sumValues(fc.getMatchingDocs()); } @@ -86,72 +83,7 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets { ++doc; } } - } - @Override - public Number getSpecificValue(String dim, String... path) throws IOException { - verifyDim(dim); - int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path)); - if (ord < 0) { - return -1; - } - return values[ord]; - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { - // TODO: can we factor this out? - if (topN <= 0) { - throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")"); - } - verifyDim(dim); - FacetLabel cp = FacetLabel.create(dim, path); - int dimOrd = taxoReader.getOrdinal(cp); - if (dimOrd == -1) { - //System.out.println("no ord for path=" + path); - return null; - } - - TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN)); - int bottomValue = 0; - - int ord = children[dimOrd]; - long sumValue = 0; - int childCount = 0; - - TopOrdAndIntQueue.OrdAndValue reuse = null; - while(ord != TaxonomyReader.INVALID_ORDINAL) { - if (values[ord] > 0) { - sumValue += values[ord]; - childCount++; - if (values[ord] > bottomValue) { - if (reuse == null) { - reuse = new TopOrdAndIntQueue.OrdAndValue(); - } - reuse.ord = ord; - reuse.value = values[ord]; - reuse = q.insertWithOverflow(reuse); - if (q.size() == topN) { - bottomValue = q.top().value; - } - } - } - - ord = siblings[ord]; - } - - if (sumValue == 0) { - //System.out.println("totCount=0 for path=" + path); - return null; - } - - LabelAndValue[] labelValues = new LabelAndValue[q.size()]; - for(int i=labelValues.length-1;i>=0;i--) { - TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop(); - FacetLabel child = taxoReader.getPath(ordAndValue.ord); - labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value); - } - - return new FacetResult(sumValue, labelValues, childCount); + rollup(); } } diff --git a/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetSumValueSource.java b/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetSumValueSource.java index 2bdaa952e94..a4406f8ffa9 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetSumValueSource.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/TaxonomyFacetSumValueSource.java @@ -23,7 +23,6 @@ import java.util.List; import java.util.Map; import org.apache.lucene.facet.FacetsCollector.MatchingDocs; -import org.apache.lucene.facet.taxonomy.FacetLabel; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.queries.function.FunctionValues; @@ -33,12 +32,11 @@ import org.apache.lucene.search.Scorer; import org.apache.lucene.util.FixedBitSet; import org.apache.lucene.util.IntsRef; -/** Aggregates sum of values from a {@link ValueSource}, for - * each facet label. +/** Aggregates sum of values from {@link + * ValueSource#doubleValue}, for each facet label. * * @lucene.experimental */ -public class TaxonomyFacetSumValueSource extends TaxonomyFacets { - private final float[] values; +public class TaxonomyFacetSumValueSource extends FloatTaxonomyFacets { private final OrdinalsReader ordinalsReader; /** Aggreggates float facet values from the provided @@ -58,7 +56,6 @@ public class TaxonomyFacetSumValueSource extends TaxonomyFacets { FacetsConfig config, FacetsCollector fc, ValueSource valueSource) throws IOException { super(ordinalsReader.getIndexFieldName(), taxoReader, config); this.ordinalsReader = ordinalsReader; - values = new float[taxoReader.getSize()]; sumValues(fc.getMatchingDocs(), fc.getKeepScores(), valueSource); } @@ -105,107 +102,7 @@ public class TaxonomyFacetSumValueSource extends TaxonomyFacets { } } - // nocommit we could do this lazily instead: - - // Rollup any necessary dims: - for(Map.Entry ent : config.getDimConfigs().entrySet()) { - String dim = ent.getKey(); - FacetsConfig.DimConfig ft = ent.getValue(); - if (ft.hierarchical && ft.multiValued == false) { - int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim)); - assert dimRootOrd > 0; - values[dimRootOrd] += rollup(children[dimRootOrd]); - } - } - } - - private float rollup(int ord) { - float sum = 0; - while (ord != TaxonomyReader.INVALID_ORDINAL) { - float childValue = values[ord] + rollup(children[ord]); - values[ord] = childValue; - sum += childValue; - ord = siblings[ord]; - } - return sum; - } - - @Override - public Number getSpecificValue(String dim, String... path) throws IOException { - verifyDim(dim); - int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path)); - if (ord < 0) { - return -1; - } - return values[ord]; - } - - @Override - public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException { - // TODO: can we factor this out? - if (topN <= 0) { - throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")"); - } - FacetsConfig.DimConfig dimConfig = verifyDim(dim); - FacetLabel cp = FacetLabel.create(dim, path); - int dimOrd = taxoReader.getOrdinal(cp); - if (dimOrd == -1) { - System.out.println(" no dim ord " + dim); - return null; - } - - TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN)); - float bottomValue = 0; - - int ord = children[dimOrd]; - float sumValues = 0; - int childCount = 0; - - TopOrdAndFloatQueue.OrdAndValue reuse = null; - while(ord != TaxonomyReader.INVALID_ORDINAL) { - if (values[ord] > 0) { - sumValues += values[ord]; - childCount++; - if (values[ord] > bottomValue) { - if (reuse == null) { - reuse = new TopOrdAndFloatQueue.OrdAndValue(); - } - reuse.ord = ord; - reuse.value = values[ord]; - reuse = q.insertWithOverflow(reuse); - if (q.size() == topN) { - bottomValue = q.top().value; - } - } - } - - ord = siblings[ord]; - } - - if (sumValues == 0) { - System.out.println(" no sum"); - return null; - } - - if (dimConfig.multiValued) { - if (dimConfig.requireDimCount) { - sumValues = values[dimOrd]; - } else { - // Our sum'd count is not correct, in general: - sumValues = -1; - } - } else { - // Our sum'd dim count is accurate, so we keep it - } - - LabelAndValue[] labelValues = new LabelAndValue[q.size()]; - for(int i=labelValues.length-1;i>=0;i--) { - TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop(); - FacetLabel child = taxoReader.getPath(ordAndValue.ord); - labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value); - } - - return new FacetResult(sumValues, labelValues, childCount); + rollup(); } /** {@link ValueSource} that returns the score for each diff --git a/lucene/facet/src/test/org/apache/lucene/facet/TestTaxonomyFacetAssociations.java b/lucene/facet/src/test/org/apache/lucene/facet/TestTaxonomyFacetAssociations.java index bc41861a6cb..730a48313b4 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/TestTaxonomyFacetAssociations.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/TestTaxonomyFacetAssociations.java @@ -19,7 +19,6 @@ package org.apache.lucene.facet; import org.apache.lucene.document.Document; -import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.taxonomy.FacetLabel; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; @@ -42,10 +41,6 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase { private static Directory taxoDir; private static TaxonomyReader taxoReader; - private static final FacetLabel aint = new FacetLabel("int", "a"); - private static final FacetLabel bint = new FacetLabel("int", "b"); - private static final FacetLabel afloat = new FacetLabel("float", "a"); - private static final FacetLabel bfloat = new FacetLabel("float", "b"); private static FacetsConfig config; @BeforeClass @@ -107,7 +102,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase { searcher.search(new MatchAllDocsQuery(), fc); Facets facets = new TaxonomyFacetSumIntAssociations("$facets.int", taxoReader, config, fc); - assertEquals("value=350 childCount=2\n a (200)\n b (150)\n", facets.getTopChildren(10, "int").toString()); + assertEquals("value=-1 childCount=2\n a (200)\n b (150)\n", facets.getTopChildren(10, "int").toString()); assertEquals("Wrong count for category 'a'!", 200, facets.getSpecificValue("int", "a").intValue()); assertEquals("Wrong count for category 'b'!", 150, facets.getSpecificValue("int", "b").intValue()); } @@ -119,7 +114,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase { searcher.search(new MatchAllDocsQuery(), fc); Facets facets = new TaxonomyFacetSumFloatAssociations("$facets.float", taxoReader, config, fc); - assertEquals("value=59.999996 childCount=2\n a (50.0)\n b (9.999995)\n", facets.getTopChildren(10, "float").toString()); + assertEquals("value=-1.0 childCount=2\n a (50.0)\n b (9.999995)\n", facets.getTopChildren(10, "float").toString()); assertEquals("Wrong count for category 'a'!", 50f, facets.getSpecificValue("float", "a").floatValue(), 0.00001); assertEquals("Wrong count for category 'b'!", 10f, facets.getSpecificValue("float", "b").floatValue(), 0.00001); }