LUCENE-5339: factor out base classes for int/float taxonomy aggregates

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5339@1546097 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-11-27 16:34:39 +00:00
parent 825a02356d
commit 458786d0f4
9 changed files with 297 additions and 480 deletions

View File

@ -29,7 +29,7 @@ public class TestAssociationsFacetsExample extends LuceneTestCase {
public void testExamples() throws Exception {
List<FacetResult> res = new AssociationsFacetsExample().runSumAssociations();
assertEquals("Wrong number of results", 2, res.size());
assertEquals("value=6 childCount=2\n lucene (4)\n solr (2)\n", res.get(0).toString());
assertEquals("value=1.96 childCount=2\n computing (1.62)\n software (0.34)\n", res.get(1).toString());
assertEquals("value=-1 childCount=2\n lucene (4)\n solr (2)\n", res.get(0).toString());
assertEquals("value=-1.0 childCount=2\n computing (1.62)\n software (0.34)\n", res.get(1).toString());
}
}

View File

@ -19,10 +19,8 @@ package org.apache.lucene.facet;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
@ -32,8 +30,7 @@ import org.apache.lucene.util.FixedBitSet;
* into DocValues was used.
*
* @lucene.experimental */
public class FastTaxonomyFacetCounts extends TaxonomyFacets {
private final int[] counts;
public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
/** Create {@code FastTaxonomyFacetCounts}, which also
* counts all facet labels. */
@ -48,7 +45,6 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets {
* field name for certain dimensions. */
public FastTaxonomyFacetCounts(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
super(indexFieldName, taxoReader, config);
counts = new int[taxoReader.getSize()];
count(fc.getMatchingDocs());
}
@ -76,9 +72,8 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets {
byte b = bytes[offset++];
if (b >= 0) {
prev = ord = ((ord << 7) | b) + prev;
assert ord < counts.length: "ord=" + ord + " vs maxOrd=" + counts.length;
//System.out.println(" ord=" + ord);
++counts[ord];
assert ord < values.length: "ord=" + ord + " vs maxOrd=" + values.length;
++values[ord];
ord = 0;
} else {
ord = (ord << 7) | (b & 0x7F);
@ -88,112 +83,6 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets {
}
}
// nocommit we could do this lazily instead:
// Rollup any necessary dims:
for(Map.Entry<String,FacetsConfig.DimConfig> ent : config.getDimConfigs().entrySet()) {
String dim = ent.getKey();
FacetsConfig.DimConfig ft = ent.getValue();
if (ft.hierarchical && ft.multiValued == false) {
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
// It can be -1 if this field was declared in the
// config but never indexed:
if (dimRootOrd > 0) {
counts[dimRootOrd] += rollup(children[dimRootOrd]);
}
}
}
}
private int rollup(int ord) {
int sum = 0;
while (ord != TaxonomyReader.INVALID_ORDINAL) {
int childValue = counts[ord] + rollup(children[ord]);
counts[ord] = childValue;
sum += childValue;
ord = siblings[ord];
}
return sum;
}
@Override
public Number getSpecificValue(String dim, String... path) throws IOException {
verifyDim(dim);
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
if (ord < 0) {
return -1;
}
return counts[ord];
}
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
// TODO: can we factor this out?
if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
}
FacetsConfig.DimConfig dimConfig = verifyDim(dim);
//System.out.println("ftfc.getTopChildren topN=" + topN);
FacetLabel cp = FacetLabel.create(dim, path);
int dimOrd = taxoReader.getOrdinal(cp);
if (dimOrd == -1) {
//System.out.println("no ord for dim=" + dim + " path=" + path);
return null;
}
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
int bottomCount = 0;
int ord = children[dimOrd];
int totCount = 0;
int childCount = 0;
TopOrdAndIntQueue.OrdAndValue reuse = null;
while(ord != TaxonomyReader.INVALID_ORDINAL) {
//System.out.println(" check ord=" + ord + " label=" + taxoReader.getPath(ord) + " topN=" + topN);
if (counts[ord] > 0) {
totCount += counts[ord];
childCount++;
if (counts[ord] > bottomCount) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
reuse.ord = ord;
reuse.value = counts[ord];
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomCount = q.top().value;
}
}
}
ord = siblings[ord];
}
if (totCount == 0) {
//System.out.println(" no matches");
return null;
}
if (dimConfig.multiValued) {
if (dimConfig.requireDimCount) {
totCount = counts[dimOrd];
} else {
// Our sum'd count is not correct, in general:
totCount = -1;
}
} else {
// Our sum'd dim count is accurate, so we keep it
}
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for(int i=labelValues.length-1;i>=0;i--) {
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
}
return new FacetResult(totCount, labelValues, childCount);
rollup();
}
}

View File

@ -0,0 +1,137 @@
package org.apache.lucene.facet;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/** Base class for all taxonomy-based facets that aggregate
* to a per-ords float[]. */
public abstract class FloatTaxonomyFacets extends TaxonomyFacets {
protected final float[] values;
protected FloatTaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config) throws IOException {
super(indexFieldName, taxoReader, config);
values = new float[taxoReader.getSize()];
}
// nocommit we could do this lazily instead:
protected void rollup() throws IOException {
// Rollup any necessary dims:
for(Map.Entry<String,FacetsConfig.DimConfig> ent : config.getDimConfigs().entrySet()) {
String dim = ent.getKey();
FacetsConfig.DimConfig ft = ent.getValue();
if (ft.hierarchical && ft.multiValued == false) {
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
assert dimRootOrd > 0;
values[dimRootOrd] += rollup(children[dimRootOrd]);
}
}
}
private float rollup(int ord) {
float sum = 0;
while (ord != TaxonomyReader.INVALID_ORDINAL) {
float childValue = values[ord] + rollup(children[ord]);
values[ord] = childValue;
sum += childValue;
ord = siblings[ord];
}
return sum;
}
@Override
public Number getSpecificValue(String dim, String... path) throws IOException {
verifyDim(dim);
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
if (ord < 0) {
return -1;
}
return values[ord];
}
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
}
FacetsConfig.DimConfig dimConfig = verifyDim(dim);
FacetLabel cp = FacetLabel.create(dim, path);
int dimOrd = taxoReader.getOrdinal(cp);
if (dimOrd == -1) {
return null;
}
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
float bottomValue = 0;
int ord = children[dimOrd];
float sumValues = 0;
int childCount = 0;
TopOrdAndFloatQueue.OrdAndValue reuse = null;
while(ord != TaxonomyReader.INVALID_ORDINAL) {
if (values[ord] > 0) {
sumValues += values[ord];
childCount++;
if (values[ord] > bottomValue) {
if (reuse == null) {
reuse = new TopOrdAndFloatQueue.OrdAndValue();
}
reuse.ord = ord;
reuse.value = values[ord];
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
}
}
}
ord = siblings[ord];
}
if (sumValues == 0) {
return null;
}
if (dimConfig.multiValued) {
if (dimConfig.requireDimCount) {
sumValues = values[dimOrd];
} else {
// Our sum'd count is not correct, in general:
sumValues = -1;
}
} else {
// Our sum'd dim count is accurate, so we keep it
}
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for(int i=labelValues.length-1;i>=0;i--) {
TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop();
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
}
return new FacetResult(sumValues, labelValues, childCount);
}
}

View File

@ -0,0 +1,141 @@
package org.apache.lucene.facet;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Map;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/** Base class for all taxonomy-based facets that aggregate
* to a per-ords int[]. */
public abstract class IntTaxonomyFacets extends TaxonomyFacets {
protected final int[] values;
protected IntTaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config) throws IOException {
super(indexFieldName, taxoReader, config);
values = new int[taxoReader.getSize()];
}
// nocommit we could do this lazily instead:
protected void rollup() throws IOException {
// Rollup any necessary dims:
for(Map.Entry<String,FacetsConfig.DimConfig> ent : config.getDimConfigs().entrySet()) {
String dim = ent.getKey();
FacetsConfig.DimConfig ft = ent.getValue();
if (ft.hierarchical && ft.multiValued == false) {
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
// It can be -1 if this field was declared in the
// config but never indexed:
if (dimRootOrd > 0) {
values[dimRootOrd] += rollup(children[dimRootOrd]);
}
}
}
}
private int rollup(int ord) {
int sum = 0;
while (ord != TaxonomyReader.INVALID_ORDINAL) {
int childValue = values[ord] + rollup(children[ord]);
values[ord] = childValue;
sum += childValue;
ord = siblings[ord];
}
return sum;
}
@Override
public Number getSpecificValue(String dim, String... path) throws IOException {
verifyDim(dim);
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
if (ord < 0) {
return -1;
}
return values[ord];
}
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
}
FacetsConfig.DimConfig dimConfig = verifyDim(dim);
FacetLabel cp = FacetLabel.create(dim, path);
int dimOrd = taxoReader.getOrdinal(cp);
if (dimOrd == -1) {
return null;
}
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
int bottomValue = 0;
int ord = children[dimOrd];
int totValue = 0;
int childCount = 0;
TopOrdAndIntQueue.OrdAndValue reuse = null;
while(ord != TaxonomyReader.INVALID_ORDINAL) {
if (values[ord] > 0) {
totValue += values[ord];
childCount++;
if (values[ord] > bottomValue) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
reuse.ord = ord;
reuse.value = values[ord];
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
}
}
}
ord = siblings[ord];
}
if (totValue == 0) {
return null;
}
if (dimConfig.multiValued) {
if (dimConfig.requireDimCount) {
totValue = values[dimOrd];
} else {
// Our sum'd value is not correct, in general:
totValue = -1;
}
} else {
// Our sum'd dim value is accurate, so we keep it
}
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for(int i=labelValues.length-1;i>=0;i--) {
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
}
return new FacetResult(totValue, labelValues, childCount);
}
}

View File

@ -19,10 +19,8 @@ package org.apache.lucene.facet;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.FixedBitSet;
@ -35,9 +33,8 @@ import org.apache.lucene.util.IntsRef;
* @lucene.experimental */
// nocommit remove & add specialized Cached variation only?
public class TaxonomyFacetCounts extends TaxonomyFacets {
public class TaxonomyFacetCounts extends IntTaxonomyFacets {
private final OrdinalsReader ordinalsReader;
private final int[] counts;
/** Create {@code TaxonomyFacetCounts}, which also
* counts all facet labels. Use this for a non-default
@ -46,7 +43,6 @@ public class TaxonomyFacetCounts extends TaxonomyFacets {
public TaxonomyFacetCounts(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
super(ordinalsReader.getIndexFieldName(), taxoReader, config);
this.ordinalsReader = ordinalsReader;
counts = new int[taxoReader.getSize()];
count(fc.getMatchingDocs());
}
@ -61,115 +57,12 @@ public class TaxonomyFacetCounts extends TaxonomyFacets {
while (doc < length && (doc = bits.nextSetBit(doc)) != -1) {
ords.get(doc, scratch);
for(int i=0;i<scratch.length;i++) {
counts[scratch.ints[scratch.offset+i]]++;
values[scratch.ints[scratch.offset+i]]++;
}
++doc;
}
}
// nocommit we could do this lazily instead:
// Rollup any necessary dims:
for(Map.Entry<String,FacetsConfig.DimConfig> ent : config.getDimConfigs().entrySet()) {
String dim = ent.getKey();
FacetsConfig.DimConfig ft = ent.getValue();
if (ft.hierarchical && ft.multiValued == false) {
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
// It can be -1 if this field was declared in the
// config but never indexed:
if (dimRootOrd > 0) {
counts[dimRootOrd] += rollup(children[dimRootOrd]);
}
}
}
}
private int rollup(int ord) {
int sum = 0;
while (ord != TaxonomyReader.INVALID_ORDINAL) {
int childValue = counts[ord] + rollup(children[ord]);
counts[ord] = childValue;
sum += childValue;
ord = siblings[ord];
}
return sum;
}
@Override
public Number getSpecificValue(String dim, String... path) throws IOException {
verifyDim(dim);
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
if (ord < 0) {
return -1;
}
return counts[ord];
}
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
// TODO: can we factor this out?
if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
}
FacetsConfig.DimConfig dimConfig = verifyDim(dim);
FacetLabel cp = FacetLabel.create(dim, path);
int dimOrd = taxoReader.getOrdinal(cp);
if (dimOrd == -1) {
//System.out.println("no ord for path=" + path);
return null;
}
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
int bottomCount = 0;
int ord = children[dimOrd];
int totCount = 0;
int childCount = 0;
TopOrdAndIntQueue.OrdAndValue reuse = null;
while(ord != TaxonomyReader.INVALID_ORDINAL) {
if (counts[ord] > 0) {
totCount += counts[ord];
childCount++;
if (counts[ord] > bottomCount) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
reuse.ord = ord;
reuse.value = counts[ord];
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomCount = q.top().value;
}
}
}
ord = siblings[ord];
}
if (totCount == 0) {
return null;
}
if (dimConfig.multiValued) {
if (dimConfig.requireDimCount) {
totCount = counts[dimOrd];
} else {
// Our sum'd count is not correct, in general:
totCount = -1;
}
} else {
// Our sum'd dim count is accurate, so we keep it
}
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for(int i=labelValues.length-1;i>=0;i--) {
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
}
return new FacetResult(totCount, labelValues, childCount);
rollup();
}
}

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.List;
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
@ -32,8 +31,7 @@ import org.apache.lucene.util.FixedBitSet;
* encoding.
*
* @lucene.experimental */
public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
private final float[] values;
public class TaxonomyFacetSumFloatAssociations extends FloatTaxonomyFacets {
/** Create {@code TaxonomyFacetSumFloatAssociations} against
* the default index field. */
@ -45,7 +43,6 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
* the specified index field. */
public TaxonomyFacetSumFloatAssociations(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
super(indexFieldName, taxoReader, config);
values = new float[taxoReader.getSize()];
sumValues(fc.getMatchingDocs());
}
@ -86,71 +83,7 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
++doc;
}
}
}
@Override
public Number getSpecificValue(String dim, String... path) throws IOException {
verifyDim(dim);
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
if (ord < 0) {
return -1;
}
return values[ord];
}
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
// TODO: can we factor this out?
if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
}
FacetsConfig.DimConfig dimConfig = verifyDim(dim);
FacetLabel cp = FacetLabel.create(dim, path);
int dimOrd = taxoReader.getOrdinal(cp);
if (dimOrd == -1) {
//System.out.println("no ord for path=" + path);
return null;
}
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
float bottomValue = 0;
int ord = children[dimOrd];
float sumValue = 0;
int childCount = 0;
TopOrdAndFloatQueue.OrdAndValue reuse = null;
while(ord != TaxonomyReader.INVALID_ORDINAL) {
if (values[ord] > 0) {
sumValue += values[ord];
childCount++;
if (values[ord] > bottomValue) {
if (reuse == null) {
reuse = new TopOrdAndFloatQueue.OrdAndValue();
}
reuse.ord = ord;
reuse.value = values[ord];
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
}
}
}
ord = siblings[ord];
}
if (sumValue == 0) {
//System.out.println("totCount=0 for path=" + path);
return null;
}
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for(int i=labelValues.length-1;i>=0;i--) {
TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop();
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
}
return new FacetResult(sumValue, labelValues, childCount);
rollup();
}
}

View File

@ -21,7 +21,6 @@ import java.io.IOException;
import java.util.List;
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
@ -32,8 +31,7 @@ import org.apache.lucene.util.FixedBitSet;
* encoding.
*
* @lucene.experimental */
public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
private final int[] values;
public class TaxonomyFacetSumIntAssociations extends IntTaxonomyFacets {
/** Create {@code TaxonomyFacetSumIntAssociations} against
* the default index field. */
@ -45,7 +43,6 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
* the specified index field. */
public TaxonomyFacetSumIntAssociations(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
super(indexFieldName, taxoReader, config);
values = new int[taxoReader.getSize()];
sumValues(fc.getMatchingDocs());
}
@ -86,72 +83,7 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
++doc;
}
}
}
@Override
public Number getSpecificValue(String dim, String... path) throws IOException {
verifyDim(dim);
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
if (ord < 0) {
return -1;
}
return values[ord];
}
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
// TODO: can we factor this out?
if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
}
verifyDim(dim);
FacetLabel cp = FacetLabel.create(dim, path);
int dimOrd = taxoReader.getOrdinal(cp);
if (dimOrd == -1) {
//System.out.println("no ord for path=" + path);
return null;
}
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
int bottomValue = 0;
int ord = children[dimOrd];
long sumValue = 0;
int childCount = 0;
TopOrdAndIntQueue.OrdAndValue reuse = null;
while(ord != TaxonomyReader.INVALID_ORDINAL) {
if (values[ord] > 0) {
sumValue += values[ord];
childCount++;
if (values[ord] > bottomValue) {
if (reuse == null) {
reuse = new TopOrdAndIntQueue.OrdAndValue();
}
reuse.ord = ord;
reuse.value = values[ord];
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
}
}
}
ord = siblings[ord];
}
if (sumValue == 0) {
//System.out.println("totCount=0 for path=" + path);
return null;
}
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for(int i=labelValues.length-1;i>=0;i--) {
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
}
return new FacetResult(sumValue, labelValues, childCount);
rollup();
}
}

View File

@ -23,7 +23,6 @@ import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.queries.function.FunctionValues;
@ -33,12 +32,11 @@ import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.FixedBitSet;
import org.apache.lucene.util.IntsRef;
/** Aggregates sum of values from a {@link ValueSource}, for
* each facet label.
/** Aggregates sum of values from {@link
* ValueSource#doubleValue}, for each facet label.
*
* @lucene.experimental */
public class TaxonomyFacetSumValueSource extends TaxonomyFacets {
private final float[] values;
public class TaxonomyFacetSumValueSource extends FloatTaxonomyFacets {
private final OrdinalsReader ordinalsReader;
/** Aggreggates float facet values from the provided
@ -58,7 +56,6 @@ public class TaxonomyFacetSumValueSource extends TaxonomyFacets {
FacetsConfig config, FacetsCollector fc, ValueSource valueSource) throws IOException {
super(ordinalsReader.getIndexFieldName(), taxoReader, config);
this.ordinalsReader = ordinalsReader;
values = new float[taxoReader.getSize()];
sumValues(fc.getMatchingDocs(), fc.getKeepScores(), valueSource);
}
@ -105,107 +102,7 @@ public class TaxonomyFacetSumValueSource extends TaxonomyFacets {
}
}
// nocommit we could do this lazily instead:
// Rollup any necessary dims:
for(Map.Entry<String,FacetsConfig.DimConfig> ent : config.getDimConfigs().entrySet()) {
String dim = ent.getKey();
FacetsConfig.DimConfig ft = ent.getValue();
if (ft.hierarchical && ft.multiValued == false) {
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
assert dimRootOrd > 0;
values[dimRootOrd] += rollup(children[dimRootOrd]);
}
}
}
private float rollup(int ord) {
float sum = 0;
while (ord != TaxonomyReader.INVALID_ORDINAL) {
float childValue = values[ord] + rollup(children[ord]);
values[ord] = childValue;
sum += childValue;
ord = siblings[ord];
}
return sum;
}
@Override
public Number getSpecificValue(String dim, String... path) throws IOException {
verifyDim(dim);
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
if (ord < 0) {
return -1;
}
return values[ord];
}
@Override
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
// TODO: can we factor this out?
if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
}
FacetsConfig.DimConfig dimConfig = verifyDim(dim);
FacetLabel cp = FacetLabel.create(dim, path);
int dimOrd = taxoReader.getOrdinal(cp);
if (dimOrd == -1) {
System.out.println(" no dim ord " + dim);
return null;
}
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
float bottomValue = 0;
int ord = children[dimOrd];
float sumValues = 0;
int childCount = 0;
TopOrdAndFloatQueue.OrdAndValue reuse = null;
while(ord != TaxonomyReader.INVALID_ORDINAL) {
if (values[ord] > 0) {
sumValues += values[ord];
childCount++;
if (values[ord] > bottomValue) {
if (reuse == null) {
reuse = new TopOrdAndFloatQueue.OrdAndValue();
}
reuse.ord = ord;
reuse.value = values[ord];
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
}
}
}
ord = siblings[ord];
}
if (sumValues == 0) {
System.out.println(" no sum");
return null;
}
if (dimConfig.multiValued) {
if (dimConfig.requireDimCount) {
sumValues = values[dimOrd];
} else {
// Our sum'd count is not correct, in general:
sumValues = -1;
}
} else {
// Our sum'd dim count is accurate, so we keep it
}
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for(int i=labelValues.length-1;i>=0;i--) {
TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop();
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
}
return new FacetResult(sumValues, labelValues, childCount);
rollup();
}
/** {@link ValueSource} that returns the score for each

View File

@ -19,7 +19,6 @@ package org.apache.lucene.facet;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
@ -42,10 +41,6 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
private static Directory taxoDir;
private static TaxonomyReader taxoReader;
private static final FacetLabel aint = new FacetLabel("int", "a");
private static final FacetLabel bint = new FacetLabel("int", "b");
private static final FacetLabel afloat = new FacetLabel("float", "a");
private static final FacetLabel bfloat = new FacetLabel("float", "b");
private static FacetsConfig config;
@BeforeClass
@ -107,7 +102,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
searcher.search(new MatchAllDocsQuery(), fc);
Facets facets = new TaxonomyFacetSumIntAssociations("$facets.int", taxoReader, config, fc);
assertEquals("value=350 childCount=2\n a (200)\n b (150)\n", facets.getTopChildren(10, "int").toString());
assertEquals("value=-1 childCount=2\n a (200)\n b (150)\n", facets.getTopChildren(10, "int").toString());
assertEquals("Wrong count for category 'a'!", 200, facets.getSpecificValue("int", "a").intValue());
assertEquals("Wrong count for category 'b'!", 150, facets.getSpecificValue("int", "b").intValue());
}
@ -119,7 +114,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
searcher.search(new MatchAllDocsQuery(), fc);
Facets facets = new TaxonomyFacetSumFloatAssociations("$facets.float", taxoReader, config, fc);
assertEquals("value=59.999996 childCount=2\n a (50.0)\n b (9.999995)\n", facets.getTopChildren(10, "float").toString());
assertEquals("value=-1.0 childCount=2\n a (50.0)\n b (9.999995)\n", facets.getTopChildren(10, "float").toString());
assertEquals("Wrong count for category 'a'!", 50f, facets.getSpecificValue("float", "a").floatValue(), 0.00001);
assertEquals("Wrong count for category 'b'!", 10f, facets.getSpecificValue("float", "b").floatValue(), 0.00001);
}