mirror of https://github.com/apache/lucene.git
LUCENE-5339: factor out base classes for int/float taxonomy aggregates
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5339@1546097 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
825a02356d
commit
458786d0f4
|
@ -29,7 +29,7 @@ public class TestAssociationsFacetsExample extends LuceneTestCase {
|
|||
public void testExamples() throws Exception {
|
||||
List<FacetResult> res = new AssociationsFacetsExample().runSumAssociations();
|
||||
assertEquals("Wrong number of results", 2, res.size());
|
||||
assertEquals("value=6 childCount=2\n lucene (4)\n solr (2)\n", res.get(0).toString());
|
||||
assertEquals("value=1.96 childCount=2\n computing (1.62)\n software (0.34)\n", res.get(1).toString());
|
||||
assertEquals("value=-1 childCount=2\n lucene (4)\n solr (2)\n", res.get(0).toString());
|
||||
assertEquals("value=-1.0 childCount=2\n computing (1.62)\n software (0.34)\n", res.get(1).toString());
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,10 +19,8 @@ package org.apache.lucene.facet;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -32,8 +30,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
* into DocValues was used.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class FastTaxonomyFacetCounts extends TaxonomyFacets {
|
||||
private final int[] counts;
|
||||
public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
|
||||
/** Create {@code FastTaxonomyFacetCounts}, which also
|
||||
* counts all facet labels. */
|
||||
|
@ -48,7 +45,6 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets {
|
|||
* field name for certain dimensions. */
|
||||
public FastTaxonomyFacetCounts(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
|
||||
super(indexFieldName, taxoReader, config);
|
||||
counts = new int[taxoReader.getSize()];
|
||||
count(fc.getMatchingDocs());
|
||||
}
|
||||
|
||||
|
@ -76,9 +72,8 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets {
|
|||
byte b = bytes[offset++];
|
||||
if (b >= 0) {
|
||||
prev = ord = ((ord << 7) | b) + prev;
|
||||
assert ord < counts.length: "ord=" + ord + " vs maxOrd=" + counts.length;
|
||||
//System.out.println(" ord=" + ord);
|
||||
++counts[ord];
|
||||
assert ord < values.length: "ord=" + ord + " vs maxOrd=" + values.length;
|
||||
++values[ord];
|
||||
ord = 0;
|
||||
} else {
|
||||
ord = (ord << 7) | (b & 0x7F);
|
||||
|
@ -88,112 +83,6 @@ public class FastTaxonomyFacetCounts extends TaxonomyFacets {
|
|||
}
|
||||
}
|
||||
|
||||
// nocommit we could do this lazily instead:
|
||||
|
||||
// Rollup any necessary dims:
|
||||
for(Map.Entry<String,FacetsConfig.DimConfig> ent : config.getDimConfigs().entrySet()) {
|
||||
String dim = ent.getKey();
|
||||
FacetsConfig.DimConfig ft = ent.getValue();
|
||||
if (ft.hierarchical && ft.multiValued == false) {
|
||||
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
|
||||
// It can be -1 if this field was declared in the
|
||||
// config but never indexed:
|
||||
if (dimRootOrd > 0) {
|
||||
counts[dimRootOrd] += rollup(children[dimRootOrd]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int rollup(int ord) {
|
||||
int sum = 0;
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int childValue = counts[ord] + rollup(children[ord]);
|
||||
counts[ord] = childValue;
|
||||
sum += childValue;
|
||||
ord = siblings[ord];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
verifyDim(dim);
|
||||
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
|
||||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
return counts[ord];
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
// TODO: can we factor this out?
|
||||
if (topN <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
|
||||
}
|
||||
FacetsConfig.DimConfig dimConfig = verifyDim(dim);
|
||||
//System.out.println("ftfc.getTopChildren topN=" + topN);
|
||||
FacetLabel cp = FacetLabel.create(dim, path);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd == -1) {
|
||||
//System.out.println("no ord for dim=" + dim + " path=" + path);
|
||||
return null;
|
||||
}
|
||||
|
||||
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
|
||||
|
||||
int bottomCount = 0;
|
||||
|
||||
int ord = children[dimOrd];
|
||||
int totCount = 0;
|
||||
int childCount = 0;
|
||||
|
||||
TopOrdAndIntQueue.OrdAndValue reuse = null;
|
||||
while(ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
//System.out.println(" check ord=" + ord + " label=" + taxoReader.getPath(ord) + " topN=" + topN);
|
||||
if (counts[ord] > 0) {
|
||||
totCount += counts[ord];
|
||||
childCount++;
|
||||
if (counts[ord] > bottomCount) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndIntQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = counts[ord];
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomCount = q.top().value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ord = siblings[ord];
|
||||
}
|
||||
|
||||
if (totCount == 0) {
|
||||
//System.out.println(" no matches");
|
||||
return null;
|
||||
}
|
||||
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
totCount = counts[dimOrd];
|
||||
} else {
|
||||
// Our sum'd count is not correct, in general:
|
||||
totCount = -1;
|
||||
}
|
||||
} else {
|
||||
// Our sum'd dim count is accurate, so we keep it
|
||||
}
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for(int i=labelValues.length-1;i>=0;i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
|
||||
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
|
||||
}
|
||||
|
||||
return new FacetResult(totCount, labelValues, childCount);
|
||||
rollup();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,137 @@
|
|||
package org.apache.lucene.facet;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/** Base class for all taxonomy-based facets that aggregate
|
||||
* to a per-ords float[]. */
|
||||
|
||||
public abstract class FloatTaxonomyFacets extends TaxonomyFacets {
|
||||
|
||||
protected final float[] values;
|
||||
|
||||
protected FloatTaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config) throws IOException {
|
||||
super(indexFieldName, taxoReader, config);
|
||||
values = new float[taxoReader.getSize()];
|
||||
}
|
||||
|
||||
// nocommit we could do this lazily instead:
|
||||
protected void rollup() throws IOException {
|
||||
// Rollup any necessary dims:
|
||||
for(Map.Entry<String,FacetsConfig.DimConfig> ent : config.getDimConfigs().entrySet()) {
|
||||
String dim = ent.getKey();
|
||||
FacetsConfig.DimConfig ft = ent.getValue();
|
||||
if (ft.hierarchical && ft.multiValued == false) {
|
||||
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
|
||||
assert dimRootOrd > 0;
|
||||
values[dimRootOrd] += rollup(children[dimRootOrd]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private float rollup(int ord) {
|
||||
float sum = 0;
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
float childValue = values[ord] + rollup(children[ord]);
|
||||
values[ord] = childValue;
|
||||
sum += childValue;
|
||||
ord = siblings[ord];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
verifyDim(dim);
|
||||
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
|
||||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
return values[ord];
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
if (topN <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
|
||||
}
|
||||
FacetsConfig.DimConfig dimConfig = verifyDim(dim);
|
||||
FacetLabel cp = FacetLabel.create(dim, path);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd == -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
|
||||
float bottomValue = 0;
|
||||
|
||||
int ord = children[dimOrd];
|
||||
float sumValues = 0;
|
||||
int childCount = 0;
|
||||
|
||||
TopOrdAndFloatQueue.OrdAndValue reuse = null;
|
||||
while(ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
if (values[ord] > 0) {
|
||||
sumValues += values[ord];
|
||||
childCount++;
|
||||
if (values[ord] > bottomValue) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndFloatQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = values[ord];
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomValue = q.top().value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ord = siblings[ord];
|
||||
}
|
||||
|
||||
if (sumValues == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
sumValues = values[dimOrd];
|
||||
} else {
|
||||
// Our sum'd count is not correct, in general:
|
||||
sumValues = -1;
|
||||
}
|
||||
} else {
|
||||
// Our sum'd dim count is accurate, so we keep it
|
||||
}
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for(int i=labelValues.length-1;i>=0;i--) {
|
||||
TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop();
|
||||
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
|
||||
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
|
||||
}
|
||||
|
||||
return new FacetResult(sumValues, labelValues, childCount);
|
||||
}
|
||||
}
|
|
@ -0,0 +1,141 @@
|
|||
package org.apache.lucene.facet;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
|
||||
/** Base class for all taxonomy-based facets that aggregate
|
||||
* to a per-ords int[]. */
|
||||
|
||||
public abstract class IntTaxonomyFacets extends TaxonomyFacets {
|
||||
|
||||
protected final int[] values;
|
||||
|
||||
protected IntTaxonomyFacets(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config) throws IOException {
|
||||
super(indexFieldName, taxoReader, config);
|
||||
values = new int[taxoReader.getSize()];
|
||||
}
|
||||
|
||||
// nocommit we could do this lazily instead:
|
||||
protected void rollup() throws IOException {
|
||||
// Rollup any necessary dims:
|
||||
for(Map.Entry<String,FacetsConfig.DimConfig> ent : config.getDimConfigs().entrySet()) {
|
||||
String dim = ent.getKey();
|
||||
FacetsConfig.DimConfig ft = ent.getValue();
|
||||
if (ft.hierarchical && ft.multiValued == false) {
|
||||
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
|
||||
// It can be -1 if this field was declared in the
|
||||
// config but never indexed:
|
||||
if (dimRootOrd > 0) {
|
||||
values[dimRootOrd] += rollup(children[dimRootOrd]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int rollup(int ord) {
|
||||
int sum = 0;
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int childValue = values[ord] + rollup(children[ord]);
|
||||
values[ord] = childValue;
|
||||
sum += childValue;
|
||||
ord = siblings[ord];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
verifyDim(dim);
|
||||
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
|
||||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
return values[ord];
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
if (topN <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
|
||||
}
|
||||
FacetsConfig.DimConfig dimConfig = verifyDim(dim);
|
||||
FacetLabel cp = FacetLabel.create(dim, path);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd == -1) {
|
||||
return null;
|
||||
}
|
||||
|
||||
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
|
||||
|
||||
int bottomValue = 0;
|
||||
|
||||
int ord = children[dimOrd];
|
||||
int totValue = 0;
|
||||
int childCount = 0;
|
||||
|
||||
TopOrdAndIntQueue.OrdAndValue reuse = null;
|
||||
while(ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
if (values[ord] > 0) {
|
||||
totValue += values[ord];
|
||||
childCount++;
|
||||
if (values[ord] > bottomValue) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndIntQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = values[ord];
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomValue = q.top().value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ord = siblings[ord];
|
||||
}
|
||||
|
||||
if (totValue == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
totValue = values[dimOrd];
|
||||
} else {
|
||||
// Our sum'd value is not correct, in general:
|
||||
totValue = -1;
|
||||
}
|
||||
} else {
|
||||
// Our sum'd dim value is accurate, so we keep it
|
||||
}
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for(int i=labelValues.length-1;i>=0;i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
|
||||
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
|
||||
}
|
||||
|
||||
return new FacetResult(totValue, labelValues, childCount);
|
||||
}
|
||||
}
|
|
@ -19,10 +19,8 @@ package org.apache.lucene.facet;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.util.FixedBitSet;
|
||||
|
@ -35,9 +33,8 @@ import org.apache.lucene.util.IntsRef;
|
|||
* @lucene.experimental */
|
||||
|
||||
// nocommit remove & add specialized Cached variation only?
|
||||
public class TaxonomyFacetCounts extends TaxonomyFacets {
|
||||
public class TaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
private final OrdinalsReader ordinalsReader;
|
||||
private final int[] counts;
|
||||
|
||||
/** Create {@code TaxonomyFacetCounts}, which also
|
||||
* counts all facet labels. Use this for a non-default
|
||||
|
@ -46,7 +43,6 @@ public class TaxonomyFacetCounts extends TaxonomyFacets {
|
|||
public TaxonomyFacetCounts(OrdinalsReader ordinalsReader, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
|
||||
super(ordinalsReader.getIndexFieldName(), taxoReader, config);
|
||||
this.ordinalsReader = ordinalsReader;
|
||||
counts = new int[taxoReader.getSize()];
|
||||
count(fc.getMatchingDocs());
|
||||
}
|
||||
|
||||
|
@ -61,115 +57,12 @@ public class TaxonomyFacetCounts extends TaxonomyFacets {
|
|||
while (doc < length && (doc = bits.nextSetBit(doc)) != -1) {
|
||||
ords.get(doc, scratch);
|
||||
for(int i=0;i<scratch.length;i++) {
|
||||
counts[scratch.ints[scratch.offset+i]]++;
|
||||
values[scratch.ints[scratch.offset+i]]++;
|
||||
}
|
||||
++doc;
|
||||
}
|
||||
}
|
||||
|
||||
// nocommit we could do this lazily instead:
|
||||
|
||||
// Rollup any necessary dims:
|
||||
for(Map.Entry<String,FacetsConfig.DimConfig> ent : config.getDimConfigs().entrySet()) {
|
||||
String dim = ent.getKey();
|
||||
FacetsConfig.DimConfig ft = ent.getValue();
|
||||
if (ft.hierarchical && ft.multiValued == false) {
|
||||
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
|
||||
// It can be -1 if this field was declared in the
|
||||
// config but never indexed:
|
||||
if (dimRootOrd > 0) {
|
||||
counts[dimRootOrd] += rollup(children[dimRootOrd]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private int rollup(int ord) {
|
||||
int sum = 0;
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
int childValue = counts[ord] + rollup(children[ord]);
|
||||
counts[ord] = childValue;
|
||||
sum += childValue;
|
||||
ord = siblings[ord];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
verifyDim(dim);
|
||||
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
|
||||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
return counts[ord];
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
// TODO: can we factor this out?
|
||||
if (topN <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
|
||||
}
|
||||
FacetsConfig.DimConfig dimConfig = verifyDim(dim);
|
||||
FacetLabel cp = FacetLabel.create(dim, path);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd == -1) {
|
||||
//System.out.println("no ord for path=" + path);
|
||||
return null;
|
||||
}
|
||||
|
||||
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
|
||||
|
||||
int bottomCount = 0;
|
||||
|
||||
int ord = children[dimOrd];
|
||||
int totCount = 0;
|
||||
int childCount = 0;
|
||||
|
||||
TopOrdAndIntQueue.OrdAndValue reuse = null;
|
||||
while(ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
if (counts[ord] > 0) {
|
||||
totCount += counts[ord];
|
||||
childCount++;
|
||||
if (counts[ord] > bottomCount) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndIntQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = counts[ord];
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomCount = q.top().value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ord = siblings[ord];
|
||||
}
|
||||
|
||||
if (totCount == 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
totCount = counts[dimOrd];
|
||||
} else {
|
||||
// Our sum'd count is not correct, in general:
|
||||
totCount = -1;
|
||||
}
|
||||
} else {
|
||||
// Our sum'd dim count is accurate, so we keep it
|
||||
}
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for(int i=labelValues.length-1;i>=0;i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
|
||||
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
|
||||
}
|
||||
|
||||
return new FacetResult(totCount, labelValues, childCount);
|
||||
rollup();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -32,8 +31,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
* encoding.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
|
||||
private final float[] values;
|
||||
public class TaxonomyFacetSumFloatAssociations extends FloatTaxonomyFacets {
|
||||
|
||||
/** Create {@code TaxonomyFacetSumFloatAssociations} against
|
||||
* the default index field. */
|
||||
|
@ -45,7 +43,6 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
|
|||
* the specified index field. */
|
||||
public TaxonomyFacetSumFloatAssociations(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
|
||||
super(indexFieldName, taxoReader, config);
|
||||
values = new float[taxoReader.getSize()];
|
||||
sumValues(fc.getMatchingDocs());
|
||||
}
|
||||
|
||||
|
@ -86,71 +83,7 @@ public class TaxonomyFacetSumFloatAssociations extends TaxonomyFacets {
|
|||
++doc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
verifyDim(dim);
|
||||
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
|
||||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
return values[ord];
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
// TODO: can we factor this out?
|
||||
if (topN <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
|
||||
}
|
||||
FacetsConfig.DimConfig dimConfig = verifyDim(dim);
|
||||
FacetLabel cp = FacetLabel.create(dim, path);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd == -1) {
|
||||
//System.out.println("no ord for path=" + path);
|
||||
return null;
|
||||
}
|
||||
|
||||
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
|
||||
float bottomValue = 0;
|
||||
|
||||
int ord = children[dimOrd];
|
||||
float sumValue = 0;
|
||||
int childCount = 0;
|
||||
TopOrdAndFloatQueue.OrdAndValue reuse = null;
|
||||
while(ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
if (values[ord] > 0) {
|
||||
sumValue += values[ord];
|
||||
childCount++;
|
||||
if (values[ord] > bottomValue) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndFloatQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = values[ord];
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomValue = q.top().value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ord = siblings[ord];
|
||||
}
|
||||
|
||||
if (sumValue == 0) {
|
||||
//System.out.println("totCount=0 for path=" + path);
|
||||
return null;
|
||||
}
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for(int i=labelValues.length-1;i>=0;i--) {
|
||||
TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop();
|
||||
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
|
||||
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
|
||||
}
|
||||
|
||||
return new FacetResult(sumValue, labelValues, childCount);
|
||||
rollup();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.IOException;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -32,8 +31,7 @@ import org.apache.lucene.util.FixedBitSet;
|
|||
* encoding.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
|
||||
private final int[] values;
|
||||
public class TaxonomyFacetSumIntAssociations extends IntTaxonomyFacets {
|
||||
|
||||
/** Create {@code TaxonomyFacetSumIntAssociations} against
|
||||
* the default index field. */
|
||||
|
@ -45,7 +43,6 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
|
|||
* the specified index field. */
|
||||
public TaxonomyFacetSumIntAssociations(String indexFieldName, TaxonomyReader taxoReader, FacetsConfig config, FacetsCollector fc) throws IOException {
|
||||
super(indexFieldName, taxoReader, config);
|
||||
values = new int[taxoReader.getSize()];
|
||||
sumValues(fc.getMatchingDocs());
|
||||
}
|
||||
|
||||
|
@ -86,72 +83,7 @@ public class TaxonomyFacetSumIntAssociations extends TaxonomyFacets {
|
|||
++doc;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
verifyDim(dim);
|
||||
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
|
||||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
return values[ord];
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
// TODO: can we factor this out?
|
||||
if (topN <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
|
||||
}
|
||||
verifyDim(dim);
|
||||
FacetLabel cp = FacetLabel.create(dim, path);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd == -1) {
|
||||
//System.out.println("no ord for path=" + path);
|
||||
return null;
|
||||
}
|
||||
|
||||
TopOrdAndIntQueue q = new TopOrdAndIntQueue(Math.min(taxoReader.getSize(), topN));
|
||||
int bottomValue = 0;
|
||||
|
||||
int ord = children[dimOrd];
|
||||
long sumValue = 0;
|
||||
int childCount = 0;
|
||||
|
||||
TopOrdAndIntQueue.OrdAndValue reuse = null;
|
||||
while(ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
if (values[ord] > 0) {
|
||||
sumValue += values[ord];
|
||||
childCount++;
|
||||
if (values[ord] > bottomValue) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndIntQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = values[ord];
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomValue = q.top().value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ord = siblings[ord];
|
||||
}
|
||||
|
||||
if (sumValue == 0) {
|
||||
//System.out.println("totCount=0 for path=" + path);
|
||||
return null;
|
||||
}
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for(int i=labelValues.length-1;i>=0;i--) {
|
||||
TopOrdAndIntQueue.OrdAndValue ordAndValue = q.pop();
|
||||
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
|
||||
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
|
||||
}
|
||||
|
||||
return new FacetResult(sumValue, labelValues, childCount);
|
||||
rollup();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.queries.function.FunctionValues;
|
||||
|
@ -33,12 +32,11 @@ import org.apache.lucene.search.Scorer;
|
|||
import org.apache.lucene.util.FixedBitSet;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/** Aggregates sum of values from a {@link ValueSource}, for
|
||||
* each facet label.
|
||||
/** Aggregates sum of values from {@link
|
||||
* ValueSource#doubleValue}, for each facet label.
|
||||
*
|
||||
* @lucene.experimental */
|
||||
public class TaxonomyFacetSumValueSource extends TaxonomyFacets {
|
||||
private final float[] values;
|
||||
public class TaxonomyFacetSumValueSource extends FloatTaxonomyFacets {
|
||||
private final OrdinalsReader ordinalsReader;
|
||||
|
||||
/** Aggreggates float facet values from the provided
|
||||
|
@ -58,7 +56,6 @@ public class TaxonomyFacetSumValueSource extends TaxonomyFacets {
|
|||
FacetsConfig config, FacetsCollector fc, ValueSource valueSource) throws IOException {
|
||||
super(ordinalsReader.getIndexFieldName(), taxoReader, config);
|
||||
this.ordinalsReader = ordinalsReader;
|
||||
values = new float[taxoReader.getSize()];
|
||||
sumValues(fc.getMatchingDocs(), fc.getKeepScores(), valueSource);
|
||||
}
|
||||
|
||||
|
@ -105,107 +102,7 @@ public class TaxonomyFacetSumValueSource extends TaxonomyFacets {
|
|||
}
|
||||
}
|
||||
|
||||
// nocommit we could do this lazily instead:
|
||||
|
||||
// Rollup any necessary dims:
|
||||
for(Map.Entry<String,FacetsConfig.DimConfig> ent : config.getDimConfigs().entrySet()) {
|
||||
String dim = ent.getKey();
|
||||
FacetsConfig.DimConfig ft = ent.getValue();
|
||||
if (ft.hierarchical && ft.multiValued == false) {
|
||||
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
|
||||
assert dimRootOrd > 0;
|
||||
values[dimRootOrd] += rollup(children[dimRootOrd]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private float rollup(int ord) {
|
||||
float sum = 0;
|
||||
while (ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
float childValue = values[ord] + rollup(children[ord]);
|
||||
values[ord] = childValue;
|
||||
sum += childValue;
|
||||
ord = siblings[ord];
|
||||
}
|
||||
return sum;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Number getSpecificValue(String dim, String... path) throws IOException {
|
||||
verifyDim(dim);
|
||||
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
|
||||
if (ord < 0) {
|
||||
return -1;
|
||||
}
|
||||
return values[ord];
|
||||
}
|
||||
|
||||
@Override
|
||||
public FacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
|
||||
// TODO: can we factor this out?
|
||||
if (topN <= 0) {
|
||||
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
|
||||
}
|
||||
FacetsConfig.DimConfig dimConfig = verifyDim(dim);
|
||||
FacetLabel cp = FacetLabel.create(dim, path);
|
||||
int dimOrd = taxoReader.getOrdinal(cp);
|
||||
if (dimOrd == -1) {
|
||||
System.out.println(" no dim ord " + dim);
|
||||
return null;
|
||||
}
|
||||
|
||||
TopOrdAndFloatQueue q = new TopOrdAndFloatQueue(Math.min(taxoReader.getSize(), topN));
|
||||
float bottomValue = 0;
|
||||
|
||||
int ord = children[dimOrd];
|
||||
float sumValues = 0;
|
||||
int childCount = 0;
|
||||
|
||||
TopOrdAndFloatQueue.OrdAndValue reuse = null;
|
||||
while(ord != TaxonomyReader.INVALID_ORDINAL) {
|
||||
if (values[ord] > 0) {
|
||||
sumValues += values[ord];
|
||||
childCount++;
|
||||
if (values[ord] > bottomValue) {
|
||||
if (reuse == null) {
|
||||
reuse = new TopOrdAndFloatQueue.OrdAndValue();
|
||||
}
|
||||
reuse.ord = ord;
|
||||
reuse.value = values[ord];
|
||||
reuse = q.insertWithOverflow(reuse);
|
||||
if (q.size() == topN) {
|
||||
bottomValue = q.top().value;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ord = siblings[ord];
|
||||
}
|
||||
|
||||
if (sumValues == 0) {
|
||||
System.out.println(" no sum");
|
||||
return null;
|
||||
}
|
||||
|
||||
if (dimConfig.multiValued) {
|
||||
if (dimConfig.requireDimCount) {
|
||||
sumValues = values[dimOrd];
|
||||
} else {
|
||||
// Our sum'd count is not correct, in general:
|
||||
sumValues = -1;
|
||||
}
|
||||
} else {
|
||||
// Our sum'd dim count is accurate, so we keep it
|
||||
}
|
||||
|
||||
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
|
||||
for(int i=labelValues.length-1;i>=0;i--) {
|
||||
TopOrdAndFloatQueue.OrdAndValue ordAndValue = q.pop();
|
||||
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
|
||||
labelValues[i] = new LabelAndValue(child.components[cp.length], ordAndValue.value);
|
||||
}
|
||||
|
||||
return new FacetResult(sumValues, labelValues, childCount);
|
||||
rollup();
|
||||
}
|
||||
|
||||
/** {@link ValueSource} that returns the score for each
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.facet;
|
|||
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.facet.FacetTestCase;
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
|
@ -42,10 +41,6 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
|||
private static Directory taxoDir;
|
||||
private static TaxonomyReader taxoReader;
|
||||
|
||||
private static final FacetLabel aint = new FacetLabel("int", "a");
|
||||
private static final FacetLabel bint = new FacetLabel("int", "b");
|
||||
private static final FacetLabel afloat = new FacetLabel("float", "a");
|
||||
private static final FacetLabel bfloat = new FacetLabel("float", "b");
|
||||
private static FacetsConfig config;
|
||||
|
||||
@BeforeClass
|
||||
|
@ -107,7 +102,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
|||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
|
||||
Facets facets = new TaxonomyFacetSumIntAssociations("$facets.int", taxoReader, config, fc);
|
||||
assertEquals("value=350 childCount=2\n a (200)\n b (150)\n", facets.getTopChildren(10, "int").toString());
|
||||
assertEquals("value=-1 childCount=2\n a (200)\n b (150)\n", facets.getTopChildren(10, "int").toString());
|
||||
assertEquals("Wrong count for category 'a'!", 200, facets.getSpecificValue("int", "a").intValue());
|
||||
assertEquals("Wrong count for category 'b'!", 150, facets.getSpecificValue("int", "b").intValue());
|
||||
}
|
||||
|
@ -119,7 +114,7 @@ public class TestTaxonomyFacetAssociations extends FacetTestCase {
|
|||
searcher.search(new MatchAllDocsQuery(), fc);
|
||||
|
||||
Facets facets = new TaxonomyFacetSumFloatAssociations("$facets.float", taxoReader, config, fc);
|
||||
assertEquals("value=59.999996 childCount=2\n a (50.0)\n b (9.999995)\n", facets.getTopChildren(10, "float").toString());
|
||||
assertEquals("value=-1.0 childCount=2\n a (50.0)\n b (9.999995)\n", facets.getTopChildren(10, "float").toString());
|
||||
assertEquals("Wrong count for category 'a'!", 50f, facets.getSpecificValue("float", "a").floatValue(), 0.00001);
|
||||
assertEquals("Wrong count for category 'b'!", 10f, facets.getSpecificValue("float", "b").floatValue(), 0.00001);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue