LUCENE-5339: assocations

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene5339@1543047 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-11-18 14:53:53 +00:00
parent a872b82402
commit 18117c0b04
10 changed files with 765 additions and 3 deletions

1
TODO
View File

@ -2,6 +2,7 @@ nocommit this!
TODO
- associations
- make SumFloat too
- cutover taxo writer/reader to pathToString/stringToPath
- wrap an IW instead of extending one? or, FacetDocument?
- re-enable ALL_BUT_DIM somehow?

View File

@ -554,7 +554,7 @@ public class Field implements IndexableField, StorableField {
return analyzer.tokenStream(name(), stringValue());
}
throw new IllegalArgumentException("Field must have either TokenStream, String, Reader or Number value");
throw new IllegalArgumentException("Field must have either TokenStream, String, Reader or Number value; this=" + this);
}
static final class StringTokenStream extends TokenStream {

View File

@ -0,0 +1,78 @@
package org.apache.lucene.facet.simple;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.Arrays;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.util.BytesRef;
/** Associates an arbitrary byte[] with the added facet
* path. */
public class AssociationFacetField extends Field {
static final FieldType TYPE = new FieldType();
static {
TYPE.setIndexed(true);
TYPE.freeze();
}
final String dim;
final String[] path;
final BytesRef assoc;
public AssociationFacetField(BytesRef assoc, String dim, String... path) {
super("dummy", TYPE);
this.dim = dim;
this.assoc = assoc;
if (path.length == 0) {
throw new IllegalArgumentException("path must have at least one element");
}
this.path = path;
}
/** Utility ctor: associates an int value (translates it
* to 4-byte BytesRef). */
public AssociationFacetField(int assoc, String dim, String... path) {
this(intToBytesRef(assoc), dim, path);
}
/** Utility ctor: associates a float value (translates it
* to 4-byte BytesRef). */
public AssociationFacetField(float assoc, String dim, String... path) {
this(floatToBytesRef(assoc), dim, path);
}
private static BytesRef intToBytesRef(int v) {
byte[] bytes = new byte[4];
// big-endian:
bytes[0] = (byte) (v >> 24);
bytes[1] = (byte) (v >> 16);
bytes[2] = (byte) (v >> 8);
bytes[3] = (byte) v;
return new BytesRef(bytes);
}
private static BytesRef floatToBytesRef(float v) {
return intToBytesRef(Float.floatToIntBits(v));
}
@Override
public String toString() {
return "FacetField(dim=" + dim + " path=" + Arrays.toString(path) + ")";
}
}

View File

@ -38,6 +38,7 @@ import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.StorableField;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
@ -66,6 +67,9 @@ public class FacetIndexWriter extends IndexWriter {
// ... and also all SortedSetDocValuesFacetFields:
Map<String,List<SortedSetDocValuesFacetField>> dvByField = new HashMap<String,List<SortedSetDocValuesFacetField>>();
// ... and also all AssociationFacetFields
Map<String,List<AssociationFacetField>> assocByField = new HashMap<String,List<AssociationFacetField>>();
for(IndexableField field : doc.indexableFields()) {
if (field.fieldType() == FacetField.TYPE) {
FacetField facetField = (FacetField) field;
@ -90,6 +94,20 @@ public class FacetIndexWriter extends IndexWriter {
}
fields.add(facetField);
}
if (field.fieldType() == AssociationFacetField.TYPE) {
AssociationFacetField facetField = (AssociationFacetField) field;
FacetsConfig.DimConfig dimConfig = facetsConfig.getDimConfig(field.name());
// nocommit how to use a different default name for assocs?
String indexedFieldName = dimConfig.indexedFieldName;
List<AssociationFacetField> fields = assocByField.get(indexedFieldName);
if (fields == null) {
fields = new ArrayList<AssociationFacetField>();
assocByField.put(indexedFieldName, fields);
}
fields.add(facetField);
}
}
List<Field> addedIndexedFields = new ArrayList<Field>();
@ -97,13 +115,14 @@ public class FacetIndexWriter extends IndexWriter {
processFacetFields(byField, addedIndexedFields, addedStoredFields);
processSSDVFacetFields(dvByField, addedIndexedFields, addedStoredFields);
processAssocFacetFields(assocByField, addedIndexedFields, addedStoredFields);
//System.out.println("add stored: " + addedStoredFields);
final List<IndexableField> allIndexedFields = new ArrayList<IndexableField>();
for(IndexableField field : doc.indexableFields()) {
IndexableFieldType ft = field.fieldType();
if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE) {
if (ft != FacetField.TYPE && ft != SortedSetDocValuesFacetField.TYPE && ft != AssociationFacetField.TYPE) {
allIndexedFields.add(field);
}
}
@ -200,6 +219,35 @@ public class FacetIndexWriter extends IndexWriter {
}
}
private void processAssocFacetFields(Map<String,List<AssociationFacetField>> byField, List<Field> addedIndexedFields, List<Field> addedStoredFields) throws IOException {
for(Map.Entry<String,List<AssociationFacetField>> ent : byField.entrySet()) {
byte[] bytes = new byte[16];
int upto = 0;
String indexedFieldName = ent.getKey();
for(AssociationFacetField field : ent.getValue()) {
// NOTE: we don't add parents for associations
// nocommit is that right? maybe we are supposed to
// add to taxo writer, and just not index the parent
// ords?
int ordinal = taxoWriter.addCategory(FacetLabel.create(field.dim, field.path));
if (upto + 4 > bytes.length) {
bytes = ArrayUtil.grow(bytes, upto+4);
}
// big-endian:
bytes[upto++] = (byte) (ordinal >> 24);
bytes[upto++] = (byte) (ordinal >> 16);
bytes[upto++] = (byte) (ordinal >> 8);
bytes[upto++] = (byte) ordinal;
if (upto + field.assoc.length > bytes.length) {
bytes = ArrayUtil.grow(bytes, upto+field.assoc.length);
}
System.arraycopy(field.assoc.bytes, field.assoc.offset, bytes, upto, field.assoc.length);
upto += field.assoc.length;
}
addedStoredFields.add(new BinaryDocValuesField(indexedFieldName, new BytesRef(bytes, 0, upto)));
}
}
/** We can open this up if/when we really need
* pluggability on the encoding. */
private final BytesRef dedupAndEncode(IntsRef ordinals) {
@ -314,6 +362,7 @@ public class FacetIndexWriter extends IndexWriter {
buffer[upto++] = ch;
}
}
parts.add(new String(buffer, 0, upto));
assert !lastEscape;
return parts.toArray(new String[parts.size()]);
}

View File

@ -16,7 +16,9 @@ package org.apache.lucene.facet.simple;
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
@ -110,7 +112,7 @@ public final class SortedSetDocValuesReaderState {
dv.lookupOrd(ord, spare);
String[] components = FacetIndexWriter.stringToPath(spare.utf8ToString());
if (components.length != 2) {
throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + spare.utf8ToString());
throw new IllegalArgumentException("this class can only handle 2 level hierarchy (dim/value); got: " + Arrays.toString(components) + " " + spare.utf8ToString());
}
if (!components[0].equals(lastDim)) {
if (lastDim != null) {

View File

@ -0,0 +1,235 @@
package org.apache.lucene.facet.simple;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.simple.SimpleFacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
// nocommit jdoc that this assumes/requires the default encoding
public class SumFloatAssociationFacets extends Facets {
private final FacetsConfig facetsConfig;
private final TaxonomyReader taxoReader;
private final float[] values;
private final String facetsFieldName;
private final int[] children;
private final int[] parents;
private final int[] siblings;
public SumFloatAssociationFacets(TaxonomyReader taxoReader, FacetsConfig facetsConfig, SimpleFacetsCollector fc) throws IOException {
this(FacetsConfig.DEFAULT_INDEXED_FIELD_NAME, taxoReader, facetsConfig, fc);
}
public SumFloatAssociationFacets(String facetsFieldName, TaxonomyReader taxoReader, FacetsConfig facetsConfig, SimpleFacetsCollector fc) throws IOException {
this.facetsFieldName = facetsFieldName;
this.taxoReader = taxoReader;
this.facetsConfig = facetsConfig;
ParallelTaxonomyArrays pta = taxoReader.getParallelTaxonomyArrays();
children = pta.children();
parents = pta.parents();
siblings = pta.siblings();
values = new float[taxoReader.getSize()];
sumValues(fc.getMatchingDocs());
}
private final void sumValues(List<MatchingDocs> matchingDocs) throws IOException {
//System.out.println("count matchingDocs=" + matchingDocs + " facetsField=" + facetsFieldName);
for(MatchingDocs hits : matchingDocs) {
BinaryDocValues dv = hits.context.reader().getBinaryDocValues(facetsFieldName);
if (dv == null) { // this reader does not have DocValues for the requested category list
continue;
}
FixedBitSet bits = hits.bits;
final int length = hits.bits.length();
int doc = 0;
BytesRef scratch = new BytesRef();
//System.out.println("count seg=" + hits.context.reader());
while (doc < length && (doc = bits.nextSetBit(doc)) != -1) {
//System.out.println(" doc=" + doc);
// nocommit use OrdinalsReader? but, add a
// BytesRef getAssociation()?
dv.get(doc, scratch);
byte[] bytes = scratch.bytes;
int end = scratch.offset + scratch.length;
int offset = scratch.offset;
while (offset < end) {
int ord = ((bytes[offset]&0xFF) << 24) |
((bytes[offset+1]&0xFF) << 16) |
((bytes[offset+2]&0xFF) << 8) |
(bytes[offset+3]&0xFF);
offset += 4;
int value = ((bytes[offset]&0xFF) << 24) |
((bytes[offset+1]&0xFF) << 16) |
((bytes[offset+2]&0xFF) << 8) |
(bytes[offset+3]&0xFF);
offset += 4;
values[ord] += Float.intBitsToFloat(value);
}
++doc;
}
}
// nocommit we could do this lazily instead:
// Rollup any necessary dims:
// nocommit should we rollup?
/*
for(Map.Entry<String,FacetsConfig.DimConfig> ent : facetsConfig.getDimConfigs().entrySet()) {
String dim = ent.getKey();
FacetsConfig.DimConfig ft = ent.getValue();
if (ft.hierarchical && ft.multiValued == false) {
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
// It can be -1 if this field was declared in the
// facetsConfig but never indexed:
if (dimRootOrd > 0) {
counts[dimRootOrd] += rollup(children[dimRootOrd]);
}
}
}
*/
}
private float rollup(int ord) {
int sum = 0;
while (ord != TaxonomyReader.INVALID_ORDINAL) {
float childValue = values[ord] + rollup(children[ord]);
values[ord] = childValue;
sum += childValue;
ord = siblings[ord];
}
return sum;
}
/** Return the count for a specific path. Returns -1 if
* this path doesn't exist, else the count. */
@Override
public Number getSpecificValue(String dim, String... path) throws IOException {
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
if (ord < 0) {
return -1;
}
return values[ord];
}
@Override
public SimpleFacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
FacetLabel cp = FacetLabel.create(dim, path);
int ord = taxoReader.getOrdinal(cp);
if (ord == -1) {
//System.out.println("no ord for path=" + path);
return null;
}
return getTopChildren(cp, ord, topN);
}
private SimpleFacetResult getTopChildren(FacetLabel path, int dimOrd, int topN) throws IOException {
TopOrdValueQueue q = new TopOrdValueQueue(topN);
float bottomValue = 0;
int ord = children[dimOrd];
float sumValue = 0;
TopOrdValueQueue.OrdAndValue reuse = null;
while(ord != TaxonomyReader.INVALID_ORDINAL) {
if (values[ord] > 0) {
sumValue += values[ord];
if (values[ord] > bottomValue) {
if (reuse == null) {
reuse = new TopOrdValueQueue.OrdAndValue();
}
reuse.ord = ord;
reuse.value = values[ord];
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().value;
}
}
}
ord = siblings[ord];
}
if (sumValue == 0) {
//System.out.println("totCount=0 for path=" + path);
return null;
}
/*
FacetsConfig.DimConfig ft = facetsConfig.getDimConfig(path.components[0]);
// nocommit shouldn't we verify the indexedFieldName
// matches what was passed to our ctor?
if (ft.hierarchical && ft.multiValued) {
totCount = counts[dimOrd];
}
*/
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for(int i=labelValues.length-1;i>=0;i--) {
TopOrdValueQueue.OrdAndValue ordAndValue = q.pop();
FacetLabel child = taxoReader.getPath(ordAndValue.ord);
labelValues[i] = new LabelAndValue(child.components[path.length], ordAndValue.value);
}
return new SimpleFacetResult(path, sumValue, labelValues);
}
@Override
public List<SimpleFacetResult> getAllDims(int topN) throws IOException {
int ord = children[TaxonomyReader.ROOT_ORDINAL];
List<SimpleFacetResult> results = new ArrayList<SimpleFacetResult>();
while (ord != TaxonomyReader.INVALID_ORDINAL) {
SimpleFacetResult result = getTopChildren(taxoReader.getPath(ord), ord, topN);
if (result != null) {
results.add(result);
}
ord = siblings[ord];
}
// Sort by highest count:
Collections.sort(results,
new Comparator<SimpleFacetResult>() {
@Override
public int compare(SimpleFacetResult a, SimpleFacetResult b) {
if (a.value.intValue() > b.value.intValue()) {
return -1;
} else if (b.value.intValue() > a.value.intValue()) {
return 1;
} else {
// Tie break by dimension
return a.path.components[0].compareTo(b.path.components[0]);
}
}
});
return results;
}
}

View File

@ -0,0 +1,235 @@
package org.apache.lucene.facet.simple;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.simple.SimpleFacetsCollector.MatchingDocs;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
// nocommit jdoc that this assumes/requires the default encoding
public class SumIntAssociationFacets extends Facets {
private final FacetsConfig facetsConfig;
private final TaxonomyReader taxoReader;
private final int[] values;
private final String facetsFieldName;
private final int[] children;
private final int[] parents;
private final int[] siblings;
public SumIntAssociationFacets(TaxonomyReader taxoReader, FacetsConfig facetsConfig, SimpleFacetsCollector fc) throws IOException {
this(FacetsConfig.DEFAULT_INDEXED_FIELD_NAME, taxoReader, facetsConfig, fc);
}
public SumIntAssociationFacets(String facetsFieldName, TaxonomyReader taxoReader, FacetsConfig facetsConfig, SimpleFacetsCollector fc) throws IOException {
this.facetsFieldName = facetsFieldName;
this.taxoReader = taxoReader;
this.facetsConfig = facetsConfig;
ParallelTaxonomyArrays pta = taxoReader.getParallelTaxonomyArrays();
children = pta.children();
parents = pta.parents();
siblings = pta.siblings();
values = new int[taxoReader.getSize()];
sumValues(fc.getMatchingDocs());
}
private final void sumValues(List<MatchingDocs> matchingDocs) throws IOException {
//System.out.println("count matchingDocs=" + matchingDocs + " facetsField=" + facetsFieldName);
for(MatchingDocs hits : matchingDocs) {
BinaryDocValues dv = hits.context.reader().getBinaryDocValues(facetsFieldName);
if (dv == null) { // this reader does not have DocValues for the requested category list
continue;
}
FixedBitSet bits = hits.bits;
final int length = hits.bits.length();
int doc = 0;
BytesRef scratch = new BytesRef();
//System.out.println("count seg=" + hits.context.reader());
while (doc < length && (doc = bits.nextSetBit(doc)) != -1) {
//System.out.println(" doc=" + doc);
// nocommit use OrdinalsReader? but, add a
// BytesRef getAssociation()?
dv.get(doc, scratch);
byte[] bytes = scratch.bytes;
int end = scratch.offset + scratch.length;
int offset = scratch.offset;
while (offset < end) {
int ord = ((bytes[offset]&0xFF) << 24) |
((bytes[offset+1]&0xFF) << 16) |
((bytes[offset+2]&0xFF) << 8) |
(bytes[offset+3]&0xFF);
offset += 4;
int value = ((bytes[offset]&0xFF) << 24) |
((bytes[offset+1]&0xFF) << 16) |
((bytes[offset+2]&0xFF) << 8) |
(bytes[offset+3]&0xFF);
offset += 4;
values[ord] += value;
}
++doc;
}
}
// nocommit we could do this lazily instead:
// Rollup any necessary dims:
// nocommit should we rollup?
/*
for(Map.Entry<String,FacetsConfig.DimConfig> ent : facetsConfig.getDimConfigs().entrySet()) {
String dim = ent.getKey();
FacetsConfig.DimConfig ft = ent.getValue();
if (ft.hierarchical && ft.multiValued == false) {
int dimRootOrd = taxoReader.getOrdinal(new FacetLabel(dim));
// It can be -1 if this field was declared in the
// facetsConfig but never indexed:
if (dimRootOrd > 0) {
counts[dimRootOrd] += rollup(children[dimRootOrd]);
}
}
}
*/
}
private int rollup(int ord) {
int sum = 0;
while (ord != TaxonomyReader.INVALID_ORDINAL) {
int childValue = values[ord] + rollup(children[ord]);
values[ord] = childValue;
sum += childValue;
ord = siblings[ord];
}
return sum;
}
/** Return the count for a specific path. Returns -1 if
* this path doesn't exist, else the count. */
@Override
public Number getSpecificValue(String dim, String... path) throws IOException {
int ord = taxoReader.getOrdinal(FacetLabel.create(dim, path));
if (ord < 0) {
return -1;
}
return values[ord];
}
@Override
public SimpleFacetResult getTopChildren(int topN, String dim, String... path) throws IOException {
FacetLabel cp = FacetLabel.create(dim, path);
int ord = taxoReader.getOrdinal(cp);
if (ord == -1) {
//System.out.println("no ord for path=" + path);
return null;
}
return getTopChildren(cp, ord, topN);
}
private SimpleFacetResult getTopChildren(FacetLabel path, int dimOrd, int topN) throws IOException {
TopOrdCountQueue q = new TopOrdCountQueue(topN);
int bottomValue = 0;
int ord = children[dimOrd];
long sumValue = 0;
TopOrdCountQueue.OrdAndCount reuse = null;
while(ord != TaxonomyReader.INVALID_ORDINAL) {
if (values[ord] > 0) {
sumValue += values[ord];
if (values[ord] > bottomValue) {
if (reuse == null) {
reuse = new TopOrdCountQueue.OrdAndCount();
}
reuse.ord = ord;
reuse.count = values[ord];
reuse = q.insertWithOverflow(reuse);
if (q.size() == topN) {
bottomValue = q.top().count;
}
}
}
ord = siblings[ord];
}
if (sumValue == 0) {
//System.out.println("totCount=0 for path=" + path);
return null;
}
/*
FacetsConfig.DimConfig ft = facetsConfig.getDimConfig(path.components[0]);
// nocommit shouldn't we verify the indexedFieldName
// matches what was passed to our ctor?
if (ft.hierarchical && ft.multiValued) {
totCount = counts[dimOrd];
}
*/
LabelAndValue[] labelValues = new LabelAndValue[q.size()];
for(int i=labelValues.length-1;i>=0;i--) {
TopOrdCountQueue.OrdAndCount ordAndCount = q.pop();
FacetLabel child = taxoReader.getPath(ordAndCount.ord);
labelValues[i] = new LabelAndValue(child.components[path.length], ordAndCount.count);
}
return new SimpleFacetResult(path, sumValue, labelValues);
}
@Override
public List<SimpleFacetResult> getAllDims(int topN) throws IOException {
int ord = children[TaxonomyReader.ROOT_ORDINAL];
List<SimpleFacetResult> results = new ArrayList<SimpleFacetResult>();
while (ord != TaxonomyReader.INVALID_ORDINAL) {
SimpleFacetResult result = getTopChildren(taxoReader.getPath(ord), ord, topN);
if (result != null) {
results.add(result);
}
ord = siblings[ord];
}
// Sort by highest count:
Collections.sort(results,
new Comparator<SimpleFacetResult>() {
@Override
public int compare(SimpleFacetResult a, SimpleFacetResult b) {
if (a.value.intValue() > b.value.intValue()) {
return -1;
} else if (b.value.intValue() > a.value.intValue()) {
return 1;
} else {
// Tie break by dimension
return a.path.components[0].compareTo(b.path.components[0]);
}
}
});
return results;
}
}

View File

@ -37,6 +37,7 @@ import org.apache.lucene.util.IntsRef;
* FastTaxonomyFacetCounts} if you are just using the
* default encoding from {@link BinaryDocValues}. */
// nocommit remove & add specialized Cached variation only?
public class TaxonomyFacetCounts extends Facets {
private final OrdinalsReader ordinalsReader;
private final FacetsConfig facetsConfig;

View File

@ -19,6 +19,8 @@ package org.apache.lucene.facet.simple;
import org.apache.lucene.util.PriorityQueue;
// nocommit rename to TopOrdIntQueue?
/** Keeps highest count results. */
class TopOrdCountQueue extends PriorityQueue<TopOrdCountQueue.OrdAndCount> {

View File

@ -0,0 +1,159 @@
package org.apache.lucene.facet.simple;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.List;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.search.FacetResult;
import org.apache.lucene.facet.search.FacetsCollector;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.junit.AfterClass;
import org.junit.BeforeClass;
/** Test for associations */
public class TestAssociationFacets extends FacetTestCase {
private static Directory dir;
private static IndexReader reader;
private static Directory taxoDir;
private static final FacetLabel aint = new FacetLabel("int", "a");
private static final FacetLabel bint = new FacetLabel("int", "b");
private static final FacetLabel afloat = new FacetLabel("float", "a");
private static final FacetLabel bfloat = new FacetLabel("float", "b");
@BeforeClass
public static void beforeClass() throws Exception {
dir = newDirectory();
taxoDir = newDirectory();
// preparations - index, taxonomy, content
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
IndexWriterConfig iwc = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
IndexWriter writer = new FacetIndexWriter(dir, iwc, taxoWriter, new FacetsConfig());
// index documents, 50% have only 'b' and all have 'a'
for (int i = 0; i < 110; i++) {
Document doc = new Document();
// every 11th document is added empty, this used to cause the association
// aggregators to go into an infinite loop
if (i % 11 != 0) {
doc.add(new AssociationFacetField(2, "int", "a"));
doc.add(new AssociationFacetField(0.5f, "float", "a"));
if (i % 2 == 0) { // 50
doc.add(new AssociationFacetField(3, "int", "b"));
doc.add(new AssociationFacetField(0.2f, "float", "b"));
}
}
writer.addDocument(doc);
}
taxoWriter.close();
reader = DirectoryReader.open(writer, true);
writer.close();
}
@AfterClass
public static void afterClass() throws Exception {
reader.close();
reader = null;
dir.close();
dir = null;
taxoDir.close();
taxoDir = null;
}
public void testIntSumAssociation() throws Exception {
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
SimpleFacetsCollector fc = new SimpleFacetsCollector();
IndexSearcher searcher = newSearcher(reader);
searcher.search(new MatchAllDocsQuery(), fc);
SumIntAssociationFacets facets = new SumIntAssociationFacets(taxoReader, new FacetsConfig(), fc);
assertEquals("Wrong count for category 'a'!", 200, facets.getSpecificValue("int", "a").intValue());
assertEquals("Wrong count for category 'b'!", 150, facets.getSpecificValue("int", "b").intValue());
taxoReader.close();
}
public void testFloatSumAssociation() throws Exception {
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
SimpleFacetsCollector fc = new SimpleFacetsCollector();
IndexSearcher searcher = newSearcher(reader);
searcher.search(new MatchAllDocsQuery(), fc);
SumFloatAssociationFacets facets = new SumFloatAssociationFacets(taxoReader, new FacetsConfig(), fc);
assertEquals("Wrong count for category 'a'!", 50f, facets.getSpecificValue("float", "a").floatValue(), 0.00001);
assertEquals("Wrong count for category 'b'!", 10f, facets.getSpecificValue("float", "b").floatValue(), 0.00001);
taxoReader.close();
}
/*
public void testDifferentAggregatorsSameCategoryList() throws Exception {
DirectoryTaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDir);
// facet requests for two facets
FacetSearchParams fsp = new FacetSearchParams(
new SumIntAssociationFacetRequest(aint, 10),
new SumIntAssociationFacetRequest(bint, 10),
new SumFloatAssociationFacetRequest(afloat, 10),
new SumFloatAssociationFacetRequest(bfloat, 10));
Query q = new MatchAllDocsQuery();
FacetsCollector fc = FacetsCollector.create(fsp, reader, taxo);
IndexSearcher searcher = newSearcher(reader);
searcher.search(q, fc);
List<FacetResult> res = fc.getFacetResults();
assertEquals("Wrong number of results!", 4, res.size());
assertEquals("Wrong count for category 'a'!", 200, (int) res.get(0).getFacetResultNode().value);
assertEquals("Wrong count for category 'b'!", 150, (int) res.get(1).getFacetResultNode().value);
assertEquals("Wrong count for category 'a'!",50f, (float) res.get(2).getFacetResultNode().value, 0.00001);
assertEquals("Wrong count for category 'b'!",10f, (float) res.get(3).getFacetResultNode().value, 0.00001);
taxo.close();
}
*/
}