Sandbox: Compute facets while collecting (#13568)

This adds a new, ground-up implementation of faceting that computes aggregations while collecting. This has the following advantages over the current faceting module:
1. Allows for flexible aggregation logic instead of just "counts" in a general way (essentially makes what's available in today's "association faceting" available beyond taxonomy-based fields).
2. When aggregating beyond "counts," association value computation can be expensive. This implementation allows values to be computed only once when used across different aggregations.
3. Reduces latency by leveraging concurrency during collection (but potentially with increased overall cost).

This work has been done in the sandbox module for now since it is not yet complete (the current faceting module covers use-cases this doesn't yet) and it needs time to bake to work out API and implementation rough edges.

---------

Co-authored-by: Egor Potemkin <epotyom@amazon.com>
Co-authored-by: Shradha Shankar <shrdsha@amazon.com>
Co-authored-by: Greg Miller <gsmiller@gmail.com>
This commit is contained in:
Egor Potemkin 2024-08-12 15:26:59 +01:00 committed by GitHub
parent 9b481f76f7
commit 304d4e7855
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
55 changed files with 9430 additions and 219 deletions

View File

@ -108,6 +108,15 @@ API Changes
* GITHUB#13410: Removed Scorer#getWeight (Sanjay Dutt, Adrien Grand)
* GITHUB#13568: Add DoubleValuesSource#toSortableLongDoubleValuesSource and
MultiDoubleValuesSource#toSortableMultiLongValuesSource methods. (Shradha Shankar)
* GITHUB#13568: Add CollectorOwner class that wraps CollectorManager, and handles list of Collectors and results.
Add IndexSearcher#search method that takes CollectorOwner. (Egor Potemkin)
* GITHUB#13568: Add DrillSideways#search method that supports any collector types for any drill-sideways dimensions
or drill-down. (Egor Potemkin)
* GITHUB#13499: Remove deprecated TopScoreDocCollector + TopFieldCollector methods (#create, #createSharedManager) (Jakub Slowinski)
* GITHUB#13632: CandidateMatcher public matching functions (Bryan Jacobowitz)
@ -134,6 +143,8 @@ New Features
DocValuesSkipper abstraction. A new flag is added to FieldType.java that configures whether
to create a "skip index" for doc values. (Ignacio Vera)
* GITHUB#13568: Add sandbox facets module to compute facets while collecting. (Egor Potemkin, Shradha Shankar)
* GITHUB#13563: Add levels to doc values skip index. (Ignacio Vera)
* GITHUB#13597: Align doc value skipper interval boundaries when an interval contains a constant
@ -141,6 +152,7 @@ New Features
* GITHUB#13604: Add Kmeans clustering on vectors (Mayya Sharipova, Jim Ferenczi, Tom Veasey)
Improvements
---------------------

View File

@ -0,0 +1,834 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.internal.hppc;
import static org.apache.lucene.internal.hppc.HashContainers.*;
import java.util.Arrays;
import java.util.Iterator;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;
/**
* A hash map of <code>int</code> to <code>long</code>, implemented using open addressing with
* linear probing for collision resolution.
*
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.IntLongHashMap
*
* <p>github: https://github.com/carrotsearch/hppc release 0.10.0
*
* @lucene.internal
*/
public class IntLongHashMap
implements Iterable<IntLongHashMap.IntLongCursor>, Accountable, Cloneable {
private static final long BASE_RAM_BYTES_USED =
RamUsageEstimator.shallowSizeOfInstance(IntLongHashMap.class);
/** The array holding keys. */
public int[] keys;
/** The array holding values. */
public long[] values;
/**
* The number of stored keys (assigned key slots), excluding the special "empty" key, if any (use
* {@link #size()} instead).
*
* @see #size()
*/
protected int assigned;
/** Mask for slot scans in {@link #keys}. */
protected int mask;
/** Expand (rehash) {@link #keys} when {@link #assigned} hits this value. */
protected int resizeAt;
/** Special treatment for the "empty slot" key marker. */
protected boolean hasEmptyKey;
/** The load factor for {@link #keys}. */
protected double loadFactor;
/** Seed used to ensure the hash iteration order is different from an iteration to another. */
protected int iterationSeed;
/** New instance with sane defaults. */
public IntLongHashMap() {
this(DEFAULT_EXPECTED_ELEMENTS);
}
/**
* New instance with sane defaults.
*
* @param expectedElements The expected number of elements guaranteed not to cause buffer
* expansion (inclusive).
*/
public IntLongHashMap(int expectedElements) {
this(expectedElements, DEFAULT_LOAD_FACTOR);
}
/**
* New instance with the provided defaults.
*
* @param expectedElements The expected number of elements guaranteed not to cause a rehash
* (inclusive).
* @param loadFactor The load factor for internal buffers. Insane load factors (zero, full
* capacity) are rejected by {@link #verifyLoadFactor(double)}.
*/
public IntLongHashMap(int expectedElements, double loadFactor) {
this.loadFactor = verifyLoadFactor(loadFactor);
iterationSeed = ITERATION_SEED.incrementAndGet();
ensureCapacity(expectedElements);
}
/** Create a hash map from all key-value pairs of another container. */
public IntLongHashMap(IntLongHashMap container) {
this(container.size());
putAll(container);
}
public long put(int key, long value) {
assert assigned < mask + 1;
final int mask = this.mask;
if (((key) == 0)) {
long previousValue = hasEmptyKey ? values[mask + 1] : 0L;
hasEmptyKey = true;
values[mask + 1] = value;
return previousValue;
} else {
final int[] keys = this.keys;
int slot = hashKey(key) & mask;
int existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
final long previousValue = values[slot];
values[slot] = value;
return previousValue;
}
slot = (slot + 1) & mask;
}
if (assigned == resizeAt) {
allocateThenInsertThenRehash(slot, key, value);
} else {
keys[slot] = key;
values[slot] = value;
}
assigned++;
return 0L;
}
}
/**
* If the specified key is not already associated with a value, associates it with the given
* value.
*
* @return {@code true} if {@code key} did not exist and {@code value} was placed in the map,
* {@code false} otherwise.
*/
public boolean putIfAbsent(int key, long value) {
int keyIndex = indexOf(key);
if (indexExists(keyIndex)) {
return false;
} else {
indexInsert(keyIndex, key, value);
return true;
}
}
/** Puts all key/value pairs from a given iterable into this map. */
public int putAll(Iterable<? extends IntLongCursor> iterable) {
final int count = size();
for (IntLongCursor c : iterable) {
put(c.key, c.value);
}
return size() - count;
}
/**
* If <code>key</code> exists, <code>putValue</code> is inserted into the map, otherwise any
* existing value is incremented by <code>additionValue</code>.
*
* @param key The key of the value to adjust.
* @param putValue The value to put if <code>key</code> does not exist.
* @param incrementValue The value to add to the existing value if <code>key</code> exists.
* @return Returns the current value associated with <code>key</code> (after changes).
*/
public long putOrAdd(int key, long putValue, long incrementValue) {
assert assigned < mask + 1;
int keyIndex = indexOf(key);
if (indexExists(keyIndex)) {
putValue = values[keyIndex] + incrementValue;
indexReplace(keyIndex, putValue);
} else {
indexInsert(keyIndex, key, putValue);
}
return putValue;
}
/**
* Adds <code>incrementValue</code> to any existing value for the given <code>key</code> or
* inserts <code>incrementValue</code> if <code>key</code> did not previously exist.
*
* @param key The key of the value to adjust.
* @param incrementValue The value to put or add to the existing value if <code>key</code> exists.
* @return Returns the current value associated with <code>key</code> (after changes).
*/
public long addTo(int key, long incrementValue) {
return putOrAdd(key, incrementValue, incrementValue);
}
/**
* Remove all values at the given key. The default value for the key type is returned if the value
* does not exist in the map.
*/
public long remove(int key) {
final int mask = this.mask;
if (((key) == 0)) {
if (!hasEmptyKey) {
return 0L;
}
hasEmptyKey = false;
long previousValue = values[mask + 1];
values[mask + 1] = 0L;
return previousValue;
} else {
final int[] keys = this.keys;
int slot = hashKey(key) & mask;
int existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
final long previousValue = values[slot];
shiftConflictingKeys(slot);
return previousValue;
}
slot = (slot + 1) & mask;
}
return 0L;
}
}
public long get(int key) {
if (((key) == 0)) {
return hasEmptyKey ? values[mask + 1] : 0L;
} else {
final int[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
int existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
return values[slot];
}
slot = (slot + 1) & mask;
}
return 0L;
}
}
public long getOrDefault(int key, long defaultValue) {
if (((key) == 0)) {
return hasEmptyKey ? values[mask + 1] : defaultValue;
} else {
final int[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
int existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
return values[slot];
}
slot = (slot + 1) & mask;
}
return defaultValue;
}
}
public boolean containsKey(int key) {
if (((key) == 0)) {
return hasEmptyKey;
} else {
final int[] keys = this.keys;
final int mask = this.mask;
int slot = hashKey(key) & mask;
int existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
return true;
}
slot = (slot + 1) & mask;
}
return false;
}
}
public int indexOf(int key) {
final int mask = this.mask;
if (((key) == 0)) {
return hasEmptyKey ? mask + 1 : ~(mask + 1);
} else {
final int[] keys = this.keys;
int slot = hashKey(key) & mask;
int existing;
while (!((existing = keys[slot]) == 0)) {
if (((key) == (existing))) {
return slot;
}
slot = (slot + 1) & mask;
}
return ~slot;
}
}
public boolean indexExists(int index) {
assert index < 0 || (index >= 0 && index <= mask) || (index == mask + 1 && hasEmptyKey);
return index >= 0;
}
public long indexGet(int index) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
return values[index];
}
public long indexReplace(int index, long newValue) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
long previousValue = values[index];
values[index] = newValue;
return previousValue;
}
public void indexInsert(int index, int key, long value) {
assert index < 0 : "The index must not point at an existing key.";
index = ~index;
if (((key) == 0)) {
assert index == mask + 1;
values[index] = value;
hasEmptyKey = true;
} else {
assert ((keys[index]) == 0);
if (assigned == resizeAt) {
allocateThenInsertThenRehash(index, key, value);
} else {
keys[index] = key;
values[index] = value;
}
assigned++;
}
}
public long indexRemove(int index) {
assert index >= 0 : "The index must point at an existing key.";
assert index <= mask || (index == mask + 1 && hasEmptyKey);
long previousValue = values[index];
if (index > mask) {
assert index == mask + 1;
hasEmptyKey = false;
values[index] = 0L;
} else {
shiftConflictingKeys(index);
}
return previousValue;
}
public void clear() {
assigned = 0;
hasEmptyKey = false;
Arrays.fill(keys, 0);
}
public void release() {
assigned = 0;
hasEmptyKey = false;
keys = null;
values = null;
ensureCapacity(DEFAULT_EXPECTED_ELEMENTS);
}
public int size() {
return assigned + (hasEmptyKey ? 1 : 0);
}
public boolean isEmpty() {
return size() == 0;
}
@Override
public int hashCode() {
int h = hasEmptyKey ? 0xDEADBEEF : 0;
for (IntLongCursor c : this) {
h += BitMixer.mix(c.key) + BitMixer.mix(c.value);
}
return h;
}
@Override
public boolean equals(Object obj) {
return (this == obj)
|| (obj != null && getClass() == obj.getClass() && equalElements(getClass().cast(obj)));
}
/** Return true if all keys of some other container exist in this container. */
protected boolean equalElements(IntLongHashMap other) {
if (other.size() != size()) {
return false;
}
for (IntLongCursor c : other) {
int key = c.key;
if (!containsKey(key) || !((c.value) == (get(key)))) {
return false;
}
}
return true;
}
/**
* Ensure this container can hold at least the given number of keys (entries) without resizing its
* buffers.
*
* @param expectedElements The total number of keys, inclusive.
*/
public void ensureCapacity(int expectedElements) {
if (expectedElements > resizeAt || keys == null) {
final int[] prevKeys = this.keys;
final long[] prevValues = this.values;
allocateBuffers(minBufferSize(expectedElements, loadFactor));
if (prevKeys != null && !isEmpty()) {
rehash(prevKeys, prevValues);
}
}
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(keys) + RamUsageEstimator.sizeOf(values);
}
/**
* Provides the next iteration seed used to build the iteration starting slot and offset
* increment. This method does not need to be synchronized, what matters is that each thread gets
* a sequence of varying seeds.
*/
protected int nextIterationSeed() {
return iterationSeed = BitMixer.mixPhi(iterationSeed);
}
/** An iterator implementation for {@link #iterator}. */
private final class EntryIterator extends AbstractIterator<IntLongCursor> {
private final IntLongCursor cursor;
private final int increment;
private int index;
private int slot;
public EntryIterator() {
cursor = new IntLongCursor();
int seed = nextIterationSeed();
increment = iterationIncrement(seed);
slot = seed & mask;
}
@Override
protected IntLongCursor fetch() {
final int mask = IntLongHashMap.this.mask;
while (index <= mask) {
int existing;
index++;
slot = (slot + increment) & mask;
if (!((existing = keys[slot]) == 0)) {
cursor.index = slot;
cursor.key = existing;
cursor.value = values[slot];
return cursor;
}
}
if (index == mask + 1 && hasEmptyKey) {
cursor.index = index;
cursor.key = 0;
cursor.value = values[index++];
return cursor;
}
return done();
}
}
@Override
public Iterator<IntLongCursor> iterator() {
return new EntryIterator();
}
/** Returns a specialized view of the keys of this associated container. */
public KeysContainer keys() {
return new KeysContainer();
}
/** A view of the keys inside this hash map. */
public final class KeysContainer implements Iterable<IntCursor> {
@Override
public Iterator<IntCursor> iterator() {
return new KeysIterator();
}
public int size() {
return IntLongHashMap.this.size();
}
public int[] toArray() {
int[] array = new int[size()];
int i = 0;
for (IntCursor cursor : this) {
array[i++] = cursor.value;
}
return array;
}
}
/** An iterator over the set of assigned keys. */
private final class KeysIterator extends AbstractIterator<IntCursor> {
private final IntCursor cursor;
private final int increment;
private int index;
private int slot;
public KeysIterator() {
cursor = new IntCursor();
int seed = nextIterationSeed();
increment = iterationIncrement(seed);
slot = seed & mask;
}
@Override
protected IntCursor fetch() {
final int mask = IntLongHashMap.this.mask;
while (index <= mask) {
int existing;
index++;
slot = (slot + increment) & mask;
if (!((existing = keys[slot]) == 0)) {
cursor.index = slot;
cursor.value = existing;
return cursor;
}
}
if (index == mask + 1 && hasEmptyKey) {
cursor.index = index++;
cursor.value = 0;
return cursor;
}
return done();
}
}
/**
* @return Returns a container with all values stored in this map.
*/
public ValuesContainer values() {
return new ValuesContainer();
}
/** A view over the set of values of this map. */
public final class ValuesContainer implements Iterable<LongCursor> {
@Override
public Iterator<LongCursor> iterator() {
return new ValuesIterator();
}
public long[] toArray() {
long[] array = new long[size()];
int i = 0;
for (LongCursor cursor : this) {
array[i++] = cursor.value;
}
return array;
}
}
/** An iterator over the set of assigned values. */
private final class ValuesIterator extends AbstractIterator<LongCursor> {
private final LongCursor cursor;
private final int increment;
private int index;
private int slot;
public ValuesIterator() {
cursor = new LongCursor();
int seed = nextIterationSeed();
increment = iterationIncrement(seed);
slot = seed & mask;
}
@Override
protected LongCursor fetch() {
final int mask = IntLongHashMap.this.mask;
while (index <= mask) {
index++;
slot = (slot + increment) & mask;
if (!((keys[slot]) == 0)) {
cursor.index = slot;
cursor.value = values[slot];
return cursor;
}
}
if (index == mask + 1 && hasEmptyKey) {
cursor.index = index;
cursor.value = values[index++];
return cursor;
}
return done();
}
}
@Override
public IntLongHashMap clone() {
try {
IntLongHashMap cloned = (IntLongHashMap) super.clone();
cloned.keys = keys.clone();
cloned.values = values.clone();
cloned.hasEmptyKey = hasEmptyKey;
cloned.iterationSeed = ITERATION_SEED.incrementAndGet();
return cloned;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
}
/** Convert the contents of this map to a human-friendly string. */
@Override
public String toString() {
final StringBuilder buffer = new StringBuilder();
buffer.append("[");
boolean first = true;
for (IntLongCursor cursor : this) {
if (!first) {
buffer.append(", ");
}
buffer.append(cursor.key);
buffer.append("=>");
buffer.append(cursor.value);
first = false;
}
buffer.append("]");
return buffer.toString();
}
/** Creates a hash map from two index-aligned arrays of key-value pairs. */
public static IntLongHashMap from(int[] keys, long[] values) {
if (keys.length != values.length) {
throw new IllegalArgumentException(
"Arrays of keys and values must have an identical length.");
}
IntLongHashMap map = new IntLongHashMap(keys.length);
for (int i = 0; i < keys.length; i++) {
map.put(keys[i], values[i]);
}
return map;
}
/**
* Returns a hash code for the given key.
*
* <p>The output from this function should evenly distribute keys across the entire integer range.
*/
protected int hashKey(int key) {
assert !((key) == 0); // Handled as a special case (empty slot marker).
return BitMixer.mixPhi(key);
}
/**
* Validate load factor range and return it. Override and suppress if you need insane load
* factors.
*/
protected double verifyLoadFactor(double loadFactor) {
checkLoadFactor(loadFactor, MIN_LOAD_FACTOR, MAX_LOAD_FACTOR);
return loadFactor;
}
/** Rehash from old buffers to new buffers. */
protected void rehash(int[] fromKeys, long[] fromValues) {
assert fromKeys.length == fromValues.length
&& HashContainers.checkPowerOfTwo(fromKeys.length - 1);
// Rehash all stored key/value pairs into the new buffers.
final int[] keys = this.keys;
final long[] values = this.values;
final int mask = this.mask;
int existing;
// Copy the zero element's slot, then rehash everything else.
int from = fromKeys.length - 1;
keys[keys.length - 1] = fromKeys[from];
values[values.length - 1] = fromValues[from];
while (--from >= 0) {
if (!((existing = fromKeys[from]) == 0)) {
int slot = hashKey(existing) & mask;
while (!((keys[slot]) == 0)) {
slot = (slot + 1) & mask;
}
keys[slot] = existing;
values[slot] = fromValues[from];
}
}
}
/**
* Allocate new internal buffers. This method attempts to allocate and assign internal buffers
* atomically (either allocations succeed or not).
*/
protected void allocateBuffers(int arraySize) {
assert Integer.bitCount(arraySize) == 1;
// Ensure no change is done if we hit an OOM.
int[] prevKeys = this.keys;
long[] prevValues = this.values;
try {
int emptyElementSlot = 1;
this.keys = (new int[arraySize + emptyElementSlot]);
this.values = (new long[arraySize + emptyElementSlot]);
} catch (OutOfMemoryError e) {
this.keys = prevKeys;
this.values = prevValues;
throw new BufferAllocationException(
"Not enough memory to allocate buffers for rehashing: %,d -> %,d",
e, this.mask + 1, arraySize);
}
this.resizeAt = expandAtCount(arraySize, loadFactor);
this.mask = arraySize - 1;
}
/**
* This method is invoked when there is a new key/ value pair to be inserted into the buffers but
* there is not enough empty slots to do so.
*
* <p>New buffers are allocated. If this succeeds, we know we can proceed with rehashing so we
* assign the pending element to the previous buffer (possibly violating the invariant of having
* at least one empty slot) and rehash all keys, substituting new buffers at the end.
*/
protected void allocateThenInsertThenRehash(int slot, int pendingKey, long pendingValue) {
assert assigned == resizeAt && ((keys[slot]) == 0) && !((pendingKey) == 0);
// Try to allocate new buffers first. If we OOM, we leave in a consistent state.
final int[] prevKeys = this.keys;
final long[] prevValues = this.values;
allocateBuffers(nextBufferSize(mask + 1, size(), loadFactor));
assert this.keys.length > prevKeys.length;
// We have succeeded at allocating new data so insert the pending key/value at
// the free slot in the old arrays before rehashing.
prevKeys[slot] = pendingKey;
prevValues[slot] = pendingValue;
// Rehash old keys, including the pending key.
rehash(prevKeys, prevValues);
}
/**
* Shift all the slot-conflicting keys and values allocated to (and including) <code>slot</code>.
*/
protected void shiftConflictingKeys(int gapSlot) {
final int[] keys = this.keys;
final long[] values = this.values;
final int mask = this.mask;
// Perform shifts of conflicting keys to fill in the gap.
int distance = 0;
while (true) {
final int slot = (gapSlot + (++distance)) & mask;
final int existing = keys[slot];
if (((existing) == 0)) {
break;
}
final int idealSlot = hashKey(existing);
final int shift = (slot - idealSlot) & mask;
if (shift >= distance) {
// Entry at this position was originally at or before the gap slot.
// Move the conflict-shifted entry to the gap's position and repeat the procedure
// for any entries to the right of the current position, treating it
// as the new gap.
keys[gapSlot] = existing;
values[gapSlot] = values[slot];
gapSlot = slot;
distance = 0;
}
}
// Mark the last found gap slot without a conflict as empty.
keys[gapSlot] = 0;
values[gapSlot] = 0L;
assigned--;
}
/** Forked from HPPC, holding int index,key and value */
public static final class IntLongCursor {
/**
* The current key and value's index in the container this cursor belongs to. The meaning of
* this index is defined by the container (usually it will be an index in the underlying storage
* buffer).
*/
public int index;
/** The current key. */
public int key;
/** The current value. */
public long value;
@Override
public String toString() {
return "[cursor, index: " + index + ", key: " + key + ", value: " + value + "]";
}
}
}

View File

@ -0,0 +1,78 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
/**
* This class wraps {@link CollectorManager} and owns the collectors the manager creates. It is
* convenient that clients of the class don't have to worry about keeping the list of collectors, as
* well as about making the collector's type (C) compatible when reduce is called. Instances of this
* class cache results of {@link CollectorManager#reduce(Collection)}.
*
* <p>Note that instance of this class ignores any {@link Collector} created by {@link
* CollectorManager#newCollector()} directly, not through {@link #newCollector()}
*
* @lucene.experimental
*/
public final class CollectorOwner<C extends Collector, T> {
private final CollectorManager<C, T> manager;
private T result;
private boolean reduced;
// TODO: For IndexSearcher, the list doesn't have to be synchronized
// because we create new collectors sequentially. Drill sideways creates new collectors in
// DrillSidewaysQuery#Weight#bulkScorer which is already called concurrently.
// I think making the list synchronized here is not a huge concern, at the same time, do we want
// to do something about it?
// e.g. have boolean property in constructor that makes it threads friendly when set?
private final List<C> collectors = Collections.synchronizedList(new ArrayList<>());
public CollectorOwner(CollectorManager<C, T> manager) {
this.manager = manager;
}
/** Return a new {@link Collector}. This must return a different instance on each call. */
public C newCollector() throws IOException {
C collector = manager.newCollector();
collectors.add(collector);
return collector;
}
public C getCollector(int i) {
return collectors.get(i);
}
/**
* Returns result of {@link CollectorManager#reduce(Collection)}. The result is cached.
*
* <p>This method is NOT threadsafe.
*/
public T getResult() throws IOException {
if (reduced == false) {
result = manager.reduce(collectors);
reduced = true;
}
return result;
}
}

View File

@ -26,6 +26,7 @@ import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.VectorEncoding;
import org.apache.lucene.search.comparators.DoubleComparator;
import org.apache.lucene.util.NumericUtils;
/**
* Base class for producing {@link DoubleValues}
@ -115,6 +116,70 @@ public abstract class DoubleValuesSource implements SegmentCacheable {
return new LongDoubleValuesSource(this);
}
/** Convert to {@link LongValuesSource} by calling {@link NumericUtils#doubleToSortableLong} */
public final LongValuesSource toSortableLongDoubleValuesSource() {
return new SortableLongDoubleValuesSource(this);
}
private static class SortableLongDoubleValuesSource extends LongValuesSource {
private final DoubleValuesSource inner;
private SortableLongDoubleValuesSource(DoubleValuesSource inner) {
this.inner = Objects.requireNonNull(inner);
}
@Override
public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException {
DoubleValues in = inner.getValues(ctx, scores);
return new LongValues() {
@Override
public long longValue() throws IOException {
return NumericUtils.doubleToSortableLong(in.doubleValue());
}
@Override
public boolean advanceExact(int doc) throws IOException {
return in.advanceExact(doc);
}
};
}
@Override
public boolean needsScores() {
return inner.needsScores();
}
@Override
public int hashCode() {
return inner.hashCode();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
SortableLongDoubleValuesSource that = (SortableLongDoubleValuesSource) o;
return Objects.equals(inner, that.inner);
}
@Override
public String toString() {
return "sortableLong(" + inner.toString() + ")";
}
@Override
public LongValuesSource rewrite(IndexSearcher searcher) throws IOException {
return inner.rewrite(searcher).toLongValuesSource();
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return false;
}
}
private static class LongDoubleValuesSource extends LongValuesSource {
private final DoubleValuesSource inner;

View File

@ -630,27 +630,42 @@ public class IndexSearcher {
*/
public <C extends Collector, T> T search(Query query, CollectorManager<C, T> collectorManager)
throws IOException {
final C firstCollector = collectorManager.newCollector();
CollectorOwner<C, T> collectorOwner = new CollectorOwner<>(collectorManager);
final C firstCollector = collectorOwner.newCollector();
query = rewrite(query, firstCollector.scoreMode().needsScores());
final Weight weight = createWeight(query, firstCollector.scoreMode(), 1);
return search(weight, collectorManager, firstCollector);
search(weight, collectorOwner, firstCollector);
return collectorOwner.getResult();
}
private <C extends Collector, T> T search(
Weight weight, CollectorManager<C, T> collectorManager, C firstCollector) throws IOException {
/**
* Lower-level search API. Search all leaves using the given {@link CollectorOwner}, without
* calling {@link CollectorOwner#getResult()} so that clients can reduce and read results
* themselves.
*
* <p>Note that this method doesn't return anything - users can access results by calling {@link
* CollectorOwner#getResult()}
*
* @lucene.experimental
*/
public <C extends Collector> void search(Query query, CollectorOwner<C, ?> collectorOwner)
throws IOException {
final C firstCollector = collectorOwner.newCollector();
query = rewrite(query, firstCollector.scoreMode().needsScores());
final Weight weight = createWeight(query, firstCollector.scoreMode(), 1);
search(weight, collectorOwner, firstCollector);
}
private <C extends Collector> void search(
Weight weight, CollectorOwner<C, ?> collectorOwner, C firstCollector) throws IOException {
final LeafSlice[] leafSlices = getSlices();
if (leafSlices.length == 0) {
// there are no segments, nothing to offload to the executor, but we do need to call reduce to
// create some kind of empty result
// there are no segments, nothing to offload to the executor
assert leafContexts.isEmpty();
return collectorManager.reduce(Collections.singletonList(firstCollector));
} else {
final List<C> collectors = new ArrayList<>(leafSlices.length);
collectors.add(firstCollector);
final ScoreMode scoreMode = firstCollector.scoreMode();
for (int i = 1; i < leafSlices.length; ++i) {
final C collector = collectorManager.newCollector();
collectors.add(collector);
final C collector = collectorOwner.newCollector();
if (scoreMode != collector.scoreMode()) {
throw new IllegalStateException(
"CollectorManager does not always produce collectors with the same score mode");
@ -659,15 +674,14 @@ public class IndexSearcher {
final List<Callable<C>> listTasks = new ArrayList<>(leafSlices.length);
for (int i = 0; i < leafSlices.length; ++i) {
final LeafReaderContext[] leaves = leafSlices[i].leaves;
final C collector = collectors.get(i);
final C collector = collectorOwner.getCollector(i);
listTasks.add(
() -> {
search(Arrays.asList(leaves), weight, collector);
return collector;
});
}
List<C> results = taskExecutor.invokeAll(listTasks);
return collectorManager.reduce(results);
taskExecutor.invokeAll(listTasks);
}
}

View File

@ -0,0 +1,699 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.internal.hppc;
import com.carrotsearch.randomizedtesting.RandomizedTest;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.junit.After;
import org.junit.Test;
/**
* Tests for {@link IntLongHashMap}.
*
* <p>Mostly forked and trimmed from com.carrotsearch.hppc.IntLongHashMapTest
*
* <p>github: https://github.com/carrotsearch/hppc release: 0.10.0
*/
public class TestIntLongHashMap extends LuceneTestCase {
/* Ready to use key values. */
protected int keyE = 0;
protected int key0 = cast(0), k0 = key0;
protected int key1 = cast(1), k1 = key1;
protected int key2 = cast(2), k2 = key2;
protected int key3 = cast(3), k3 = key3;
protected int key4 = cast(4), k4 = key4;
protected int key5 = cast(5), k5 = key5;
protected int key6 = cast(6), k6 = key6;
protected int key7 = cast(7), k7 = key7;
protected int key8 = cast(8), k8 = key8;
protected int key9 = cast(9), k9 = key9;
protected long value0 = vcast(0);
protected long value1 = vcast(1);
protected long value2 = vcast(2);
protected long value3 = vcast(3);
protected long value4 = vcast(4);
private static int randomIntBetween(int min, int max) {
return min + random().nextInt(max + 1 - min);
}
private final int[] newArray(int... elements) {
return elements;
}
/** Create a new array of a given type and copy the arguments to this array. */
/* */
private final long[] newvArray(long... elements) {
return elements;
}
/** Convert to target type from an integer used to test stuff. */
private int cast(Integer v) {
return v.intValue();
}
/** Convert to target type from an integer used to test stuff. */
private long vcast(int value) {
return (long) value;
}
/** Check if the array's content is identical to a given sequence of elements. */
public static void assertSortedListEquals(int[] array, int... elements) {
assertEquals(elements.length, array.length);
Arrays.sort(array);
Arrays.sort(elements);
assertArrayEquals(elements, array);
}
/** Check if the array's content is identical to a given sequence of elements. */
public static void assertSortedListEquals(long[] array, long... elements) {
assertEquals(elements.length, array.length);
Arrays.sort(array);
assertArrayEquals(elements, array);
}
/** Per-test fresh initialized instance. */
public IntLongHashMap map = newInstance();
protected IntLongHashMap newInstance() {
return new IntLongHashMap();
}
@After
public void checkEmptySlotsUninitialized() {
if (map != null) {
int occupied = 0;
for (int i = 0; i <= map.mask; i++) {
if (((map.keys[i]) == 0)) {
} else {
occupied++;
}
}
assertEquals(occupied, map.assigned);
if (!map.hasEmptyKey) {}
}
}
private void assertSameMap(final IntLongHashMap c1, final IntLongHashMap c2) {
assertEquals(c1.size(), c2.size());
for (IntLongHashMap.IntLongCursor entry : c1) {
assertTrue(c2.containsKey(entry.key));
assertEquals(entry.value, c2.get(entry.key));
}
}
/* */
@Test
public void testEnsureCapacity() {
final AtomicInteger expands = new AtomicInteger();
IntLongHashMap map =
new IntLongHashMap(0) {
@Override
protected void allocateBuffers(int arraySize) {
super.allocateBuffers(arraySize);
expands.incrementAndGet();
}
};
// Add some elements.
final int max = rarely() ? 0 : randomIntBetween(0, 250);
for (int i = 0; i < max; i++) {
map.put(cast(i), value0);
}
final int additions = randomIntBetween(max, max + 5000);
map.ensureCapacity(additions + map.size());
final int before = expands.get();
for (int i = 0; i < additions; i++) {
map.put(cast(i), value0);
}
assertEquals(before, expands.get());
}
@Test
public void testCursorIndexIsValid() {
map.put(keyE, value1);
map.put(key1, value2);
map.put(key2, value3);
for (IntLongHashMap.IntLongCursor c : map) {
assertTrue(map.indexExists(c.index));
assertEquals(c.value, map.indexGet(c.index));
}
}
@Test
public void testIndexMethods() {
map.put(keyE, value1);
map.put(key1, value2);
assertTrue(map.indexOf(keyE) >= 0);
assertTrue(map.indexOf(key1) >= 0);
assertTrue(map.indexOf(key2) < 0);
assertTrue(map.indexExists(map.indexOf(keyE)));
assertTrue(map.indexExists(map.indexOf(key1)));
assertFalse(map.indexExists(map.indexOf(key2)));
assertEquals(value1, map.indexGet(map.indexOf(keyE)));
assertEquals(value2, map.indexGet(map.indexOf(key1)));
expectThrows(
AssertionError.class,
() -> {
map.indexGet(map.indexOf(key2));
});
assertEquals(value1, map.indexReplace(map.indexOf(keyE), value3));
assertEquals(value2, map.indexReplace(map.indexOf(key1), value4));
assertEquals(value3, map.indexGet(map.indexOf(keyE)));
assertEquals(value4, map.indexGet(map.indexOf(key1)));
map.indexInsert(map.indexOf(key2), key2, value1);
assertEquals(value1, map.indexGet(map.indexOf(key2)));
assertEquals(3, map.size());
assertEquals(value3, map.indexRemove(map.indexOf(keyE)));
assertEquals(2, map.size());
assertEquals(value1, map.indexRemove(map.indexOf(key2)));
assertEquals(1, map.size());
assertTrue(map.indexOf(keyE) < 0);
assertTrue(map.indexOf(key1) >= 0);
assertTrue(map.indexOf(key2) < 0);
}
/* */
@Test
public void testCloningConstructor() {
map.put(key1, value1);
map.put(key2, value2);
map.put(key3, value3);
assertSameMap(map, new IntLongHashMap(map));
}
/* */
@Test
public void testFromArrays() {
map.put(key1, value1);
map.put(key2, value2);
map.put(key3, value3);
IntLongHashMap map2 =
IntLongHashMap.from(newArray(key1, key2, key3), newvArray(value1, value2, value3));
assertSameMap(map, map2);
}
@Test
public void testGetOrDefault() {
map.put(key2, value2);
assertTrue(map.containsKey(key2));
map.put(key1, value1);
assertEquals(value1, map.getOrDefault(key1, value3));
assertEquals(value3, map.getOrDefault(key3, value3));
map.remove(key1);
assertEquals(value3, map.getOrDefault(key1, value3));
}
/* */
@Test
public void testPut() {
map.put(key1, value1);
assertTrue(map.containsKey(key1));
assertEquals(value1, map.get(key1));
map.put(key2, 0L);
assertEquals(2, map.size());
assertTrue(map.containsKey(key2));
assertEquals(0L, map.get(key2));
}
/* */
@Test
public void testPutOverExistingKey() {
map.put(key1, value1);
assertEquals(value1, map.put(key1, value3));
assertEquals(value3, map.get(key1));
assertEquals(1, map.size());
assertEquals(value3, map.put(key1, 0L));
assertTrue(map.containsKey(key1));
assertEquals(0L, map.get(key1));
assertEquals(0L, map.put(key1, value1));
assertEquals(value1, map.get(key1));
assertEquals(1, map.size());
}
/* */
@Test
public void testPutWithExpansions() {
final int COUNT = 10000;
final Random rnd = new Random(random().nextLong());
final HashSet<Object> values = new HashSet<Object>();
for (int i = 0; i < COUNT; i++) {
final int v = rnd.nextInt();
final boolean hadKey = values.contains(cast(v));
values.add(cast(v));
assertEquals(hadKey, map.containsKey(cast(v)));
map.put(cast(v), vcast(v));
assertEquals(values.size(), map.size());
}
assertEquals(values.size(), map.size());
}
/* */
@Test
public void testPutAll() {
map.put(key1, value1);
map.put(key2, value1);
IntLongHashMap map2 = newInstance();
map2.put(key2, value2);
map2.put(keyE, value1);
// One new key (keyE).
assertEquals(1, map.putAll(map2));
// Assert the value under key2 has been replaced.
assertEquals(value2, map.get(key2));
// And key3 has been added.
assertEquals(value1, map.get(keyE));
assertEquals(3, map.size());
}
/* */
@Test
public void testPutIfAbsent() {
assertTrue(map.putIfAbsent(key1, value1));
assertFalse(map.putIfAbsent(key1, value2));
assertEquals(value1, map.get(key1));
}
@Test
public void testPutOrAdd() {
assertEquals(value1, map.putOrAdd(key1, value1, value2));
assertEquals(value3, map.putOrAdd(key1, value1, value2));
}
@Test
public void testAddTo() {
assertEquals(value1, map.addTo(key1, value1));
assertEquals(value3, map.addTo(key1, value2));
}
/* */
@Test
public void testRemove() {
map.put(key1, value1);
assertEquals(value1, map.remove(key1));
assertEquals(0L, map.remove(key1));
assertEquals(0, map.size());
// These are internals, but perhaps worth asserting too.
assertEquals(0, map.assigned);
}
/* */
@Test
public void testEmptyKey() {
final int empty = 0;
map.put(empty, value1);
assertEquals(1, map.size());
assertEquals(false, map.isEmpty());
assertEquals(value1, map.get(empty));
assertEquals(value1, map.getOrDefault(empty, value2));
assertEquals(true, map.iterator().hasNext());
assertEquals(empty, map.iterator().next().key);
assertEquals(value1, map.iterator().next().value);
assertEquals(1, map.keys().size());
assertEquals(empty, map.keys().iterator().next().value);
assertEquals(value1, map.values().iterator().next().value);
assertEquals(value1, map.put(empty, 0L));
assertEquals(1, map.size());
assertTrue(map.containsKey(empty));
assertEquals(0L, map.get(empty));
map.remove(empty);
assertEquals(0L, map.get(empty));
assertEquals(0, map.size());
assertEquals(0L, map.put(empty, value1));
assertEquals(value1, map.put(empty, value2));
map.clear();
assertFalse(map.indexExists(map.indexOf(empty)));
assertEquals(0L, map.put(empty, value1));
map.clear();
assertEquals(0L, map.remove(empty));
}
/* */
@Test
public void testMapKeySet() {
map.put(key1, value3);
map.put(key2, value2);
map.put(key3, value1);
assertSortedListEquals(map.keys().toArray(), key1, key2, key3);
}
/* */
@Test
public void testMapKeySetIterator() {
map.put(key1, value3);
map.put(key2, value2);
map.put(key3, value1);
int counted = 0;
for (IntCursor c : map.keys()) {
assertEquals(map.keys[c.index], c.value);
counted++;
}
assertEquals(counted, map.size());
}
/* */
@Test
public void testClear() {
map.put(key1, value1);
map.put(key2, value1);
map.clear();
assertEquals(0, map.size());
// These are internals, but perhaps worth asserting too.
assertEquals(0, map.assigned);
// Check values are cleared.
assertEquals(0L, map.put(key1, value1));
assertEquals(0L, map.remove(key2));
map.clear();
// Check if the map behaves properly upon subsequent use.
testPutWithExpansions();
}
/* */
@Test
public void testRelease() {
map.put(key1, value1);
map.put(key2, value1);
map.release();
assertEquals(0, map.size());
// These are internals, but perhaps worth asserting too.
assertEquals(0, map.assigned);
// Check if the map behaves properly upon subsequent use.
testPutWithExpansions();
}
/* */
@Test
public void testIterable() {
map.put(key1, value1);
map.put(key2, value2);
map.put(key3, value3);
map.remove(key2);
int count = 0;
for (IntLongHashMap.IntLongCursor cursor : map) {
count++;
assertTrue(map.containsKey(cursor.key));
assertEquals(cursor.value, map.get(cursor.key));
assertEquals(cursor.value, map.values[cursor.index]);
assertEquals(cursor.key, map.keys[cursor.index]);
}
assertEquals(count, map.size());
map.clear();
assertFalse(map.iterator().hasNext());
}
/* */
@Test
public void testBug_HPPC73_FullCapacityGet() {
final AtomicInteger reallocations = new AtomicInteger();
final int elements = 0x7F;
map =
new IntLongHashMap(elements, 1f) {
@Override
protected double verifyLoadFactor(double loadFactor) {
// Skip load factor sanity range checking.
return loadFactor;
}
@Override
protected void allocateBuffers(int arraySize) {
super.allocateBuffers(arraySize);
reallocations.incrementAndGet();
}
};
int reallocationsBefore = reallocations.get();
assertEquals(reallocationsBefore, 1);
for (int i = 1; i <= elements; i++) {
map.put(cast(i), value1);
}
// Non-existent key.
int outOfSet = cast(elements + 1);
map.remove(outOfSet);
assertFalse(map.containsKey(outOfSet));
assertEquals(reallocationsBefore, reallocations.get());
// Should not expand because we're replacing an existing element.
map.put(k1, value2);
assertEquals(reallocationsBefore, reallocations.get());
// Remove from a full map.
map.remove(k1);
assertEquals(reallocationsBefore, reallocations.get());
map.put(k1, value2);
// Check expand on "last slot of a full map" condition.
map.put(outOfSet, value1);
assertEquals(reallocationsBefore + 1, reallocations.get());
}
@Test
public void testHashCodeEquals() {
IntLongHashMap l0 = newInstance();
assertEquals(0, l0.hashCode());
assertEquals(l0, newInstance());
IntLongHashMap l1 =
IntLongHashMap.from(newArray(key1, key2, key3), newvArray(value1, value2, value3));
IntLongHashMap l2 =
IntLongHashMap.from(newArray(key2, key1, key3), newvArray(value2, value1, value3));
IntLongHashMap l3 = IntLongHashMap.from(newArray(key1, key2), newvArray(value2, value1));
assertEquals(l1.hashCode(), l2.hashCode());
assertEquals(l1, l2);
assertFalse(l1.equals(l3));
assertFalse(l2.equals(l3));
}
@Test
public void testBug_HPPC37() {
IntLongHashMap l1 = IntLongHashMap.from(newArray(key1), newvArray(value1));
IntLongHashMap l2 = IntLongHashMap.from(newArray(key2), newvArray(value1));
assertFalse(l1.equals(l2));
assertFalse(l2.equals(l1));
}
@Test
public void testEmptyValue() {
assertEquals(0L, map.put(key1, 0L));
assertEquals(0L, map.get(key1));
assertTrue(map.containsKey(key1));
map.remove(key1);
assertFalse(map.containsKey(key1));
assertEquals(0, map.size());
}
/** Runs random insertions/deletions/clearing and compares the results against {@link HashMap}. */
@Test
@SuppressWarnings({"rawtypes", "unchecked"})
public void testAgainstHashMap() {
final Random rnd = RandomizedTest.getRandom();
final HashMap other = new HashMap();
for (int size = 1000; size < 20000; size += 4000) {
other.clear();
map.clear();
for (int round = 0; round < size * 20; round++) {
int key = cast(rnd.nextInt(size));
if (rnd.nextInt(50) == 0) {
key = 0;
}
long value = vcast(rnd.nextInt());
boolean hadOldValue = map.containsKey(key);
if (rnd.nextBoolean()) {
long previousValue;
if (rnd.nextBoolean()) {
int index = map.indexOf(key);
if (map.indexExists(index)) {
previousValue = map.indexReplace(index, value);
} else {
map.indexInsert(index, key, value);
previousValue = 0L;
}
} else {
previousValue = map.put(key, value);
}
assertEquals(
other.put(key, value), ((previousValue) == 0) && !hadOldValue ? null : previousValue);
assertEquals(value, map.get(key));
assertEquals(value, map.indexGet(map.indexOf(key)));
assertTrue(map.containsKey(key));
assertTrue(map.indexExists(map.indexOf(key)));
} else {
assertEquals(other.containsKey(key), map.containsKey(key));
long previousValue =
map.containsKey(key) && rnd.nextBoolean()
? map.indexRemove(map.indexOf(key))
: map.remove(key);
assertEquals(
other.remove(key), ((previousValue) == 0) && !hadOldValue ? null : previousValue);
}
assertEquals(other.size(), map.size());
}
}
}
/*
*
*/
@Test
public void testClone() {
this.map.put(key1, value1);
this.map.put(key2, value2);
this.map.put(key3, value3);
IntLongHashMap cloned = map.clone();
cloned.remove(key1);
assertSortedListEquals(map.keys().toArray(), key1, key2, key3);
assertSortedListEquals(cloned.keys().toArray(), key2, key3);
}
/* */
@Test
public void testMapValues() {
map.put(key1, value3);
map.put(key2, value2);
map.put(key3, value1);
assertSortedListEquals(map.values().toArray(), value1, value2, value3);
map.clear();
map.put(key1, value1);
map.put(key2, value2);
map.put(key3, value2);
assertSortedListEquals(map.values().toArray(), value1, value2, value2);
}
/* */
@Test
public void testMapValuesIterator() {
map.put(key1, value3);
map.put(key2, value2);
map.put(key3, value1);
int counted = 0;
for (LongCursor c : map.values()) {
assertEquals(map.values[c.index], c.value);
counted++;
}
assertEquals(counted, map.size());
}
/* */
@Test
public void testEqualsSameClass() {
IntLongHashMap l1 = newInstance();
l1.put(k1, value0);
l1.put(k2, value1);
l1.put(k3, value2);
IntLongHashMap l2 = new IntLongHashMap(l1);
l2.putAll(l1);
IntLongHashMap l3 = new IntLongHashMap(l2);
l3.putAll(l2);
l3.put(k4, value0);
assertEquals(l1, l2);
assertEquals(l1.hashCode(), l2.hashCode());
assertNotEquals(l1, l3);
}
/* */
@Test
public void testEqualsSubClass() {
class Sub extends IntLongHashMap {}
;
IntLongHashMap l1 = newInstance();
l1.put(k1, value0);
l1.put(k2, value1);
l1.put(k3, value2);
IntLongHashMap l2 = new Sub();
l2.putAll(l1);
l2.put(k4, value3);
IntLongHashMap l3 = new Sub();
l3.putAll(l2);
assertNotEquals(l1, l2);
assertEquals(l2.hashCode(), l3.hashCode());
assertEquals(l2, l3);
}
}

View File

@ -23,6 +23,7 @@ module org.apache.lucene.demo {
requires org.apache.lucene.queries;
requires org.apache.lucene.queryparser;
requires org.apache.lucene.expressions;
requires org.apache.lucene.sandbox;
exports org.apache.lucene.demo;
exports org.apache.lucene.demo.facet;

View File

@ -0,0 +1,737 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.demo.facet;
import static org.apache.lucene.facet.FacetsConfig.DEFAULT_INDEX_FIELD_NAME;
import static org.apache.lucene.sandbox.facet.ComparableUtils.byAggregatedValue;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.facet.DrillDownQuery;
import org.apache.lucene.facet.DrillSideways;
import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.facet.MultiLongValuesSource;
import org.apache.lucene.facet.range.LongRange;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.sandbox.facet.ComparableUtils;
import org.apache.lucene.sandbox.facet.FacetFieldCollector;
import org.apache.lucene.sandbox.facet.FacetFieldCollectorManager;
import org.apache.lucene.sandbox.facet.cutters.TaxonomyFacetsCutter;
import org.apache.lucene.sandbox.facet.cutters.ranges.LongRangeFacetCutter;
import org.apache.lucene.sandbox.facet.iterators.ComparableSupplier;
import org.apache.lucene.sandbox.facet.iterators.OrdinalIterator;
import org.apache.lucene.sandbox.facet.iterators.TaxonomyChildrenOrdinalIterator;
import org.apache.lucene.sandbox.facet.iterators.TopnOrdinalIterator;
import org.apache.lucene.sandbox.facet.labels.RangeOrdToLabel;
import org.apache.lucene.sandbox.facet.labels.TaxonomyOrdLabelBiMap;
import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
import org.apache.lucene.sandbox.facet.recorders.LongAggregationsFacetRecorder;
import org.apache.lucene.sandbox.facet.recorders.MultiFacetsRecorder;
import org.apache.lucene.sandbox.facet.recorders.Reducer;
import org.apache.lucene.search.CollectorOwner;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiCollectorManager;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollectorManager;
import org.apache.lucene.store.ByteBuffersDirectory;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
/** Demo for sandbox faceting. */
public class SandboxFacetsExample {
private final Directory indexDir = new ByteBuffersDirectory();
private final Directory taxoDir = new ByteBuffersDirectory();
private final FacetsConfig config = new FacetsConfig();
private SandboxFacetsExample() {
config.setHierarchical("Publish Date", true);
}
/** Build the example index. */
void index() throws IOException {
IndexWriter indexWriter =
new IndexWriter(
indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
// Writes facet ords to a separate directory from the main index
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
Document doc = new Document();
doc.add(new FacetField("Author", "Bob"));
doc.add(new FacetField("Publish Date", "2010", "10", "15"));
doc.add(new NumericDocValuesField("Price", 10));
doc.add(new NumericDocValuesField("Units", 9));
doc.add(new DoubleDocValuesField("Popularity", 3.5d));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2010", "10", "20"));
doc.add(new NumericDocValuesField("Price", 4));
doc.add(new NumericDocValuesField("Units", 2));
doc.add(new DoubleDocValuesField("Popularity", 4.1D));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2012", "1", "1"));
doc.add(new NumericDocValuesField("Price", 3));
doc.add(new NumericDocValuesField("Units", 5));
doc.add(new DoubleDocValuesField("Popularity", 3.9D));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Susan"));
doc.add(new FacetField("Publish Date", "2012", "1", "7"));
doc.add(new NumericDocValuesField("Price", 8));
doc.add(new NumericDocValuesField("Units", 7));
doc.add(new DoubleDocValuesField("Popularity", 4D));
indexWriter.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Frank"));
doc.add(new FacetField("Publish Date", "1999", "5", "5"));
doc.add(new NumericDocValuesField("Price", 9));
doc.add(new NumericDocValuesField("Units", 6));
doc.add(new DoubleDocValuesField("Popularity", 4.9D));
indexWriter.addDocument(config.build(taxoWriter, doc));
IOUtils.close(indexWriter, taxoWriter);
}
/** User runs a query and counts facets only without collecting the matching documents. */
List<FacetResult> facetsOnly() throws IOException {
//// (1) init readers and searcher
DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
//// (2) init collector
TaxonomyFacetsCutter defaultTaxoCutter =
new TaxonomyFacetsCutter(DEFAULT_INDEX_FIELD_NAME, config, taxoReader);
CountFacetRecorder defaultRecorder = new CountFacetRecorder();
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
new FacetFieldCollectorManager<>(defaultTaxoCutter, defaultRecorder);
//// (2.1) if we need to collect data using multiple different collectors, e.g. taxonomy and
//// ranges, or even two taxonomy facets that use different Category List Field, we can
//// use MultiCollectorManager, e.g.:
// TODO: add a demo for it.
// TaxonomyFacetsCutter publishDateCutter = new
// TaxonomyFacetsCutter(config.getDimConfig("Publish Date"), taxoReader);
// CountFacetRecorder publishDateRecorder = new CountFacetRecorder(false);
// FacetFieldCollectorManager<CountFacetRecorder> publishDateCollectorManager = new
// FacetFieldCollectorManager<>(publishDateCutter, publishDateRecorder);
// MultiCollectorManager drillDownCollectorManager = new
// MultiCollectorManager(authorCollectorManager, publishDateCollectorManager);
// Object[] results = searcher.search(new MatchAllDocsQuery(), drillDownCollectorManager);
//// (3) search
// Search returns the same Recorder we created - so we can ignore results
searcher.search(new MatchAllDocsQuery(), collectorManager);
//// (4) Get top 10 results by count for Author and Publish Date
// This object is used to get topN results by count
ComparableSupplier<ComparableUtils.ByCountComparable> countComparable =
ComparableUtils.byCount(defaultRecorder);
// We don't actually need to use FacetResult, it is up to client what to do with the results.
// Here we just want to demo that we can still do FacetResult as well
List<FacetResult> results = new ArrayList<>(2);
// This object provides labels for ordinals.
TaxonomyOrdLabelBiMap ordLabels = new TaxonomyOrdLabelBiMap(taxoReader);
for (String dimension : List.of("Author", "Publish Date")) {
//// (4.1) Chain two ordinal iterators to get top N children
int dimOrdinal = ordLabels.getOrd(new FacetLabel(dimension));
OrdinalIterator childrenIterator =
new TaxonomyChildrenOrdinalIterator(
defaultRecorder.recordedOrds(),
taxoReader.getParallelTaxonomyArrays().parents(),
dimOrdinal);
OrdinalIterator topByCountOrds =
new TopnOrdinalIterator<>(childrenIterator, countComparable, 10);
// Get array of final ordinals - we need to use all of them to get labels first, and then to
// get counts,
// but OrdinalIterator only allows reading ordinals once.
int[] resultOrdinals = topByCountOrds.toArray();
//// (4.2) Use faceting results
FacetLabel[] labels = ordLabels.getLabels(resultOrdinals);
List<LabelAndValue> labelsAndValues = new ArrayList<>(labels.length);
for (int i = 0; i < resultOrdinals.length; i++) {
labelsAndValues.add(
new LabelAndValue(
labels[i].lastComponent(), defaultRecorder.getCount(resultOrdinals[i])));
}
int dimensionValue = defaultRecorder.getCount(dimOrdinal);
results.add(
new FacetResult(
dimension,
new String[0],
dimensionValue,
labelsAndValues.toArray(new LabelAndValue[0]),
labelsAndValues.size()));
}
IOUtils.close(indexReader, taxoReader);
return results;
}
/**
* User runs a query and counts facets for exclusive ranges without collecting the matching
* documents
*/
List<FacetResult> exclusiveRangesCountFacetsOnly() throws IOException {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
MultiLongValuesSource valuesSource = MultiLongValuesSource.fromLongField("Price");
// Exclusive ranges example
LongRange[] inputRanges = new LongRange[2];
inputRanges[0] = new LongRange("0-5", 0, true, 5, true);
inputRanges[1] = new LongRange("5-10", 5, false, 10, true);
LongRangeFacetCutter longRangeFacetCutter =
LongRangeFacetCutter.create(valuesSource, inputRanges);
CountFacetRecorder countRecorder = new CountFacetRecorder();
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
new FacetFieldCollectorManager<>(longRangeFacetCutter, countRecorder);
searcher.search(new MatchAllDocsQuery(), collectorManager);
RangeOrdToLabel ordToLabels = new RangeOrdToLabel(inputRanges);
ComparableSupplier<ComparableUtils.ByCountComparable> countComparable =
ComparableUtils.byCount(countRecorder);
OrdinalIterator topByCountOrds =
new TopnOrdinalIterator<>(countRecorder.recordedOrds(), countComparable, 10);
List<FacetResult> results = new ArrayList<>(2);
int[] resultOrdinals = topByCountOrds.toArray();
FacetLabel[] labels = ordToLabels.getLabels(resultOrdinals);
List<LabelAndValue> labelsAndValues = new ArrayList<>(labels.length);
for (int i = 0; i < resultOrdinals.length; i++) {
labelsAndValues.add(
new LabelAndValue(labels[i].lastComponent(), countRecorder.getCount(resultOrdinals[i])));
}
results.add(
new FacetResult(
"Price", new String[0], 0, labelsAndValues.toArray(new LabelAndValue[0]), 0));
System.out.println("Computed counts");
IOUtils.close(indexReader);
return results;
}
List<FacetResult> overlappingRangesCountFacetsOnly() throws IOException {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
MultiLongValuesSource valuesSource = MultiLongValuesSource.fromLongField("Price");
// overlapping ranges example
LongRange[] inputRanges = new LongRange[2];
inputRanges[0] = new LongRange("0-5", 0, true, 5, true);
inputRanges[1] = new LongRange("0-10", 0, true, 10, true);
LongRangeFacetCutter longRangeFacetCutter =
LongRangeFacetCutter.create(valuesSource, inputRanges);
CountFacetRecorder countRecorder = new CountFacetRecorder();
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
new FacetFieldCollectorManager<>(longRangeFacetCutter, countRecorder);
searcher.search(new MatchAllDocsQuery(), collectorManager);
RangeOrdToLabel ordToLabels = new RangeOrdToLabel(inputRanges);
ComparableSupplier<ComparableUtils.ByCountComparable> countComparable =
ComparableUtils.byCount(countRecorder);
OrdinalIterator topByCountOrds =
new TopnOrdinalIterator<>(countRecorder.recordedOrds(), countComparable, 10);
List<FacetResult> results = new ArrayList<>(2);
int[] resultOrdinals = topByCountOrds.toArray();
FacetLabel[] labels = ordToLabels.getLabels(resultOrdinals);
List<LabelAndValue> labelsAndValues = new ArrayList<>(labels.length);
for (int i = 0; i < resultOrdinals.length; i++) {
labelsAndValues.add(
new LabelAndValue(labels[i].lastComponent(), countRecorder.getCount(resultOrdinals[i])));
}
results.add(
new FacetResult(
"Price", new String[0], 0, labelsAndValues.toArray(new LabelAndValue[0]), 0));
System.out.println("Computed counts");
IOUtils.close(indexReader);
return results;
}
List<FacetResult> exclusiveRangesAggregationFacets() throws IOException {
DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
MultiLongValuesSource valuesSource = MultiLongValuesSource.fromLongField("Price");
// Exclusive ranges example
LongRange[] inputRanges = new LongRange[2];
inputRanges[0] = new LongRange("0-5", 0, true, 5, true);
inputRanges[1] = new LongRange("5-10", 5, false, 10, true);
LongRangeFacetCutter longRangeFacetCutter =
LongRangeFacetCutter.create(valuesSource, inputRanges);
// initialise the aggregations to be computed - a values source + reducer
LongValuesSource[] longValuesSources = new LongValuesSource[2];
Reducer[] reducers = new Reducer[2];
// popularity:max
longValuesSources[0] = DoubleValuesSource.fromDoubleField("Popularity").toLongValuesSource();
reducers[0] = Reducer.MAX;
// units:sum
longValuesSources[1] = LongValuesSource.fromLongField("Units");
reducers[1] = Reducer.SUM;
LongAggregationsFacetRecorder longAggregationsFacetRecorder =
new LongAggregationsFacetRecorder(longValuesSources, reducers);
CountFacetRecorder countRecorder = new CountFacetRecorder();
// Compute both counts and aggregations
MultiFacetsRecorder multiFacetsRecorder =
new MultiFacetsRecorder(countRecorder, longAggregationsFacetRecorder);
FacetFieldCollectorManager<MultiFacetsRecorder> collectorManager =
new FacetFieldCollectorManager<>(longRangeFacetCutter, multiFacetsRecorder);
searcher.search(new MatchAllDocsQuery(), collectorManager);
RangeOrdToLabel ordToLabels = new RangeOrdToLabel(inputRanges);
// Get recorded ords - use either count/aggregations recorder
OrdinalIterator recordedOrds = longAggregationsFacetRecorder.recordedOrds();
// We don't actually need to use FacetResult, it is up to client what to do with the results.
// Here we just want to demo that we can still do FacetResult as well
List<FacetResult> results = new ArrayList<>(2);
ComparableSupplier<ComparableUtils.ByAggregatedValueComparable> comparableSupplier;
OrdinalIterator topOrds;
int[] resultOrdinals;
FacetLabel[] labels;
List<LabelAndValue> labelsAndValues;
// Sort results by units:sum and tie-break by count
comparableSupplier = byAggregatedValue(countRecorder, longAggregationsFacetRecorder, 1);
topOrds = new TopnOrdinalIterator<>(recordedOrds, comparableSupplier, 10);
resultOrdinals = topOrds.toArray();
labels = ordToLabels.getLabels(resultOrdinals);
labelsAndValues = new ArrayList<>(labels.length);
for (int i = 0; i < resultOrdinals.length; i++) {
labelsAndValues.add(
new LabelAndValue(
labels[i].lastComponent(),
longAggregationsFacetRecorder.getRecordedValue(resultOrdinals[i], 1)));
}
results.add(
new FacetResult(
"Price", new String[0], 0, labelsAndValues.toArray(new LabelAndValue[0]), 0));
// note: previous ordinal iterator was exhausted
recordedOrds = longAggregationsFacetRecorder.recordedOrds();
// Sort results by popularity:max and tie-break by count
comparableSupplier = byAggregatedValue(countRecorder, longAggregationsFacetRecorder, 0);
topOrds = new TopnOrdinalIterator<>(recordedOrds, comparableSupplier, 10);
resultOrdinals = topOrds.toArray();
labels = ordToLabels.getLabels(resultOrdinals);
labelsAndValues = new ArrayList<>(labels.length);
for (int i = 0; i < resultOrdinals.length; i++) {
labelsAndValues.add(
new LabelAndValue(
labels[i].lastComponent(),
longAggregationsFacetRecorder.getRecordedValue(resultOrdinals[i], 0)));
}
results.add(
new FacetResult(
"Price", new String[0], 0, labelsAndValues.toArray(new LabelAndValue[0]), 0));
return results;
}
/** User runs a query and counts facets. */
private List<FacetResult> facetsWithSearch() throws IOException {
//// (1) init readers and searcher
DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
//// (2) init collectors
// Facet collectors
TaxonomyFacetsCutter defaultTaxoCutter =
new TaxonomyFacetsCutter(DEFAULT_INDEX_FIELD_NAME, config, taxoReader);
CountFacetRecorder defaultRecorder = new CountFacetRecorder();
FacetFieldCollectorManager<CountFacetRecorder> taxoFacetsCollectorManager =
new FacetFieldCollectorManager<>(defaultTaxoCutter, defaultRecorder);
// Hits collector
TopScoreDocCollectorManager hitsCollectorManager =
new TopScoreDocCollectorManager(2, Integer.MAX_VALUE);
// Now wrap them with MultiCollectorManager to collect both hits and facets.
MultiCollectorManager collectorManager =
new MultiCollectorManager(hitsCollectorManager, taxoFacetsCollectorManager);
//// (3) search
Object[] results = searcher.search(new MatchAllDocsQuery(), collectorManager);
TopDocs topDocs = (TopDocs) results[0];
System.out.println(
"Search results: totalHits: "
+ topDocs.totalHits
+ ", collected hits: "
+ topDocs.scoreDocs.length);
// FacetFieldCollectorManager returns the same Recorder it gets - so we can ignore read the
// results from original recorder
// and ignore this value.
// CountFacetRecorder defaultRecorder = (CountFacetRecorder) results[1];
//// (4) Get top 10 results by count for Author and Publish Date
// This object is used to get topN results by count
ComparableSupplier<ComparableUtils.ByCountComparable> countComparable =
ComparableUtils.byCount(defaultRecorder);
// We don't actually need to use FacetResult, it is up to client what to do with the results.
// Here we just want to demo that we can still do FacetResult as well
List<FacetResult> facetResults = new ArrayList<>(2);
// This object provides labels for ordinals.
TaxonomyOrdLabelBiMap ordLabels = new TaxonomyOrdLabelBiMap(taxoReader);
for (String dimension : List.of("Author", "Publish Date")) {
int dimensionOrdinal = ordLabels.getOrd(new FacetLabel(dimension));
//// (4.1) Chain two ordinal iterators to get top N children
OrdinalIterator childrenIterator =
new TaxonomyChildrenOrdinalIterator(
defaultRecorder.recordedOrds(),
taxoReader.getParallelTaxonomyArrays().parents(),
dimensionOrdinal);
OrdinalIterator topByCountOrds =
new TopnOrdinalIterator<>(childrenIterator, countComparable, 10);
// Get array of final ordinals - we need to use all of them to get labels first, and then to
// get counts,
// but OrdinalIterator only allows reading ordinals once.
int[] resultOrdinals = topByCountOrds.toArray();
//// (4.2) Use faceting results
FacetLabel[] labels = ordLabels.getLabels(resultOrdinals);
List<LabelAndValue> labelsAndValues = new ArrayList<>(labels.length);
for (int i = 0; i < resultOrdinals.length; i++) {
labelsAndValues.add(
new LabelAndValue(
labels[i].lastComponent(), defaultRecorder.getCount(resultOrdinals[i])));
}
int dimensionValue = defaultRecorder.getCount(dimensionOrdinal);
facetResults.add(
new FacetResult(
dimension,
new String[0],
dimensionValue,
labelsAndValues.toArray(new LabelAndValue[0]),
labelsAndValues.size()));
}
IOUtils.close(indexReader, taxoReader);
return facetResults;
}
/** User drills down on 'Publish Date/2010', and we return facets for 'Author' */
FacetResult drillDown() throws IOException {
//// (1) init readers and searcher
DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
//// (2) init collector
TaxonomyFacetsCutter defaultTaxoCutter =
new TaxonomyFacetsCutter(DEFAULT_INDEX_FIELD_NAME, config, taxoReader);
CountFacetRecorder defaultRecorder = new CountFacetRecorder();
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
new FacetFieldCollectorManager<>(defaultTaxoCutter, defaultRecorder);
DrillDownQuery q = new DrillDownQuery(config);
q.add("Publish Date", "2010");
//// (3) search
// Right now we return the same Recorder we created - so we can ignore results
searcher.search(q, collectorManager);
//// (4) Get top 10 results by count for Author and Publish Date
// This object is used to get topN results by count
ComparableSupplier<ComparableUtils.ByCountComparable> countComparable =
ComparableUtils.byCount(defaultRecorder);
// This object provides labels for ordinals.
TaxonomyOrdLabelBiMap ordLabels = new TaxonomyOrdLabelBiMap(taxoReader);
String dimension = "Author";
//// (4.1) Chain two ordinal iterators to get top N children
int dimOrdinal = ordLabels.getOrd(new FacetLabel(dimension));
OrdinalIterator childrenIterator =
new TaxonomyChildrenOrdinalIterator(
defaultRecorder.recordedOrds(),
taxoReader.getParallelTaxonomyArrays().parents(),
dimOrdinal);
OrdinalIterator topByCountOrds =
new TopnOrdinalIterator<>(childrenIterator, countComparable, 10);
// Get array of final ordinals - we need to use all of them to get labels first, and then to get
// counts,
// but OrdinalIterator only allows reading ordinals once.
int[] resultOrdinals = topByCountOrds.toArray();
//// (4.2) Use faceting results
FacetLabel[] labels = ordLabels.getLabels(resultOrdinals);
List<LabelAndValue> labelsAndValues = new ArrayList<>(labels.length);
for (int i = 0; i < resultOrdinals.length; i++) {
labelsAndValues.add(
new LabelAndValue(
labels[i].lastComponent(), defaultRecorder.getCount(resultOrdinals[i])));
}
IOUtils.close(indexReader, taxoReader);
int dimensionValue = defaultRecorder.getCount(dimOrdinal);
// We don't actually need to use FacetResult, it is up to client what to do with the results.
// Here we just want to demo that we can still do FacetResult as well
return new FacetResult(
dimension,
new String[0],
dimensionValue,
labelsAndValues.toArray(new LabelAndValue[0]),
labelsAndValues.size());
}
/**
* User drills down on 'Publish Date/2010', and we return facets for both 'Publish Date' and
* 'Author', using DrillSideways.
*/
private List<FacetResult> drillSideways() throws IOException {
//// (1) init readers and searcher
DirectoryReader indexReader = DirectoryReader.open(indexDir);
IndexSearcher searcher = new IndexSearcher(indexReader);
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);
//// (2) init drill down query and collectors
TaxonomyFacetsCutter defaultTaxoCutter =
new TaxonomyFacetsCutter(DEFAULT_INDEX_FIELD_NAME, config, taxoReader);
CountFacetRecorder drillDownRecorder = new CountFacetRecorder();
FacetFieldCollectorManager<CountFacetRecorder> drillDownCollectorManager =
new FacetFieldCollectorManager<>(defaultTaxoCutter, drillDownRecorder);
DrillDownQuery q = new DrillDownQuery(config);
//// (2.1) add query and collector dimensions
q.add("Publish Date", "2010");
CountFacetRecorder publishDayDimensionRecorder = new CountFacetRecorder();
// Note that it is safe to use the same FacetsCutter here because we create Leaf cutter for each
// leaf for each
// FacetFieldCollectorManager anyway, and leaf cutter are not merged or anything like that.
FacetFieldCollectorManager<CountFacetRecorder> publishDayDimensionCollectorManager =
new FacetFieldCollectorManager<>(defaultTaxoCutter, publishDayDimensionRecorder);
List<CollectorOwner<FacetFieldCollector, CountFacetRecorder>> drillSidewaysOwners =
List.of(new CollectorOwner<>(publishDayDimensionCollectorManager));
//// (3) search
// Right now we return the same Recorder we created - so we can ignore results
DrillSideways ds = new DrillSideways(searcher, config, taxoReader);
// We must wrap list of drill sideways owner with unmodifiableList to make generics work.
ds.search(
q,
new CollectorOwner<>(drillDownCollectorManager),
Collections.unmodifiableList(drillSidewaysOwners));
//// (4) Get top 10 results by count for Author
List<FacetResult> facetResults = new ArrayList<>(2);
// This object provides labels for ordinals.
TaxonomyOrdLabelBiMap ordLabels = new TaxonomyOrdLabelBiMap(taxoReader);
// This object is used to get topN results by count
ComparableSupplier<ComparableUtils.ByCountComparable> countComparable =
ComparableUtils.byCount(drillDownRecorder);
//// (4.1) Chain two ordinal iterators to get top N children
int dimOrdinal = ordLabels.getOrd(new FacetLabel("Author"));
OrdinalIterator childrenIterator =
new TaxonomyChildrenOrdinalIterator(
drillDownRecorder.recordedOrds(),
taxoReader.getParallelTaxonomyArrays().parents(),
dimOrdinal);
OrdinalIterator topByCountOrds =
new TopnOrdinalIterator<>(childrenIterator, countComparable, 10);
// Get array of final ordinals - we need to use all of them to get labels first, and then to get
// counts,
// but OrdinalIterator only allows reading ordinals once.
int[] resultOrdinals = topByCountOrds.toArray();
//// (4.2) Use faceting results
FacetLabel[] labels = ordLabels.getLabels(resultOrdinals);
List<LabelAndValue> labelsAndValues = new ArrayList<>(labels.length);
for (int i = 0; i < resultOrdinals.length; i++) {
labelsAndValues.add(
new LabelAndValue(
labels[i].lastComponent(), drillDownRecorder.getCount(resultOrdinals[i])));
}
int dimensionValue = drillDownRecorder.getCount(dimOrdinal);
facetResults.add(
new FacetResult(
"Author",
new String[0],
dimensionValue,
labelsAndValues.toArray(new LabelAndValue[0]),
labelsAndValues.size()));
//// (5) Same process, but for Publish Date drill sideways dimension
countComparable = ComparableUtils.byCount(publishDayDimensionRecorder);
//// (4.1) Chain two ordinal iterators to get top N children
dimOrdinal = ordLabels.getOrd(new FacetLabel("Publish Date"));
childrenIterator =
new TaxonomyChildrenOrdinalIterator(
publishDayDimensionRecorder.recordedOrds(),
taxoReader.getParallelTaxonomyArrays().parents(),
dimOrdinal);
topByCountOrds = new TopnOrdinalIterator<>(childrenIterator, countComparable, 10);
// Get array of final ordinals - we need to use all of them to get labels first, and then to get
// counts,
// but OrdinalIterator only allows reading ordinals once.
resultOrdinals = topByCountOrds.toArray();
//// (4.2) Use faceting results
labels = ordLabels.getLabels(resultOrdinals);
labelsAndValues = new ArrayList<>(labels.length);
for (int i = 0; i < resultOrdinals.length; i++) {
labelsAndValues.add(
new LabelAndValue(
labels[i].lastComponent(), publishDayDimensionRecorder.getCount(resultOrdinals[i])));
}
dimensionValue = publishDayDimensionRecorder.getCount(dimOrdinal);
facetResults.add(
new FacetResult(
"Publish Date",
new String[0],
dimensionValue,
labelsAndValues.toArray(new LabelAndValue[0]),
labelsAndValues.size()));
IOUtils.close(indexReader, taxoReader);
return facetResults;
}
/** Runs the search example. */
public List<FacetResult> runFacetOnly() throws IOException {
index();
return facetsOnly();
}
/** Runs the search example. */
public List<FacetResult> runSearch() throws IOException {
index();
return facetsWithSearch();
}
/** Runs the drill-down example. */
public FacetResult runDrillDown() throws IOException {
index();
return drillDown();
}
/** Runs the drill-sideways example. */
public List<FacetResult> runDrillSideways() throws IOException {
index();
return drillSideways();
}
/** Runs the example of non overlapping range facets */
public List<FacetResult> runNonOverlappingRangesCountFacetsOnly() throws IOException {
index();
return exclusiveRangesCountFacetsOnly();
}
/** Runs the example of overlapping range facets */
public List<FacetResult> runOverlappingRangesCountFacetsOnly() throws IOException {
index();
return overlappingRangesCountFacetsOnly();
}
/** Runs the example of collecting long aggregations for non overlapping range facets. */
public List<FacetResult> runNonOverlappingRangesAggregationFacets() throws IOException {
index();
return exclusiveRangesAggregationFacets();
}
/** Runs the search and drill-down examples and prints the results. */
public static void main(String[] args) throws Exception {
System.out.println("Facet counting example:");
System.out.println("-----------------------");
SandboxFacetsExample example = new SandboxFacetsExample();
List<FacetResult> results1 = example.runFacetOnly();
System.out.println("Author: " + results1.get(0));
System.out.println("Publish Date: " + results1.get(1));
System.out.println("Facet counting example (combined facets and search):");
System.out.println("-----------------------");
List<FacetResult> results = example.runSearch();
System.out.println("Author: " + results.get(0));
System.out.println("Publish Date: " + results.get(1));
System.out.println("Facet drill-down example (Publish Date/2010):");
System.out.println("---------------------------------------------");
System.out.println("Author: " + example.runDrillDown());
System.out.println("Facet drill-sideways example (Publish Date/2010):");
System.out.println("---------------------------------------------");
for (FacetResult result : example.runDrillSideways()) {
System.out.println(result);
}
System.out.println("Facet counting example with exclusive ranges:");
System.out.println("---------------------------------------------");
for (FacetResult result : example.runNonOverlappingRangesCountFacetsOnly()) {
System.out.println(result);
}
System.out.println("Facet counting example with overlapping ranges:");
System.out.println("---------------------------------------------");
for (FacetResult result : example.runOverlappingRangesCountFacetsOnly()) {
System.out.println(result);
}
System.out.println("Facet aggregation example with exclusive ranges:");
System.out.println("---------------------------------------------");
for (FacetResult result : example.runNonOverlappingRangesAggregationFacets()) {
System.out.println(result);
}
}
}

View File

@ -18,6 +18,7 @@ package org.apache.lucene.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -31,6 +32,7 @@ import org.apache.lucene.facet.sortedset.SortedSetDocValuesReaderState;
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.CollectorOwner;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
@ -300,35 +302,25 @@ public class DrillSideways {
}
}
private static class CallableCollector implements Callable<CallableResult> {
private final int pos;
private static class CallableCollector implements Callable<Void> {
private final IndexSearcher searcher;
private final Query query;
private final CollectorManager<?, ?> collectorManager;
private final CollectorOwner<?, ?> collectorOwner;
private CallableCollector(
int pos, IndexSearcher searcher, Query query, CollectorManager<?, ?> collectorManager) {
this.pos = pos;
IndexSearcher searcher, Query query, CollectorOwner<?, ?> collectorOwner) {
this.searcher = searcher;
this.query = query;
this.collectorManager = collectorManager;
this.collectorOwner = collectorOwner;
}
@Override
public CallableResult call() throws Exception {
return new CallableResult(pos, searcher.search(query, collectorManager));
}
}
private static class CallableResult {
private final int pos;
private final Object result;
private CallableResult(int pos, Object result) {
this.pos = pos;
this.result = result;
public Void call() throws Exception {
searcher.search(query, collectorOwner);
// Call getResult to trigger reduce, we don't need to return results because users can access
// them directly from collectorOwner
collectorOwner.getResult();
return null;
}
}
@ -349,16 +341,125 @@ public class DrillSideways {
public <R> ConcurrentDrillSidewaysResult<R> search(
final DrillDownQuery query, final CollectorManager<?, R> hitCollectorManager)
throws IOException {
if (executor != null) {
return searchConcurrently(query, hitCollectorManager);
// Main query
FacetsCollectorManager drillDownFacetsCollectorManager =
createDrillDownFacetsCollectorManager();
final CollectorOwner<?, ?> mainCollectorOwner;
if (drillDownFacetsCollectorManager != null) {
// Make sure we populate a facet collector corresponding to the base query if desired:
mainCollectorOwner =
new CollectorOwner<>(
new MultiCollectorManager(drillDownFacetsCollectorManager, hitCollectorManager));
} else {
return searchSequentially(query, hitCollectorManager);
mainCollectorOwner = new CollectorOwner<>(hitCollectorManager);
}
// Drill sideways dimensions
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners;
if (query.getDims().isEmpty() == false) {
drillSidewaysCollectorOwners = new ArrayList<>(query.getDims().size());
for (int i = 0; i < query.getDims().size(); i++) {
drillSidewaysCollectorOwners.add(
new CollectorOwner<>(createDrillSidewaysFacetsCollectorManager()));
}
} else {
drillSidewaysCollectorOwners = null;
}
// Execute query
if (executor != null) {
searchConcurrently(query, mainCollectorOwner, drillSidewaysCollectorOwners);
} else {
searchSequentially(query, mainCollectorOwner, drillSidewaysCollectorOwners);
}
// Collect results
final FacetsCollector facetsCollectorResult;
final R hitCollectorResult;
if (drillDownFacetsCollectorManager != null) {
// drill down collected using MultiCollector
// Extract the results:
Object[] drillDownResult = (Object[]) mainCollectorOwner.getResult();
facetsCollectorResult = (FacetsCollector) drillDownResult[0];
hitCollectorResult = (R) drillDownResult[1];
} else {
facetsCollectorResult = null;
hitCollectorResult = (R) mainCollectorOwner.getResult();
}
// Getting results for drill sideways dimensions (if any)
final String[] drillSidewaysDims;
final FacetsCollector[] drillSidewaysCollectors;
if (query.getDims().isEmpty() == false) {
drillSidewaysDims = query.getDims().keySet().toArray(new String[0]);
int numDims = query.getDims().size();
assert drillSidewaysCollectorOwners != null;
assert drillSidewaysCollectorOwners.size() == numDims;
drillSidewaysCollectors = new FacetsCollector[numDims];
for (int dim = 0; dim < numDims; dim++) {
drillSidewaysCollectors[dim] =
(FacetsCollector) drillSidewaysCollectorOwners.get(dim).getResult();
}
} else {
drillSidewaysDims = null;
drillSidewaysCollectors = null;
}
return new ConcurrentDrillSidewaysResult<>(
buildFacetsResult(facetsCollectorResult, drillSidewaysCollectors, drillSidewaysDims),
null,
hitCollectorResult,
facetsCollectorResult,
drillSidewaysCollectors,
drillSidewaysDims);
}
/**
* Search using DrillDownQuery with custom collectors. This method can be used with any {@link
* CollectorOwner}s. It doesn't return anything because it is expected that you read results from
* provided {@link CollectorOwner}s.
*
* <p>To read the results, run {@link CollectorOwner#getResult()} for drill down and all drill
* sideways dimensions.
*
* <p>Note: use {@link Collections#unmodifiableList(List)} to wrap {@code
* drillSidewaysCollectorOwners} to convince compiler that it is safe to use List here.
*
* <p>Use {@link MultiCollectorManager} wrapped by {@link CollectorOwner} to collect both hits and
* facets for the entire query and/or for drill-sideways dimensions.
*
* <p>TODO: Class CollectorOwner was created so that we can ignore CollectorManager type C,
* because we want each dimensions to be able to use their own types. Alternatively, we can use
* typesafe heterogeneous container and provide CollectorManager type for each dimension to this
* method? I do like CollectorOwner approach as it seems more intuitive?
*/
public void search(
final DrillDownQuery query,
CollectorOwner<?, ?> drillDownCollectorOwner,
List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners)
throws IOException {
if (drillDownCollectorOwner == null) {
throw new IllegalArgumentException(
"This search method requires client to provide drill down collector manager");
}
if (drillSidewaysCollectorOwners == null) {
if (query.getDims().isEmpty() == false) {
throw new IllegalArgumentException(
"The query requires not null drillSidewaysCollectorOwners");
}
} else if (drillSidewaysCollectorOwners.size() != query.getDims().size()) {
throw new IllegalArgumentException(
"drillSidewaysCollectorOwners size must be equal to number of dimensions in the query.");
}
if (executor != null) {
searchConcurrently(query, drillDownCollectorOwner, drillSidewaysCollectorOwners);
} else {
searchSequentially(query, drillDownCollectorOwner, drillSidewaysCollectorOwners);
}
}
@SuppressWarnings("unchecked")
private <R> ConcurrentDrillSidewaysResult<R> searchSequentially(
final DrillDownQuery query, final CollectorManager<?, R> hitCollectorManager)
private void searchSequentially(
final DrillDownQuery query,
final CollectorOwner<?, ?> drillDownCollectorOwner,
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners)
throws IOException {
Map<String, Integer> drillDownDims = query.getDims();
@ -366,28 +467,9 @@ public class DrillSideways {
if (drillDownDims.isEmpty()) {
// There are no drill-down dims, so there is no
// drill-sideways to compute:
FacetsCollectorManager drillDownCollectorManager = createDrillDownFacetsCollectorManager();
FacetsCollector mainFacetsCollector;
R collectorResult;
if (drillDownCollectorManager != null) {
Object[] mainResults =
searcher.search(
query, new MultiCollectorManager(drillDownCollectorManager, hitCollectorManager));
// Extract the results:
mainFacetsCollector = (FacetsCollector) mainResults[0];
collectorResult = (R) mainResults[1];
} else {
mainFacetsCollector = null;
collectorResult = searcher.search(query, hitCollectorManager);
}
return new ConcurrentDrillSidewaysResult<>(
buildFacetsResult(mainFacetsCollector, null, null),
null,
collectorResult,
mainFacetsCollector,
null,
null);
searcher.search(query, drillDownCollectorOwner);
drillDownCollectorOwner.getResult();
return;
}
Query baseQuery = query.getBaseQuery();
@ -398,130 +480,64 @@ public class DrillSideways {
}
Query[] drillDownQueries = query.getDrillDownQueries();
int numDims = drillDownDims.size();
FacetsCollectorManager drillDownCollectorManager = createDrillDownFacetsCollectorManager();
FacetsCollectorManager[] drillSidewaysFacetsCollectorManagers =
new FacetsCollectorManager[numDims];
for (int i = 0; i < numDims; i++) {
drillSidewaysFacetsCollectorManagers[i] = createDrillSidewaysFacetsCollectorManager();
}
DrillSidewaysQuery dsq =
new DrillSidewaysQuery(
baseQuery,
drillDownCollectorManager,
drillSidewaysFacetsCollectorManagers,
// drillDownCollectorOwner,
// Don't pass drill down collector because drill down is collected by IndexSearcher
// itself.
// TODO: deprecate drillDown collection in DrillSidewaysQuery?
null,
drillSidewaysCollectorOwners,
drillDownQueries,
scoreSubDocsAtOnce());
R collectorResult = searcher.search(dsq, hitCollectorManager);
FacetsCollector drillDownCollector;
if (drillDownCollectorManager != null) {
drillDownCollector = drillDownCollectorManager.reduce(dsq.managedDrillDownCollectors);
} else {
drillDownCollector = null;
}
FacetsCollector[] drillSidewaysCollectors = new FacetsCollector[numDims];
int numSlices = dsq.managedDrillSidewaysCollectors.size();
for (int dim = 0; dim < numDims; dim++) {
List<FacetsCollector> facetsCollectorsForDim = new ArrayList<>(numSlices);
for (int slice = 0; slice < numSlices; slice++) {
facetsCollectorsForDim.add(dsq.managedDrillSidewaysCollectors.get(slice)[dim]);
searcher.search(dsq, drillDownCollectorOwner);
// This method doesn't return results as each dimension might have its own result type.
// But we call getResult to trigger results reducing, so that users don't have to worry about
// it.
drillDownCollectorOwner.getResult();
if (drillSidewaysCollectorOwners != null) {
for (CollectorOwner<?, ?> sidewaysOwner : drillSidewaysCollectorOwners) {
sidewaysOwner.getResult();
}
drillSidewaysCollectors[dim] =
drillSidewaysFacetsCollectorManagers[dim].reduce(facetsCollectorsForDim);
}
String[] drillSidewaysDims = drillDownDims.keySet().toArray(new String[0]);
return new ConcurrentDrillSidewaysResult<>(
buildFacetsResult(drillDownCollector, drillSidewaysCollectors, drillSidewaysDims),
null,
collectorResult,
drillDownCollector,
drillSidewaysCollectors,
drillSidewaysDims);
}
@SuppressWarnings("unchecked")
private <R> ConcurrentDrillSidewaysResult<R> searchConcurrently(
final DrillDownQuery query, final CollectorManager<?, R> hitCollectorManager)
private void searchConcurrently(
final DrillDownQuery query,
final CollectorOwner<?, ?> drillDownCollectorOwner,
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners)
throws IOException {
final Map<String, Integer> drillDownDims = query.getDims();
final List<CallableCollector> callableCollectors = new ArrayList<>(drillDownDims.size() + 1);
// Add the main DrillDownQuery
FacetsCollectorManager drillDownFacetsCollectorManager =
createDrillDownFacetsCollectorManager();
CollectorManager<?, ?> mainCollectorManager;
if (drillDownFacetsCollectorManager != null) {
// Make sure we populate a facet collector corresponding to the base query if desired:
mainCollectorManager =
new MultiCollectorManager(drillDownFacetsCollectorManager, hitCollectorManager);
} else {
mainCollectorManager = hitCollectorManager;
}
callableCollectors.add(new CallableCollector(-1, searcher, query, mainCollectorManager));
callableCollectors.add(new CallableCollector(searcher, query, drillDownCollectorOwner));
int i = 0;
final Query[] filters = query.getDrillDownQueries();
for (String dim : drillDownDims.keySet())
for (String dim : drillDownDims.keySet()) {
callableCollectors.add(
new CallableCollector(
i++,
searcher,
getDrillDownQuery(query, filters, dim),
createDrillSidewaysFacetsCollectorManager()));
final FacetsCollector mainFacetsCollector;
final FacetsCollector[] facetsCollectors = new FacetsCollector[drillDownDims.size()];
final R collectorResult;
drillSidewaysCollectorOwners.get(i)));
i++;
}
try {
// Run the query pool
final List<Future<CallableResult>> futures = executor.invokeAll(callableCollectors);
final List<Future<Void>> futures = executor.invokeAll(callableCollectors);
// Extract the results
if (drillDownFacetsCollectorManager != null) {
// If we populated a facets collector for the main query, make sure to unpack it properly
final Object[] mainResults = (Object[]) futures.get(0).get().result;
mainFacetsCollector = (FacetsCollector) mainResults[0];
collectorResult = (R) mainResults[1];
} else {
mainFacetsCollector = null;
collectorResult = (R) futures.get(0).get().result;
// Wait for results. We don't read the results as they are collected by CollectorOwners
for (i = 0; i < futures.size(); i++) {
futures.get(i).get();
}
for (i = 1; i < futures.size(); i++) {
final CallableResult result = futures.get(i).get();
facetsCollectors[result.pos] = (FacetsCollector) result.result;
}
// Fill the null results with the mainFacetsCollector
for (i = 0; i < facetsCollectors.length; i++)
if (facetsCollectors[i] == null) facetsCollectors[i] = mainFacetsCollector;
} catch (InterruptedException e) {
throw new ThreadInterruptedException(e);
} catch (ExecutionException e) {
throw new RuntimeException(e);
}
String[] drillSidewaysDims = drillDownDims.keySet().toArray(new String[0]);
// build the facets and return the result
return new ConcurrentDrillSidewaysResult<>(
buildFacetsResult(mainFacetsCollector, facetsCollectors, drillSidewaysDims),
null,
collectorResult,
mainFacetsCollector,
facetsCollectors,
drillSidewaysDims);
}
/**

View File

@ -17,14 +17,14 @@
package org.apache.lucene.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BulkScorer;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorOwner;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
@ -45,10 +45,8 @@ class DrillSidewaysQuery extends Query {
final Query baseQuery;
final FacetsCollectorManager drillDownCollectorManager;
final FacetsCollectorManager[] drillSidewaysCollectorManagers;
final List<FacetsCollector> managedDrillDownCollectors;
final List<FacetsCollector[]> managedDrillSidewaysCollectors;
final CollectorOwner<?, ?> drillDownCollectorOwner;
final List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners;
final Query[] drillDownQueries;
@ -56,47 +54,17 @@ class DrillSidewaysQuery extends Query {
/**
* Construct a new {@code DrillSidewaysQuery} that will create new {@link FacetsCollector}s for
* each {@link LeafReaderContext} using the provided {@link FacetsCollectorManager}s. The caller
* can access the created {@link FacetsCollector}s through {@link #managedDrillDownCollectors} and
* {@link #managedDrillSidewaysCollectors}.
* each {@link LeafReaderContext} using the provided {@link FacetsCollectorManager}s.
*/
DrillSidewaysQuery(
Query baseQuery,
FacetsCollectorManager drillDownCollectorManager,
FacetsCollectorManager[] drillSidewaysCollectorManagers,
Query[] drillDownQueries,
boolean scoreSubDocsAtOnce) {
// Note that the "managed" facet collector lists are synchronized here since bulkScorer()
// can be invoked concurrently and needs to remain thread-safe. We're OK with synchronizing
// on the whole list as contention is expected to remain very low:
this(
baseQuery,
drillDownCollectorManager,
drillSidewaysCollectorManagers,
Collections.synchronizedList(new ArrayList<>()),
Collections.synchronizedList(new ArrayList<>()),
drillDownQueries,
scoreSubDocsAtOnce);
}
/**
* Needed for {@link Query#rewrite(IndexSearcher)}. Ensures the same "managed" lists get used
* since {@link DrillSideways} accesses references to these through the original {@code
* DrillSidewaysQuery}.
*/
private DrillSidewaysQuery(
Query baseQuery,
FacetsCollectorManager drillDownCollectorManager,
FacetsCollectorManager[] drillSidewaysCollectorManagers,
List<FacetsCollector> managedDrillDownCollectors,
List<FacetsCollector[]> managedDrillSidewaysCollectors,
CollectorOwner<?, ?> drillDownCollectorOwner,
List<CollectorOwner<?, ?>> drillSidewaysCollectorOwners,
Query[] drillDownQueries,
boolean scoreSubDocsAtOnce) {
this.baseQuery = Objects.requireNonNull(baseQuery);
this.drillDownCollectorManager = drillDownCollectorManager;
this.drillSidewaysCollectorManagers = drillSidewaysCollectorManagers;
this.managedDrillDownCollectors = managedDrillDownCollectors;
this.managedDrillSidewaysCollectors = managedDrillSidewaysCollectors;
this.drillDownCollectorOwner = drillDownCollectorOwner;
this.drillSidewaysCollectorOwners = drillSidewaysCollectorOwners;
this.drillDownQueries = drillDownQueries;
this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
}
@ -121,10 +89,8 @@ class DrillSidewaysQuery extends Query {
} else {
return new DrillSidewaysQuery(
newQuery,
drillDownCollectorManager,
drillSidewaysCollectorManagers,
managedDrillDownCollectors,
managedDrillSidewaysCollectors,
drillDownCollectorOwner,
drillSidewaysCollectorOwners,
drillDownQueries,
scoreSubDocsAtOnce);
}
@ -158,20 +124,15 @@ class DrillSidewaysQuery extends Query {
int drillDownCount = drillDowns.length;
FacetsCollector drillDownCollector;
LeafCollector drillDownLeafCollector;
if (drillDownCollectorManager != null) {
drillDownCollector = drillDownCollectorManager.newCollector();
managedDrillDownCollectors.add(drillDownCollector);
Collector drillDownCollector;
final LeafCollector drillDownLeafCollector;
if (drillDownCollectorOwner != null) {
drillDownCollector = drillDownCollectorOwner.newCollector();
drillDownLeafCollector = drillDownCollector.getLeafCollector(context);
} else {
drillDownCollector = null;
drillDownLeafCollector = null;
}
FacetsCollector[] sidewaysCollectors = new FacetsCollector[drillDownCount];
managedDrillSidewaysCollectors.add(sidewaysCollectors);
DrillSidewaysScorer.DocsAndCost[] dims =
new DrillSidewaysScorer.DocsAndCost[drillDownCount];
@ -183,8 +144,7 @@ class DrillSidewaysQuery extends Query {
scorer = new ConstantScoreScorer(0f, scoreMode, DocIdSetIterator.empty());
}
FacetsCollector sidewaysCollector = drillSidewaysCollectorManagers[dim].newCollector();
sidewaysCollectors[dim] = sidewaysCollector;
Collector sidewaysCollector = drillSidewaysCollectorOwners.get(dim).newCollector();
dims[dim] =
new DrillSidewaysScorer.DocsAndCost(
@ -195,11 +155,11 @@ class DrillSidewaysQuery extends Query {
// a null scorer in this case, but we need to make sure #finish gets called on all facet
// collectors since IndexSearcher won't handle this for us:
if (baseScorerSupplier == null || nullCount > 1) {
if (drillDownCollector != null) {
drillDownCollector.finish();
if (drillDownLeafCollector != null) {
drillDownLeafCollector.finish();
}
for (FacetsCollector fc : sidewaysCollectors) {
fc.finish();
for (DrillSidewaysScorer.DocsAndCost dim : dims) {
dim.sidewaysLeafCollector.finish();
}
return null;
}
@ -252,9 +212,9 @@ class DrillSidewaysQuery extends Query {
final int prime = 31;
int result = classHash();
result = prime * result + Objects.hashCode(baseQuery);
result = prime * result + Objects.hashCode(drillDownCollectorManager);
result = prime * result + Objects.hashCode(drillDownCollectorOwner);
result = prime * result + Arrays.hashCode(drillDownQueries);
result = prime * result + Arrays.hashCode(drillSidewaysCollectorManagers);
result = prime * result + Objects.hashCode(drillSidewaysCollectorOwners);
return result;
}
@ -265,8 +225,8 @@ class DrillSidewaysQuery extends Query {
private boolean equalsTo(DrillSidewaysQuery other) {
return Objects.equals(baseQuery, other.baseQuery)
&& Objects.equals(drillDownCollectorManager, other.drillDownCollectorManager)
&& Objects.equals(drillDownCollectorOwner, other.drillDownCollectorOwner)
&& Arrays.equals(drillDownQueries, other.drillDownQueries)
&& Arrays.equals(drillSidewaysCollectorManagers, other.drillSidewaysCollectorManagers);
&& Objects.equals(drillSidewaysCollectorOwners, other.drillSidewaysCollectorOwners);
}
}

View File

@ -26,6 +26,7 @@ import org.apache.lucene.search.DoubleValues;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.SegmentCacheable;
import org.apache.lucene.util.NumericUtils;
/**
* Base class for producing {@link MultiDoubleValues}. See also {@link DoubleValuesSource} for a
@ -118,6 +119,65 @@ public abstract class MultiDoubleValuesSource implements SegmentCacheable {
return new LongDoubleValuesSource(this);
}
/** Convert to a {@link MultiLongValuesSource} using {@link NumericUtils#doubleToSortableLong} */
public final MultiLongValuesSource toSortableMultiLongValuesSource() {
return new SortableMultiLongValuesSource(this);
}
private static class SortableMultiLongValuesSource extends MultiLongValuesSource {
MultiDoubleValuesSource inner;
SortableMultiLongValuesSource(MultiDoubleValuesSource inner) {
this.inner = Objects.requireNonNull(inner);
}
@Override
public boolean isCacheable(LeafReaderContext ctx) {
return inner.isCacheable(ctx);
}
@Override
public MultiLongValues getValues(LeafReaderContext ctx) throws IOException {
MultiDoubleValues doubleValues = inner.getValues(ctx);
return new MultiLongValues() {
@Override
public long getValueCount() {
return doubleValues.getValueCount();
}
@Override
public long nextValue() throws IOException {
return NumericUtils.doubleToSortableLong(doubleValues.nextValue());
}
@Override
public boolean advanceExact(int doc) throws IOException {
return doubleValues.advanceExact(doc);
}
};
}
@Override
public int hashCode() {
return inner.hashCode();
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
SortableMultiLongValuesSource that = (SortableMultiLongValuesSource) o;
return Objects.equals(inner, that.inner);
}
@Override
public String toString() {
return "sortableMultiLong(" + inner.toString() + ")";
}
}
private static class FieldMultiValuedSource extends MultiDoubleValuesSource {
private final String field;
private final LongToDoubleFunction decoder;

View File

@ -178,6 +178,14 @@ public class FacetLabel implements Comparable<FacetLabel> {
}
}
/** Get the last component. */
public String lastComponent() {
if (components.length == 0) {
throw new UnsupportedOperationException("components is empty");
}
return components[components.length - 1];
}
/** Returns a string representation of the path. */
@Override
public String toString() {

View File

@ -22,5 +22,6 @@ description = 'Various third party contributions and new ideas'
dependencies {
moduleApi project(':lucene:core')
moduleApi project(':lucene:queries')
moduleApi project(':lucene:facet')
moduleTestImplementation project(':lucene:test-framework')
}

View File

@ -19,6 +19,7 @@
module org.apache.lucene.sandbox {
requires org.apache.lucene.core;
requires org.apache.lucene.queries;
requires org.apache.lucene.facet;
exports org.apache.lucene.payloads;
exports org.apache.lucene.sandbox.codecs.idversion;
@ -27,6 +28,12 @@ module org.apache.lucene.sandbox {
exports org.apache.lucene.sandbox.queries;
exports org.apache.lucene.sandbox.search;
exports org.apache.lucene.sandbox.index;
exports org.apache.lucene.sandbox.facet;
exports org.apache.lucene.sandbox.facet.recorders;
exports org.apache.lucene.sandbox.facet.cutters.ranges;
exports org.apache.lucene.sandbox.facet.iterators;
exports org.apache.lucene.sandbox.facet.cutters;
exports org.apache.lucene.sandbox.facet.labels;
provides org.apache.lucene.codecs.PostingsFormat with
org.apache.lucene.sandbox.codecs.idversion.IDVersionPostingsFormat;

View File

@ -0,0 +1,261 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.sandbox.facet.cutters.LongValueFacetCutter;
import org.apache.lucene.sandbox.facet.iterators.ComparableSupplier;
import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
import org.apache.lucene.sandbox.facet.recorders.LongAggregationsFacetRecorder;
import org.apache.lucene.util.InPlaceMergeSorter;
/**
* Collection of static methods to provide most common comparables for sandbox faceting. You can
* also use it as an example for creating your own {@link ComparableSupplier} to enable custom
* facets top-n and sorting.
*
* @lucene.experimental
*/
public final class ComparableUtils {
private ComparableUtils() {}
/** {@link ComparableSupplier} to sort by ords (ascending). */
public static ComparableSupplier<ByOrdinalComparable> byOrdinal() {
return new ComparableSupplier<>() {
public void reuseComparable(int ord, ByOrdinalComparable reuse) {
reuse.ord = ord;
}
public ByOrdinalComparable createComparable(int ord) {
ByOrdinalComparable result = new ByOrdinalComparable();
result.ord = ord;
return result;
}
};
}
/** Used for {@link #byOrdinal} result. */
public static class ByOrdinalComparable implements Comparable<ByOrdinalComparable> {
private int ord;
@Override
public int compareTo(ByOrdinalComparable o) {
return Integer.compare(o.ord, ord);
}
}
/**
* {@link ComparableSupplier} to sort ordinals by count (descending) with ord as a tie-break
* (ascending) using provided {@link CountFacetRecorder}.
*/
public static ComparableSupplier<ByCountComparable> byCount(CountFacetRecorder recorder) {
return new ComparableSupplier<>() {
public void reuseComparable(int ord, ByCountComparable reuse) {
reuse.ord = ord;
reuse.count = recorder.getCount(ord);
}
public ByCountComparable createComparable(int ord) {
ByCountComparable result = new ByCountComparable();
result.ord = ord;
result.count = recorder.getCount(ord);
return result;
}
};
}
/** Used for {@link #byCount} result. */
public static class ByCountComparable implements Comparable<ByCountComparable> {
private ByCountComparable() {}
private int count;
private int ord;
@Override
public int compareTo(ByCountComparable o) {
int cmp = Integer.compare(count, o.count);
if (cmp == 0) {
cmp = Integer.compare(o.ord, ord);
}
return cmp;
}
}
/**
* {@link ComparableSupplier} to sort ordinals by long aggregation (descending) with tie-break by
* count (descending) or by ordinal (ascending) using provided {@link CountFacetRecorder} and
* {@link LongAggregationsFacetRecorder}.
*/
public static ComparableSupplier<ByAggregatedValueComparable> byAggregatedValue(
CountFacetRecorder countRecorder,
LongAggregationsFacetRecorder longAggregationsFacetRecorder,
int aggregationId) {
return new ComparableSupplier<>() {
public void reuseComparable(int ord, ByAggregatedValueComparable reuse) {
reuse.ord = ord;
reuse.secondaryRank = countRecorder.getCount(ord);
reuse.primaryRank = longAggregationsFacetRecorder.getRecordedValue(ord, aggregationId);
}
public ByAggregatedValueComparable createComparable(int ord) {
ByAggregatedValueComparable result = new ByAggregatedValueComparable();
reuseComparable(ord, result);
return result;
}
};
}
/** Used for {@link #byAggregatedValue} result. */
public static class ByAggregatedValueComparable
implements Comparable<ByAggregatedValueComparable> {
private ByAggregatedValueComparable() {}
private int ord;
private int secondaryRank;
private long primaryRank;
@Override
public int compareTo(ByAggregatedValueComparable o) {
int cmp = Long.compare(primaryRank, o.primaryRank);
if (cmp == 0) {
cmp = Integer.compare(secondaryRank, o.secondaryRank);
if (cmp == 0) {
cmp = Integer.compare(o.ord, ord);
}
}
return cmp;
}
}
/**
* {@link ComparableSupplier} to sort ordinals by long value from {@link LongValueFacetCutter}
* (descending).
*/
public static ComparableSupplier<ByLongValueComparable> byLongValue(
LongValueFacetCutter longValueFacetCutter) {
return new ComparableSupplier<>() {
public void reuseComparable(int ord, ByLongValueComparable reuse) {
reuse.value = longValueFacetCutter.getValue(ord);
}
public ByLongValueComparable createComparable(int ord) {
ByLongValueComparable result = new ByLongValueComparable();
result.value = longValueFacetCutter.getValue(ord);
return result;
}
};
}
/** Used for {@link #byLongValue} result. */
public static final class ByLongValueComparable implements Comparable<ByLongValueComparable> {
private ByLongValueComparable() {}
private long value;
@Override
public int compareTo(ByLongValueComparable o) {
return Long.compare(o.value, value);
}
@Override
public boolean equals(Object obj) {
if (obj instanceof ByLongValueComparable other) {
return other.value == value;
}
return false;
}
@Override
public int hashCode() {
return Objects.hash(value);
}
}
/**
* {@link ComparableSupplier} to sort ordinals by count (descending) from {@link
* CountFacetRecorder} with tie-break by long value (ascending) from {@link LongValueFacetCutter}.
*/
public static ComparableSupplier<ByCountAndLongValueComparable> byCount(
CountFacetRecorder countFacetRecorder, LongValueFacetCutter longValueFacetCutter) {
return new ComparableSupplier<>() {
public void reuseComparable(int ord, ByCountAndLongValueComparable reuse) {
reuse.value = longValueFacetCutter.getValue(ord);
reuse.count = countFacetRecorder.getCount(ord);
}
public ByCountAndLongValueComparable createComparable(int ord) {
ByCountAndLongValueComparable result = new ByCountAndLongValueComparable();
reuseComparable(ord, result);
return result;
}
};
}
/** Used for {@link #byCount(CountFacetRecorder, LongValueFacetCutter)} result. */
public static class ByCountAndLongValueComparable
implements Comparable<ByCountAndLongValueComparable> {
private ByCountAndLongValueComparable() {}
private int count;
private long value;
@Override
public int compareTo(ByCountAndLongValueComparable o) {
int cmp = Integer.compare(count, o.count);
if (cmp == 0) {
cmp = Long.compare(o.value, value);
}
return cmp;
}
}
/**
* Sort array of ordinals.
*
* <p>To get top-n ordinals use {@link
* org.apache.lucene.sandbox.facet.iterators.TopnOrdinalIterator} instead.
*
* @param ordinals array of ordinals to sort
* @param comparableSupplier defines sort order
*/
public static <T extends Comparable<T>> void sort(
int[] ordinals, ComparableSupplier<T> comparableSupplier) throws IOException {
List<T> comparables = new ArrayList<>(ordinals.length);
for (int i = 0; i < ordinals.length; i++) {
comparables.add(comparableSupplier.createComparable(ordinals[i]));
}
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
int tmp = ordinals[i];
ordinals[i] = ordinals[j];
ordinals[j] = tmp;
Collections.swap(comparables, i, j);
}
@Override
protected int compare(int i, int j) {
return comparables.get(j).compareTo(comparables.get(i));
}
}.sort(0, ordinals.length);
}
}

View File

@ -0,0 +1,59 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.sandbox.facet.cutters.FacetCutter;
import org.apache.lucene.sandbox.facet.recorders.FacetRecorder;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.ScoreMode;
/**
* {@link Collector} that brings together {@link FacetCutter} and {@link FacetRecorder} to compute
* facets during collection phase.
*
* @lucene.experimental
*/
public final class FacetFieldCollector implements Collector {
private final FacetCutter facetCutter;
private final FacetRecorder facetRecorder;
/** Collector for cutter+recorder pair. */
public FacetFieldCollector(FacetCutter facetCutter, FacetRecorder facetRecorder) {
this.facetCutter = facetCutter;
this.facetRecorder = facetRecorder;
}
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
return new FacetFieldLeafCollector(context, facetCutter, facetRecorder);
}
@Override
public ScoreMode scoreMode() {
// TODO: Some FacetRecorders might need scores, e.g. to get associated numeric values, see for
// example TaxonomyFacetFloatAssociations. Not sure if anyone actually uses it, because
// FacetsCollectorManager creates FacetsCollector with keepScores: false. But if someone needs
// it, we can add boolean needScores method to FacetRecorder interface, return
// ScoreMode.COMPLETE here when the method returns true. FacetRecorders#needScores should be
// implemented on case by case basis, e.g. LongAggregationsFacetRecorder can take it as a
// constuctor argument, and when it's true call LongValues#getValues with the scores.
return ScoreMode.COMPLETE_NO_SCORES;
}
}

View File

@ -0,0 +1,53 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet;
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.sandbox.facet.cutters.FacetCutter;
import org.apache.lucene.sandbox.facet.recorders.FacetRecorder;
import org.apache.lucene.search.CollectorManager;
/**
* Collector manager for {@link FacetFieldCollector}. Returns the same extension of {@link
* FacetRecorder} that was used to collect results.
*
* @lucene.experimental
*/
public final class FacetFieldCollectorManager<V extends FacetRecorder>
implements CollectorManager<FacetFieldCollector, V> {
private final FacetCutter facetCutter;
private final V facetRecorder;
/** Create collector for a cutter + recorder pair */
public FacetFieldCollectorManager(FacetCutter facetCutter, V facetRecorder) {
this.facetCutter = facetCutter;
this.facetRecorder = facetRecorder;
}
@Override
public FacetFieldCollector newCollector() throws IOException {
return new FacetFieldCollector(facetCutter, facetRecorder);
}
@Override
public V reduce(Collection<FacetFieldCollector> collectors) throws IOException {
facetRecorder.reduce(facetCutter);
return this.facetRecorder;
}
}

View File

@ -0,0 +1,76 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.sandbox.facet.cutters.FacetCutter;
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
import org.apache.lucene.sandbox.facet.recorders.FacetRecorder;
import org.apache.lucene.sandbox.facet.recorders.LeafFacetRecorder;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.LeafCollector;
import org.apache.lucene.search.Scorable;
/**
* {@link LeafCollector} that for each facet ordinal from {@link LeafFacetCutter} records data with
* {@link LeafFacetRecorder}.
*/
final class FacetFieldLeafCollector implements LeafCollector {
private final LeafReaderContext context;
private final FacetCutter cutter;
private final FacetRecorder recorder;
private LeafFacetCutter leafCutter;
private LeafFacetRecorder leafRecorder;
FacetFieldLeafCollector(LeafReaderContext context, FacetCutter cutter, FacetRecorder recorder) {
this.context = context;
this.cutter = cutter;
this.recorder = recorder;
}
@Override
public void setScorer(Scorable scorer) throws IOException {
// TODO: see comment in FacetFieldCollector#scoreMode
}
@Override
public void collect(int doc) throws IOException {
if (leafCutter == null) {
leafCutter = cutter.createLeafCutter(context);
assert leafRecorder == null;
leafRecorder = recorder.getLeafRecorder(context);
}
if (leafCutter.advanceExact(doc)) {
for (int curOrd = leafCutter.nextOrd();
curOrd != LeafFacetCutter.NO_MORE_ORDS;
curOrd = leafCutter.nextOrd()) {
leafRecorder.record(doc, curOrd);
}
}
}
@Override
public DocIdSetIterator competitiveIterator() throws IOException {
// TODO: any ideas?
// 1. Docs that have values for the index field we about to facet on
// 2. TK
return LeafCollector.super.competitiveIterator();
}
}

View File

@ -0,0 +1,54 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.cutters;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.sandbox.facet.iterators.OrdinalIterator;
/**
* Creates {@link LeafFacetCutter} for each leaf.
*
* <p>TODO: do we need FacetCutterManager similar to CollectorManager, e.g. is createLeafCutter
* always thread safe?
*
* @lucene.experimental
*/
public interface FacetCutter {
/** Get cutter for the leaf. */
LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException;
/**
* For facets that have hierarchy (levels), return all top level dimension ordinals that require
* rollup.
*
* <p>Rollup is an optimization for facets types that support hierarchy, if single document
* belongs to at most one node in the hierarchy, we can first record data for these nodes only,
* and then roll up values to parent ordinals.
*
* <p>Default implementation returns null, which means that rollup is not needed.
*/
default OrdinalIterator getOrdinalsToRollup() throws IOException {
return null;
}
/** For facets that have hierarchy (levels), get all children ordinals for given ord. */
default OrdinalIterator getChildrenOrds(int ord) throws IOException {
return null;
}
}

View File

@ -0,0 +1,34 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.cutters;
import java.io.IOException;
import org.apache.lucene.sandbox.facet.iterators.OrdinalIterator;
/**
* Interface to be implemented to cut documents into facets for an index segment (leaf).
*
* <p>When {@link #advanceExact(int)} returns true, {@link #nextOrd()} yields all facet ordinals for
* the current document. It is illegal to call {@link #nextOrd()} if {@link #advanceExact(int)}
* returns false.
*
* @lucene.experimental
*/
public interface LeafFacetCutter extends OrdinalIterator {
/** advance to the next doc */
boolean advanceExact(int doc) throws IOException;
}

View File

@ -0,0 +1,187 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.cutters;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.function.IntSupplier;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.internal.hppc.IntLongHashMap;
import org.apache.lucene.internal.hppc.LongIntHashMap;
import org.apache.lucene.sandbox.facet.labels.OrdToLabel;
/**
* {@link FacetCutter} and {@link OrdToLabel} for distinct long values.
*
* <p>TODO: This class is quite inefficient. Will optimise later.
*
* <p>TODO: add support for other value sources e.g: LongValues
*
* @lucene.experimental
*/
public final class LongValueFacetCutter implements FacetCutter, OrdToLabel {
private final String field;
// TODO: consider alternatives if this is a bottleneck
private final LongIntHashMapSyncCompute valueToOrdMap;
private IntLongHashMap ordToValueMap;
private final AtomicInteger maxOrdinal;
/**
* Constructor.
*
* @param field field name to read long values from.
*/
public LongValueFacetCutter(String field) {
this.field = field;
valueToOrdMap = new LongIntHashMapSyncCompute();
ordToValueMap = null;
maxOrdinal = new AtomicInteger(-1);
}
@Override
public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException {
SortedNumericDocValues docValues = DocValues.getSortedNumeric(context.reader(), field);
return new LeafFacetCutter() {
int docValueCount;
long lastDocValue;
int docValueCursor;
@Override
public boolean advanceExact(int doc) throws IOException {
if (docValues.advanceExact(doc)) {
docValueCount = docValues.docValueCount();
docValueCursor = 0;
return true;
}
return false;
}
@Override
public int nextOrd() throws IOException {
while (docValueCursor++ < docValueCount) {
long value = docValues.nextValue();
// SortedNumericDocValues can have duplicates, but values are sorted, so we only need to
// check previous value to remove duplicates
if (docValueCursor == 1 || value != lastDocValue) {
lastDocValue = value;
return valueToOrdMap.computeIfAbsent(value, maxOrdinal::incrementAndGet);
}
}
return NO_MORE_ORDS;
}
};
}
@Override
public FacetLabel getLabel(int ordinal) {
if (ordToValueMap == null) {
buildOrdToValueMap();
}
if (ordToValueMap.containsKey(ordinal)) {
return new FacetLabel(String.valueOf(ordToValueMap.get(ordinal)));
}
assert false
: "ordinal="
+ ordinal
+ ", ordToValueMap.size="
+ ordToValueMap.size()
+ ", valueToOrdMap.size="
+ valueToOrdMap.size();
return null;
}
/**
* Get value by ordinal. Should only be called after collection phase.
*
* <p>TODO: we need it to tie break sort by value. Alternatively we can sort by label (then we
* don't need this method), but we would have to convert FacetLabel to "long" to have the same
* order... Overall, it is probably not important to tie break by value, and we can tie break by
* ord same as for other facets; but for now we don't want to change results order just in case.
*
* @param ordinal facet ordinal.
* @return long value
*/
public long getValue(int ordinal) {
// TODO: do we want to create #finish method that called by #reduce to build the map?
if (ordToValueMap == null) {
buildOrdToValueMap();
}
return ordToValueMap.get(ordinal);
}
private void buildOrdToValueMap() {
ordToValueMap = new IntLongHashMap(valueToOrdMap.size());
for (LongIntHashMap.LongIntCursor cursor : valueToOrdMap) {
ordToValueMap.put(cursor.value, cursor.key);
}
}
@Override
public FacetLabel[] getLabels(int[] ordinals) throws IOException {
FacetLabel[] facetLabels = new FacetLabel[ordinals.length];
for (int i = 0; i < ordinals.length; i++) {
facetLabels[i] = getLabel(ordinals[i]);
}
return facetLabels;
}
/** {@link LongIntHashMap} with threadsafe computeIfAbsent method */
private static class LongIntHashMapSyncCompute extends LongIntHashMap {
private final ReentrantReadWriteLock rwl = new ReentrantReadWriteLock();
private final Lock r = rwl.readLock();
private final Lock w = rwl.writeLock();
/**
* If key exists in the map return its value, otherwise insert value from the value supplier and
* return it.
*
* <p>The method is threadsafe, and it allows concurrent reading from the map, but it locks the
* map to insert a new value as it might require rehashing.
*/
public int computeIfAbsent(long key, IntSupplier valueSupplier) {
r.lock();
int value;
try {
value = super.getOrDefault(key, -1);
} finally {
r.unlock();
}
if (value == -1) {
w.lock();
try {
int index = super.indexOf(key);
if (super.indexExists(index)) {
return super.indexGet(index);
} else {
value = valueSupplier.getAsInt();
super.indexInsert(index, key, value);
return value;
}
} finally {
w.unlock();
}
} else {
return value;
}
}
}
}

View File

@ -0,0 +1,199 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.cutters;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.sandbox.facet.iterators.OrdinalIterator;
/**
* {@link FacetCutter} for facets that use taxonomy side-car index.
*
* @lucene.experimental
*/
public final class TaxonomyFacetsCutter implements FacetCutter {
private final FacetsConfig facetsConfig;
private final TaxonomyReader taxoReader;
private final String indexFieldName;
private final boolean disableRollup;
private ParallelTaxonomyArrays.IntArray children;
private ParallelTaxonomyArrays.IntArray siblings;
/** Create {@link FacetCutter} for taxonomy facets. */
public TaxonomyFacetsCutter(
String indexFieldName, FacetsConfig facetsConfig, TaxonomyReader taxoReader) {
this(indexFieldName, facetsConfig, taxoReader, false);
}
/**
* Expert: Create {@link FacetCutter} for taxonomy facets.
*
* @param disableRollup if set to true, rollup is disabled. In most cases users should not use it.
* Setting it to true silently leads to incorrect results for dimensions that require rollup.
* At the same time, if you are sure that there are no dimensions that require rollup, setting
* it to true might improve performance.
*/
public TaxonomyFacetsCutter(
String indexFieldName,
FacetsConfig facetsConfig,
TaxonomyReader taxoReader,
boolean disableRollup) {
this.facetsConfig = facetsConfig;
this.indexFieldName = indexFieldName;
this.taxoReader = taxoReader;
this.disableRollup = disableRollup;
}
/**
* Returns int[] mapping each ordinal to its first child; this is a large array and is computed
* (and then saved) the first time this method is invoked.
*/
ParallelTaxonomyArrays.IntArray getChildren() throws IOException {
if (children == null) {
children = taxoReader.getParallelTaxonomyArrays().children();
}
return children;
}
/**
* Returns int[] mapping each ordinal to its next sibling; this is a large array and is computed
* (and then saved) the first time this method is invoked.
*/
ParallelTaxonomyArrays.IntArray getSiblings() throws IOException {
if (siblings == null) {
siblings = taxoReader.getParallelTaxonomyArrays().siblings();
}
return siblings;
}
@Override
public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException {
SortedNumericDocValues multiValued =
DocValues.getSortedNumeric(context.reader(), indexFieldName);
// DocValues.getSortedNumeric never returns null
assert multiValued != null;
// TODO: if multiValued is emptySortedNumeric we can throw CollectionTerminatedException
// in FacetFieldLeafCollector and save some CPU cycles.
TaxonomyLeafFacetCutterMultiValue leafCutter =
new TaxonomyLeafFacetCutterMultiValue(multiValued);
return leafCutter;
// TODO: does unwrapping Single valued make things any faster? We still need to wrap it into
// LeafFacetCutter
// NumericDocValues singleValued = DocValues.unwrapSingleton(multiValued);
}
@Override
public OrdinalIterator getOrdinalsToRollup() throws IOException {
if (disableRollup) {
return null;
}
// Rollup any necessary dims:
Iterator<Map.Entry<String, FacetsConfig.DimConfig>> dimensions =
facetsConfig.getDimConfigs().entrySet().iterator();
ArrayList<FacetLabel> dimsToRollup = new ArrayList<>();
while (dimensions.hasNext()) {
Map.Entry<String, FacetsConfig.DimConfig> ent = dimensions.next();
String dim = ent.getKey();
FacetsConfig.DimConfig ft = ent.getValue();
if (ft.hierarchical && ft.multiValued == false && ft.indexFieldName.equals(indexFieldName)) {
dimsToRollup.add(new FacetLabel(dim));
}
}
int[] dimOrdToRollup = taxoReader.getBulkOrdinals(dimsToRollup.toArray(new FacetLabel[0]));
return new OrdinalIterator() {
int currentIndex = 0;
@Override
public int nextOrd() throws IOException {
for (; currentIndex < dimOrdToRollup.length; currentIndex++) {
// It can be invalid if this field was declared in the
// config but never indexed
if (dimOrdToRollup[currentIndex] != TaxonomyReader.INVALID_ORDINAL) {
return dimOrdToRollup[currentIndex++];
}
}
return NO_MORE_ORDS;
}
};
}
@Override
public OrdinalIterator getChildrenOrds(final int parentOrd) throws IOException {
ParallelTaxonomyArrays.IntArray children = getChildren();
ParallelTaxonomyArrays.IntArray siblings = getSiblings();
return new OrdinalIterator() {
int currentChild = parentOrd;
@Override
public int nextOrd() {
if (currentChild == parentOrd) {
currentChild = children.get(currentChild);
} else {
currentChild = siblings.get(currentChild);
}
if (currentChild != TaxonomyReader.INVALID_ORDINAL) {
return currentChild;
}
return NO_MORE_ORDS;
}
};
}
private static class TaxonomyLeafFacetCutterMultiValue implements LeafFacetCutter {
private final SortedNumericDocValues multiValued;
private int ordsInDoc;
private TaxonomyLeafFacetCutterMultiValue(SortedNumericDocValues multiValued) {
this.multiValued = multiValued;
}
@Override
public int nextOrd() throws IOException {
if (ordsInDoc > 0) {
ordsInDoc--;
return (int) multiValued.nextValue();
}
return LeafFacetCutter.NO_MORE_ORDS;
}
@Override
public boolean advanceExact(int doc) throws IOException {
if (multiValued.advanceExact(doc)) {
ordsInDoc = multiValued.docValueCount();
return true;
}
return false;
}
}
}

View File

@ -0,0 +1,23 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Sandbox faceting: facet cutters, see {@link org.apache.lucene.sandbox.facet.cutters.FacetCutter}
* for details.
*
* @lucene.experimental
*/
package org.apache.lucene.sandbox.facet.cutters;

View File

@ -0,0 +1,90 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.cutters.ranges;
import java.io.IOException;
import org.apache.lucene.facet.MultiDoubleValuesSource;
import org.apache.lucene.facet.MultiLongValuesSource;
import org.apache.lucene.facet.range.DoubleRange;
import org.apache.lucene.facet.range.DoubleRangeFacetCounts;
import org.apache.lucene.facet.range.LongRange;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.sandbox.facet.cutters.FacetCutter;
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.util.NumericUtils;
/**
* {@link FacetCutter} for ranges of double values.
*
* <p>Based on {@link DoubleRangeFacetCounts}, this class translates double ranges to long ranges
* using {@link NumericUtils#doubleToSortableLong} and delegates faceting work to a {@link
* LongRangeFacetCutter}.
*
* @lucene.experimental
*/
public final class DoubleRangeFacetCutter implements FacetCutter {
private final LongRangeFacetCutter longRangeFacetCutter;
/** Constructor. */
public DoubleRangeFacetCutter(
MultiDoubleValuesSource multiDoubleValuesSource, DoubleRange[] doubleRanges) {
super();
DoubleValuesSource singleDoubleValuesSource =
MultiDoubleValuesSource.unwrapSingleton(multiDoubleValuesSource);
LongValuesSource singleLongValuesSource;
MultiLongValuesSource multiLongValuesSource;
if (singleDoubleValuesSource != null) {
singleLongValuesSource = singleDoubleValuesSource.toSortableLongDoubleValuesSource();
multiLongValuesSource = null;
} else {
singleLongValuesSource = null;
multiLongValuesSource = multiDoubleValuesSource.toSortableMultiLongValuesSource();
}
LongRange[] longRanges = mapDoubleRangesToSortableLong(doubleRanges);
// TODO: instead of relying on either single value source or multi value source to be null, we
// should create different factory methods for single and multi valued versions and use the
// right one
this.longRangeFacetCutter =
LongRangeFacetCutter.createSingleOrMultiValued(
multiLongValuesSource, singleLongValuesSource, longRanges);
}
@Override
public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException {
return longRangeFacetCutter.createLeafCutter(context);
}
// TODO: it is exactly the same as DoubleRangeFacetCounts#getLongRanges (protected), we should
// dedup
private LongRange[] mapDoubleRangesToSortableLong(DoubleRange[] doubleRanges) {
LongRange[] longRanges = new LongRange[doubleRanges.length];
for (int i = 0; i < longRanges.length; i++) {
DoubleRange dr = doubleRanges[i];
longRanges[i] =
new LongRange(
dr.label,
NumericUtils.doubleToSortableLong(dr.min),
true,
NumericUtils.doubleToSortableLong(dr.max),
true);
}
return longRanges;
}
}

View File

@ -0,0 +1,95 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.cutters.ranges;
import java.io.IOException;
import org.apache.lucene.sandbox.facet.iterators.OrdinalIterator;
import org.apache.lucene.util.FixedBitSet;
/**
* A specialised ordinal iterator that supports write (set and clear) operations. Clients can write
* data and freeze the state before reading data from it like any other OrdinalIterator. Instances
* may be reused by clearing the current iterator E.g. LongRangeFacetCutter uses IntervalTracker
* instances to map ranges to ordinals and track per-range data and retrieve recorded ranges for a
* data set.
*
* @lucene.experimental
*/
interface IntervalTracker extends OrdinalIterator {
/** track information for the seen input interval * */
void set(int i);
/** clear recorded information on this tracker. * */
void clear();
/** check if any data for the interval has been recorded * */
boolean get(int index);
/** finalise any state before read operations can be performed on this OrdinalIterator */
void freeze();
/**
* Interval Tracker that tracks data for multiple intervals. The interval is recorded only once
* iff data belonging to the interval is encountered *
*/
class MultiIntervalTracker implements IntervalTracker {
private FixedBitSet tracker;
private int trackerState;
private int bitFrom;
private int intervalsWithHit;
MultiIntervalTracker(int size) {
tracker = new FixedBitSet(size);
}
@Override
public void set(int i) {
tracker.set(i);
}
@Override
public void clear() {
tracker.clear();
bitFrom = 0;
trackerState = 0;
intervalsWithHit = 0;
}
@Override
public boolean get(int index) {
return tracker.get(index);
}
@Override
public void freeze() {
intervalsWithHit = tracker.cardinality();
}
@Override
public int nextOrd() throws IOException {
if (trackerState == intervalsWithHit) {
return NO_MORE_ORDS;
}
trackerState++;
int nextSetBit = tracker.nextSetBit(bitFrom);
bitFrom = nextSetBit + 1;
return nextSetBit;
}
}
}

View File

@ -0,0 +1,320 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.cutters.ranges;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.facet.MultiLongValues;
import org.apache.lucene.facet.MultiLongValuesSource;
import org.apache.lucene.facet.range.LongRange;
import org.apache.lucene.sandbox.facet.cutters.FacetCutter;
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
import org.apache.lucene.search.LongValues;
import org.apache.lucene.search.LongValuesSource;
/**
* {@link FacetCutter} for ranges of long values. It's based on LongRangeCounter class.
*
* @lucene.experimental
*/
public abstract class LongRangeFacetCutter implements FacetCutter {
final MultiLongValuesSource valuesSource;
// TODO: refactor - weird that we have both multi and single here.
final LongValuesSource singleValues;
final LongRangeAndPos[] sortedRanges;
final int requestedRangeCount;
final List<InclusiveRange> elementaryIntervals;
/** elementary interval boundaries used for efficient counting (bsearch to find interval) */
final long[] boundaries;
final int[] pos;
// Default interval position, when elementary interval is mapped to this interval
// it is skipped.
static final int SKIP_INTERVAL_POSITION = -1;
/** Create {@link FacetCutter} for provided value source and long ranges. */
static LongRangeFacetCutter createSingleOrMultiValued(
MultiLongValuesSource longValuesSource,
LongValuesSource singleLongValuesSource,
LongRange[] longRanges) {
if (areOverlappingRanges(longRanges)) {
return new OverlappingLongRangeFacetCutter(
longValuesSource, singleLongValuesSource, longRanges);
}
return new NonOverlappingLongRangeFacetCutter(
longValuesSource, singleLongValuesSource, longRanges);
}
public static LongRangeFacetCutter create(
MultiLongValuesSource longValuesSource, LongRange[] longRanges) {
return createSingleOrMultiValued(longValuesSource, null, longRanges);
}
// caller handles conversion of Doubles and DoubleRange to Long and LongRange
// ranges need not be sorted
LongRangeFacetCutter(
MultiLongValuesSource longValuesSource,
LongValuesSource singleLongValuesSource,
LongRange[] longRanges) {
super();
valuesSource = longValuesSource;
if (singleLongValuesSource != null) {
singleValues = singleLongValuesSource;
} else {
singleValues = MultiLongValuesSource.unwrapSingleton(valuesSource);
}
sortedRanges = new LongRangeAndPos[longRanges.length];
requestedRangeCount = longRanges.length;
for (int i = 0; i < longRanges.length; i++) {
sortedRanges[i] = new LongRangeAndPos(longRanges[i], i);
}
Arrays.sort(this.sortedRanges, Comparator.comparingLong(r -> r.range.min));
elementaryIntervals = buildElementaryIntervals();
// Keep track of elementary interval boundary ends (for binary search) along with the requested
// range they map back to (and -1 when they map to a "gap" range in case of ExclusiveRanges):
boundaries = new long[elementaryIntervals.size()];
pos = new int[elementaryIntervals.size()];
Arrays.fill(pos, SKIP_INTERVAL_POSITION);
int currRange = 0;
for (int i = 0; i < boundaries.length; i++) {
boundaries[i] = elementaryIntervals.get(i).end;
if (currRange < sortedRanges.length) {
LongRangeAndPos curr = sortedRanges[currRange];
if (boundaries[i] == curr.range.max) {
pos[i] = curr.pos;
currRange++;
}
}
}
}
/**
* Generates non-overlapping intervals that cover requested ranges and gaps in-between. Each
* elementary range refers to a gap, single requested range, or multiple requested ranges when
* they overlap.
*/
abstract List<InclusiveRange> buildElementaryIntervals();
private static boolean areOverlappingRanges(LongRange[] ranges) {
if (ranges.length == 0) {
return false;
}
// Copy before sorting so we don't mess with the caller's original ranges:
// TODO: We're going to do this again in the constructor. Can't we come up with a clever way to
// avoid doing it twice?
LongRange[] sortedRanges = new LongRange[ranges.length];
System.arraycopy(ranges, 0, sortedRanges, 0, ranges.length);
Arrays.sort(sortedRanges, Comparator.comparingLong(r -> r.min));
long previousMax = sortedRanges[0].max;
for (int i = 1; i < sortedRanges.length; i++) {
// Ranges overlap if the next min is <= the previous max (note that LongRange models
// closed ranges, so equal limit points are considered overlapping):
if (sortedRanges[i].min <= previousMax) {
return true;
}
previousMax = sortedRanges[i].max;
}
return false;
}
abstract static class LongRangeMultivaluedLeafFacetCutter implements LeafFacetCutter {
private final MultiLongValues multiLongValues;
private final long[] boundaries;
final int[] pos;
final IntervalTracker elementaryIntervalTracker;
// TODO: we need it only for overlapping ranges, should not handle it in advanceExact for
// exclusive ranges.
IntervalTracker requestedIntervalTracker;
LongRangeMultivaluedLeafFacetCutter(MultiLongValues longValues, long[] boundaries, int[] pos) {
this.multiLongValues = longValues;
this.boundaries = boundaries;
this.pos = pos;
elementaryIntervalTracker = new IntervalTracker.MultiIntervalTracker(boundaries.length);
}
@Override
public boolean advanceExact(int doc) throws IOException {
if (multiLongValues.advanceExact(doc) == false) {
return false;
}
elementaryIntervalTracker.clear();
if (requestedIntervalTracker != null) {
requestedIntervalTracker.clear();
}
long numValues = multiLongValues.getValueCount();
int lastIntervalSeen = -1;
for (int i = 0; i < numValues; i++) {
lastIntervalSeen = processValue(multiLongValues.nextValue(), lastIntervalSeen);
assert lastIntervalSeen >= 0 && lastIntervalSeen < boundaries.length;
elementaryIntervalTracker.set(lastIntervalSeen);
if (lastIntervalSeen == boundaries.length - 1) {
// we've already reached the end of all possible intervals for this doc
break;
}
}
maybeRollUp(requestedIntervalTracker);
elementaryIntervalTracker.freeze();
if (requestedIntervalTracker != null) {
requestedIntervalTracker.freeze();
}
return true;
}
// Returns the value of the interval v belongs or lastIntervalSeen
// if no processing is done, it returns the lastIntervalSeen
private int processValue(long v, int lastIntervalSeen) {
int lo = 0, hi = boundaries.length - 1;
if (lastIntervalSeen != -1) {
// this is the multivalued doc case, we need to set lo correctly
if (v <= boundaries[lastIntervalSeen]) {
// we've already counted something for this interval and doc
// we don't need to process v
return lastIntervalSeen;
}
lo = lastIntervalSeen + 1;
if (lo == boundaries.length) {
// we've already counted the last elementary interval. If so, there's nothing
// else to count for this doc
// TODO: does it make sense to return something else?
return lastIntervalSeen;
}
}
int lowerBound = lo;
while (true) {
int mid = (lo + hi) >>> 1;
if (v <= boundaries[mid]) {
if (mid == lowerBound) {
return mid;
} else {
hi = mid - 1;
}
} else if (v > boundaries[mid + 1]) {
lo = mid + 1;
} else {
return mid + 1;
}
}
}
void maybeRollUp(IntervalTracker rollUpInto) {}
}
abstract static class LongRangeSingleValuedLeafFacetCutter implements LeafFacetCutter {
private final LongValues longValues;
private final long[] boundaries;
final int[] pos;
int elementaryIntervalOrd;
IntervalTracker requestedIntervalTracker;
LongRangeSingleValuedLeafFacetCutter(LongValues longValues, long[] boundaries, int[] pos) {
this.longValues = longValues;
this.boundaries = boundaries;
this.pos = pos;
}
@Override
public boolean advanceExact(int doc) throws IOException {
if (longValues.advanceExact(doc) == false) {
return false;
}
if (requestedIntervalTracker != null) {
requestedIntervalTracker.clear();
}
elementaryIntervalOrd = processValue(longValues.longValue());
maybeRollUp(requestedIntervalTracker);
if (requestedIntervalTracker != null) {
requestedIntervalTracker.freeze();
}
return true;
}
// Returns the value of the interval v belongs or lastIntervalSeen
// if no processing is done, it returns the lastIntervalSeen
private int processValue(long v) {
int lo = 0, hi = boundaries.length - 1;
int lowerBound = lo;
while (true) {
int mid = (lo + hi) >>> 1;
if (v <= boundaries[mid]) {
if (mid == lowerBound) {
return mid;
} else {
hi = mid - 1;
}
} else if (v > boundaries[mid + 1]) {
lo = mid + 1;
} else {
return mid + 1;
}
}
}
void maybeRollUp(IntervalTracker rollUpInto) {}
}
record LongRangeAndPos(LongRange range, int pos) {
@Override
public String toString() {
return "LongRangeAndPos[" + "range=" + range + ", " + "pos=" + pos + ']';
}
}
/**
* Similar to InclusiveRange from LongRangeCounter.
*
* <p>TODO: dedup
*/
record InclusiveRange(long start, long end) {
@Override
public String toString() {
return start + " to " + end;
}
}
}

View File

@ -0,0 +1,94 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.cutters.ranges;
import org.apache.lucene.internal.hppc.IntArrayList;
/**
* Holds one node of the segment tree.
*
* <p>TODO: dedup existing LongRangeNode.
*/
final class LongRangeNode {
final LongRangeNode left;
final LongRangeNode right;
// Our range, inclusive:
final long start;
final long end;
// Which range indices to output when a query goes
// through this node:
IntArrayList outputs;
/** add doc * */
LongRangeNode(long start, long end, LongRangeNode left, LongRangeNode right) {
this.start = start;
this.end = end;
this.left = left;
this.right = right;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
toString(sb, 0);
return sb.toString();
}
private static void indent(StringBuilder sb, int depth) {
sb.append(" ".repeat(depth));
}
/** Recursively assigns range outputs to each node. */
public void addOutputs(LongRangeFacetCutter.LongRangeAndPos range) {
if (start >= range.range().min && end <= range.range().max) {
// Our range is fully included in the incoming
// range; add to our output list:
if (outputs == null) {
outputs = new IntArrayList();
}
outputs.add(range.pos());
} else if (left != null) {
assert right != null;
// Recurse:
left.addOutputs(range);
right.addOutputs(range);
}
}
private void toString(StringBuilder sb, int depth) {
indent(sb, depth);
if (left == null) {
assert right == null;
sb.append("leaf: ").append(start).append(" to ").append(end);
} else {
sb.append("node: ").append(start).append(" to ").append(end);
}
if (outputs != null) {
sb.append(" outputs=");
sb.append(outputs);
}
sb.append('\n');
if (left != null) {
assert right != null;
left.toString(sb, depth + 1);
right.toString(sb, depth + 1);
}
}
}

View File

@ -0,0 +1,125 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.cutters.ranges;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.facet.MultiLongValues;
import org.apache.lucene.facet.MultiLongValuesSource;
import org.apache.lucene.facet.range.LongRange;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
import org.apache.lucene.search.LongValues;
import org.apache.lucene.search.LongValuesSource;
/** {@link LongRangeFacetCutter} for ranges of long value that don't overlap. * */
class NonOverlappingLongRangeFacetCutter extends LongRangeFacetCutter {
NonOverlappingLongRangeFacetCutter(
MultiLongValuesSource longValuesSource,
LongValuesSource singleLongValuesSource,
LongRange[] longRanges) {
super(longValuesSource, singleLongValuesSource, longRanges);
}
/**
* TODO: it's identical to private ExclusiveLongRangeCounter#buildElementaryIntervals, let's
* dedup.
*/
@Override
List<InclusiveRange> buildElementaryIntervals() {
List<InclusiveRange> elementaryIntervals = new ArrayList<>();
long prev = Long.MIN_VALUE;
for (LongRangeAndPos range : sortedRanges) {
if (range.range().min > prev) {
// add a "gap" range preceding requested range if necessary:
elementaryIntervals.add(new InclusiveRange(prev, range.range().min - 1));
}
// add the requested range:
elementaryIntervals.add(new InclusiveRange(range.range().min, range.range().max));
prev = range.range().max + 1;
}
if (elementaryIntervals.isEmpty() == false) {
long lastEnd = elementaryIntervals.get(elementaryIntervals.size() - 1).end();
if (lastEnd < Long.MAX_VALUE) {
elementaryIntervals.add(new InclusiveRange(lastEnd + 1, Long.MAX_VALUE));
}
} else {
// If no ranges were requested, create a single entry from MIN_VALUE to MAX_VALUE:
elementaryIntervals.add(new InclusiveRange(Long.MIN_VALUE, Long.MAX_VALUE));
}
return elementaryIntervals;
}
@Override
public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException {
if (singleValues != null) {
LongValues values = singleValues.getValues(context, null);
return new NonOverlappingLongRangeSingleValueLeafFacetCutter(values, boundaries, pos);
} else {
MultiLongValues values = valuesSource.getValues(context);
return new NonOverlappingLongRangeMultiValueLeafFacetCutter(values, boundaries, pos);
}
}
/**
* TODO: dedup NonOverlappingLongRangeMultiValueLeafFacetCutter and
* NonOverlappingLongRangeSingleValueLeafFacetCutter code - they are similar but they extend
* different base classes.
*/
static class NonOverlappingLongRangeMultiValueLeafFacetCutter
extends LongRangeMultivaluedLeafFacetCutter {
NonOverlappingLongRangeMultiValueLeafFacetCutter(
MultiLongValues longValues, long[] boundaries, int[] pos) {
super(longValues, boundaries, pos);
}
@Override
public int nextOrd() throws IOException {
while (true) {
int ordinal = elementaryIntervalTracker.nextOrd();
if (ordinal == NO_MORE_ORDS) {
return NO_MORE_ORDS;
}
int result = pos[ordinal];
if (result != SKIP_INTERVAL_POSITION) {
return result;
}
}
}
}
static class NonOverlappingLongRangeSingleValueLeafFacetCutter
extends LongRangeSingleValuedLeafFacetCutter {
NonOverlappingLongRangeSingleValueLeafFacetCutter(
LongValues longValues, long[] boundaries, int[] pos) {
super(longValues, boundaries, pos);
}
@Override
public int nextOrd() throws IOException {
if (elementaryIntervalOrd == NO_MORE_ORDS) {
return NO_MORE_ORDS;
}
int result = pos[elementaryIntervalOrd];
elementaryIntervalOrd = NO_MORE_ORDS;
return result != SKIP_INTERVAL_POSITION ? result : NO_MORE_ORDS;
}
}
}

View File

@ -0,0 +1,273 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.cutters.ranges;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.facet.MultiLongValues;
import org.apache.lucene.facet.MultiLongValuesSource;
import org.apache.lucene.facet.range.LongRange;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.internal.hppc.IntCursor;
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
import org.apache.lucene.search.LongValues;
import org.apache.lucene.search.LongValuesSource;
/**
* {@link LongRangeFacetCutter} for ranges of long value that overlap. Uses segment tree
* optimisation to find all matching ranges for a given value <a
* href="https://blog.mikemccandless.com/2013/12/fast-range-faceting-using-segment-trees.html">fast-range-faceting-
* using-segment-trees.html</a>
*/
class OverlappingLongRangeFacetCutter extends LongRangeFacetCutter {
private final LongRangeNode root;
OverlappingLongRangeFacetCutter(
MultiLongValuesSource longValuesSource,
LongValuesSource singleLongValuesSource,
LongRange[] longRanges) {
super(longValuesSource, singleLongValuesSource, longRanges);
// Build binary tree on top of intervals:
root = split(0, elementaryIntervals.size(), elementaryIntervals);
// Set outputs, so we know which range to output for each node in the tree:
for (LongRangeAndPos range : sortedRanges) {
root.addOutputs(range);
}
}
/**
* TODO: it's identical to private OverlappingLongRangeCounter#buildElementaryIntervals, let's
* dedup.
*/
@Override
List<InclusiveRange> buildElementaryIntervals() {
// Maps all range inclusive endpoints to int flags; 1
// = start of interval, 2 = end of interval. We need to
// track the start vs end case separately because if a
// given point is both, then it must be its own
// elementary interval:
Map<Long, Integer> endsMap = new HashMap<>();
endsMap.put(Long.MIN_VALUE, 1);
endsMap.put(Long.MAX_VALUE, 2);
for (LongRangeAndPos rangeAndPos : sortedRanges) {
Integer cur = endsMap.get(rangeAndPos.range().min);
if (cur == null) {
endsMap.put(rangeAndPos.range().min, 1);
} else {
endsMap.put(rangeAndPos.range().min, cur | 1);
}
cur = endsMap.get(rangeAndPos.range().max);
if (cur == null) {
endsMap.put(rangeAndPos.range().max, 2);
} else {
endsMap.put(rangeAndPos.range().max, cur | 2);
}
}
List<Long> endsList = new ArrayList<>(endsMap.keySet());
Collections.sort(endsList);
// Build elementaryIntervals (a 1D Venn diagram):
List<InclusiveRange> elementaryIntervals = new ArrayList<>();
int upto = 1;
long v = endsList.get(0);
long prev;
if (endsMap.get(v) == 3) {
elementaryIntervals.add(new InclusiveRange(v, v));
prev = v + 1;
} else {
prev = v;
}
while (upto < endsList.size()) {
v = endsList.get(upto);
int flags = endsMap.get(v);
if (flags == 3) {
// This point is both an end and a start; we need to
// separate it:
if (v > prev) {
elementaryIntervals.add(new InclusiveRange(prev, v - 1));
}
elementaryIntervals.add(new InclusiveRange(v, v));
prev = v + 1;
} else if (flags == 1) {
// This point is only the start of an interval;
// attach it to next interval:
if (v > prev) {
elementaryIntervals.add(new InclusiveRange(prev, v - 1));
}
prev = v;
} else {
assert flags == 2;
// This point is only the end of an interval; attach
// it to last interval:
elementaryIntervals.add(new InclusiveRange(prev, v));
prev = v + 1;
}
upto++;
}
return elementaryIntervals;
}
private static LongRangeNode split(int start, int end, List<InclusiveRange> elementaryIntervals) {
if (start == end - 1) {
// leaf
InclusiveRange range = elementaryIntervals.get(start);
return new LongRangeNode(range.start(), range.end(), null, null);
} else {
int mid = (start + end) >>> 1;
LongRangeNode left = split(start, mid, elementaryIntervals);
LongRangeNode right = split(mid, end, elementaryIntervals);
return new LongRangeNode(left.start, right.end, left, right);
}
}
@Override
public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException {
if (singleValues != null) {
LongValues values = singleValues.getValues(context, null);
return new OverlappingSingleValuedRangeLeafFacetCutter(
values, boundaries, pos, requestedRangeCount, root);
} else {
MultiLongValues values = valuesSource.getValues(context);
return new OverlappingMultivaluedRangeLeafFacetCutter(
values, boundaries, pos, requestedRangeCount, root);
}
}
/**
* TODO: dedup OverlappingMultivaluedRangeLeafFacetCutter and
* OverlappingSingleValuedRangeLeafFacetCutter code - they are identical but they extend different
* base classes.
*/
static class OverlappingMultivaluedRangeLeafFacetCutter
extends LongRangeMultivaluedLeafFacetCutter {
private final LongRangeNode elementaryIntervalRoot;
private int elementaryIntervalUpto;
OverlappingMultivaluedRangeLeafFacetCutter(
MultiLongValues longValues,
long[] boundaries,
int[] pos,
int requestedRangeCount,
LongRangeNode elementaryIntervalRoot) {
super(longValues, boundaries, pos);
requestedIntervalTracker = new IntervalTracker.MultiIntervalTracker(requestedRangeCount);
this.elementaryIntervalRoot = elementaryIntervalRoot;
}
@Override
void maybeRollUp(IntervalTracker rollUpInto) {
elementaryIntervalUpto = 0;
rollupMultiValued(elementaryIntervalRoot);
}
private boolean rollupMultiValued(LongRangeNode node) {
boolean containedHit;
if (node.left != null) {
containedHit = rollupMultiValued(node.left);
containedHit |= rollupMultiValued(node.right);
} else {
// Leaf:
containedHit = elementaryIntervalTracker.get(elementaryIntervalUpto);
elementaryIntervalUpto++;
}
if (containedHit && node.outputs != null) {
for (IntCursor rangeIndex : node.outputs) {
requestedIntervalTracker.set(rangeIndex.value);
}
}
return containedHit;
}
@Override
public int nextOrd() throws IOException {
if (requestedIntervalTracker == null) {
return NO_MORE_ORDS;
}
return requestedIntervalTracker.nextOrd();
}
}
static class OverlappingSingleValuedRangeLeafFacetCutter
extends LongRangeSingleValuedLeafFacetCutter {
private final LongRangeNode elementaryIntervalRoot;
private int elementaryIntervalUpto;
OverlappingSingleValuedRangeLeafFacetCutter(
LongValues longValues,
long[] boundaries,
int[] pos,
int requestedRangeCount,
LongRangeNode elementaryIntervalRoot) {
super(longValues, boundaries, pos);
requestedIntervalTracker = new IntervalTracker.MultiIntervalTracker(requestedRangeCount);
this.elementaryIntervalRoot = elementaryIntervalRoot;
}
@Override
void maybeRollUp(IntervalTracker rollUpInto) {
// TODO: for single valued we can rollup after collecting all documents, e.g. in reduce
// method. Maybe we can use FacetCutter rollup methods to handle this case too?
elementaryIntervalUpto = 0;
rollupSingleValued(elementaryIntervalRoot);
}
// Note: combined rollUpSingleValued and rollUpMultiValued from OverlappingLongRangeCounter into
// 1 rollUp method
private boolean rollupSingleValued(LongRangeNode node) {
boolean containedHit;
if (node.left != null) {
containedHit = rollupSingleValued(node.left);
containedHit |= rollupSingleValued(node.right);
} else {
// Leaf:
containedHit = elementaryIntervalUpto == elementaryIntervalOrd;
elementaryIntervalUpto++;
}
if (containedHit && node.outputs != null) {
for (IntCursor rangeIndex : node.outputs) {
requestedIntervalTracker.set(rangeIndex.value);
}
}
return containedHit;
}
@Override
public int nextOrd() throws IOException {
if (requestedIntervalTracker == null) {
return NO_MORE_ORDS;
}
return requestedIntervalTracker.nextOrd();
}
}
}

View File

@ -0,0 +1,22 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Sandbox faceting: Range Faceting
*
* @lucene.experimental
*/
package org.apache.lucene.sandbox.facet.cutters.ranges;

View File

@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.iterators;
import java.io.IOException;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.sandbox.facet.labels.LabelToOrd;
import org.apache.lucene.sandbox.facet.recorders.FacetRecorder;
/**
* {@link OrdinalIterator} that filters out ordinals from delegate if they are not in the candidate
* set. Can be handy to get results only for specific facets.
*
* @lucene.experimental
*/
public final class CandidateSetOrdinalIterator implements OrdinalIterator {
private final OrdinalIterator candidateOrdinalIterator;
private final FacetRecorder facetRecorder;
/** Constructor. */
public CandidateSetOrdinalIterator(
FacetRecorder facetRecorder, FacetLabel[] candidateLabels, LabelToOrd labelToOrd)
throws IOException {
// TODO: if candidates size >> number of ordinals in facetRecorder, it is more efficient to
// iterate ordinals from FacetRecorder, and check if candidates contain them
if (facetRecorder.isEmpty()) {
// Getting ordinals for labels might be expensive, e.g. it requires reading index for taxonomy
// facets, so we make sure we don't do it for empty facet recorder.
this.candidateOrdinalIterator = OrdinalIterator.EMPTY;
} else {
this.candidateOrdinalIterator =
OrdinalIterator.fromArray(labelToOrd.getOrds(candidateLabels));
}
this.facetRecorder = facetRecorder;
}
@Override
public int nextOrd() throws IOException {
for (int ord = candidateOrdinalIterator.nextOrd();
ord != NO_MORE_ORDS;
ord = candidateOrdinalIterator.nextOrd()) {
if (facetRecorder.contains(ord)) {
return ord;
}
}
return NO_MORE_ORDS;
}
}

View File

@ -0,0 +1,43 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.iterators;
/**
* Generates {@link Comparable} for provided ordinal. For example, it can be used to find topN facet
* ordinals.
*
* @param <T> something ordinals can be compared by.
* @lucene.experimental
*/
public interface ComparableSupplier<T extends Comparable<T>> {
/**
* For given ordinal, get something it can be compared by.
*
* @param ord ordinal.
* @param reuse object to reuse for building result. Must not be null.
*/
void reuseComparable(int ord, T reuse);
/**
* For given ordinal, create something it can be compared by.
*
* @param ord ordinal.
* @return Comparable.
*/
T createComparable(int ord);
}

View File

@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.iterators;
import java.io.IOException;
import org.apache.lucene.internal.hppc.IntArrayList;
/**
* Iterate over ordinals.
*
* @lucene.experimental
*/
public interface OrdinalIterator {
/** This const is returned by nextOrd when there are no more ordinals. */
int NO_MORE_ORDS = -1;
/** Returns next ord or {@link #NO_MORE_ORDS}. * */
int nextOrd() throws IOException;
/**
* Convert to int array. Note that after this method is called original OrdinalIterator is
* exhausted.
*/
default int[] toArray() throws IOException {
IntArrayList resultList = new IntArrayList();
for (int ord = this.nextOrd(); ord != NO_MORE_ORDS; ord = this.nextOrd()) {
resultList.add(ord);
}
return resultList.toArray();
}
/** Convert int array to ordinal iterator. */
static OrdinalIterator fromArray(int[] source) {
return new OrdinalIterator() {
int cursor;
@Override
public int nextOrd() throws IOException {
int ord;
while (cursor < source.length) {
ord = source[cursor++];
// NO_MORE_ORDS should be returned only after we read the entire array.
if (ord != NO_MORE_ORDS) {
return ord;
}
}
return NO_MORE_ORDS;
}
};
}
/** Return empty ordinal iterator */
OrdinalIterator EMPTY = () -> NO_MORE_ORDS;
}

View File

@ -0,0 +1,58 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.iterators;
import java.io.IOException;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.sandbox.facet.labels.LabelToOrd;
/**
* Facets results selector to get children for selected parent. Source ordinals order is preserved.
*
* @lucene.experimental
*/
public final class TaxonomyChildrenOrdinalIterator implements OrdinalIterator {
// TODO: do we want to have something like ChainOrdinalIterators to chain multiple iterators?
// Or are we fine with chaining them manually every time?
private final OrdinalIterator sourceOrds;
private final ParallelTaxonomyArrays.IntArray parents;
private final int parentOrd;
/** Create */
public TaxonomyChildrenOrdinalIterator(
OrdinalIterator sourceOrds, ParallelTaxonomyArrays.IntArray parents, int parentOrd) {
this.sourceOrds = sourceOrds;
this.parents = parents;
assert parentOrd != LabelToOrd.INVALID_ORD : "Parent Ordinal is not valid";
this.parentOrd = parentOrd;
}
@Override
public int nextOrd() throws IOException {
// TODO: in some cases it might be faster to traverse children of selected parent
// (children/siblings IntArrays) and check if source ords contain them. We can think of some
// heuristics to decide which approach to use on case by case basis? There is similar comment in
// TaxonomyFacets#getTopChildrenForPath
for (int ord = sourceOrds.nextOrd(); ord != NO_MORE_ORDS; ord = sourceOrds.nextOrd()) {
if (parents.get(ord) == parentOrd) {
return ord;
}
}
return NO_MORE_ORDS;
}
}

View File

@ -0,0 +1,113 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.iterators;
import java.io.IOException;
import org.apache.lucene.util.PriorityQueue;
/**
* Class that consumes incoming ordinals, sorts them by provided Comparable, and returns first top N
* ordinals only.
*
* @lucene.experimental
*/
public final class TopnOrdinalIterator<T extends Comparable<T>> implements OrdinalIterator {
private final ComparableSupplier<T> comparableSupplier;
private final OrdinalIterator sourceOrds;
private final int topN;
private int[] result;
private int currentIndex;
/** Constructor. */
public TopnOrdinalIterator(
OrdinalIterator sourceOrds, ComparableSupplier<T> comparableSupplier, int topN) {
if (topN <= 0) {
throw new IllegalArgumentException("topN must be > 0 (got: " + topN + ")");
}
this.sourceOrds = sourceOrds;
this.comparableSupplier = comparableSupplier;
this.topN = topN;
}
private void getTopN() throws IOException {
assert result == null;
// TODO: current taxonomy implementations limit queue size by taxo reader size too, but it
// probably doesn't make sense for large enough taxonomy indexes?
// e.g. TopOrdAndIntQueue q = new TopComparableQueue(Math.min(taxoReader.getSize(), topN));
// TODO: create queue lazily - skip if first nextOrd is NO_MORE_ORDS ?
TopComparableQueue<T> queue = new TopComparableQueue<>(topN);
OrdComparablePair<T> reuse = null;
for (int ord = sourceOrds.nextOrd(); ord != NO_MORE_ORDS; ord = sourceOrds.nextOrd()) {
if (reuse == null) {
reuse = new OrdComparablePair<>(ord, comparableSupplier.createComparable(ord));
} else {
reuse.ordinal = ord;
comparableSupplier.reuseComparable(ord, reuse.comparable);
}
reuse = queue.insertWithOverflow(reuse);
}
// Now we need to read from the queue as well as the queue gives the least element, not the top.
result = new int[queue.size()];
for (int i = result.length - 1; i >= 0; i--) {
result[i] = queue.pop().ordinal;
}
currentIndex = 0;
}
@Override
public int nextOrd() throws IOException {
if (result == null) {
getTopN();
}
assert result != null;
if (currentIndex >= result.length) {
return NO_MORE_ORDS;
}
return result[currentIndex++];
}
/** Keeps top N results ordered by Comparable. */
private static class TopComparableQueue<T extends Comparable<T>>
extends PriorityQueue<OrdComparablePair<T>> {
/** Sole constructor. */
public TopComparableQueue(int topN) {
super(topN);
}
@Override
protected boolean lessThan(OrdComparablePair<T> a, OrdComparablePair<T> b) {
return a.lessThan(b);
}
}
/** Pair of ordinal and comparable to use in TopComparableQueue */
private static class OrdComparablePair<T extends Comparable<T>> {
int ordinal;
T comparable;
private OrdComparablePair(int ordinal, T comparable) {
this.ordinal = ordinal;
this.comparable = comparable;
}
boolean lessThan(OrdComparablePair<T> other) {
return comparable.compareTo(other.comparable) < 0;
}
}
}

View File

@ -0,0 +1,22 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Sandbox faceting: facet ordinals.
*
* @lucene.experimental
*/
package org.apache.lucene.sandbox.facet.iterators;

View File

@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.labels;
import java.io.IOException;
import org.apache.lucene.facet.taxonomy.FacetLabel;
/**
* Label to ord mapping interface.
*
* <p>TODO: move FacetLabel out of taxonomy folder to use it for any facets, not just taxonomy?
*
* <p>TODO: there is some overlap with {@link
* org.apache.lucene.facet.taxonomy.writercache.LabelToOrdinal}, can we reuse something?
*
* @lucene.experimental
*/
public interface LabelToOrd {
/**
* Ordinal to return if facet label doesn't exist in {@link #getOrd(FacetLabel)} and {@link
* #getOrds(FacetLabel[])}
*/
int INVALID_ORD = -1;
/** get ord for one label */
int getOrd(FacetLabel label) throws IOException;
/** get ords for multiple labels */
int[] getOrds(FacetLabel[] labels) throws IOException;
}

View File

@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.labels;
import java.io.IOException;
import org.apache.lucene.facet.taxonomy.FacetLabel;
/**
* Ordinal to label mapping interface.
*
* <p>TODO: move FacetLabel out of taxonomy folder to use it for any facets, not just taxonomy?
*
* @lucene.experimental
*/
public interface OrdToLabel {
/** get label of one ord TODO: what do we return when ordinal is not valid? null? */
FacetLabel getLabel(int ordinal) throws IOException;
/** get labels for multiple ords */
FacetLabel[] getLabels(int[] ordinals) throws IOException;
}

View File

@ -0,0 +1,52 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.labels;
import org.apache.lucene.facet.range.Range;
import org.apache.lucene.facet.taxonomy.FacetLabel;
/**
* {@link OrdToLabel} for ranges.
*
* @lucene.experimental
*/
public class RangeOrdToLabel implements OrdToLabel {
private final Range[] ranges;
/** Constructor that takes array of Range objects as input */
public RangeOrdToLabel(Range[] inputRanges) {
ranges = inputRanges;
}
@Override
public FacetLabel getLabel(int ordinal) {
if (ordinal >= 0 && ordinal < ranges.length) {
return new FacetLabel(ranges[ordinal].label);
}
return null;
}
@Override
public FacetLabel[] getLabels(int[] ordinals) {
FacetLabel[] facetLabels = new FacetLabel[ordinals.length];
for (int i = 0; i < ordinals.length; i++) {
facetLabels[i] = getLabel(ordinals[i]);
}
return facetLabels;
}
}

View File

@ -0,0 +1,57 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.labels;
import java.io.IOException;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
/**
* Map taxonomy labels to ordinals.
*
* @lucene.experimental
*/
public final class TaxonomyOrdLabelBiMap implements OrdToLabel, LabelToOrd {
private final TaxonomyReader taxoReader;
/** Construct */
public TaxonomyOrdLabelBiMap(TaxonomyReader taxoReader) {
this.taxoReader = taxoReader;
}
@Override
public FacetLabel getLabel(int ordinal) throws IOException {
return taxoReader.getPath(ordinal);
}
@Override
public FacetLabel[] getLabels(int[] ordinals) throws IOException {
return taxoReader.getBulkPath(
ordinals.clone()); // Have to clone because getBulkPath shuffles its input array.
}
@Override
public int getOrd(FacetLabel label) throws IOException {
return taxoReader.getOrdinal(label);
}
@Override
public int[] getOrds(FacetLabel[] labels) throws IOException {
return taxoReader.getBulkOrdinals(labels);
}
}

View File

@ -0,0 +1,23 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Sandbox faceting: facet labels, see {@link org.apache.lucene.sandbox.facet.labels.OrdToLabel} for
* details.
*
* @lucene.experimental
*/
package org.apache.lucene.sandbox.facet.labels;

View File

@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Sandbox faceting - Collectors that compute facets. Facet Ordinals/Ids: Each doc may have
* different facets and therefore, different facet ordinals. For example a book can have Author,
* Publish Date, Page Count etc. as facets. The specific value for each of these Facets for a book
* can be mapped to an ordinal. Facet ordinals may be common across different book documents.
* FacetCutter: Can interpret Facets of a specific type for a doc type and output all the Facet
* Ordinals for the type for the doc. Facet Recorders: record data per ordinal. Some recorders may
* compute aggregations and record per ordinal data aggregated across an index.
*
* <p>See SandboxFacetsExample for examples.
*
* @lucene.experimental
*/
package org.apache.lucene.sandbox.facet;

View File

@ -0,0 +1,169 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.recorders;
import static org.apache.lucene.sandbox.facet.iterators.OrdinalIterator.NO_MORE_ORDS;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.internal.hppc.IntCursor;
import org.apache.lucene.internal.hppc.IntIntHashMap;
import org.apache.lucene.sandbox.facet.cutters.FacetCutter;
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
import org.apache.lucene.sandbox.facet.iterators.OrdinalIterator;
/**
* {@link FacetRecorder} to count facets.
*
* <p>TODO: add an option to keep counts in an array, to improve performance for facets with small
* number of ordinals e.g. range facets. Options: - {@link LeafFacetCutter} can inform {@link
* LeafFacetRecorder} about expected number of facet ordinals ({@link
* org.apache.lucene.sandbox.facet.FacetFieldCollector} can orchestrate that). If expeted facet ord
* number is below some threshold - use array instead of a map? - first 100/1k counts in array, the
* rest - in a map; the limit can also be provided in a constructor? It is similar to what
* LongValuesFacetCounts does today.
*
* <p>TODO: We can also consider collecting 2 (3, 4, ..., can be parametrizes) slices to a single
* sync map which can reduce thread contention compared to single sync map for all slices; at the
* same time there will be less work for reduce method. So far reduce wasn't a bottleneck for us,
* but it is definitely not free.
*
* <p>TODO: If we come back to some for of synchronized count maps, we should be more careful what
* we acquire locks for - we used to lock addTo method itself, but it could be faster if we only
* synchronized after computing the key's hash; or we can lock the entire map only if we need to
* insert key, and lock single key otherwise?
*
* @lucene.experimental
*/
public final class CountFacetRecorder implements FacetRecorder {
private IntIntHashMap values;
private final List<IntIntHashMap> perLeafValues;
/** Create. */
public CountFacetRecorder() {
// Has to be synchronizedList as we have one recorder per all slices.
perLeafValues = Collections.synchronizedList(new ArrayList<>());
}
/** Get count for provided ordinal. */
public int getCount(int ord) {
return values.get(ord);
}
@Override
public LeafFacetRecorder getLeafRecorder(LeafReaderContext context) {
// TODO: we are planning to do some experiments with how hash maps are assigned to leaf or slice
// recorders, see other TODOs in this class. When we make the decision, we can collect
// leaf/slice recorders themselves, not the hashmaps?
IntIntHashMap leafValues = new IntIntHashMap();
perLeafValues.add(leafValues);
return new CountLeafFacetRecorder(leafValues);
}
@Override
public OrdinalIterator recordedOrds() {
// TODO: even if this is called before collection started, we want it to use results from the
// time when nextOrd is first called. Does ordIterator work like that? I've run some tests that
// confirmed expected behavior, but I'm not sure IntIntMap guarantees that. We should at least
// add a unit test to make sure it always work that way.
Iterator<IntCursor> ordIterator = values.keys().iterator();
return new OrdinalIterator() {
@Override
public int nextOrd() {
if (ordIterator.hasNext()) {
return ordIterator.next().value;
} else {
return NO_MORE_ORDS;
}
}
};
}
@Override
public boolean isEmpty() {
return values.isEmpty();
}
@Override
public void reduce(FacetCutter facetCutter) throws IOException {
boolean firstElement = true;
for (IntIntHashMap leafRecords : perLeafValues) {
if (firstElement) {
values = leafRecords;
firstElement = false;
} else {
for (IntIntHashMap.IntIntCursor elem : leafRecords) {
values.addTo(elem.key, elem.value);
}
}
}
if (firstElement) {
// TODO: do we need empty map by default?
values = new IntIntHashMap();
}
OrdinalIterator dimOrds = facetCutter.getOrdinalsToRollup();
if (dimOrds != null) {
for (int dimOrd = dimOrds.nextOrd(); dimOrd != NO_MORE_ORDS; dimOrd = dimOrds.nextOrd()) {
int rolledUp = rollup(dimOrd, facetCutter);
if (rolledUp > 0) {
values.addTo(dimOrd, rolledUp);
}
}
}
}
@Override
public boolean contains(int ordinal) {
return values.containsKey(ordinal);
}
private int rollup(int ord, FacetCutter facetCutter) throws IOException {
OrdinalIterator childOrds = facetCutter.getChildrenOrds(ord);
int accum = 0;
for (int nextChild = childOrds.nextOrd();
nextChild != NO_MORE_ORDS;
nextChild = childOrds.nextOrd()) {
int rolledUp = rollup(nextChild, facetCutter);
// Don't rollup zeros to not add ordinals that we don't actually have counts for to the map
if (rolledUp > 0) {
accum += values.addTo(nextChild, rolledUp);
} else {
accum += values.get(nextChild);
}
}
return accum;
}
private static class CountLeafFacetRecorder implements LeafFacetRecorder {
private final IntIntHashMap values;
public CountLeafFacetRecorder(IntIntHashMap values) {
this.values = values;
}
@Override
public void record(int docId, int facetOrd) {
this.values.addTo(facetOrd, 1);
}
}
}

View File

@ -0,0 +1,63 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.recorders;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.sandbox.facet.cutters.FacetCutter;
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
import org.apache.lucene.sandbox.facet.iterators.OrdinalIterator;
/**
* Record data for each facet of each doc.
*
* <p>TODO: In the next iteration we can add an extra layer between FacetRecorder and
* LeafFacetRecorder, e.g. SliceFacetRecorder. The new layer will be created per {@link
* org.apache.lucene.search.Collector}, which means that collecting of multiple leafs (segments)
* within a slice is sequential and can be done to a single non-sync map to improve performance and
* reduce memory consumption. We already tried that, but didn't see any performance improvement.
* Given that it also makes lazy leaf recorder init in {@link
* org.apache.lucene.sandbox.facet.FacetFieldCollector} trickier, it was decided to rollback the
* initial attempt and try again later, in the next iteration.
*
* @lucene.experimental
*/
public interface FacetRecorder {
/** Get leaf recorder. */
LeafFacetRecorder getLeafRecorder(LeafReaderContext context) throws IOException;
/** Return next collected ordinal, or {@link LeafFacetCutter#NO_MORE_ORDS} */
OrdinalIterator recordedOrds();
/** True if there are no records */
boolean isEmpty();
/**
* Reduce leaf recorder results into this recorder. If {@link FacetCutter#getOrdinalsToRollup()}
* result is not null, it also rolls up values.
*
* <p>After this method is called, it's illegal to add values to recorder, i.e. calling {@link
* #getLeafRecorder} or {@link LeafFacetRecorder#record} on its leaf recorders.
*
* @throws UnsupportedOperationException if {@link FacetCutter#getOrdinalsToRollup()} returns not
* null but this recorder doesn't support rollup.
*/
void reduce(FacetCutter facetCutter) throws IOException;
/** Check if any data was recorded for provided facet ordinal. */
boolean contains(int ordinal);
}

View File

@ -0,0 +1,35 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.recorders;
import java.io.IOException;
/**
* Record data for each facet of each doc of a leaf (segment).
*
* @lucene.experimental
*/
public interface LeafFacetRecorder {
/**
* TODO: Rename: collect? accumulate?
*
* @param docId document ID
* @param facetOrd facet ordinal
*/
void record(int docId, int facetOrd) throws IOException;
}

View File

@ -0,0 +1,207 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.recorders;
import static org.apache.lucene.sandbox.facet.iterators.OrdinalIterator.NO_MORE_ORDS;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.internal.hppc.IntCursor;
import org.apache.lucene.internal.hppc.IntObjectHashMap;
import org.apache.lucene.sandbox.facet.cutters.FacetCutter;
import org.apache.lucene.sandbox.facet.iterators.OrdinalIterator;
import org.apache.lucene.search.LongValues;
import org.apache.lucene.search.LongValuesSource;
/**
* {@link FacetRecorder} that computes multiple long aggregations per facet.
*
* <p>TODO: [premature optimization idea] if instead of one array we keep aggregations in two
* LongVector (one for MAX aggregation and one for SUM) we can benefit from SIMD?
*
* @lucene.experimental
*/
public final class LongAggregationsFacetRecorder implements FacetRecorder {
private IntObjectHashMap<long[]> values;
private final List<IntObjectHashMap<long[]>> leafValues;
private final LongValuesSource[] longValuesSources;
private final Reducer[] reducers;
/** Constructor. */
public LongAggregationsFacetRecorder(LongValuesSource[] longValuesSources, Reducer[] reducers) {
assert longValuesSources.length == reducers.length;
this.longValuesSources = longValuesSources;
this.reducers = reducers;
leafValues = Collections.synchronizedList(new ArrayList<>());
}
@Override
public LeafFacetRecorder getLeafRecorder(LeafReaderContext context) throws IOException {
LongValues[] longValues = new LongValues[longValuesSources.length];
for (int i = 0; i < longValuesSources.length; i++) {
longValues[i] = longValuesSources[i].getValues(context, null);
}
IntObjectHashMap<long[]> valuesRecorder = new IntObjectHashMap<>();
leafValues.add(valuesRecorder);
return new LongAggregationsLeafFacetRecorder(longValues, reducers, valuesRecorder);
}
@Override
public OrdinalIterator recordedOrds() {
Iterator<IntCursor> ordIterator = values.keys().iterator();
return new OrdinalIterator() {
@Override
public int nextOrd() throws IOException {
if (ordIterator.hasNext()) {
return ordIterator.next().value;
} else {
return NO_MORE_ORDS;
}
}
};
}
@Override
public boolean isEmpty() {
return values.isEmpty();
}
@Override
public void reduce(FacetCutter facetCutter) throws IOException {
boolean firstElement = true;
for (IntObjectHashMap<long[]> leafValue : leafValues) {
if (firstElement) {
values = leafValue;
firstElement = false;
} else {
for (IntObjectHashMap.IntObjectCursor<long[]> elem : leafValue) {
long[] vals = values.get(elem.key);
if (vals == null) {
values.put(elem.key, elem.value);
} else {
for (int i = 0; i < longValuesSources.length; i++) {
vals[i] = reducers[i].reduce(vals[i], elem.value[i]);
}
}
}
}
}
if (firstElement) {
// TODO: do we need empty map by default?
values = new IntObjectHashMap<>();
}
OrdinalIterator dimOrds = facetCutter.getOrdinalsToRollup();
if (dimOrds != null) {
for (int dimOrd = dimOrds.nextOrd(); dimOrd != NO_MORE_ORDS; dimOrd = dimOrds.nextOrd()) {
rollup(values.get(dimOrd), dimOrd, facetCutter);
}
}
}
@Override
public boolean contains(int ordinal) {
return values.containsKey(ordinal);
}
/**
* Rollup all child values of ord to accum, and return accum. Accum param can be null. In this
* case, if recursive rollup for every child returns null, this method returns null. Otherwise,
* accum is initialized.
*/
private long[] rollup(long[] accum, int ord, FacetCutter facetCutter) throws IOException {
OrdinalIterator childOrds = facetCutter.getChildrenOrds(ord);
for (int nextChild = childOrds.nextOrd();
nextChild != NO_MORE_ORDS;
nextChild = childOrds.nextOrd()) {
long[] current = rollup(values.get(nextChild), nextChild, facetCutter);
if (current != null) {
if (accum == null) {
accum = new long[longValuesSources.length];
values.put(ord, accum);
}
for (int i = 0; i < longValuesSources.length; i++) {
accum[i] = reducers[i].reduce(accum[i], current[i]);
}
}
}
return accum;
}
/** Return aggregated value for facet ordinal and aggregation ID, or zero as default. */
public long getRecordedValue(int ord, int valuesId) {
if (valuesId < 0 || valuesId >= longValuesSources.length) {
throw new IllegalArgumentException("Invalid request for ordinal values");
}
long[] valuesForOrd = values.get(ord);
if (valuesForOrd != null) {
return valuesForOrd[valuesId];
}
// There are a few options what we can return here e.g. throw an exception, return hardcoded or
// provided default value. It might be better API to do that instead of returning zero, but
// there are two reasons why I think returning zero is the right compromise:
// 1) recorder result is a map-like structure, and maps in java usually return default value
// e.g. null or 0 rather than throw an exception when a key is missing.
// 2) Handling correctly all missing value cases might be expensive, e.g. what if only one
// aggregation for selected facet ordinal is missing, i.e. no docs that belong to this facet
// ordinal have a value to aggregate? To handle that we would have to maintain missing values
// during collection instead of using default array value - zero. I believe it is excessive and
// most users are not going to use it anyway. Worst case scenario, we can add another public get
// method that handles missing values later.
return 0;
}
private static class LongAggregationsLeafFacetRecorder implements LeafFacetRecorder {
private final LongValues[] longValues;
private final Reducer[] reducers;
private final IntObjectHashMap<long[]> perOrdinalValues;
LongAggregationsLeafFacetRecorder(
LongValues[] longValues, Reducer[] reducers, IntObjectHashMap<long[]> perOrdinalValues) {
this.longValues = longValues;
this.reducers = reducers;
this.perOrdinalValues = perOrdinalValues;
}
@Override
public void record(int docId, int facetOrd) throws IOException {
long[] valuesForOrd = perOrdinalValues.get(facetOrd);
if (valuesForOrd == null) {
valuesForOrd = new long[longValues.length];
perOrdinalValues.put(facetOrd, valuesForOrd);
}
LongValues values;
for (int i = 0; i < longValues.length; i++) {
// TODO: cache advance/longValue results for current doc? Skipped for now as LongValues
// themselves can keep the cache.
values = longValues[i];
if (values.advanceExact(docId)) {
valuesForOrd[i] = reducers[i].reduce(valuesForOrd[i], values.longValue());
}
}
}
}
}

View File

@ -0,0 +1,87 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.recorders;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.sandbox.facet.cutters.FacetCutter;
import org.apache.lucene.sandbox.facet.iterators.OrdinalIterator;
/**
* {@link FacetRecorder} that contains multiple FacetRecorders.
*
* @lucene.experimental
*/
public final class MultiFacetsRecorder implements FacetRecorder {
private final FacetRecorder[] delegates;
/** Constructor */
public MultiFacetsRecorder(FacetRecorder... delegates) {
this.delegates = delegates;
}
@Override
public LeafFacetRecorder getLeafRecorder(LeafReaderContext context) throws IOException {
LeafFacetRecorder[] leafDelegates = new LeafFacetRecorder[delegates.length];
for (int i = 0; i < delegates.length; i++) {
leafDelegates[i] = delegates[i].getLeafRecorder(context);
}
return new MultiFacetsLeafRecorder(leafDelegates);
}
@Override
public OrdinalIterator recordedOrds() {
throw new UnsupportedOperationException(
"Not supported, call recordedOrds for sub-recorders instead");
}
@Override
public boolean isEmpty() {
throw new UnsupportedOperationException(
"Not supported, call isEmpty for sub-recorders instead");
}
@Override
public void reduce(FacetCutter facetCutter) throws IOException {
for (FacetRecorder recorder : delegates) {
recorder.reduce(facetCutter);
}
}
@Override
public boolean contains(int ordinal) {
throw new UnsupportedOperationException(
"Not supported, call contains for sub-recorders instead");
}
private static final class MultiFacetsLeafRecorder implements LeafFacetRecorder {
private final LeafFacetRecorder[] delegates;
private MultiFacetsLeafRecorder(LeafFacetRecorder[] delegates) {
this.delegates = delegates;
}
@Override
public void record(int docId, int facetOrd) throws IOException {
for (LeafFacetRecorder leafRecorder : delegates) {
leafRecorder.record(docId, facetOrd);
}
}
}
}

View File

@ -0,0 +1,85 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet.recorders;
/**
* Reducer for numeric values.
*
* @lucene.experimental
*/
public interface Reducer {
/** Int values reducer. */
int reduce(int a, int b);
/** Long values reducer. */
long reduce(long a, long b);
/** Float values reducer. */
float reduce(float a, float b);
/** Double values reducer. */
double reduce(double a, double b);
/** Reducer that returns MAX of two values. */
Reducer MAX =
new Reducer() {
@Override
public int reduce(int a, int b) {
return Math.max(a, b);
}
@Override
public long reduce(long a, long b) {
return Math.max(a, b);
}
@Override
public float reduce(float a, float b) {
return Math.max(a, b);
}
@Override
public double reduce(double a, double b) {
return Math.max(a, b);
}
};
/** Reducer that returns SUM of two values. */
Reducer SUM =
new Reducer() {
@Override
public int reduce(int a, int b) {
return Math.addExact(a, b);
}
@Override
public long reduce(long a, long b) {
return Math.addExact(a, b);
}
@Override
public float reduce(float a, float b) {
return a + b;
}
@Override
public double reduce(double a, double b) {
return a + b;
}
};
}

View File

@ -0,0 +1,23 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Sandbox faceting: classes that can record per ordinal data E.g. aggregations per facet ordinals
* can be recorded.
*
* @lucene.experimental
*/
package org.apache.lucene.sandbox.facet.recorders;

View File

@ -0,0 +1,198 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.sandbox.facet.iterators.CandidateSetOrdinalIterator;
import org.apache.lucene.sandbox.facet.iterators.ComparableSupplier;
import org.apache.lucene.sandbox.facet.iterators.OrdinalIterator;
import org.apache.lucene.sandbox.facet.iterators.TaxonomyChildrenOrdinalIterator;
import org.apache.lucene.sandbox.facet.iterators.TopnOrdinalIterator;
import org.apache.lucene.sandbox.facet.labels.OrdToLabel;
import org.apache.lucene.sandbox.facet.labels.TaxonomyOrdLabelBiMap;
import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
import org.apache.lucene.tests.util.LuceneTestCase;
public abstract class SandboxFacetTestCase extends LuceneTestCase {
// TODO: We don't have access to overall count for all facets from count recorder, and we can't
// compute it as a SUM of values for each facet ordinal because we need to respect cases where the
// same doc belongs to multiple facets (e.g. overlapping ranges or multi value fields). In most
// cases we can already access the value. E.g. for facets with hierarchy (taxonomy or SSDV) we can
// read value for parent facet ordinal. I believe the only case that requires code changes is
// range facets. To solve it we can add a parameter to range FacetCutter to assign/yeild special
// facet ordinal for every document that matches at least one range from the list. Overall,
// sandbox facet tests don't have to use FacetResult, so we change it to assert facet labels and
// recorded results directly and avoid need for this constant.
static final int VALUE_CANT_BE_COMPUTED = Integer.MIN_VALUE;
protected void assertNumericValuesEquals(Number a, Number b) {
assertTrue(a.getClass().isInstance(b));
if (a instanceof Float) {
assertEquals(a.floatValue(), b.floatValue(), a.floatValue() / 1e5);
} else if (a instanceof Double) {
assertEquals(a.doubleValue(), b.doubleValue(), a.doubleValue() / 1e5);
} else {
assertEquals(a, b);
}
}
protected void assertFacetResult(
FacetResult result,
String expectedDim,
String[] expectedPath,
int expectedChildCount,
Number expectedValue,
LabelAndValue... expectedChildren) {
assertEquals(expectedDim, result.dim);
assertArrayEquals(expectedPath, result.path);
assertEquals(expectedChildCount, result.childCount);
assertNumericValuesEquals(expectedValue, result.value);
assertEquals(expectedChildren.length, result.labelValues.length);
// assert children equal with no assumption of the children ordering
assertTrue(Arrays.asList(result.labelValues).containsAll(Arrays.asList(expectedChildren)));
}
FacetResult getTopChildrenByCount(
CountFacetRecorder countFacetRecorder,
TaxonomyReader taxoReader,
int topN,
String dimension,
String... path)
throws IOException {
ComparableSupplier<ComparableUtils.ByCountComparable> countComparable =
ComparableUtils.byCount(countFacetRecorder);
TaxonomyOrdLabelBiMap ordLabels = new TaxonomyOrdLabelBiMap(taxoReader);
FacetLabel parentLabel = new FacetLabel(dimension, path);
OrdinalIterator childrenIterator =
new TaxonomyChildrenOrdinalIterator(
countFacetRecorder.recordedOrds(),
taxoReader.getParallelTaxonomyArrays().parents(),
ordLabels.getOrd(parentLabel));
OrdinalIterator topByCountOrds =
new TopnOrdinalIterator<>(childrenIterator, countComparable, topN);
// Get array of final ordinals - we need to use all of them to get labels first, and then to get
// counts,
// but OrdinalIterator only allows reading ordinals once.
int[] resultOrdinals = topByCountOrds.toArray();
FacetLabel[] labels = ordLabels.getLabels(resultOrdinals);
List<LabelAndValue> labelsAndValues = new ArrayList<>(labels.length);
int childCount = 0;
for (int i = 0; i < resultOrdinals.length; i++) {
int count = countFacetRecorder.getCount(resultOrdinals[i]);
labelsAndValues.add(new LabelAndValue(labels[i].lastComponent(), count));
childCount++;
}
// int value = countFacetRecorder.getCount(parentOrdinal);
return new FacetResult(
dimension,
path,
VALUE_CANT_BE_COMPUTED,
labelsAndValues.toArray(new LabelAndValue[0]),
childCount);
}
FacetResult getAllChildren(
CountFacetRecorder countFacetRecorder,
TaxonomyReader taxoReader,
String dimension,
String... path)
throws IOException {
TaxonomyOrdLabelBiMap ordLabels = new TaxonomyOrdLabelBiMap(taxoReader);
FacetLabel parentLabel = new FacetLabel(dimension, path);
int parentOrdinal = ordLabels.getOrd(parentLabel);
OrdinalIterator childrenIternator =
new TaxonomyChildrenOrdinalIterator(
countFacetRecorder.recordedOrds(),
taxoReader.getParallelTaxonomyArrays().parents(),
parentOrdinal);
// Get array of final ordinals - we need to use all of them to get labels first, and then to get
// counts,
// but OrdinalIterator only allows reading ordinals once.
int[] resultOrdinals = childrenIternator.toArray();
FacetLabel[] labels = ordLabels.getLabels(resultOrdinals);
List<LabelAndValue> labelsAndValues = new ArrayList<>(labels.length);
int childCount = 0;
for (int i = 0; i < resultOrdinals.length; i++) {
int count = countFacetRecorder.getCount(resultOrdinals[i]);
labelsAndValues.add(new LabelAndValue(labels[i].lastComponent(), count));
childCount++;
}
// int value = countFacetRecorder.getCount(parentOrdinal);
return new FacetResult(
dimension,
path,
VALUE_CANT_BE_COMPUTED,
labelsAndValues.toArray(new LabelAndValue[0]),
childCount);
}
FacetResult getAllSortByOrd(
int[] resultOrdinals,
CountFacetRecorder countFacetRecorder,
String dimension,
OrdToLabel ordLabels)
throws IOException {
ComparableUtils.sort(resultOrdinals, ComparableUtils.byOrdinal());
FacetLabel[] labels = ordLabels.getLabels(resultOrdinals);
List<LabelAndValue> labelsAndValues = new ArrayList<>(labels.length);
int childCount = 0;
for (int i = 0; i < resultOrdinals.length; i++) {
int count = countFacetRecorder.getCount(resultOrdinals[i]);
labelsAndValues.add(new LabelAndValue(labels[i].lastComponent(), count));
childCount++;
}
return new FacetResult(
dimension,
new String[0],
VALUE_CANT_BE_COMPUTED,
labelsAndValues.toArray(new LabelAndValue[0]),
childCount);
}
int getSpecificValue(
CountFacetRecorder countFacetRecorder, TaxonomyReader taxoReader, String... path)
throws IOException {
TaxonomyOrdLabelBiMap ordLabels = new TaxonomyOrdLabelBiMap(taxoReader);
FacetLabel label = new FacetLabel(path);
int facetOrd = ordLabels.getOrd(label);
return countFacetRecorder.getCount(facetOrd);
}
int[] getCountsForRecordedCandidates(
CountFacetRecorder countFacetRecorder, TaxonomyReader taxoReader, FacetLabel[] candidates)
throws IOException {
int[] resultOrds =
new CandidateSetOrdinalIterator(
countFacetRecorder, candidates, new TaxonomyOrdLabelBiMap(taxoReader))
.toArray();
int[] counts = new int[resultOrds.length];
for (int i = 0; i < resultOrds.length; i++) {
counts[i] = countFacetRecorder.getCount(resultOrds[i]);
}
return counts;
}
}

View File

@ -0,0 +1,100 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet;
import java.io.IOException;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.sandbox.facet.cutters.FacetCutter;
import org.apache.lucene.sandbox.facet.cutters.LeafFacetCutter;
import org.apache.lucene.sandbox.facet.iterators.CandidateSetOrdinalIterator;
import org.apache.lucene.sandbox.facet.iterators.OrdinalIterator;
import org.apache.lucene.sandbox.facet.labels.LabelToOrd;
import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
import org.apache.lucene.sandbox.facet.recorders.FacetRecorder;
import org.apache.lucene.sandbox.facet.recorders.LeafFacetRecorder;
import org.apache.lucene.tests.util.LuceneTestCase;
/** Tests for {@link CandidateSetOrdinalIterator}. */
public class TestCandidateSetOrdinalIterator extends LuceneTestCase {
/** LabelToOrd that parses label's string to get int ordinal */
private LabelToOrd mockLabelToOrd =
new LabelToOrd() {
@Override
public int getOrd(FacetLabel label) {
return Integer.valueOf(label.lastComponent());
}
@Override
public int[] getOrds(FacetLabel[] labels) {
int[] result = new int[labels.length];
for (int i = 0; i < result.length; i++) {
result[i] = getOrd(labels[i]);
}
return result;
}
};
private FacetCutter mockFacetCutter =
new FacetCutter() {
@Override
public LeafFacetCutter createLeafCutter(LeafReaderContext context) throws IOException {
return null;
}
};
public void testBasic() throws IOException {
FacetRecorder recorder = new CountFacetRecorder();
LeafFacetRecorder leafRecorder = recorder.getLeafRecorder(null);
leafRecorder.record(0, 0);
leafRecorder.record(0, 3);
recorder.reduce(mockFacetCutter);
FacetLabel[] candidates =
new FacetLabel[] {
new FacetLabel("0"),
new FacetLabel("1"),
new FacetLabel(String.valueOf(LabelToOrd.INVALID_ORD)),
new FacetLabel("3")
};
// Note that "1" is filtered out as it was never recorded
assertArrayEquals(
new int[] {0, 3},
new CandidateSetOrdinalIterator(recorder, candidates, mockLabelToOrd).toArray());
}
public void testEmptyRecorder() throws IOException {
FacetRecorder recorder = new CountFacetRecorder();
recorder.reduce(mockFacetCutter);
FacetLabel[] candidates =
new FacetLabel[] {
new FacetLabel("0"),
new FacetLabel("1"),
new FacetLabel(String.valueOf(LabelToOrd.INVALID_ORD)),
new FacetLabel("3")
};
// Note that "1" is filtered out as it was never recorded
assertEquals(
OrdinalIterator.NO_MORE_ORDS,
new CandidateSetOrdinalIterator(recorder, candidates, mockLabelToOrd).nextOrd());
}
}

View File

@ -0,0 +1,478 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet;
import static org.apache.lucene.facet.FacetsConfig.DEFAULT_INDEX_FIELD_NAME;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.sandbox.facet.cutters.TaxonomyFacetsCutter;
import org.apache.lucene.sandbox.facet.iterators.CandidateSetOrdinalIterator;
import org.apache.lucene.sandbox.facet.iterators.ComparableSupplier;
import org.apache.lucene.sandbox.facet.iterators.OrdinalIterator;
import org.apache.lucene.sandbox.facet.iterators.TaxonomyChildrenOrdinalIterator;
import org.apache.lucene.sandbox.facet.iterators.TopnOrdinalIterator;
import org.apache.lucene.sandbox.facet.labels.TaxonomyOrdLabelBiMap;
import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
import org.apache.lucene.sandbox.facet.recorders.FacetRecorder;
import org.apache.lucene.sandbox.facet.recorders.LongAggregationsFacetRecorder;
import org.apache.lucene.sandbox.facet.recorders.MultiFacetsRecorder;
import org.apache.lucene.sandbox.facet.recorders.Reducer;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.LongValuesSource;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.IOUtils;
/** Test for {@link FacetRecorder} */
public class TestFacetRecorders extends SandboxFacetTestCase {
public void testCountAndLongAggregationRecordersBasic() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
// Writes facet ords to a separate directory from the
// main index:
DirectoryTaxonomyWriter taxoWriter =
new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.setHierarchical("Publish Date", true);
config.setMultiValued("Publish Date", random().nextBoolean());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new FacetField("Author", "Bob"));
doc.add(new FacetField("Publish Date", "2010", "10", "15"));
doc.add(new NumericDocValuesField("Units", 9));
doc.add(new DoubleDocValuesField("Popularity", 3.5d));
doc.add(new StringField("Availability", "yes", Field.Store.NO));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2010"));
doc.add(new NumericDocValuesField("Units", 2));
doc.add(new DoubleDocValuesField("Popularity", 4.1D));
doc.add(new StringField("Availability", "yes", Field.Store.NO));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2012", "1", "1"));
doc.add(new NumericDocValuesField("Units", 5));
doc.add(new DoubleDocValuesField("Popularity", 3.9D));
doc.add(new StringField("Availability", "yes", Field.Store.NO));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Susan"));
doc.add(new FacetField("Publish Date", "2012", "1", "7"));
doc.add(new NumericDocValuesField("Units", 7));
doc.add(new DoubleDocValuesField("Popularity", 4D));
doc.add(new StringField("Availability", "yes", Field.Store.NO));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Frank"));
doc.add(new FacetField("Publish Date", "1999", "5", "5"));
doc.add(new NumericDocValuesField("Units", 6));
doc.add(new DoubleDocValuesField("Popularity", 7.9D));
doc.add(new StringField("Availability", "yes", Field.Store.NO));
writer.addDocument(config.build(taxoWriter, doc));
// Add a document that is not returned by a query
doc = new Document();
doc.add(new FacetField("Author", "John"));
doc.add(new FacetField("Publish Date", "2024", "11", "12"));
doc.add(new NumericDocValuesField("Units", 200));
doc.add(new DoubleDocValuesField("Popularity", 13D));
doc.add(new StringField("Availability", "no", Field.Store.NO));
writer.addDocument(config.build(taxoWriter, doc));
// NRT open
IndexSearcher searcher = newSearcher(writer.getReader());
// NRT open
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
Query query = new TermQuery(new Term("Availability", "yes"));
TaxonomyFacetsCutter defaultTaxoCutter =
new TaxonomyFacetsCutter(DEFAULT_INDEX_FIELD_NAME, config, taxoReader);
LongValuesSource[] longValuesSources = new LongValuesSource[2];
Reducer[] reducers = new Reducer[2];
// popularity:max
longValuesSources[0] = DoubleValuesSource.fromDoubleField("Popularity").toLongValuesSource();
reducers[0] = Reducer.MAX;
// units:sum
longValuesSources[1] = LongValuesSource.fromLongField("Units");
reducers[1] = Reducer.SUM;
LongAggregationsFacetRecorder longAggregationsFacetRecorder =
new LongAggregationsFacetRecorder(longValuesSources, reducers);
final CountFacetRecorder countRecorder = new CountFacetRecorder();
// Compute both counts and aggregations
MultiFacetsRecorder multiFacetsRecorder =
new MultiFacetsRecorder(countRecorder, longAggregationsFacetRecorder);
FacetFieldCollectorManager<MultiFacetsRecorder> collectorManager =
new FacetFieldCollectorManager<>(defaultTaxoCutter, multiFacetsRecorder);
searcher.search(query, collectorManager);
int[] ordsFromCounts = countRecorder.recordedOrds().toArray();
Arrays.sort(ordsFromCounts);
int[] ordsFromAggregations = longAggregationsFacetRecorder.recordedOrds().toArray();
Arrays.sort(ordsFromAggregations);
assertArrayEquals(ordsFromCounts, ordsFromAggregations);
// Retrieve & verify results:
assertEquals(
"dim=Publish Date path=[]\n"
+ " 2010 (2, agg0=4 agg1=11)\n"
+ " 2012 (2, agg0=4 agg1=12)\n"
+ " 1999 (1, agg0=7 agg1=6)\n",
getTopChildrenWithLongAggregations(
countRecorder, taxoReader, 10, 2, longAggregationsFacetRecorder, null, "Publish Date"));
assertEquals(
"dim=Author path=[]\n"
+ " Lisa (2, agg0=4 agg1=7)\n"
+ " Bob (1, agg0=3 agg1=9)\n"
+ " Susan (1, agg0=4 agg1=7)\n"
+ " Frank (1, agg0=7 agg1=6)\n",
getTopChildrenWithLongAggregations(
countRecorder, taxoReader, 10, 2, longAggregationsFacetRecorder, null, "Author"));
assertArrayEquals(
new long[] {11, 6},
getAggregationForRecordedCandidates(
longAggregationsFacetRecorder,
1,
taxoReader,
new FacetLabel[] {
new FacetLabel("Publish Date", "2010"),
// Not in the index - skipped
new FacetLabel("Publish Date", "2025"),
// Not matched by the query - skipped
new FacetLabel("Publish Date", "2024"),
new FacetLabel("Publish Date", "1999"),
}));
assertArrayEquals(
new long[] {7, 6},
getAggregationForRecordedCandidates(
longAggregationsFacetRecorder,
1,
taxoReader,
new FacetLabel[] {
new FacetLabel("Author", "Lisa"),
// Not in the index - skipped
new FacetLabel("Author", "Christofer"),
// Not matched by the query - skipped
new FacetLabel("Author", "John"),
new FacetLabel("Author", "Frank"),
}));
writer.close();
IOUtils.close(taxoWriter, searcher.getIndexReader(), taxoReader, taxoDir, dir);
}
/**
* Test that counts and long aggregations are correct when different index segments have different
* facet ordinals.
*/
public void testCountAndLongAggregationRecordersMultipleSegments() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
// Writes facet ords to a separate directory from the
// main index:
DirectoryTaxonomyWriter taxoWriter =
new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.setHierarchical("Publish Date", true);
config.setMultiValued("Publish Date", random().nextBoolean());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new FacetField("Author", "Bob"));
doc.add(new FacetField("Publish Date", "2010", "10", "15"));
doc.add(new NumericDocValuesField("Units", 9));
doc.add(new DoubleDocValuesField("Popularity", 3.5d));
writer.addDocument(config.build(taxoWriter, doc));
writer.commit();
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2012", "10", "20"));
doc.add(new NumericDocValuesField("Units", 2));
doc.add(new DoubleDocValuesField("Popularity", 4.1D));
writer.addDocument(config.build(taxoWriter, doc));
writer.commit();
// NRT open
IndexSearcher searcher = newSearcher(writer.getReader());
// NRT open
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
Query query = new MatchAllDocsQuery();
TaxonomyFacetsCutter defaultTaxoCutter =
new TaxonomyFacetsCutter(DEFAULT_INDEX_FIELD_NAME, config, taxoReader);
LongValuesSource[] longValuesSources = new LongValuesSource[2];
Reducer[] reducers = new Reducer[2];
// popularity:max
longValuesSources[0] = DoubleValuesSource.fromDoubleField("Popularity").toLongValuesSource();
reducers[0] = Reducer.MAX;
// units:sum
longValuesSources[1] = LongValuesSource.fromLongField("Units");
reducers[1] = Reducer.SUM;
LongAggregationsFacetRecorder longAggregationsFacetRecorder =
new LongAggregationsFacetRecorder(longValuesSources, reducers);
final CountFacetRecorder countRecorder = new CountFacetRecorder();
// Compute both counts and aggregations
MultiFacetsRecorder multiFacetsRecorder =
new MultiFacetsRecorder(countRecorder, longAggregationsFacetRecorder);
FacetFieldCollectorManager<MultiFacetsRecorder> collectorManager =
new FacetFieldCollectorManager<>(defaultTaxoCutter, multiFacetsRecorder);
searcher.search(query, collectorManager);
// Retrieve & verify results:
assertEquals(
"dim=Publish Date path=[]\n"
+ " 2010 (1, agg0=3 agg1=9)\n"
+ " 2012 (1, agg0=4 agg1=2)\n",
getTopChildrenWithLongAggregations(
countRecorder, taxoReader, 10, 2, longAggregationsFacetRecorder, null, "Publish Date"));
assertEquals(
"dim=Author path=[]\n" + " Bob (1, agg0=3 agg1=9)\n" + " Lisa (1, agg0=4 agg1=2)\n",
getTopChildrenWithLongAggregations(
countRecorder, taxoReader, 10, 2, longAggregationsFacetRecorder, null, "Author"));
writer.close();
IOUtils.close(taxoWriter, searcher.getIndexReader(), taxoReader, taxoDir, dir);
}
public void testSortByLongAggregation() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
// Writes facet ords to a separate directory from the
// main index:
DirectoryTaxonomyWriter taxoWriter =
new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.setHierarchical("Publish Date", true);
config.setMultiValued("Publish Date", random().nextBoolean());
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new FacetField("Author", "Bob"));
doc.add(new FacetField("Publish Date", "2010", "10", "15"));
doc.add(new NumericDocValuesField("Units", 9));
doc.add(new DoubleDocValuesField("Popularity", 3.5d));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2010", "10", "20"));
doc.add(new NumericDocValuesField("Units", 2));
doc.add(new DoubleDocValuesField("Popularity", 4.1D));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2012", "1", "1"));
doc.add(new NumericDocValuesField("Units", 5));
doc.add(new DoubleDocValuesField("Popularity", 3.9D));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Susan"));
doc.add(new FacetField("Publish Date", "2012", "1", "7"));
doc.add(new NumericDocValuesField("Units", 7));
doc.add(new DoubleDocValuesField("Popularity", 4D));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Frank"));
doc.add(new FacetField("Publish Date", "1999", "5", "5"));
doc.add(new NumericDocValuesField("Units", 6));
doc.add(new DoubleDocValuesField("Popularity", 7.9D));
writer.addDocument(config.build(taxoWriter, doc));
// NRT open
IndexSearcher searcher = newSearcher(writer.getReader());
// NRT open
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
Query query = new MatchAllDocsQuery();
TaxonomyFacetsCutter defaultTaxoCutter =
new TaxonomyFacetsCutter(DEFAULT_INDEX_FIELD_NAME, config, taxoReader);
LongValuesSource[] longValuesSources = new LongValuesSource[2];
Reducer[] reducers = new Reducer[2];
// popularity:max
longValuesSources[0] = DoubleValuesSource.fromDoubleField("Popularity").toLongValuesSource();
reducers[0] = Reducer.MAX;
// units:sum
longValuesSources[1] = LongValuesSource.fromLongField("Units");
reducers[1] = Reducer.SUM;
LongAggregationsFacetRecorder longAggregationsFacetRecorder =
new LongAggregationsFacetRecorder(longValuesSources, reducers);
final CountFacetRecorder countRecorder = new CountFacetRecorder();
// Compute both counts and aggregations
MultiFacetsRecorder multiFacetsRecorder =
new MultiFacetsRecorder(countRecorder, longAggregationsFacetRecorder);
FacetFieldCollectorManager<MultiFacetsRecorder> collectorManager =
new FacetFieldCollectorManager<>(defaultTaxoCutter, multiFacetsRecorder);
searcher.search(query, collectorManager);
// Retrieve & verify results:
assertEquals(
"dim=Publish Date path=[]\n"
+ " 2012 (2, agg0=4 agg1=12)\n"
+ " 2010 (2, agg0=4 agg1=11)\n"
+ " 1999 (1, agg0=7 agg1=6)\n",
getTopChildrenWithLongAggregations(
countRecorder, taxoReader, 10, 2, longAggregationsFacetRecorder, 1, "Publish Date"));
assertEquals(
"dim=Author path=[]\n"
+ " Frank (1, agg0=7 agg1=6)\n"
+ " Lisa (2, agg0=4 agg1=7)\n"
+ " Susan (1, agg0=4 agg1=7)\n"
+ " Bob (1, agg0=3 agg1=9)\n",
getTopChildrenWithLongAggregations(
countRecorder, taxoReader, 10, 2, longAggregationsFacetRecorder, 0, "Author"));
writer.close();
IOUtils.close(taxoWriter, searcher.getIndexReader(), taxoReader, taxoDir, dir);
}
private String getTopChildrenWithLongAggregations(
CountFacetRecorder countFacetRecorder,
TaxonomyReader taxoReader,
int topN,
int numOfAggregations,
LongAggregationsFacetRecorder longAggregationsFacetRecorder,
Integer sortByLongAggregationId,
String dimension,
String... path)
throws IOException {
StringBuilder resultBuilder = new StringBuilder();
resultBuilder.append("dim=");
resultBuilder.append(dimension);
resultBuilder.append(" path=");
resultBuilder.append(Arrays.toString(path));
resultBuilder.append('\n');
TaxonomyOrdLabelBiMap ordLabels = new TaxonomyOrdLabelBiMap(taxoReader);
FacetLabel parentLabel = new FacetLabel(dimension, path);
OrdinalIterator childrenIternator =
new TaxonomyChildrenOrdinalIterator(
countFacetRecorder.recordedOrds(),
taxoReader.getParallelTaxonomyArrays().parents(),
ordLabels.getOrd(parentLabel));
final int[] resultOrdinals;
if (sortByLongAggregationId != null) {
ComparableSupplier<ComparableUtils.ByAggregatedValueComparable> comparableSupplier =
ComparableUtils.byAggregatedValue(
countFacetRecorder, longAggregationsFacetRecorder, sortByLongAggregationId);
OrdinalIterator topByCountOrds =
new TopnOrdinalIterator<>(childrenIternator, comparableSupplier, topN);
resultOrdinals = topByCountOrds.toArray();
} else {
ComparableSupplier<ComparableUtils.ByCountComparable> countComparable =
ComparableUtils.byCount(countFacetRecorder);
OrdinalIterator topByCountOrds =
new TopnOrdinalIterator<>(childrenIternator, countComparable, topN);
resultOrdinals = topByCountOrds.toArray();
}
FacetLabel[] labels = ordLabels.getLabels(resultOrdinals);
for (int i = 0; i < resultOrdinals.length; i++) {
int facetOrdinal = resultOrdinals[i];
int count = countFacetRecorder.getCount(facetOrdinal);
resultBuilder.append(" ");
resultBuilder.append(labels[i].lastComponent());
resultBuilder.append(" (");
resultBuilder.append(count);
resultBuilder.append(", ");
for (int a = 0; a < numOfAggregations; a++) {
resultBuilder.append(" agg");
resultBuilder.append(a);
resultBuilder.append("=");
resultBuilder.append(longAggregationsFacetRecorder.getRecordedValue(facetOrdinal, a));
}
resultBuilder.append(")");
resultBuilder.append("\n");
}
// int value = countFacetRecorder.getCount(parentOrdinal);
return resultBuilder.toString();
}
long[] getAggregationForRecordedCandidates(
LongAggregationsFacetRecorder aggregationsRecorder,
int aggregationId,
TaxonomyReader taxoReader,
FacetLabel[] candidates)
throws IOException {
int[] resultOrds =
new CandidateSetOrdinalIterator(
aggregationsRecorder, candidates, new TaxonomyOrdLabelBiMap(taxoReader))
.toArray();
long[] result = new long[resultOrds.length];
for (int i = 0; i < resultOrds.length; i++) {
result[i] = aggregationsRecorder.getRecordedValue(resultOrds[i], aggregationId);
}
return result;
}
}

View File

@ -0,0 +1,841 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.facet.FacetResult;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.facet.LongValueFacetCounts;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.sandbox.facet.cutters.LongValueFacetCutter;
import org.apache.lucene.sandbox.facet.iterators.ComparableSupplier;
import org.apache.lucene.sandbox.facet.iterators.OrdinalIterator;
import org.apache.lucene.sandbox.facet.iterators.TopnOrdinalIterator;
import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.TestUtil;
/** Tests long value facets, based on TestLongValueFacetCounts. */
public class TestLongValueFacet extends SandboxFacetTestCase {
public void testBasic() throws Exception {
Directory d = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), d);
for (long l = 0; l < 100; l++) {
Document doc = new Document();
doc.add(new NumericDocValuesField("field", l % 5));
w.addDocument(doc);
}
// Also add Long.MAX_VALUE
Document doc = new Document();
doc.add(new NumericDocValuesField("field", Long.MAX_VALUE));
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
LongValueFacetCutter longValuesFacetCutter = new LongValueFacetCutter("field");
CountFacetRecorder countRecorder = new CountFacetRecorder();
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
new FacetFieldCollectorManager<>(longValuesFacetCutter, countRecorder);
s.search(new MatchAllDocsQuery(), collectorManager);
FacetResult result = getAllChildrenSortByValue("field", longValuesFacetCutter, countRecorder);
assertEquals(
"dim=field path=[] value=-2147483648 childCount=6\n 0 (20)\n 1 (20)\n 2 (20)\n 3 (20)\n "
+ "4 (20)\n 9223372036854775807 (1)\n",
result.toString());
FacetResult topChildrenResult =
getTopChildren(2, "field", longValuesFacetCutter, countRecorder);
assertEquals(
"dim=field path=[] value=-2147483648 childCount=2\n 0 (20)\n 1 (20)\n",
topChildrenResult.toString());
assertFacetResult(
getAllChildren("field", longValuesFacetCutter, countRecorder),
"field",
new String[0],
6,
-2147483648,
new LabelAndValue("0", 20),
new LabelAndValue("1", 20),
new LabelAndValue("2", 20),
new LabelAndValue("3", 20),
new LabelAndValue("4", 20),
new LabelAndValue("9223372036854775807", 1));
r.close();
d.close();
}
public void testOnlyBigLongs() throws Exception {
Directory d = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), d);
for (long l = 0; l < 3; l++) {
Document doc = new Document();
doc.add(new NumericDocValuesField("field", Long.MAX_VALUE - l));
w.addDocument(doc);
}
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
LongValueFacetCutter longValuesFacetCutter = new LongValueFacetCutter("field");
CountFacetRecorder countRecorder = new CountFacetRecorder();
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
new FacetFieldCollectorManager<>(longValuesFacetCutter, countRecorder);
s.search(new MatchAllDocsQuery(), collectorManager);
FacetResult result = getAllChildrenSortByValue("field", longValuesFacetCutter, countRecorder);
assertFacetResult(
getAllChildren("field", longValuesFacetCutter, countRecorder),
"field",
new String[0],
3,
-2147483648,
new LabelAndValue("9223372036854775805", 1),
new LabelAndValue("9223372036854775806", 1),
new LabelAndValue("9223372036854775807", 1));
// since we have no insight into the value order in the hashMap, we sort labels by value and
// count in
// ascending order in order to compare with expected results
Arrays.sort(
result.labelValues,
Comparator.comparing((LabelAndValue a) -> a.label)
.thenComparingLong(a -> a.value.longValue()));
assertEquals(
"dim=field path=[] value=-2147483648 childCount=3\n 9223372036854775805 (1)\n "
+ "9223372036854775806 (1)\n 9223372036854775807 (1)\n",
result.toString());
r.close();
d.close();
}
public void testRandomSingleValued() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
int docCount = atLeast(1000);
double missingChance = random().nextDouble();
long maxValue;
if (random().nextBoolean()) {
maxValue = random().nextLong() & Long.MAX_VALUE;
} else {
maxValue = random().nextInt(1000);
}
if (VERBOSE) {
System.out.println(
"TEST: valueCount="
+ docCount
+ " valueRange=-"
+ maxValue
+ "-"
+ maxValue
+ " missingChance="
+ missingChance);
}
Long[] values = new Long[docCount];
// int missingCount = 0;
for (int i = 0; i < docCount; i++) {
Document doc = new Document();
doc.add(new IntPoint("id", i));
if (random().nextDouble() > missingChance) {
long value = TestUtil.nextLong(random(), -maxValue, maxValue);
doc.add(new NumericDocValuesField("field", value));
values[i] = value;
} else {
// missingCount++;
}
w.addDocument(doc);
}
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
int iters = atLeast(100);
for (int iter = 0; iter < iters; iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter);
System.out.println(" test all docs");
}
// all docs
Map<Long, Integer> expected = new HashMap<>();
int expectedChildCount = 0;
for (int i = 0; i < docCount; i++) {
if (values[i] != null) {
Integer curCount = expected.get(values[i]);
if (curCount == null) {
curCount = 0;
expectedChildCount++;
}
expected.put(values[i], curCount + 1);
}
}
List<Map.Entry<Long, Integer>> expectedCounts = new ArrayList<>(expected.entrySet());
// sort by value
expectedCounts.sort(Comparator.comparingLong(Map.Entry::getKey));
LongValueFacetCutter longValuesFacetCutter = new LongValueFacetCutter("field");
CountFacetRecorder countRecorder = new CountFacetRecorder();
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
new FacetFieldCollectorManager<>(longValuesFacetCutter, countRecorder);
s.search(new MatchAllDocsQuery(), collectorManager);
/* TODO: uncomment and adjust when LongValueFacetCutter supports value sources
if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println(" use value source");
}
if (random().nextBoolean()) {
facetCounts =
new LongValueFacetCounts("field", LongValuesSource.fromLongField("field"), fc);
} else if (random().nextBoolean()) {
facetCounts =
new LongValueFacetCounts("field", MultiLongValuesSource.fromLongField("field"), fc);
} else {
facetCounts =
new LongValueFacetCounts(
"field",
MultiLongValuesSource.fromSingleValued(LongValuesSource.fromLongField("field")),
fc);
}
} else { */
if (VERBOSE) {
System.out.println(" use doc values");
}
FacetResult actual = getAllChildrenSortByValue("field", longValuesFacetCutter, countRecorder);
assertSame(
"all docs, sort facets by value",
expectedCounts,
expectedChildCount,
-2147483648,
// docCount - missingCount,
actual,
Integer.MAX_VALUE);
// test getAllChildren
expectedCounts.sort(
Map.Entry.<Long, Integer>comparingByKey().thenComparingLong(Map.Entry::getValue));
FacetResult allChildren = getAllChildren("field", longValuesFacetCutter, countRecorder);
// sort labels by value, count in ascending order
Arrays.sort(
allChildren.labelValues,
Comparator.comparing((LabelAndValue a) -> a.label)
.thenComparingLong(a -> a.value.longValue()));
assertSame(
"test getAllChildren",
expectedCounts,
expectedChildCount,
-2147483648,
// docCount - missingCount,
actual,
Integer.MAX_VALUE);
// sort by count
expectedCounts.sort(
(a, b) -> {
int cmp = -Integer.compare(a.getValue(), b.getValue());
if (cmp == 0) {
// tie break by value
cmp = Long.compare(a.getKey(), b.getKey());
}
return cmp;
});
int topN;
if (random().nextBoolean()) {
topN = docCount;
} else {
topN = random().nextInt(1, docCount);
}
if (VERBOSE) {
System.out.println(" topN=" + topN);
}
actual = getTopChildren(topN, "field", longValuesFacetCutter, countRecorder);
assertSame(
"all docs, sort facets by count",
expectedCounts,
Math.min(topN, expectedChildCount),
// expectedChildCount,
-2147483648,
// docCount - missingCount,
actual,
topN);
// subset of docs
int minId = random().nextInt(docCount);
int maxId = random().nextInt(docCount);
if (minId > maxId) {
int tmp = minId;
minId = maxId;
maxId = tmp;
}
if (VERBOSE) {
System.out.println(" test id range " + minId + "-" + maxId);
}
longValuesFacetCutter = new LongValueFacetCutter("field");
countRecorder = new CountFacetRecorder();
collectorManager = new FacetFieldCollectorManager<>(longValuesFacetCutter, countRecorder);
s.search(IntPoint.newRangeQuery("id", minId, maxId), collectorManager);
// TODO: uncomment and change longValuesFacetCutter when LongValueFacetCutter supports value
// sources
// if (random().nextBoolean()) {
if (VERBOSE) {
System.out.println(" use doc values");
}
/*} else {
if (VERBOSE) {
System.out.println(" use value source");
}
if (random().nextBoolean()) {
facetCounts =
new LongValueFacetCounts("field", LongValuesSource.fromLongField("field"), fc);
} else if (random().nextBoolean()) {
facetCounts =
new LongValueFacetCounts("field", MultiLongValuesSource.fromLongField("field"), fc);
} else {
facetCounts =
new LongValueFacetCounts(
"field",
MultiLongValuesSource.fromSingleValued(LongValuesSource.fromLongField("field")),
fc);
}
}*/
expected = new HashMap<>();
expectedChildCount = 0;
// int totCount = 0;
for (int i = minId; i <= maxId; i++) {
if (values[i] != null) {
// totCount++;
Integer curCount = expected.get(values[i]);
if (curCount == null) {
expectedChildCount++;
curCount = 0;
}
expected.put(values[i], curCount + 1);
}
}
expectedCounts = new ArrayList<>(expected.entrySet());
// sort by value
expectedCounts.sort(Comparator.comparingLong(Map.Entry::getKey));
actual = getAllChildrenSortByValue("field", longValuesFacetCutter, countRecorder);
assertSame(
"id " + minId + "-" + maxId + ", sort facets by value",
expectedCounts,
expectedChildCount,
-2147483648,
// totCount,
actual,
Integer.MAX_VALUE);
// sort by count
expectedCounts.sort(
(a, b) -> {
int cmp = -Integer.compare(a.getValue(), b.getValue());
if (cmp == 0) {
// tie break by value
cmp = Long.compare(a.getKey(), b.getKey());
}
return cmp;
});
if (random().nextBoolean()) {
topN = docCount;
} else {
topN = random().nextInt(1, docCount);
}
actual = getTopChildren(topN, "field", longValuesFacetCutter, countRecorder);
assertSame(
"id " + minId + "-" + maxId + ", sort facets by count",
expectedCounts,
Math.min(topN, expectedChildCount),
// expectedChildCount,
-2147483648,
// totCount,
actual,
topN);
}
r.close();
dir.close();
}
public void testRandomMultiValued() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
int docCount = atLeast(1000);
double missingChance = random().nextDouble();
// sometimes exercise codec optimizations when a claimed multi valued field is in fact single
// valued:
boolean allSingleValued = rarely();
long maxValue;
if (random().nextBoolean()) {
maxValue = random().nextLong() & Long.MAX_VALUE;
} else {
maxValue = random().nextInt(1000);
}
if (VERBOSE) {
System.out.println(
"TEST: valueCount="
+ docCount
+ " valueRange=-"
+ maxValue
+ "-"
+ maxValue
+ " missingChance="
+ missingChance
+ " allSingleValued="
+ allSingleValued);
}
long[][] values = new long[docCount][];
for (int i = 0; i < docCount; i++) {
Document doc = new Document();
doc.add(new IntPoint("id", i));
if (random().nextDouble() > missingChance) {
if (allSingleValued) {
values[i] = new long[1];
} else {
values[i] = new long[TestUtil.nextInt(random(), 1, 5)];
}
for (int j = 0; j < values[i].length; j++) {
long value = TestUtil.nextLong(random(), -maxValue, maxValue);
values[i][j] = value;
doc.add(new SortedNumericDocValuesField("field", value));
}
if (VERBOSE) {
System.out.println(" doc=" + i + " values=" + Arrays.toString(values[i]));
}
// sort values to enable duplicate detection by comparing with the previous value
Arrays.sort(values[i]);
} else {
if (VERBOSE) {
System.out.println(" doc=" + i + " missing values");
}
}
w.addDocument(doc);
}
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
int iters = atLeast(100);
for (int iter = 0; iter < iters; iter++) {
if (VERBOSE) {
System.out.println("\nTEST: iter=" + iter);
System.out.println(" test all docs");
}
// all docs
Map<Long, Integer> expected = new HashMap<>();
// int expectedTotalCount = 0;
for (int i = 0; i < docCount; i++) {
if (values[i] != null && values[i].length > 0) {
// expectedTotalCount++;
setExpectedFrequencies(values[i], expected);
}
}
List<Map.Entry<Long, Integer>> expectedCounts = new ArrayList<>(expected.entrySet());
int expectedChildCount = expected.size();
// sort by value
expectedCounts.sort(Comparator.comparingLong(Map.Entry::getKey));
LongValueFacetCutter longValuesFacetCutter = new LongValueFacetCutter("field");
CountFacetRecorder countRecorder = new CountFacetRecorder();
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
new FacetFieldCollectorManager<>(longValuesFacetCutter, countRecorder);
s.search(new MatchAllDocsQuery(), collectorManager);
if (VERBOSE) {
System.out.println(" use doc values");
}
// TODO: uncomment and adjust when LongValueFacetCutter supports value sources
/*if (random().nextBoolean()) {
facetCounts = new LongValueFacetCounts("field", fc);
} else {
facetCounts =
new LongValueFacetCounts("field", MultiLongValuesSource.fromLongField("field"), fc);
}*/
FacetResult actual = getAllChildrenSortByValue("field", longValuesFacetCutter, countRecorder);
assertSame(
"all docs, sort facets by value",
expectedCounts,
expectedChildCount,
-2147483648,
// expectedTotalCount,
actual,
Integer.MAX_VALUE);
// test getAllChildren
expectedCounts.sort(
Map.Entry.<Long, Integer>comparingByKey().thenComparingLong(Map.Entry::getValue));
FacetResult allChildren = getAllChildren("field", longValuesFacetCutter, countRecorder);
// sort labels by value, count in ascending order
Arrays.sort(
allChildren.labelValues,
Comparator.comparing((LabelAndValue a) -> a.label)
.thenComparingLong(a -> a.value.longValue()));
assertSame(
"test getAllChildren",
expectedCounts,
expectedChildCount,
-2147483648,
// expectedTotalCount,
actual,
Integer.MAX_VALUE);
// sort by count
expectedCounts.sort(
(a, b) -> {
int cmp = -Integer.compare(a.getValue(), b.getValue());
if (cmp == 0) {
// tie break by value
cmp = Long.compare(a.getKey(), b.getKey());
}
return cmp;
});
int topN;
if (random().nextBoolean()) {
topN = docCount;
} else {
topN = random().nextInt(1, docCount);
}
if (VERBOSE) {
System.out.println(" topN=" + topN);
}
actual = getTopChildren(topN, "field", longValuesFacetCutter, countRecorder);
assertSame(
"all docs, sort facets by count",
expectedCounts,
Math.min(topN, expectedChildCount),
// expectedChildCount,
-2147483648,
// expectedTotalCount,
actual,
topN);
// subset of docs
int minId = random().nextInt(docCount);
int maxId = random().nextInt(docCount);
if (minId > maxId) {
int tmp = minId;
minId = maxId;
maxId = tmp;
}
if (VERBOSE) {
System.out.println(" test id range " + minId + "-" + maxId);
}
longValuesFacetCutter = new LongValueFacetCutter("field");
countRecorder = new CountFacetRecorder();
collectorManager = new FacetFieldCollectorManager<>(longValuesFacetCutter, countRecorder);
s.search(IntPoint.newRangeQuery("id", minId, maxId), collectorManager);
// TODO: uncomment and adjust when LongValueFacetCutter supports value sources
/*if (random().nextBoolean()) {
facetCounts = new LongValueFacetCounts("field", fc);
} else {
facetCounts =
new LongValueFacetCounts("field", MultiLongValuesSource.fromLongField("field"), fc);
}*/
expected = new HashMap<>();
// expectedTotalCount = 0;
for (int i = minId; i <= maxId; i++) {
if (values[i] != null && values[i].length > 0) {
// expectedTotalCount++;
setExpectedFrequencies(values[i], expected);
}
}
expectedCounts = new ArrayList<>(expected.entrySet());
expectedChildCount = expected.size();
// sort by value
expectedCounts.sort(Comparator.comparingLong(Map.Entry::getKey));
actual = getAllChildrenSortByValue("field", longValuesFacetCutter, countRecorder);
assertSame(
"id " + minId + "-" + maxId + ", sort facets by value",
expectedCounts,
expectedChildCount,
-2147483648,
// expectedTotalCount,
actual,
Integer.MAX_VALUE);
// sort by count
expectedCounts.sort(
(a, b) -> {
int cmp = -Integer.compare(a.getValue(), b.getValue());
if (cmp == 0) {
// tie break by value
cmp = Long.compare(a.getKey(), b.getKey());
}
return cmp;
});
if (random().nextBoolean()) {
topN = docCount;
} else {
topN = random().nextInt(1, docCount);
}
actual = getTopChildren(topN, "field", longValuesFacetCutter, countRecorder);
assertSame(
"id " + minId + "-" + maxId + ", sort facets by count",
expectedCounts,
Math.min(expectedChildCount, topN),
// expectedChildCount,
-2147483648,
// expectedTotalCount,
actual,
topN);
}
r.close();
dir.close();
}
private void setExpectedFrequencies(long[] values, Map<Long, Integer> expected) {
long previousValue = 0;
for (int j = 0; j < values.length; j++) {
if (j == 0 || previousValue != values[j]) {
Integer curCount = expected.getOrDefault(values[j], 0);
expected.put(values[j], curCount + 1);
}
previousValue = values[j];
}
}
private static void assertSame(
String desc,
List<Map.Entry<Long, Integer>> expectedCounts,
int expectedChildCount,
int expectedTotalCount,
FacetResult actual,
int topN) {
int expectedTopN = Math.min(topN, expectedCounts.size());
if (VERBOSE) {
System.out.println(" expected topN=" + expectedTopN);
for (int i = 0; i < expectedTopN; i++) {
System.out.println(
" "
+ i
+ ": value="
+ expectedCounts.get(i).getKey()
+ " count="
+ expectedCounts.get(i).getValue());
}
System.out.println(" actual topN=" + actual.labelValues.length);
for (int i = 0; i < actual.labelValues.length; i++) {
System.out.println(
" "
+ i
+ ": value="
+ actual.labelValues[i].label
+ " count="
+ actual.labelValues[i].value);
}
}
assertEquals(desc + ": topN", expectedTopN, actual.labelValues.length);
assertEquals(desc + ": childCount", expectedChildCount, actual.childCount);
assertEquals(desc + ": totCount", expectedTotalCount, actual.value.intValue());
assertTrue(actual.labelValues.length <= topN);
for (int i = 0; i < expectedTopN; i++) {
assertEquals(
desc + ": label[" + i + "]",
Long.toString(expectedCounts.get(i).getKey()),
actual.labelValues[i].label);
assertEquals(
desc + ": counts[" + i + "]",
expectedCounts.get(i).getValue().intValue(),
actual.labelValues[i].value.intValue());
}
}
/**
* LUCENE-9964: Duplicate long values in a document field should only be counted once when using
* SortedNumericDocValuesFields
*/
public void testDuplicateLongValues() throws Exception {
Directory dir = newDirectory();
RandomIndexWriter w = new RandomIndexWriter(random(), dir);
Document doc = new Document();
// these two values are not unique in a document
doc.add(new SortedNumericDocValuesField("field", 42));
doc.add(new SortedNumericDocValuesField("field", 42));
w.addDocument(doc);
doc = new Document();
doc.add(new SortedNumericDocValuesField("field", 43));
doc.add(new SortedNumericDocValuesField("field", 43));
w.addDocument(doc);
IndexReader r = w.getReader();
w.close();
IndexSearcher s = newSearcher(r);
LongValueFacetCutter longValuesFacetCutter = new LongValueFacetCutter("field");
CountFacetRecorder countRecorder = new CountFacetRecorder();
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
new FacetFieldCollectorManager<>(longValuesFacetCutter, countRecorder);
s.search(new MatchAllDocsQuery(), collectorManager);
FacetResult fr = getAllChildrenSortByValue("field", longValuesFacetCutter, countRecorder);
for (LabelAndValue labelAndValue : fr.labelValues) {
assert labelAndValue.value.equals(1);
}
assertFacetResult(
getAllChildren("field", longValuesFacetCutter, countRecorder),
"field",
new String[0],
2,
-2147483648,
new LabelAndValue("42", 1),
new LabelAndValue("43", 1));
r.close();
dir.close();
}
/**
* Get all results sorted by value, similar to {@link
* LongValueFacetCounts#getAllChildrenSortByValue()}
*/
private FacetResult getAllChildrenSortByValue(
String fieldName,
LongValueFacetCutter longValuesFacetCutter,
CountFacetRecorder countRecorder)
throws IOException {
int[] resultOrdinals = countRecorder.recordedOrds().toArray();
ComparableSupplier<ComparableUtils.ByLongValueComparable> comparableSupplier =
ComparableUtils.byLongValue(longValuesFacetCutter);
ComparableUtils.sort(resultOrdinals, comparableSupplier);
FacetLabel[] labels = longValuesFacetCutter.getLabels(resultOrdinals);
List<LabelAndValue> labelsAndValues = new ArrayList<>(labels.length);
int childCount = 0;
for (int i = 0; i < resultOrdinals.length; i++) {
int count = countRecorder.getCount(resultOrdinals[i]);
labelsAndValues.add(new LabelAndValue(labels[i].lastComponent(), count));
childCount++;
}
// int value = countFacetRecorder.getCount(parentOrdinal);
return new FacetResult(
fieldName,
new String[0],
VALUE_CANT_BE_COMPUTED,
labelsAndValues.toArray(new LabelAndValue[0]),
childCount);
}
/**
* Get top results sorted by count with tie-break by value, similar to {@link
* LongValueFacetCounts#getTopChildren(int, String, String...)}
*/
private FacetResult getTopChildren(
int topN,
String field,
LongValueFacetCutter longValuesFacetCutter,
CountFacetRecorder countRecorder)
throws IOException {
ComparableSupplier<ComparableUtils.ByCountAndLongValueComparable> comparableSupplier =
ComparableUtils.byCount(countRecorder, longValuesFacetCutter);
OrdinalIterator topByCountOrds =
new TopnOrdinalIterator<>(countRecorder.recordedOrds(), comparableSupplier, topN);
int[] resultOrdinals = topByCountOrds.toArray();
FacetLabel[] labels = longValuesFacetCutter.getLabels(resultOrdinals);
List<LabelAndValue> labelsAndValues = new ArrayList<>(labels.length);
int childCount = 0;
for (int i = 0; i < resultOrdinals.length; i++) {
int count = countRecorder.getCount(resultOrdinals[i]);
labelsAndValues.add(new LabelAndValue(labels[i].lastComponent(), count));
childCount++;
}
// int value = countFacetRecorder.getCount(parentOrdinal);
return new FacetResult(
field,
new String[0],
VALUE_CANT_BE_COMPUTED,
labelsAndValues.toArray(new LabelAndValue[0]),
childCount);
}
/**
* Get all results in no particular order, similar to {@link
* LongValueFacetCounts#getAllChildren(String, String...)}
*/
private FacetResult getAllChildren(
String field, LongValueFacetCutter longValuesFacetCutter, CountFacetRecorder countRecorder)
throws IOException {
int[] resultOrdinals = countRecorder.recordedOrds().toArray();
FacetLabel[] labels = longValuesFacetCutter.getLabels(resultOrdinals);
List<LabelAndValue> labelsAndValues = new ArrayList<>(labels.length);
int childCount = 0;
for (int i = 0; i < resultOrdinals.length; i++) {
int count = countRecorder.getCount(resultOrdinals[i]);
labelsAndValues.add(new LabelAndValue(labels[i].lastComponent(), count));
childCount++;
}
// int value = countFacetRecorder.getCount(parentOrdinal);
return new FacetResult(
field,
new String[0],
VALUE_CANT_BE_COMPUTED,
labelsAndValues.toArray(new LabelAndValue[0]),
childCount);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,211 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.sandbox.facet;
import static org.apache.lucene.facet.FacetsConfig.DEFAULT_INDEX_FIELD_NAME;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.DrillDownQuery;
import org.apache.lucene.facet.FacetField;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.LabelAndValue;
import org.apache.lucene.facet.taxonomy.FacetLabel;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.sandbox.facet.cutters.TaxonomyFacetsCutter;
import org.apache.lucene.sandbox.facet.labels.TaxonomyOrdLabelBiMap;
import org.apache.lucene.sandbox.facet.recorders.CountFacetRecorder;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.util.IOUtils;
/** Test for associations */
public class TestTaxonomyFacet extends SandboxFacetTestCase {
public void testConstants() {
// It is essential for TaxonomyOrdLabelBiMap that invalid ordinal is the same as for
// TaxonomyReader
assertEquals(TaxonomyOrdLabelBiMap.INVALID_ORD, TaxonomyReader.INVALID_ORDINAL);
}
public void testBasic() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
// Writes facet ords to a separate directory from the
// main index:
DirectoryTaxonomyWriter taxoWriter =
new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.setHierarchical("Publish Date", true);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new FacetField("Author", "Bob"));
doc.add(new FacetField("Publish Date", "2010", "10", "15"));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2010", "10", "20"));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Lisa"));
doc.add(new FacetField("Publish Date", "2012", "1", "1"));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Susan"));
doc.add(new FacetField("Publish Date", "2012", "1", "7"));
writer.addDocument(config.build(taxoWriter, doc));
doc = new Document();
doc.add(new FacetField("Author", "Frank"));
doc.add(new FacetField("Publish Date", "1999", "5", "5"));
writer.addDocument(config.build(taxoWriter, doc));
// NRT open
IndexSearcher searcher = newSearcher(writer.getReader());
// NRT open
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
Query query = new MatchAllDocsQuery();
TaxonomyFacetsCutter defaultTaxoCutter =
new TaxonomyFacetsCutter(DEFAULT_INDEX_FIELD_NAME, config, taxoReader);
final CountFacetRecorder countRecorder = new CountFacetRecorder();
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
new FacetFieldCollectorManager<>(defaultTaxoCutter, countRecorder);
searcher.search(query, collectorManager);
expectThrows(
IllegalArgumentException.class,
() -> {
getTopChildrenByCount(countRecorder, taxoReader, 0, "Author");
});
// Retrieve & verify results:
assertEquals(
"dim=Publish Date path=[] value=-2147483648 childCount=3\n 2010 (2)\n 2012 (2)\n 1999 (1)\n",
getTopChildrenByCount(countRecorder, taxoReader, 10, "Publish Date").toString());
assertEquals(
"dim=Author path=[] value=-2147483648 childCount=4\n Lisa (2)\n Bob (1)\n Susan (1)\n Frank (1)\n",
getTopChildrenByCount(countRecorder, taxoReader, 10, "Author").toString());
assertFacetResult(
getAllChildren(countRecorder, taxoReader, "Publish Date"),
"Publish Date",
new String[0],
3,
VALUE_CANT_BE_COMPUTED,
new LabelAndValue[] {
new LabelAndValue("1999", 1), new LabelAndValue("2010", 2), new LabelAndValue("2012", 2),
});
assertFacetResult(
getAllChildren(countRecorder, taxoReader, "Author"),
"Author",
new String[0],
4,
VALUE_CANT_BE_COMPUTED,
new LabelAndValue[] {
new LabelAndValue("Bob", 1),
new LabelAndValue("Frank", 1),
new LabelAndValue("Lisa", 2),
new LabelAndValue("Susan", 1),
});
// Now user drills down on Publish Date/2010:
DrillDownQuery q2 = new DrillDownQuery(config);
q2.add("Publish Date", "2010");
final CountFacetRecorder countRecorder2 = new CountFacetRecorder();
collectorManager = new FacetFieldCollectorManager<>(defaultTaxoCutter, countRecorder2);
searcher.search(q2, collectorManager);
assertEquals(
"dim=Author path=[] value=-2147483648 childCount=2\n Bob (1)\n Lisa (1)\n",
getTopChildrenByCount(countRecorder2, taxoReader, 10, "Author").toString());
assertEquals(1, getSpecificValue(countRecorder2, taxoReader, "Author", "Lisa"));
assertArrayEquals(
new int[] {1, 1},
getCountsForRecordedCandidates(
countRecorder2,
taxoReader,
new FacetLabel[] {
new FacetLabel("Author", "Lisa"),
new FacetLabel("Author", "Susan"), // 0 count, filtered out
new FacetLabel("Author", "DoesNotExist"), // Doesn't exist in the index, filtered out
new FacetLabel("Author", "Bob"),
}));
expectThrows(
AssertionError.class,
() -> {
getTopChildrenByCount(countRecorder2, taxoReader, 10, "Non exitent dim");
});
writer.close();
IOUtils.close(taxoWriter, searcher.getIndexReader(), taxoReader, taxoDir, dir);
}
public void testTaxonomyCutterExpertModeDisableRollup() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
DirectoryTaxonomyWriter taxoWriter =
new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.CREATE);
FacetsConfig config = new FacetsConfig();
config.setHierarchical("Publish Date", true);
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
Document doc = new Document();
doc.add(new FacetField("Publish Date", "2010", "10", "15"));
writer.addDocument(config.build(taxoWriter, doc));
IndexSearcher searcher = newSearcher(writer.getReader());
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
Query query = new MatchAllDocsQuery();
TaxonomyFacetsCutter defaultTaxoCutter =
new TaxonomyFacetsCutter(DEFAULT_INDEX_FIELD_NAME, config, taxoReader, true);
final CountFacetRecorder countRecorder = new CountFacetRecorder();
FacetFieldCollectorManager<CountFacetRecorder> collectorManager =
new FacetFieldCollectorManager<>(defaultTaxoCutter, countRecorder);
searcher.search(query, collectorManager);
assertEquals(
"Only leaf value should have been counted when rollup is disabled",
1,
countRecorder.recordedOrds().toArray().length);
writer.close();
IOUtils.close(taxoWriter, searcher.getIndexReader(), taxoReader, taxoDir, dir);
}
}