LUCENE-7905: optimize how OrdinalMap builds its map

This commit is contained in:
Mike McCandless 2017-07-14 11:01:57 -04:00
parent 76363e50d9
commit 3df97d3f0c
27 changed files with 465 additions and 367 deletions

View File

@ -16,6 +16,12 @@ Changes in Runtime Behavior
======================= Lucene 7.1.0 =======================
(No Changes)
Optimizations
* LUCENE-7905: Optimize how OrdinalMap (used by
SortedSetDocValuesFacetCounts and others) builds its map (Robert
Muir, Adrien Grand, Mike McCandless)
======================= Lucene 7.0.0 =======================
New Features

View File

@ -30,8 +30,8 @@ import org.apache.lucene.index.EmptyDocValuesProducer;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FilteredTermsEnum;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SegmentWriteState; // javadocs
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;

View File

@ -18,21 +18,10 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex;
import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
/**
* A wrapper for CompositeIndexReader providing access to DocValues.
@ -649,283 +638,6 @@ public class MultiDocValues {
}
}
/** maps per-segment ordinals to/from global ordinal space */
// TODO: we could also have a utility method to merge Terms[] and use size() as a weight when we need it
// TODO: use more efficient packed ints structures?
// TODO: pull this out? it's pretty generic (maps between N ord()-enabled TermsEnums)
public static class OrdinalMap implements Accountable {
private static class SegmentMap implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class);
/** Build a map from an index into a sorted view of `weights` to an index into `weights`. */
private static int[] map(final long[] weights) {
final int[] newToOld = new int[weights.length];
for (int i = 0; i < weights.length; ++i) {
newToOld[i] = i;
}
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
final int tmp = newToOld[i];
newToOld[i] = newToOld[j];
newToOld[j] = tmp;
}
@Override
protected int compare(int i, int j) {
// j first since we actually want higher weights first
return Long.compare(weights[newToOld[j]], weights[newToOld[i]]);
}
}.sort(0, weights.length);
return newToOld;
}
/** Inverse the map. */
private static int[] inverse(int[] map) {
final int[] inverse = new int[map.length];
for (int i = 0; i < map.length; ++i) {
inverse[map[i]] = i;
}
return inverse;
}
private final int[] newToOld, oldToNew;
SegmentMap(long[] weights) {
newToOld = map(weights);
oldToNew = inverse(newToOld);
assert Arrays.equals(newToOld, inverse(oldToNew));
}
int newToOld(int segment) {
return newToOld[segment];
}
int oldToNew(int segment) {
return oldToNew[segment];
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(newToOld) + RamUsageEstimator.sizeOf(oldToNew);
}
}
/**
* Create an ordinal map that uses the number of unique values of each
* {@link SortedDocValues} instance as a weight.
* @see #build(IndexReader.CacheKey, TermsEnum[], long[], float)
*/
public static OrdinalMap build(IndexReader.CacheKey owner, SortedDocValues[] values, float acceptableOverheadRatio) throws IOException {
final TermsEnum[] subs = new TermsEnum[values.length];
final long[] weights = new long[values.length];
for (int i = 0; i < values.length; ++i) {
subs[i] = values[i].termsEnum();
weights[i] = values[i].getValueCount();
}
return build(owner, subs, weights, acceptableOverheadRatio);
}
/**
* Create an ordinal map that uses the number of unique values of each
* {@link SortedSetDocValues} instance as a weight.
* @see #build(IndexReader.CacheKey, TermsEnum[], long[], float)
*/
public static OrdinalMap build(IndexReader.CacheKey owner, SortedSetDocValues[] values, float acceptableOverheadRatio) throws IOException {
final TermsEnum[] subs = new TermsEnum[values.length];
final long[] weights = new long[values.length];
for (int i = 0; i < values.length; ++i) {
subs[i] = values[i].termsEnum();
weights[i] = values[i].getValueCount();
}
return build(owner, subs, weights, acceptableOverheadRatio);
}
/**
* Creates an ordinal map that allows mapping ords to/from a merged
* space from <code>subs</code>.
* @param owner a cache key
* @param subs TermsEnums that support {@link TermsEnum#ord()}. They need
* not be dense (e.g. can be FilteredTermsEnums}.
* @param weights a weight for each sub. This is ideally correlated with
* the number of unique terms that each sub introduces compared
* to the other subs
* @throws IOException if an I/O error occurred.
*/
public static OrdinalMap build(IndexReader.CacheKey owner, TermsEnum subs[], long[] weights, float acceptableOverheadRatio) throws IOException {
if (subs.length != weights.length) {
throw new IllegalArgumentException("subs and weights must have the same length");
}
// enums are not sorted, so let's sort to save memory
final SegmentMap segmentMap = new SegmentMap(weights);
return new OrdinalMap(owner, subs, segmentMap, acceptableOverheadRatio);
}
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OrdinalMap.class);
/** Cache key of whoever asked for this awful thing */
public final IndexReader.CacheKey owner;
// globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
final PackedLongValues globalOrdDeltas;
// globalOrd -> first segment container
final PackedLongValues firstSegments;
// for every segment, segmentOrd -> globalOrd
final LongValues segmentToGlobalOrds[];
// the map from/to segment ids
final SegmentMap segmentMap;
// ram usage
final long ramBytesUsed;
OrdinalMap(IndexReader.CacheKey owner, TermsEnum subs[], SegmentMap segmentMap, float acceptableOverheadRatio) throws IOException {
// create the ordinal mappings by pulling a termsenum over each sub's
// unique terms, and walking a multitermsenum over those
this.owner = owner;
this.segmentMap = segmentMap;
// even though we accept an overhead ratio, we keep these ones with COMPACT
// since they are only used to resolve values given a global ord, which is
// slow anyway
PackedLongValues.Builder globalOrdDeltas = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
PackedLongValues.Builder firstSegments = PackedLongValues.packedBuilder(PackedInts.COMPACT);
final PackedLongValues.Builder[] ordDeltas = new PackedLongValues.Builder[subs.length];
for (int i = 0; i < ordDeltas.length; i++) {
ordDeltas[i] = PackedLongValues.monotonicBuilder(acceptableOverheadRatio);
}
long[] ordDeltaBits = new long[subs.length];
long segmentOrds[] = new long[subs.length];
ReaderSlice slices[] = new ReaderSlice[subs.length];
TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length];
for (int i = 0; i < slices.length; i++) {
slices[i] = new ReaderSlice(0, 0, i);
indexes[i] = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
}
MultiTermsEnum mte = new MultiTermsEnum(slices);
mte.reset(indexes);
long globalOrd = 0;
while (mte.next() != null) {
TermsEnumWithSlice matches[] = mte.getMatchArray();
int firstSegmentIndex = Integer.MAX_VALUE;
long globalOrdDelta = Long.MAX_VALUE;
for (int i = 0; i < mte.getMatchCount(); i++) {
int segmentIndex = matches[i].index;
long segmentOrd = matches[i].terms.ord();
long delta = globalOrd - segmentOrd;
// We compute the least segment where the term occurs. In case the
// first segment contains most (or better all) values, this will
// help save significant memory
if (segmentIndex < firstSegmentIndex) {
firstSegmentIndex = segmentIndex;
globalOrdDelta = delta;
}
// for each per-segment ord, map it back to the global term.
while (segmentOrds[segmentIndex] <= segmentOrd) {
ordDeltaBits[segmentIndex] |= delta;
ordDeltas[segmentIndex].add(delta);
segmentOrds[segmentIndex]++;
}
}
// for each unique term, just mark the first segment index/delta where it occurs
assert firstSegmentIndex < segmentOrds.length;
firstSegments.add(firstSegmentIndex);
globalOrdDeltas.add(globalOrdDelta);
globalOrd++;
}
this.firstSegments = firstSegments.build();
this.globalOrdDeltas = globalOrdDeltas.build();
// ordDeltas is typically the bottleneck, so let's see what we can do to make it faster
segmentToGlobalOrds = new LongValues[subs.length];
long ramBytesUsed = BASE_RAM_BYTES_USED + this.globalOrdDeltas.ramBytesUsed()
+ this.firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds)
+ segmentMap.ramBytesUsed();
for (int i = 0; i < ordDeltas.length; ++i) {
final PackedLongValues deltas = ordDeltas[i].build();
if (ordDeltaBits[i] == 0L) {
// segment ords perfectly match global ordinals
// likely in case of low cardinalities and large segments
segmentToGlobalOrds[i] = LongValues.IDENTITY;
} else {
final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]);
final long monotonicBits = deltas.ramBytesUsed() * 8;
final long packedBits = bitsRequired * deltas.size();
if (deltas.size() <= Integer.MAX_VALUE
&& packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) {
// monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints
final int size = (int) deltas.size();
final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
final PackedLongValues.Iterator it = deltas.iterator();
for (int ord = 0; ord < size; ++ord) {
newDeltas.set(ord, it.next());
}
assert !it.hasNext();
segmentToGlobalOrds[i] = new LongValues() {
@Override
public long get(long ord) {
return ord + newDeltas.get((int) ord);
}
};
ramBytesUsed += newDeltas.ramBytesUsed();
} else {
segmentToGlobalOrds[i] = new LongValues() {
@Override
public long get(long ord) {
return ord + deltas.get(ord);
}
};
ramBytesUsed += deltas.ramBytesUsed();
}
ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]);
}
}
this.ramBytesUsed = ramBytesUsed;
}
/**
* Given a segment number, return a {@link LongValues} instance that maps
* segment ordinals to global ordinals.
*/
public LongValues getGlobalOrds(int segmentIndex) {
return segmentToGlobalOrds[segmentMap.oldToNew(segmentIndex)];
}
/**
* Given global ordinal, returns the ordinal of the first segment which contains
* this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}).
*/
public long getFirstSegmentOrd(long globalOrd) {
return globalOrd - globalOrdDeltas.get(globalOrd);
}
/**
* Given a global ordinal, returns the index of the first
* segment that contains this term.
*/
public int getFirstSegmentNumber(long globalOrd) {
return segmentMap.newToOld((int) firstSegments.get(globalOrd));
}
/**
* Returns the total number of unique terms in global ord space.
*/
public long getValueCount() {
return globalOrdDeltas.size();
}
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
resources.add(Accountables.namedAccountable("global ord deltas", globalOrdDeltas));
resources.add(Accountables.namedAccountable("first segments", firstSegments));
resources.add(Accountables.namedAccountable("segment map", segmentMap));
// TODO: would be nice to return actual child segment deltas too, but the optimizations are confusing
return resources;
}
}
/**
* Implements SortedDocValues over n subs, using an OrdinalMap
* @lucene.internal

View File

@ -166,7 +166,7 @@ final class MultiSorter {
final SortedDocValues sorted = Sorter.getOrWrapSorted(readers.get(i), sortField);
values[i] = sorted;
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT);
OrdinalMap ordinalMap = OrdinalMap.build(null, values, PackedInts.DEFAULT);
final int missingOrd;
if (sortField.getMissingValue() == SortField.STRING_LAST) {
missingOrd = sortField.getReverse() ? Integer.MIN_VALUE : Integer.MAX_VALUE;

View File

@ -302,6 +302,8 @@ public final class MultiTermsEnum extends TermsEnum {
// gather equal top fields
if (queue.size() > 0) {
// TODO: we could maybe defer this somewhat costly operation until one of the APIs that
// needs to see the top is invoked (docFreq, postings, etc.)
pullTop();
} else {
current = null;

View File

@ -0,0 +1,368 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.PriorityQueue;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.packed.PackedInts;
import org.apache.lucene.util.packed.PackedLongValues;
/** Maps per-segment ordinals to/from global ordinal space, using a compact packed-ints representation.
*
* <p><b>NOTE</b>: this is a costly operation, as it must merge sort all terms, and may require non-trivial RAM once done. It's better to operate in
* segment-private ordinal space instead when possible.
*
* @lucene.internal */
public class OrdinalMap implements Accountable {
// TODO: we could also have a utility method to merge Terms[] and use size() as a weight when we need it
// TODO: use more efficient packed ints structures?
private static class TermsEnumIndex {
public final static TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0];
final int subIndex;
final TermsEnum termsEnum;
BytesRef currentTerm;
public TermsEnumIndex(TermsEnum termsEnum, int subIndex) {
this.termsEnum = termsEnum;
this.subIndex = subIndex;
}
public BytesRef next() throws IOException {
currentTerm = termsEnum.next();
return currentTerm;
}
}
private static class SegmentMap implements Accountable {
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class);
/** Build a map from an index into a sorted view of `weights` to an index into `weights`. */
private static int[] map(final long[] weights) {
final int[] newToOld = new int[weights.length];
for (int i = 0; i < weights.length; ++i) {
newToOld[i] = i;
}
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
final int tmp = newToOld[i];
newToOld[i] = newToOld[j];
newToOld[j] = tmp;
}
@Override
protected int compare(int i, int j) {
// j first since we actually want higher weights first
return Long.compare(weights[newToOld[j]], weights[newToOld[i]]);
}
}.sort(0, weights.length);
return newToOld;
}
/** Inverse the map. */
private static int[] inverse(int[] map) {
final int[] inverse = new int[map.length];
for (int i = 0; i < map.length; ++i) {
inverse[map[i]] = i;
}
return inverse;
}
private final int[] newToOld, oldToNew;
SegmentMap(long[] weights) {
newToOld = map(weights);
oldToNew = inverse(newToOld);
assert Arrays.equals(newToOld, inverse(oldToNew));
}
int newToOld(int segment) {
return newToOld[segment];
}
int oldToNew(int segment) {
return oldToNew[segment];
}
@Override
public long ramBytesUsed() {
return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(newToOld) + RamUsageEstimator.sizeOf(oldToNew);
}
}
/**
* Create an ordinal map that uses the number of unique values of each
* {@link SortedDocValues} instance as a weight.
* @see #build(IndexReader.CacheKey, TermsEnum[], long[], float)
*/
public static OrdinalMap build(IndexReader.CacheKey owner, SortedDocValues[] values, float acceptableOverheadRatio) throws IOException {
final TermsEnum[] subs = new TermsEnum[values.length];
final long[] weights = new long[values.length];
for (int i = 0; i < values.length; ++i) {
subs[i] = values[i].termsEnum();
weights[i] = values[i].getValueCount();
}
return build(owner, subs, weights, acceptableOverheadRatio);
}
/**
* Create an ordinal map that uses the number of unique values of each
* {@link SortedSetDocValues} instance as a weight.
* @see #build(IndexReader.CacheKey, TermsEnum[], long[], float)
*/
public static OrdinalMap build(IndexReader.CacheKey owner, SortedSetDocValues[] values, float acceptableOverheadRatio) throws IOException {
final TermsEnum[] subs = new TermsEnum[values.length];
final long[] weights = new long[values.length];
for (int i = 0; i < values.length; ++i) {
subs[i] = values[i].termsEnum();
weights[i] = values[i].getValueCount();
}
return build(owner, subs, weights, acceptableOverheadRatio);
}
/**
* Creates an ordinal map that allows mapping ords to/from a merged
* space from <code>subs</code>.
* @param owner a cache key
* @param subs TermsEnums that support {@link TermsEnum#ord()}. They need
* not be dense (e.g. can be FilteredTermsEnums}.
* @param weights a weight for each sub. This is ideally correlated with
* the number of unique terms that each sub introduces compared
* to the other subs
* @throws IOException if an I/O error occurred.
*/
public static OrdinalMap build(IndexReader.CacheKey owner, TermsEnum subs[], long[] weights, float acceptableOverheadRatio) throws IOException {
if (subs.length != weights.length) {
throw new IllegalArgumentException("subs and weights must have the same length");
}
// enums are not sorted, so let's sort to save memory
final SegmentMap segmentMap = new SegmentMap(weights);
return new OrdinalMap(owner, subs, segmentMap, acceptableOverheadRatio);
}
private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OrdinalMap.class);
/** Cache key of whoever asked for this awful thing */
public final IndexReader.CacheKey owner;
// globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term
final PackedLongValues globalOrdDeltas;
// globalOrd -> first segment container
final PackedLongValues firstSegments;
// for every segment, segmentOrd -> globalOrd
final LongValues segmentToGlobalOrds[];
// the map from/to segment ids
final SegmentMap segmentMap;
// ram usage
final long ramBytesUsed;
OrdinalMap(IndexReader.CacheKey owner, TermsEnum subs[], SegmentMap segmentMap, float acceptableOverheadRatio) throws IOException {
// create the ordinal mappings by pulling a termsenum over each sub's
// unique terms, and walking a multitermsenum over those
this.owner = owner;
this.segmentMap = segmentMap;
// even though we accept an overhead ratio, we keep these ones with COMPACT
// since they are only used to resolve values given a global ord, which is
// slow anyway
PackedLongValues.Builder globalOrdDeltas = PackedLongValues.monotonicBuilder(PackedInts.COMPACT);
PackedLongValues.Builder firstSegments = PackedLongValues.packedBuilder(PackedInts.COMPACT);
final PackedLongValues.Builder[] ordDeltas = new PackedLongValues.Builder[subs.length];
for (int i = 0; i < ordDeltas.length; i++) {
ordDeltas[i] = PackedLongValues.monotonicBuilder(acceptableOverheadRatio);
}
long[] ordDeltaBits = new long[subs.length];
long[] segmentOrds = new long[subs.length];
// Just merge-sorts by term:
PriorityQueue<TermsEnumIndex> queue = new PriorityQueue<TermsEnumIndex>(subs.length) {
@Override
protected boolean lessThan(TermsEnumIndex a, TermsEnumIndex b) {
return a.currentTerm.compareTo(b.currentTerm) < 0;
}
};
for (int i = 0; i < subs.length; i++) {
TermsEnumIndex sub = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i);
if (sub.next() != null) {
queue.add(sub);
}
}
BytesRefBuilder scratch = new BytesRefBuilder();
long globalOrd = 0;
while (queue.size() != 0) {
TermsEnumIndex top = queue.top();
scratch.copyBytes(top.currentTerm);
int firstSegmentIndex = Integer.MAX_VALUE;
long globalOrdDelta = Long.MAX_VALUE;
// Advance past this term, recording the per-segment ord deltas:
while (true) {
top = queue.top();
long segmentOrd = top.termsEnum.ord();
long delta = globalOrd - segmentOrd;
int segmentIndex = top.subIndex;
// We compute the least segment where the term occurs. In case the
// first segment contains most (or better all) values, this will
// help save significant memory
if (segmentIndex < firstSegmentIndex) {
firstSegmentIndex = segmentIndex;
globalOrdDelta = delta;
}
ordDeltaBits[segmentIndex] |= delta;
// for each per-segment ord, map it back to the global term; the while loop is needed
// in case the incoming TermsEnums don't have compact ordinals (some ordinal values
// are skipped), which can happen e.g. with a FilteredTermsEnum:
assert segmentOrds[segmentIndex] <= segmentOrd;
// TODO: we could specialize this case (the while loop is not needed when the ords
// are compact)
do {
ordDeltas[segmentIndex].add(delta);
segmentOrds[segmentIndex]++;
} while (segmentOrds[segmentIndex] <= segmentOrd);
if (top.next() == null) {
queue.pop();
if (queue.size() == 0) {
break;
}
} else {
queue.updateTop();
}
if (queue.top().currentTerm.equals(scratch.get()) == false) {
break;
}
}
// for each unique term, just mark the first segment index/delta where it occurs
firstSegments.add(firstSegmentIndex);
globalOrdDeltas.add(globalOrdDelta);
globalOrd++;
}
this.firstSegments = firstSegments.build();
this.globalOrdDeltas = globalOrdDeltas.build();
// ordDeltas is typically the bottleneck, so let's see what we can do to make it faster
segmentToGlobalOrds = new LongValues[subs.length];
long ramBytesUsed = BASE_RAM_BYTES_USED + this.globalOrdDeltas.ramBytesUsed()
+ this.firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds)
+ segmentMap.ramBytesUsed();
for (int i = 0; i < ordDeltas.length; ++i) {
final PackedLongValues deltas = ordDeltas[i].build();
if (ordDeltaBits[i] == 0L) {
// segment ords perfectly match global ordinals
// likely in case of low cardinalities and large segments
segmentToGlobalOrds[i] = LongValues.IDENTITY;
} else {
final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]);
final long monotonicBits = deltas.ramBytesUsed() * 8;
final long packedBits = bitsRequired * deltas.size();
if (deltas.size() <= Integer.MAX_VALUE
&& packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) {
// monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints
final int size = (int) deltas.size();
final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio);
final PackedLongValues.Iterator it = deltas.iterator();
for (int ord = 0; ord < size; ++ord) {
newDeltas.set(ord, it.next());
}
assert it.hasNext() == false;
segmentToGlobalOrds[i] = new LongValues() {
@Override
public long get(long ord) {
return ord + newDeltas.get((int) ord);
}
};
ramBytesUsed += newDeltas.ramBytesUsed();
} else {
segmentToGlobalOrds[i] = new LongValues() {
@Override
public long get(long ord) {
return ord + deltas.get(ord);
}
};
ramBytesUsed += deltas.ramBytesUsed();
}
ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]);
}
}
this.ramBytesUsed = ramBytesUsed;
}
/**
* Given a segment number, return a {@link LongValues} instance that maps
* segment ordinals to global ordinals.
*/
public LongValues getGlobalOrds(int segmentIndex) {
return segmentToGlobalOrds[segmentMap.oldToNew(segmentIndex)];
}
/**
* Given global ordinal, returns the ordinal of the first segment which contains
* this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}).
*/
public long getFirstSegmentOrd(long globalOrd) {
return globalOrd - globalOrdDeltas.get(globalOrd);
}
/**
* Given a global ordinal, returns the index of the first
* segment that contains this term.
*/
public int getFirstSegmentNumber(long globalOrd) {
return segmentMap.newToOld((int) firstSegments.get(globalOrd));
}
/**
* Returns the total number of unique terms in global ord space.
*/
public long getValueCount() {
return globalOrdDeltas.size();
}
@Override
public long ramBytesUsed() {
return ramBytesUsed;
}
@Override
public Collection<Accountable> getChildResources() {
List<Accountable> resources = new ArrayList<>();
resources.add(Accountables.namedAccountable("global ord deltas", globalOrdDeltas));
resources.add(Accountables.namedAccountable("first segments", firstSegments));
resources.add(Accountables.namedAccountable("segment map", segmentMap));
// TODO: would be nice to return actual child segment deltas too, but the optimizations are confusing
return resources;
}
}

View File

@ -25,7 +25,7 @@ import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongValues;

View File

@ -42,6 +42,7 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.ConjunctionDISI;
@ -152,10 +153,10 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
private class CountOneSegment implements Callable<Void> {
final LeafReader leafReader;
final MatchingDocs hits;
final MultiDocValues.OrdinalMap ordinalMap;
final OrdinalMap ordinalMap;
final int segOrd;
public CountOneSegment(LeafReader leafReader, MatchingDocs hits, MultiDocValues.OrdinalMap ordinalMap, int segOrd) {
public CountOneSegment(LeafReader leafReader, MatchingDocs hits, OrdinalMap ordinalMap, int segOrd) {
this.leafReader = leafReader;
this.hits = hits;
this.ordinalMap = ordinalMap;
@ -240,7 +241,7 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
/** Does all the "real work" of tallying up the counts. */
private final void count(List<MatchingDocs> matchingDocs) throws IOException, InterruptedException {
MultiDocValues.OrdinalMap ordinalMap;
OrdinalMap ordinalMap;
// TODO: is this right? really, we need a way to
// verify that this ordinalMap "matches" the leaves in
@ -281,7 +282,7 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets {
private final void countAll() throws IOException, InterruptedException {
//System.out.println("ssdv count");
MultiDocValues.OrdinalMap ordinalMap;
OrdinalMap ordinalMap;
// TODO: is this right? really, we need a way to
// verify that this ordinalMap "matches" the leaves in

View File

@ -31,8 +31,8 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.Accountables;

View File

@ -37,6 +37,7 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.ConjunctionDISI;
@ -155,7 +156,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
return new FacetResult(dim, new String[0], dimCount, labelValues, childCount);
}
private void countOneSegment(MultiDocValues.OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits) throws IOException {
private void countOneSegment(OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits) throws IOException {
SortedSetDocValues segValues = reader.getSortedSetDocValues(field);
if (segValues == null) {
// nothing to count
@ -236,7 +237,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
private final void count(List<MatchingDocs> matchingDocs) throws IOException {
//System.out.println("ssdv count");
MultiDocValues.OrdinalMap ordinalMap;
OrdinalMap ordinalMap;
// TODO: is this right? really, we need a way to
// verify that this ordinalMap "matches" the leaves in
@ -267,7 +268,7 @@ public class SortedSetDocValuesFacetCounts extends Facets {
private final void countAll() throws IOException {
//System.out.println("ssdv count");
MultiDocValues.OrdinalMap ordinalMap;
OrdinalMap ordinalMap;
// TODO: is this right? really, we need a way to
// verify that this ordinalMap "matches" the leaves in

View File

@ -20,7 +20,7 @@ import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.LeafCollector;
@ -37,9 +37,9 @@ final class GlobalOrdinalsCollector implements Collector {
final String field;
final LongBitSet collectedOrds;
final MultiDocValues.OrdinalMap ordinalMap;
final OrdinalMap ordinalMap;
GlobalOrdinalsCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) {
GlobalOrdinalsCollector(String field, OrdinalMap ordinalMap, long valueCount) {
this.field = field;
this.ordinalMap = ordinalMap;
this.collectedOrds = new LongBitSet(valueCount);

View File

@ -21,7 +21,7 @@ import java.util.Set;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ConstantScoreWeight;
@ -41,7 +41,7 @@ final class GlobalOrdinalsQuery extends Query {
// All the ords of matching docs found with OrdinalsCollector.
private final LongBitSet foundOrds;
private final String joinField;
private final MultiDocValues.OrdinalMap globalOrds;
private final OrdinalMap globalOrds;
// Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific.
private final Query toQuery;
@ -50,7 +50,7 @@ final class GlobalOrdinalsQuery extends Query {
// id of the context rather than the context itself in order not to hold references to index readers
private final Object indexReaderContextId;
GlobalOrdinalsQuery(LongBitSet foundOrds, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery,
GlobalOrdinalsQuery(LongBitSet foundOrds, String joinField, OrdinalMap globalOrds, Query toQuery,
Query fromQuery, Object indexReaderContextId) {
this.foundOrds = foundOrds;
this.joinField = joinField;

View File

@ -21,7 +21,7 @@ import java.util.Arrays;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.LeafCollector;
@ -35,13 +35,13 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
final boolean doMinMax;
final int min;
final int max;
final MultiDocValues.OrdinalMap ordinalMap;
final OrdinalMap ordinalMap;
final LongBitSet collectedOrds;
protected final Scores scores;
protected final Occurrences occurrences;
GlobalOrdinalsWithScoreCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, ScoreMode scoreMode, int min, int max) {
GlobalOrdinalsWithScoreCollector(String field, OrdinalMap ordinalMap, long valueCount, ScoreMode scoreMode, int min, int max) {
if (valueCount > Integer.MAX_VALUE) {
// We simply don't support more than
throw new IllegalStateException("Can't collect more than [" + Integer.MAX_VALUE + "] ids");
@ -168,7 +168,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
static final class Min extends GlobalOrdinalsWithScoreCollector {
public Min(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
public Min(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount, ScoreMode.Min, min, max);
}
@ -185,7 +185,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
static final class Max extends GlobalOrdinalsWithScoreCollector {
public Max(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
public Max(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount, ScoreMode.Max, min, max);
}
@ -202,7 +202,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
static final class Sum extends GlobalOrdinalsWithScoreCollector {
public Sum(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
public Sum(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount, ScoreMode.Total, min, max);
}
@ -219,7 +219,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
static final class Avg extends GlobalOrdinalsWithScoreCollector {
public Avg(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
public Avg(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount, ScoreMode.Avg, min, max);
}
@ -241,7 +241,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector {
static final class NoScore extends GlobalOrdinalsWithScoreCollector {
public NoScore(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) {
public NoScore(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) {
super(field, ordinalMap, valueCount, ScoreMode.None, min, max);
}

View File

@ -21,7 +21,7 @@ import java.util.Set;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.DocIdSetIterator;
@ -39,7 +39,7 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
private final GlobalOrdinalsWithScoreCollector collector;
private final String joinField;
private final MultiDocValues.OrdinalMap globalOrds;
private final OrdinalMap globalOrds;
// Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific.
private final Query toQuery;
@ -52,7 +52,7 @@ final class GlobalOrdinalsWithScoreQuery extends Query {
private final Object indexReaderContextId;
GlobalOrdinalsWithScoreQuery(GlobalOrdinalsWithScoreCollector collector, ScoreMode scoreMode, String joinField,
MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, int min, int max,
OrdinalMap globalOrds, Query toQuery, Query fromQuery, int min, int max,
Object indexReaderContextId) {
this.collector = collector;
this.joinField = joinField;

View File

@ -34,8 +34,8 @@ import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
@ -407,7 +407,7 @@ public final class JoinUtil {
/**
* Delegates to {@link #createJoinQuery(String, Query, Query, IndexSearcher, ScoreMode, MultiDocValues.OrdinalMap, int, int)},
* Delegates to {@link #createJoinQuery(String, Query, Query, IndexSearcher, ScoreMode, OrdinalMap, int, int)},
* but disables the min and max filtering.
*
* @param joinField The {@link SortedDocValues} field containing the join values
@ -425,7 +425,7 @@ public final class JoinUtil {
Query toQuery,
IndexSearcher searcher,
ScoreMode scoreMode,
MultiDocValues.OrdinalMap ordinalMap) throws IOException {
OrdinalMap ordinalMap) throws IOException {
return createJoinQuery(joinField, fromQuery, toQuery, searcher, scoreMode, ordinalMap, 0, Integer.MAX_VALUE);
}
@ -464,7 +464,7 @@ public final class JoinUtil {
Query toQuery,
IndexSearcher searcher,
ScoreMode scoreMode,
MultiDocValues.OrdinalMap ordinalMap,
OrdinalMap ordinalMap,
int min,
int max) throws IOException {
int numSegments = searcher.getIndexReader().leaves().size();

View File

@ -55,11 +55,10 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.SortedDocValues;
@ -267,7 +266,7 @@ public class TestJoinUtil extends LuceneTestCase {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
@ -372,7 +371,7 @@ public class TestJoinUtil extends LuceneTestCase {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
@ -500,7 +499,7 @@ public class TestJoinUtil extends LuceneTestCase {
for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
BooleanQuery.Builder fromQuery = new BooleanQuery.Builder();
@ -621,7 +620,7 @@ public class TestJoinUtil extends LuceneTestCase {
for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
Query fromQuery = new TermQuery(new Term("type", "from"));
@ -1036,7 +1035,7 @@ public class TestJoinUtil extends LuceneTestCase {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
IndexSearcher indexSearcher = new IndexSearcher(r);
@ -1067,7 +1066,7 @@ public class TestJoinUtil extends LuceneTestCase {
LeafReader leafReader = r.leaves().get(i).reader();
values[i] = DocValues.getSorted(leafReader, joinField);
}
MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(
OrdinalMap ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
IndexSearcher indexSearcher = new IndexSearcher(r);
@ -1590,7 +1589,7 @@ public class TestJoinUtil extends LuceneTestCase {
for (LeafReaderContext leadContext : topLevelReader.leaves()) {
values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field");
}
context.ordinalMap = MultiDocValues.OrdinalMap.build(
context.ordinalMap = OrdinalMap.build(
null, values, PackedInts.DEFAULT
);
}
@ -1712,7 +1711,7 @@ public class TestJoinUtil extends LuceneTestCase {
Map<String, Map<Integer, JoinScore>> fromHitsToJoinScore = new HashMap<>();
Map<String, Map<Integer, JoinScore>> toHitsToJoinScore = new HashMap<>();
MultiDocValues.OrdinalMap ordinalMap;
OrdinalMap ordinalMap;
Directory dir;
IndexSearcher searcher;

View File

@ -17,10 +17,6 @@
package org.apache.solr.handler;
import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
import static org.apache.solr.common.util.Utils.makeMap;
import java.io.Closeable;
import java.io.IOException;
import java.io.OutputStream;
@ -38,6 +34,7 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedNumericDocValues;
import org.apache.lucene.index.SortedSetDocValues;
@ -53,8 +50,8 @@ import org.apache.lucene.util.LongValues;
import org.apache.lucene.util.NumericUtils;
import org.apache.solr.client.solrj.impl.BinaryResponseParser;
import org.apache.solr.common.IteratorWriter;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.MapWriter.EntryWriter;
import org.apache.solr.common.MapWriter;
import org.apache.solr.common.PushWriter;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.SolrParams;
@ -82,6 +79,10 @@ import org.apache.solr.search.SyntaxError;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
import static org.apache.solr.common.util.Utils.makeMap;
public class ExportWriter implements SolrCore.RawWriter, Closeable {
private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
private OutputStreamWriter respWriter;
@ -1257,7 +1258,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable {
protected SortedDocValues vals;
protected MultiDocValues.OrdinalMap ordinalMap;
protected OrdinalMap ordinalMap;
protected LongValues globalOrds;
protected SortedDocValues currentVals;

View File

@ -24,15 +24,6 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import com.carrotsearch.hppc.IntHashSet;
import com.carrotsearch.hppc.IntObjectHashMap;
import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.LongObjectHashMap;
import com.carrotsearch.hppc.LongObjectMap;
import com.carrotsearch.hppc.cursors.IntObjectCursor;
import com.carrotsearch.hppc.cursors.LongCursor;
import com.carrotsearch.hppc.cursors.LongObjectCursor;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
@ -42,6 +33,7 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
@ -88,6 +80,16 @@ import org.apache.solr.uninverting.UninvertingReader;
import org.apache.solr.util.plugin.PluginInfoInitialized;
import org.apache.solr.util.plugin.SolrCoreAware;
import com.carrotsearch.hppc.IntHashSet;
import com.carrotsearch.hppc.IntObjectHashMap;
import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.LongObjectHashMap;
import com.carrotsearch.hppc.LongObjectMap;
import com.carrotsearch.hppc.cursors.IntObjectCursor;
import com.carrotsearch.hppc.cursors.LongCursor;
import com.carrotsearch.hppc.cursors.LongObjectCursor;
import com.carrotsearch.hppc.cursors.ObjectCursor;
/**
* The ExpandComponent is designed to work with the CollapsingPostFilter.
* The CollapsingPostFilter collapses a result set on a field.
@ -274,7 +276,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
IntObjectHashMap<BytesRef> ordBytes = null;
if(values != null) {
groupBits = new FixedBitSet(values.getValueCount());
MultiDocValues.OrdinalMap ordinalMap = null;
OrdinalMap ordinalMap = null;
SortedDocValues[] sortedDocValues = null;
LongValues segmentOrdinalMap = null;
SortedDocValues currentValues = null;
@ -520,7 +522,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia
private static class GroupExpandCollector implements Collector, GroupCollector {
private SortedDocValues docValues;
private MultiDocValues.OrdinalMap ordinalMap;
private OrdinalMap ordinalMap;
private SortedDocValues segmentValues;
private LongValues segmentOrdinalMap;
private MultiDocValues.MultiSortedDocValues multiSortedDocValues;

View File

@ -22,7 +22,7 @@ import java.util.Map;
import org.apache.lucene.index.*;
import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Version;

View File

@ -22,9 +22,9 @@ import java.util.function.Predicate;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSet;

View File

@ -22,9 +22,9 @@ import java.util.Map;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSet;

View File

@ -25,12 +25,6 @@ import java.util.Iterator;
import java.util.List;
import java.util.Map;
import com.carrotsearch.hppc.FloatArrayList;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.IntLongHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import com.carrotsearch.hppc.cursors.IntLongCursor;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.codecs.DocValuesProducer;
import org.apache.lucene.index.DocValues;
@ -43,6 +37,7 @@ import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.FunctionValues;
@ -69,10 +64,17 @@ import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.StrField;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.StrField;
import org.apache.solr.uninverting.UninvertingReader;
import com.carrotsearch.hppc.FloatArrayList;
import com.carrotsearch.hppc.IntArrayList;
import com.carrotsearch.hppc.IntIntHashMap;
import com.carrotsearch.hppc.IntLongHashMap;
import com.carrotsearch.hppc.cursors.IntIntCursor;
import com.carrotsearch.hppc.cursors.IntLongCursor;
import static org.apache.solr.common.params.CommonParams.SORT;
/**
@ -474,7 +476,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
private final DocValuesProducer collapseValuesProducer;
private FixedBitSet collapsedSet;
private SortedDocValues collapseValues;
private MultiDocValues.OrdinalMap ordinalMap;
private OrdinalMap ordinalMap;
private SortedDocValues segmentValues;
private LongValues segmentOrdinalMap;
private MultiDocValues.MultiSortedDocValues multiSortedDocValues;
@ -920,7 +922,7 @@ public class CollapsingQParserPlugin extends QParserPlugin {
private LeafReaderContext[] contexts;
private DocValuesProducer collapseValuesProducer;
private SortedDocValues collapseValues;
protected MultiDocValues.OrdinalMap ordinalMap;
protected OrdinalMap ordinalMap;
protected SortedDocValues segmentValues;
protected LongValues segmentOrdinalMap;
protected MultiDocValues.MultiSortedDocValues multiSortedDocValues;

View File

@ -23,6 +23,7 @@ import java.util.List;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.DocIdSet;
@ -43,7 +44,7 @@ class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray {
boolean multiValuedField;
SortedSetDocValues si; // only used for term lookups (for both single and multi-valued)
MultiDocValues.OrdinalMap ordinalMap = null; // maps per-segment ords to global ords
OrdinalMap ordinalMap = null; // maps per-segment ords to global ords
FacetFieldProcessorByArrayDV(FacetContext fcontext, FacetField freq, SchemaField sf) {
super(fcontext, freq, sf);

View File

@ -21,6 +21,7 @@ import java.util.Arrays;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.util.BytesRef;
@ -176,7 +177,7 @@ public class MinMaxAgg extends SimpleAggValueSource {
class SingleValuedOrdAcc extends OrdAcc {
SortedDocValues topLevel;
SortedDocValues[] subDvs;
MultiDocValues.OrdinalMap ordMap;
OrdinalMap ordMap;
LongValues toGlobal;
SortedDocValues subDv;

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
@ -30,7 +31,7 @@ import org.apache.solr.schema.SchemaField;
class UniqueMultiDvSlotAcc extends UniqueSlotAcc {
SortedSetDocValues topLevel;
SortedSetDocValues[] subDvs;
MultiDocValues.OrdinalMap ordMap;
OrdinalMap ordMap;
LongValues toGlobal;
SortedSetDocValues subDv;

View File

@ -21,6 +21,7 @@ import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.FixedBitSet;
@ -31,7 +32,7 @@ import org.apache.solr.search.SolrIndexSearcher;
class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc {
SortedDocValues topLevel;
SortedDocValues[] subDvs;
MultiDocValues.OrdinalMap ordMap;
OrdinalMap ordMap;
LongValues toGlobal;
SortedDocValues subDv;

View File

@ -21,8 +21,8 @@ import java.util.Arrays;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.MultiDocValues.OrdinalMap;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.OrdinalMap;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.ArrayUtil;
@ -50,7 +50,7 @@ class BlockJoinFieldFacetAccumulator {
// elems are : facet value counter<<32 | last parent doc num
private long[] segmentAccums = new long[0];
// for mapping per-segment ords to global ones
private MultiDocValues.OrdinalMap ordinalMap;
private OrdinalMap ordinalMap;
private SchemaField schemaField;
private SortedDocValues segmentSDV;