diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 8926dd9e7c3..fb68cd59b9f 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -16,6 +16,12 @@ Changes in Runtime Behavior ======================= Lucene 7.1.0 ======================= (No Changes) +Optimizations + +* LUCENE-7905: Optimize how OrdinalMap (used by + SortedSetDocValuesFacetCounts and others) builds its map (Robert + Muir, Adrien Grand, Mike McCandless) + ======================= Lucene 7.0.0 ======================= New Features diff --git a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java index 88e34f64b49..8526be68658 100644 --- a/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java +++ b/lucene/core/src/java/org/apache/lucene/codecs/DocValuesConsumer.java @@ -30,8 +30,8 @@ import org.apache.lucene.index.EmptyDocValuesProducer; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.FilteredTermsEnum; import org.apache.lucene.index.MergeState; -import org.apache.lucene.index.MultiDocValues.OrdinalMap; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SegmentWriteState; // javadocs import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java index 3cd796b1bee..f5f59342c64 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiDocValues.java @@ -18,21 +18,10 @@ package org.apache.lucene.index; import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; import java.util.List; -import org.apache.lucene.index.MultiTermsEnum.TermsEnumIndex; -import org.apache.lucene.index.MultiTermsEnum.TermsEnumWithSlice; -import org.apache.lucene.util.Accountable; -import org.apache.lucene.util.Accountables; import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.InPlaceMergeSorter; -import org.apache.lucene.util.LongValues; -import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.packed.PackedInts; -import org.apache.lucene.util.packed.PackedLongValues; /** * A wrapper for CompositeIndexReader providing access to DocValues. @@ -649,283 +638,6 @@ public class MultiDocValues { } } - /** maps per-segment ordinals to/from global ordinal space */ - // TODO: we could also have a utility method to merge Terms[] and use size() as a weight when we need it - // TODO: use more efficient packed ints structures? - // TODO: pull this out? it's pretty generic (maps between N ord()-enabled TermsEnums) - public static class OrdinalMap implements Accountable { - - private static class SegmentMap implements Accountable { - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class); - - /** Build a map from an index into a sorted view of `weights` to an index into `weights`. */ - private static int[] map(final long[] weights) { - final int[] newToOld = new int[weights.length]; - for (int i = 0; i < weights.length; ++i) { - newToOld[i] = i; - } - new InPlaceMergeSorter() { - @Override - protected void swap(int i, int j) { - final int tmp = newToOld[i]; - newToOld[i] = newToOld[j]; - newToOld[j] = tmp; - } - @Override - protected int compare(int i, int j) { - // j first since we actually want higher weights first - return Long.compare(weights[newToOld[j]], weights[newToOld[i]]); - } - }.sort(0, weights.length); - return newToOld; - } - - /** Inverse the map. */ - private static int[] inverse(int[] map) { - final int[] inverse = new int[map.length]; - for (int i = 0; i < map.length; ++i) { - inverse[map[i]] = i; - } - return inverse; - } - - private final int[] newToOld, oldToNew; - - SegmentMap(long[] weights) { - newToOld = map(weights); - oldToNew = inverse(newToOld); - assert Arrays.equals(newToOld, inverse(oldToNew)); - } - - int newToOld(int segment) { - return newToOld[segment]; - } - - int oldToNew(int segment) { - return oldToNew[segment]; - } - - @Override - public long ramBytesUsed() { - return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(newToOld) + RamUsageEstimator.sizeOf(oldToNew); - } - } - - /** - * Create an ordinal map that uses the number of unique values of each - * {@link SortedDocValues} instance as a weight. - * @see #build(IndexReader.CacheKey, TermsEnum[], long[], float) - */ - public static OrdinalMap build(IndexReader.CacheKey owner, SortedDocValues[] values, float acceptableOverheadRatio) throws IOException { - final TermsEnum[] subs = new TermsEnum[values.length]; - final long[] weights = new long[values.length]; - for (int i = 0; i < values.length; ++i) { - subs[i] = values[i].termsEnum(); - weights[i] = values[i].getValueCount(); - } - return build(owner, subs, weights, acceptableOverheadRatio); - } - - /** - * Create an ordinal map that uses the number of unique values of each - * {@link SortedSetDocValues} instance as a weight. - * @see #build(IndexReader.CacheKey, TermsEnum[], long[], float) - */ - public static OrdinalMap build(IndexReader.CacheKey owner, SortedSetDocValues[] values, float acceptableOverheadRatio) throws IOException { - final TermsEnum[] subs = new TermsEnum[values.length]; - final long[] weights = new long[values.length]; - for (int i = 0; i < values.length; ++i) { - subs[i] = values[i].termsEnum(); - weights[i] = values[i].getValueCount(); - } - return build(owner, subs, weights, acceptableOverheadRatio); - } - - /** - * Creates an ordinal map that allows mapping ords to/from a merged - * space from subs. - * @param owner a cache key - * @param subs TermsEnums that support {@link TermsEnum#ord()}. They need - * not be dense (e.g. can be FilteredTermsEnums}. - * @param weights a weight for each sub. This is ideally correlated with - * the number of unique terms that each sub introduces compared - * to the other subs - * @throws IOException if an I/O error occurred. - */ - public static OrdinalMap build(IndexReader.CacheKey owner, TermsEnum subs[], long[] weights, float acceptableOverheadRatio) throws IOException { - if (subs.length != weights.length) { - throw new IllegalArgumentException("subs and weights must have the same length"); - } - - // enums are not sorted, so let's sort to save memory - final SegmentMap segmentMap = new SegmentMap(weights); - return new OrdinalMap(owner, subs, segmentMap, acceptableOverheadRatio); - } - - private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OrdinalMap.class); - - /** Cache key of whoever asked for this awful thing */ - public final IndexReader.CacheKey owner; - // globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term - final PackedLongValues globalOrdDeltas; - // globalOrd -> first segment container - final PackedLongValues firstSegments; - // for every segment, segmentOrd -> globalOrd - final LongValues segmentToGlobalOrds[]; - // the map from/to segment ids - final SegmentMap segmentMap; - // ram usage - final long ramBytesUsed; - - OrdinalMap(IndexReader.CacheKey owner, TermsEnum subs[], SegmentMap segmentMap, float acceptableOverheadRatio) throws IOException { - // create the ordinal mappings by pulling a termsenum over each sub's - // unique terms, and walking a multitermsenum over those - this.owner = owner; - this.segmentMap = segmentMap; - // even though we accept an overhead ratio, we keep these ones with COMPACT - // since they are only used to resolve values given a global ord, which is - // slow anyway - PackedLongValues.Builder globalOrdDeltas = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); - PackedLongValues.Builder firstSegments = PackedLongValues.packedBuilder(PackedInts.COMPACT); - final PackedLongValues.Builder[] ordDeltas = new PackedLongValues.Builder[subs.length]; - for (int i = 0; i < ordDeltas.length; i++) { - ordDeltas[i] = PackedLongValues.monotonicBuilder(acceptableOverheadRatio); - } - long[] ordDeltaBits = new long[subs.length]; - long segmentOrds[] = new long[subs.length]; - ReaderSlice slices[] = new ReaderSlice[subs.length]; - TermsEnumIndex indexes[] = new TermsEnumIndex[slices.length]; - for (int i = 0; i < slices.length; i++) { - slices[i] = new ReaderSlice(0, 0, i); - indexes[i] = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i); - } - MultiTermsEnum mte = new MultiTermsEnum(slices); - mte.reset(indexes); - long globalOrd = 0; - while (mte.next() != null) { - TermsEnumWithSlice matches[] = mte.getMatchArray(); - int firstSegmentIndex = Integer.MAX_VALUE; - long globalOrdDelta = Long.MAX_VALUE; - for (int i = 0; i < mte.getMatchCount(); i++) { - int segmentIndex = matches[i].index; - long segmentOrd = matches[i].terms.ord(); - long delta = globalOrd - segmentOrd; - // We compute the least segment where the term occurs. In case the - // first segment contains most (or better all) values, this will - // help save significant memory - if (segmentIndex < firstSegmentIndex) { - firstSegmentIndex = segmentIndex; - globalOrdDelta = delta; - } - // for each per-segment ord, map it back to the global term. - while (segmentOrds[segmentIndex] <= segmentOrd) { - ordDeltaBits[segmentIndex] |= delta; - ordDeltas[segmentIndex].add(delta); - segmentOrds[segmentIndex]++; - } - } - // for each unique term, just mark the first segment index/delta where it occurs - assert firstSegmentIndex < segmentOrds.length; - firstSegments.add(firstSegmentIndex); - globalOrdDeltas.add(globalOrdDelta); - globalOrd++; - } - this.firstSegments = firstSegments.build(); - this.globalOrdDeltas = globalOrdDeltas.build(); - // ordDeltas is typically the bottleneck, so let's see what we can do to make it faster - segmentToGlobalOrds = new LongValues[subs.length]; - long ramBytesUsed = BASE_RAM_BYTES_USED + this.globalOrdDeltas.ramBytesUsed() - + this.firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds) - + segmentMap.ramBytesUsed(); - for (int i = 0; i < ordDeltas.length; ++i) { - final PackedLongValues deltas = ordDeltas[i].build(); - if (ordDeltaBits[i] == 0L) { - // segment ords perfectly match global ordinals - // likely in case of low cardinalities and large segments - segmentToGlobalOrds[i] = LongValues.IDENTITY; - } else { - final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]); - final long monotonicBits = deltas.ramBytesUsed() * 8; - final long packedBits = bitsRequired * deltas.size(); - if (deltas.size() <= Integer.MAX_VALUE - && packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) { - // monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints - final int size = (int) deltas.size(); - final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio); - final PackedLongValues.Iterator it = deltas.iterator(); - for (int ord = 0; ord < size; ++ord) { - newDeltas.set(ord, it.next()); - } - assert !it.hasNext(); - segmentToGlobalOrds[i] = new LongValues() { - @Override - public long get(long ord) { - return ord + newDeltas.get((int) ord); - } - }; - ramBytesUsed += newDeltas.ramBytesUsed(); - } else { - segmentToGlobalOrds[i] = new LongValues() { - @Override - public long get(long ord) { - return ord + deltas.get(ord); - } - }; - ramBytesUsed += deltas.ramBytesUsed(); - } - ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]); - } - } - this.ramBytesUsed = ramBytesUsed; - } - - /** - * Given a segment number, return a {@link LongValues} instance that maps - * segment ordinals to global ordinals. - */ - public LongValues getGlobalOrds(int segmentIndex) { - return segmentToGlobalOrds[segmentMap.oldToNew(segmentIndex)]; - } - - /** - * Given global ordinal, returns the ordinal of the first segment which contains - * this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}). - */ - public long getFirstSegmentOrd(long globalOrd) { - return globalOrd - globalOrdDeltas.get(globalOrd); - } - - /** - * Given a global ordinal, returns the index of the first - * segment that contains this term. - */ - public int getFirstSegmentNumber(long globalOrd) { - return segmentMap.newToOld((int) firstSegments.get(globalOrd)); - } - - /** - * Returns the total number of unique terms in global ord space. - */ - public long getValueCount() { - return globalOrdDeltas.size(); - } - - @Override - public long ramBytesUsed() { - return ramBytesUsed; - } - - @Override - public Collection getChildResources() { - List resources = new ArrayList<>(); - resources.add(Accountables.namedAccountable("global ord deltas", globalOrdDeltas)); - resources.add(Accountables.namedAccountable("first segments", firstSegments)); - resources.add(Accountables.namedAccountable("segment map", segmentMap)); - // TODO: would be nice to return actual child segment deltas too, but the optimizations are confusing - return resources; - } - } - /** * Implements SortedDocValues over n subs, using an OrdinalMap * @lucene.internal diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java index 630b65cb065..b484228bfc4 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiSorter.java @@ -166,7 +166,7 @@ final class MultiSorter { final SortedDocValues sorted = Sorter.getOrWrapSorted(readers.get(i), sortField); values[i] = sorted; } - MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build(null, values, PackedInts.DEFAULT); + OrdinalMap ordinalMap = OrdinalMap.build(null, values, PackedInts.DEFAULT); final int missingOrd; if (sortField.getMissingValue() == SortField.STRING_LAST) { missingOrd = sortField.getReverse() ? Integer.MIN_VALUE : Integer.MAX_VALUE; diff --git a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java index ac6887f8c33..51f495817fd 100644 --- a/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java +++ b/lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java @@ -302,6 +302,8 @@ public final class MultiTermsEnum extends TermsEnum { // gather equal top fields if (queue.size() > 0) { + // TODO: we could maybe defer this somewhat costly operation until one of the APIs that + // needs to see the top is invoked (docFreq, postings, etc.) pullTop(); } else { current = null; diff --git a/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java b/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java new file mode 100644 index 00000000000..bbb643f4e14 --- /dev/null +++ b/lucene/core/src/java/org/apache/lucene/index/OrdinalMap.java @@ -0,0 +1,368 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.lucene.index; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.List; + +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.Accountables; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.InPlaceMergeSorter; +import org.apache.lucene.util.LongValues; +import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.PackedInts; +import org.apache.lucene.util.packed.PackedLongValues; + +/** Maps per-segment ordinals to/from global ordinal space, using a compact packed-ints representation. + * + *

NOTE: this is a costly operation, as it must merge sort all terms, and may require non-trivial RAM once done. It's better to operate in + * segment-private ordinal space instead when possible. + * + * @lucene.internal */ +public class OrdinalMap implements Accountable { + // TODO: we could also have a utility method to merge Terms[] and use size() as a weight when we need it + // TODO: use more efficient packed ints structures? + + private static class TermsEnumIndex { + public final static TermsEnumIndex[] EMPTY_ARRAY = new TermsEnumIndex[0]; + final int subIndex; + final TermsEnum termsEnum; + BytesRef currentTerm; + + public TermsEnumIndex(TermsEnum termsEnum, int subIndex) { + this.termsEnum = termsEnum; + this.subIndex = subIndex; + } + + public BytesRef next() throws IOException { + currentTerm = termsEnum.next(); + return currentTerm; + } + } + + private static class SegmentMap implements Accountable { + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(SegmentMap.class); + + /** Build a map from an index into a sorted view of `weights` to an index into `weights`. */ + private static int[] map(final long[] weights) { + final int[] newToOld = new int[weights.length]; + for (int i = 0; i < weights.length; ++i) { + newToOld[i] = i; + } + new InPlaceMergeSorter() { + @Override + protected void swap(int i, int j) { + final int tmp = newToOld[i]; + newToOld[i] = newToOld[j]; + newToOld[j] = tmp; + } + @Override + protected int compare(int i, int j) { + // j first since we actually want higher weights first + return Long.compare(weights[newToOld[j]], weights[newToOld[i]]); + } + }.sort(0, weights.length); + return newToOld; + } + + /** Inverse the map. */ + private static int[] inverse(int[] map) { + final int[] inverse = new int[map.length]; + for (int i = 0; i < map.length; ++i) { + inverse[map[i]] = i; + } + return inverse; + } + + private final int[] newToOld, oldToNew; + + SegmentMap(long[] weights) { + newToOld = map(weights); + oldToNew = inverse(newToOld); + assert Arrays.equals(newToOld, inverse(oldToNew)); + } + + int newToOld(int segment) { + return newToOld[segment]; + } + + int oldToNew(int segment) { + return oldToNew[segment]; + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(newToOld) + RamUsageEstimator.sizeOf(oldToNew); + } + } + + /** + * Create an ordinal map that uses the number of unique values of each + * {@link SortedDocValues} instance as a weight. + * @see #build(IndexReader.CacheKey, TermsEnum[], long[], float) + */ + public static OrdinalMap build(IndexReader.CacheKey owner, SortedDocValues[] values, float acceptableOverheadRatio) throws IOException { + final TermsEnum[] subs = new TermsEnum[values.length]; + final long[] weights = new long[values.length]; + for (int i = 0; i < values.length; ++i) { + subs[i] = values[i].termsEnum(); + weights[i] = values[i].getValueCount(); + } + return build(owner, subs, weights, acceptableOverheadRatio); + } + + /** + * Create an ordinal map that uses the number of unique values of each + * {@link SortedSetDocValues} instance as a weight. + * @see #build(IndexReader.CacheKey, TermsEnum[], long[], float) + */ + public static OrdinalMap build(IndexReader.CacheKey owner, SortedSetDocValues[] values, float acceptableOverheadRatio) throws IOException { + final TermsEnum[] subs = new TermsEnum[values.length]; + final long[] weights = new long[values.length]; + for (int i = 0; i < values.length; ++i) { + subs[i] = values[i].termsEnum(); + weights[i] = values[i].getValueCount(); + } + return build(owner, subs, weights, acceptableOverheadRatio); + } + + /** + * Creates an ordinal map that allows mapping ords to/from a merged + * space from subs. + * @param owner a cache key + * @param subs TermsEnums that support {@link TermsEnum#ord()}. They need + * not be dense (e.g. can be FilteredTermsEnums}. + * @param weights a weight for each sub. This is ideally correlated with + * the number of unique terms that each sub introduces compared + * to the other subs + * @throws IOException if an I/O error occurred. + */ + public static OrdinalMap build(IndexReader.CacheKey owner, TermsEnum subs[], long[] weights, float acceptableOverheadRatio) throws IOException { + if (subs.length != weights.length) { + throw new IllegalArgumentException("subs and weights must have the same length"); + } + + // enums are not sorted, so let's sort to save memory + final SegmentMap segmentMap = new SegmentMap(weights); + return new OrdinalMap(owner, subs, segmentMap, acceptableOverheadRatio); + } + + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OrdinalMap.class); + + /** Cache key of whoever asked for this awful thing */ + public final IndexReader.CacheKey owner; + // globalOrd -> (globalOrd - segmentOrd) where segmentOrd is the the ordinal in the first segment that contains this term + final PackedLongValues globalOrdDeltas; + // globalOrd -> first segment container + final PackedLongValues firstSegments; + // for every segment, segmentOrd -> globalOrd + final LongValues segmentToGlobalOrds[]; + // the map from/to segment ids + final SegmentMap segmentMap; + // ram usage + final long ramBytesUsed; + + OrdinalMap(IndexReader.CacheKey owner, TermsEnum subs[], SegmentMap segmentMap, float acceptableOverheadRatio) throws IOException { + // create the ordinal mappings by pulling a termsenum over each sub's + // unique terms, and walking a multitermsenum over those + this.owner = owner; + this.segmentMap = segmentMap; + // even though we accept an overhead ratio, we keep these ones with COMPACT + // since they are only used to resolve values given a global ord, which is + // slow anyway + PackedLongValues.Builder globalOrdDeltas = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); + PackedLongValues.Builder firstSegments = PackedLongValues.packedBuilder(PackedInts.COMPACT); + final PackedLongValues.Builder[] ordDeltas = new PackedLongValues.Builder[subs.length]; + for (int i = 0; i < ordDeltas.length; i++) { + ordDeltas[i] = PackedLongValues.monotonicBuilder(acceptableOverheadRatio); + } + long[] ordDeltaBits = new long[subs.length]; + long[] segmentOrds = new long[subs.length]; + + // Just merge-sorts by term: + PriorityQueue queue = new PriorityQueue(subs.length) { + @Override + protected boolean lessThan(TermsEnumIndex a, TermsEnumIndex b) { + return a.currentTerm.compareTo(b.currentTerm) < 0; + } + }; + + for (int i = 0; i < subs.length; i++) { + TermsEnumIndex sub = new TermsEnumIndex(subs[segmentMap.newToOld(i)], i); + if (sub.next() != null) { + queue.add(sub); + } + } + + BytesRefBuilder scratch = new BytesRefBuilder(); + + long globalOrd = 0; + while (queue.size() != 0) { + TermsEnumIndex top = queue.top(); + scratch.copyBytes(top.currentTerm); + + int firstSegmentIndex = Integer.MAX_VALUE; + long globalOrdDelta = Long.MAX_VALUE; + + // Advance past this term, recording the per-segment ord deltas: + while (true) { + top = queue.top(); + long segmentOrd = top.termsEnum.ord(); + long delta = globalOrd - segmentOrd; + int segmentIndex = top.subIndex; + // We compute the least segment where the term occurs. In case the + // first segment contains most (or better all) values, this will + // help save significant memory + if (segmentIndex < firstSegmentIndex) { + firstSegmentIndex = segmentIndex; + globalOrdDelta = delta; + } + ordDeltaBits[segmentIndex] |= delta; + + // for each per-segment ord, map it back to the global term; the while loop is needed + // in case the incoming TermsEnums don't have compact ordinals (some ordinal values + // are skipped), which can happen e.g. with a FilteredTermsEnum: + assert segmentOrds[segmentIndex] <= segmentOrd; + + // TODO: we could specialize this case (the while loop is not needed when the ords + // are compact) + do { + ordDeltas[segmentIndex].add(delta); + segmentOrds[segmentIndex]++; + } while (segmentOrds[segmentIndex] <= segmentOrd); + + if (top.next() == null) { + queue.pop(); + if (queue.size() == 0) { + break; + } + } else { + queue.updateTop(); + } + if (queue.top().currentTerm.equals(scratch.get()) == false) { + break; + } + } + + // for each unique term, just mark the first segment index/delta where it occurs + firstSegments.add(firstSegmentIndex); + globalOrdDeltas.add(globalOrdDelta); + globalOrd++; + } + + this.firstSegments = firstSegments.build(); + this.globalOrdDeltas = globalOrdDeltas.build(); + // ordDeltas is typically the bottleneck, so let's see what we can do to make it faster + segmentToGlobalOrds = new LongValues[subs.length]; + long ramBytesUsed = BASE_RAM_BYTES_USED + this.globalOrdDeltas.ramBytesUsed() + + this.firstSegments.ramBytesUsed() + RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds) + + segmentMap.ramBytesUsed(); + for (int i = 0; i < ordDeltas.length; ++i) { + final PackedLongValues deltas = ordDeltas[i].build(); + if (ordDeltaBits[i] == 0L) { + // segment ords perfectly match global ordinals + // likely in case of low cardinalities and large segments + segmentToGlobalOrds[i] = LongValues.IDENTITY; + } else { + final int bitsRequired = ordDeltaBits[i] < 0 ? 64 : PackedInts.bitsRequired(ordDeltaBits[i]); + final long monotonicBits = deltas.ramBytesUsed() * 8; + final long packedBits = bitsRequired * deltas.size(); + if (deltas.size() <= Integer.MAX_VALUE + && packedBits <= monotonicBits * (1 + acceptableOverheadRatio)) { + // monotonic compression mostly adds overhead, let's keep the mapping in plain packed ints + final int size = (int) deltas.size(); + final PackedInts.Mutable newDeltas = PackedInts.getMutable(size, bitsRequired, acceptableOverheadRatio); + final PackedLongValues.Iterator it = deltas.iterator(); + for (int ord = 0; ord < size; ++ord) { + newDeltas.set(ord, it.next()); + } + assert it.hasNext() == false; + segmentToGlobalOrds[i] = new LongValues() { + @Override + public long get(long ord) { + return ord + newDeltas.get((int) ord); + } + }; + ramBytesUsed += newDeltas.ramBytesUsed(); + } else { + segmentToGlobalOrds[i] = new LongValues() { + @Override + public long get(long ord) { + return ord + deltas.get(ord); + } + }; + ramBytesUsed += deltas.ramBytesUsed(); + } + ramBytesUsed += RamUsageEstimator.shallowSizeOf(segmentToGlobalOrds[i]); + } + } + this.ramBytesUsed = ramBytesUsed; + } + + /** + * Given a segment number, return a {@link LongValues} instance that maps + * segment ordinals to global ordinals. + */ + public LongValues getGlobalOrds(int segmentIndex) { + return segmentToGlobalOrds[segmentMap.oldToNew(segmentIndex)]; + } + + /** + * Given global ordinal, returns the ordinal of the first segment which contains + * this ordinal (the corresponding to the segment return {@link #getFirstSegmentNumber}). + */ + public long getFirstSegmentOrd(long globalOrd) { + return globalOrd - globalOrdDeltas.get(globalOrd); + } + + /** + * Given a global ordinal, returns the index of the first + * segment that contains this term. + */ + public int getFirstSegmentNumber(long globalOrd) { + return segmentMap.newToOld((int) firstSegments.get(globalOrd)); + } + + /** + * Returns the total number of unique terms in global ord space. + */ + public long getValueCount() { + return globalOrdDeltas.size(); + } + + @Override + public long ramBytesUsed() { + return ramBytesUsed; + } + + @Override + public Collection getChildResources() { + List resources = new ArrayList<>(); + resources.add(Accountables.namedAccountable("global ord deltas", globalOrdDeltas)); + resources.add(Accountables.namedAccountable("first segments", firstSegments)); + resources.add(Accountables.namedAccountable("segment map", segmentMap)); + // TODO: would be nice to return actual child segment deltas too, but the optimizations are confusing + return resources; + } +} diff --git a/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java b/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java index 921102d650a..6120985fb43 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestOrdinalMap.java @@ -25,7 +25,7 @@ import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.SortedDocValuesField; import org.apache.lucene.document.SortedSetDocValuesField; -import org.apache.lucene.index.MultiDocValues.OrdinalMap; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.store.Directory; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.LongValues; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java index bcb6acfb7c3..4e4a01cae34 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/ConcurrentSortedSetDocValuesFacetCounts.java @@ -42,6 +42,7 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.ConjunctionDISI; @@ -152,10 +153,10 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets { private class CountOneSegment implements Callable { final LeafReader leafReader; final MatchingDocs hits; - final MultiDocValues.OrdinalMap ordinalMap; + final OrdinalMap ordinalMap; final int segOrd; - public CountOneSegment(LeafReader leafReader, MatchingDocs hits, MultiDocValues.OrdinalMap ordinalMap, int segOrd) { + public CountOneSegment(LeafReader leafReader, MatchingDocs hits, OrdinalMap ordinalMap, int segOrd) { this.leafReader = leafReader; this.hits = hits; this.ordinalMap = ordinalMap; @@ -240,7 +241,7 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets { /** Does all the "real work" of tallying up the counts. */ private final void count(List matchingDocs) throws IOException, InterruptedException { - MultiDocValues.OrdinalMap ordinalMap; + OrdinalMap ordinalMap; // TODO: is this right? really, we need a way to // verify that this ordinalMap "matches" the leaves in @@ -281,7 +282,7 @@ public class ConcurrentSortedSetDocValuesFacetCounts extends Facets { private final void countAll() throws IOException, InterruptedException { //System.out.println("ssdv count"); - MultiDocValues.OrdinalMap ordinalMap; + OrdinalMap ordinalMap; // TODO: is this right? really, we need a way to // verify that this ordinalMap "matches" the leaves in diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java index 832ff3b2b93..a3098baeb9c 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/DefaultSortedSetDocValuesReaderState.java @@ -31,8 +31,8 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; -import org.apache.lucene.index.MultiDocValues.OrdinalMap; import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.Accountable; import org.apache.lucene.util.Accountables; diff --git a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java index 2198fc0485d..6df43349902 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/sortedset/SortedSetDocValuesFacetCounts.java @@ -37,6 +37,7 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.ConjunctionDISI; @@ -155,7 +156,7 @@ public class SortedSetDocValuesFacetCounts extends Facets { return new FacetResult(dim, new String[0], dimCount, labelValues, childCount); } - private void countOneSegment(MultiDocValues.OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits) throws IOException { + private void countOneSegment(OrdinalMap ordinalMap, LeafReader reader, int segOrd, MatchingDocs hits) throws IOException { SortedSetDocValues segValues = reader.getSortedSetDocValues(field); if (segValues == null) { // nothing to count @@ -236,7 +237,7 @@ public class SortedSetDocValuesFacetCounts extends Facets { private final void count(List matchingDocs) throws IOException { //System.out.println("ssdv count"); - MultiDocValues.OrdinalMap ordinalMap; + OrdinalMap ordinalMap; // TODO: is this right? really, we need a way to // verify that this ordinalMap "matches" the leaves in @@ -267,7 +268,7 @@ public class SortedSetDocValuesFacetCounts extends Facets { private final void countAll() throws IOException { //System.out.println("ssdv count"); - MultiDocValues.OrdinalMap ordinalMap; + OrdinalMap ordinalMap; // TODO: is this right? really, we need a way to // verify that this ordinalMap "matches" the leaves in diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java index 9c9072d5d3a..15ce023ab51 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsCollector.java @@ -20,7 +20,7 @@ import java.io.IOException; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.search.Collector; import org.apache.lucene.search.LeafCollector; @@ -37,9 +37,9 @@ final class GlobalOrdinalsCollector implements Collector { final String field; final LongBitSet collectedOrds; - final MultiDocValues.OrdinalMap ordinalMap; + final OrdinalMap ordinalMap; - GlobalOrdinalsCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount) { + GlobalOrdinalsCollector(String field, OrdinalMap ordinalMap, long valueCount) { this.field = field; this.ordinalMap = ordinalMap; this.collectedOrds = new LongBitSet(valueCount); diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java index aacda2d80fa..9ddc5eeda10 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsQuery.java @@ -21,7 +21,7 @@ import java.util.Set; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.search.ConstantScoreWeight; @@ -41,7 +41,7 @@ final class GlobalOrdinalsQuery extends Query { // All the ords of matching docs found with OrdinalsCollector. private final LongBitSet foundOrds; private final String joinField; - private final MultiDocValues.OrdinalMap globalOrds; + private final OrdinalMap globalOrds; // Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific. private final Query toQuery; @@ -50,7 +50,7 @@ final class GlobalOrdinalsQuery extends Query { // id of the context rather than the context itself in order not to hold references to index readers private final Object indexReaderContextId; - GlobalOrdinalsQuery(LongBitSet foundOrds, String joinField, MultiDocValues.OrdinalMap globalOrds, Query toQuery, + GlobalOrdinalsQuery(LongBitSet foundOrds, String joinField, OrdinalMap globalOrds, Query toQuery, Query fromQuery, Object indexReaderContextId) { this.foundOrds = foundOrds; this.joinField = joinField; diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java index cc580413156..a5574166ac2 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreCollector.java @@ -21,7 +21,7 @@ import java.util.Arrays; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.search.Collector; import org.apache.lucene.search.LeafCollector; @@ -35,13 +35,13 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector { final boolean doMinMax; final int min; final int max; - final MultiDocValues.OrdinalMap ordinalMap; + final OrdinalMap ordinalMap; final LongBitSet collectedOrds; protected final Scores scores; protected final Occurrences occurrences; - GlobalOrdinalsWithScoreCollector(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, ScoreMode scoreMode, int min, int max) { + GlobalOrdinalsWithScoreCollector(String field, OrdinalMap ordinalMap, long valueCount, ScoreMode scoreMode, int min, int max) { if (valueCount > Integer.MAX_VALUE) { // We simply don't support more than throw new IllegalStateException("Can't collect more than [" + Integer.MAX_VALUE + "] ids"); @@ -168,7 +168,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector { static final class Min extends GlobalOrdinalsWithScoreCollector { - public Min(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) { + public Min(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) { super(field, ordinalMap, valueCount, ScoreMode.Min, min, max); } @@ -185,7 +185,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector { static final class Max extends GlobalOrdinalsWithScoreCollector { - public Max(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) { + public Max(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) { super(field, ordinalMap, valueCount, ScoreMode.Max, min, max); } @@ -202,7 +202,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector { static final class Sum extends GlobalOrdinalsWithScoreCollector { - public Sum(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) { + public Sum(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) { super(field, ordinalMap, valueCount, ScoreMode.Total, min, max); } @@ -219,7 +219,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector { static final class Avg extends GlobalOrdinalsWithScoreCollector { - public Avg(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) { + public Avg(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) { super(field, ordinalMap, valueCount, ScoreMode.Avg, min, max); } @@ -241,7 +241,7 @@ abstract class GlobalOrdinalsWithScoreCollector implements Collector { static final class NoScore extends GlobalOrdinalsWithScoreCollector { - public NoScore(String field, MultiDocValues.OrdinalMap ordinalMap, long valueCount, int min, int max) { + public NoScore(String field, OrdinalMap ordinalMap, long valueCount, int min, int max) { super(field, ordinalMap, valueCount, ScoreMode.None, min, max); } diff --git a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java index 1c80bf3bb8c..7946559cc59 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/GlobalOrdinalsWithScoreQuery.java @@ -21,7 +21,7 @@ import java.util.Set; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.Term; import org.apache.lucene.search.DocIdSetIterator; @@ -39,7 +39,7 @@ final class GlobalOrdinalsWithScoreQuery extends Query { private final GlobalOrdinalsWithScoreCollector collector; private final String joinField; - private final MultiDocValues.OrdinalMap globalOrds; + private final OrdinalMap globalOrds; // Is also an approximation of the docs that will match. Can be all docs that have toField or something more specific. private final Query toQuery; @@ -52,7 +52,7 @@ final class GlobalOrdinalsWithScoreQuery extends Query { private final Object indexReaderContextId; GlobalOrdinalsWithScoreQuery(GlobalOrdinalsWithScoreCollector collector, ScoreMode scoreMode, String joinField, - MultiDocValues.OrdinalMap globalOrds, Query toQuery, Query fromQuery, int min, int max, + OrdinalMap globalOrds, Query toQuery, Query fromQuery, int min, int max, Object indexReaderContextId) { this.collector = collector; this.joinField = joinField; diff --git a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java index d7e0ae88438..c0f380dd113 100644 --- a/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java +++ b/lucene/join/src/java/org/apache/lucene/search/join/JoinUtil.java @@ -34,8 +34,8 @@ import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; @@ -407,7 +407,7 @@ public final class JoinUtil { /** - * Delegates to {@link #createJoinQuery(String, Query, Query, IndexSearcher, ScoreMode, MultiDocValues.OrdinalMap, int, int)}, + * Delegates to {@link #createJoinQuery(String, Query, Query, IndexSearcher, ScoreMode, OrdinalMap, int, int)}, * but disables the min and max filtering. * * @param joinField The {@link SortedDocValues} field containing the join values @@ -425,7 +425,7 @@ public final class JoinUtil { Query toQuery, IndexSearcher searcher, ScoreMode scoreMode, - MultiDocValues.OrdinalMap ordinalMap) throws IOException { + OrdinalMap ordinalMap) throws IOException { return createJoinQuery(joinField, fromQuery, toQuery, searcher, scoreMode, ordinalMap, 0, Integer.MAX_VALUE); } @@ -464,7 +464,7 @@ public final class JoinUtil { Query toQuery, IndexSearcher searcher, ScoreMode scoreMode, - MultiDocValues.OrdinalMap ordinalMap, + OrdinalMap ordinalMap, int min, int max) throws IOException { int numSegments = searcher.getIndexReader().leaves().size(); diff --git a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java index b1a4a02c689..12fefd50579 100644 --- a/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java +++ b/lucene/join/src/test/org/apache/lucene/search/join/TestJoinUtil.java @@ -55,11 +55,10 @@ import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.MultiDocValues.OrdinalMap; -import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.MultiFields; import org.apache.lucene.index.NoMergePolicy; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.SortedDocValues; @@ -267,7 +266,7 @@ public class TestJoinUtil extends LuceneTestCase { LeafReader leafReader = r.leaves().get(i).reader(); values[i] = DocValues.getSorted(leafReader, joinField); } - MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build( + OrdinalMap ordinalMap = OrdinalMap.build( null, values, PackedInts.DEFAULT ); @@ -372,7 +371,7 @@ public class TestJoinUtil extends LuceneTestCase { LeafReader leafReader = r.leaves().get(i).reader(); values[i] = DocValues.getSorted(leafReader, joinField); } - MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build( + OrdinalMap ordinalMap = OrdinalMap.build( null, values, PackedInts.DEFAULT ); @@ -500,7 +499,7 @@ public class TestJoinUtil extends LuceneTestCase { for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) { values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field"); } - MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build( + OrdinalMap ordinalMap = OrdinalMap.build( null, values, PackedInts.DEFAULT ); BooleanQuery.Builder fromQuery = new BooleanQuery.Builder(); @@ -621,7 +620,7 @@ public class TestJoinUtil extends LuceneTestCase { for (LeafReaderContext leadContext : searcher.getIndexReader().leaves()) { values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field"); } - MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build( + OrdinalMap ordinalMap = OrdinalMap.build( null, values, PackedInts.DEFAULT ); Query fromQuery = new TermQuery(new Term("type", "from")); @@ -1036,7 +1035,7 @@ public class TestJoinUtil extends LuceneTestCase { LeafReader leafReader = r.leaves().get(i).reader(); values[i] = DocValues.getSorted(leafReader, joinField); } - MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build( + OrdinalMap ordinalMap = OrdinalMap.build( null, values, PackedInts.DEFAULT ); IndexSearcher indexSearcher = new IndexSearcher(r); @@ -1067,7 +1066,7 @@ public class TestJoinUtil extends LuceneTestCase { LeafReader leafReader = r.leaves().get(i).reader(); values[i] = DocValues.getSorted(leafReader, joinField); } - MultiDocValues.OrdinalMap ordinalMap = MultiDocValues.OrdinalMap.build( + OrdinalMap ordinalMap = OrdinalMap.build( null, values, PackedInts.DEFAULT ); IndexSearcher indexSearcher = new IndexSearcher(r); @@ -1590,7 +1589,7 @@ public class TestJoinUtil extends LuceneTestCase { for (LeafReaderContext leadContext : topLevelReader.leaves()) { values[leadContext.ord] = DocValues.getSorted(leadContext.reader(), "join_field"); } - context.ordinalMap = MultiDocValues.OrdinalMap.build( + context.ordinalMap = OrdinalMap.build( null, values, PackedInts.DEFAULT ); } @@ -1712,7 +1711,7 @@ public class TestJoinUtil extends LuceneTestCase { Map> fromHitsToJoinScore = new HashMap<>(); Map> toHitsToJoinScore = new HashMap<>(); - MultiDocValues.OrdinalMap ordinalMap; + OrdinalMap ordinalMap; Directory dir; IndexSearcher searcher; diff --git a/solr/core/src/java/org/apache/solr/handler/ExportWriter.java b/solr/core/src/java/org/apache/solr/handler/ExportWriter.java index 61f937ce05e..f618eef6d2b 100644 --- a/solr/core/src/java/org/apache/solr/handler/ExportWriter.java +++ b/solr/core/src/java/org/apache/solr/handler/ExportWriter.java @@ -17,10 +17,6 @@ package org.apache.solr.handler; -import static java.util.Collections.singletonList; -import static java.util.Collections.singletonMap; -import static org.apache.solr.common.util.Utils.makeMap; - import java.io.Closeable; import java.io.IOException; import java.io.OutputStream; @@ -38,6 +34,7 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; @@ -53,8 +50,8 @@ import org.apache.lucene.util.LongValues; import org.apache.lucene.util.NumericUtils; import org.apache.solr.client.solrj.impl.BinaryResponseParser; import org.apache.solr.common.IteratorWriter; -import org.apache.solr.common.MapWriter; import org.apache.solr.common.MapWriter.EntryWriter; +import org.apache.solr.common.MapWriter; import org.apache.solr.common.PushWriter; import org.apache.solr.common.SolrException; import org.apache.solr.common.params.SolrParams; @@ -82,6 +79,10 @@ import org.apache.solr.search.SyntaxError; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static java.util.Collections.singletonList; +import static java.util.Collections.singletonMap; +import static org.apache.solr.common.util.Utils.makeMap; + public class ExportWriter implements SolrCore.RawWriter, Closeable { private static final Logger logger = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); private OutputStreamWriter respWriter; @@ -1257,7 +1258,7 @@ public class ExportWriter implements SolrCore.RawWriter, Closeable { protected SortedDocValues vals; - protected MultiDocValues.OrdinalMap ordinalMap; + protected OrdinalMap ordinalMap; protected LongValues globalOrds; protected SortedDocValues currentVals; diff --git a/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java b/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java index 10c5b13fb34..e2bcef80db2 100644 --- a/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java +++ b/solr/core/src/java/org/apache/solr/handler/component/ExpandComponent.java @@ -24,15 +24,6 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import com.carrotsearch.hppc.IntHashSet; -import com.carrotsearch.hppc.IntObjectHashMap; -import com.carrotsearch.hppc.LongHashSet; -import com.carrotsearch.hppc.LongObjectHashMap; -import com.carrotsearch.hppc.LongObjectMap; -import com.carrotsearch.hppc.cursors.IntObjectCursor; -import com.carrotsearch.hppc.cursors.LongCursor; -import com.carrotsearch.hppc.cursors.LongObjectCursor; -import com.carrotsearch.hppc.cursors.ObjectCursor; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.DocValuesType; import org.apache.lucene.index.FieldInfo; @@ -42,6 +33,7 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.search.BooleanClause.Occur; import org.apache.lucene.search.BooleanQuery; @@ -88,6 +80,16 @@ import org.apache.solr.uninverting.UninvertingReader; import org.apache.solr.util.plugin.PluginInfoInitialized; import org.apache.solr.util.plugin.SolrCoreAware; +import com.carrotsearch.hppc.IntHashSet; +import com.carrotsearch.hppc.IntObjectHashMap; +import com.carrotsearch.hppc.LongHashSet; +import com.carrotsearch.hppc.LongObjectHashMap; +import com.carrotsearch.hppc.LongObjectMap; +import com.carrotsearch.hppc.cursors.IntObjectCursor; +import com.carrotsearch.hppc.cursors.LongCursor; +import com.carrotsearch.hppc.cursors.LongObjectCursor; +import com.carrotsearch.hppc.cursors.ObjectCursor; + /** * The ExpandComponent is designed to work with the CollapsingPostFilter. * The CollapsingPostFilter collapses a result set on a field. @@ -274,7 +276,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia IntObjectHashMap ordBytes = null; if(values != null) { groupBits = new FixedBitSet(values.getValueCount()); - MultiDocValues.OrdinalMap ordinalMap = null; + OrdinalMap ordinalMap = null; SortedDocValues[] sortedDocValues = null; LongValues segmentOrdinalMap = null; SortedDocValues currentValues = null; @@ -520,7 +522,7 @@ public class ExpandComponent extends SearchComponent implements PluginInfoInitia private static class GroupExpandCollector implements Collector, GroupCollector { private SortedDocValues docValues; - private MultiDocValues.OrdinalMap ordinalMap; + private OrdinalMap ordinalMap; private SortedDocValues segmentValues; private LongValues segmentOrdinalMap; private MultiDocValues.MultiSortedDocValues multiSortedDocValues; diff --git a/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java b/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java index ad1e81f3d45..2d612e90ebc 100644 --- a/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java +++ b/solr/core/src/java/org/apache/solr/index/SlowCompositeReaderWrapper.java @@ -22,7 +22,7 @@ import java.util.Map; import org.apache.lucene.index.*; import org.apache.lucene.index.MultiDocValues.MultiSortedDocValues; -import org.apache.lucene.index.MultiDocValues.OrdinalMap; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.util.Bits; import org.apache.lucene.util.Version; diff --git a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java index e9498f8a7d9..d77c73de48d 100644 --- a/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java +++ b/solr/core/src/java/org/apache/solr/request/DocValuesFacets.java @@ -22,9 +22,9 @@ import java.util.function.Predicate; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; -import org.apache.lucene.index.MultiDocValues.OrdinalMap; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.DocIdSet; diff --git a/solr/core/src/java/org/apache/solr/request/DocValuesStats.java b/solr/core/src/java/org/apache/solr/request/DocValuesStats.java index 4b1fe84c637..dae9943fb7d 100644 --- a/solr/core/src/java/org/apache/solr/request/DocValuesStats.java +++ b/solr/core/src/java/org/apache/solr/request/DocValuesStats.java @@ -22,9 +22,9 @@ import java.util.Map; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.MultiDocValues.MultiSortedSetDocValues; -import org.apache.lucene.index.MultiDocValues.OrdinalMap; +import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.DocIdSet; diff --git a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java index 2a60343ff41..8c93b529be6 100644 --- a/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java +++ b/solr/core/src/java/org/apache/solr/search/CollapsingQParserPlugin.java @@ -25,12 +25,6 @@ import java.util.Iterator; import java.util.List; import java.util.Map; -import com.carrotsearch.hppc.FloatArrayList; -import com.carrotsearch.hppc.IntArrayList; -import com.carrotsearch.hppc.IntIntHashMap; -import com.carrotsearch.hppc.IntLongHashMap; -import com.carrotsearch.hppc.cursors.IntIntCursor; -import com.carrotsearch.hppc.cursors.IntLongCursor; import org.apache.commons.lang.StringUtils; import org.apache.lucene.codecs.DocValuesProducer; import org.apache.lucene.index.DocValues; @@ -43,6 +37,7 @@ import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.queries.function.FunctionQuery; import org.apache.lucene.queries.function.FunctionValues; @@ -69,10 +64,17 @@ import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.request.SolrRequestInfo; import org.apache.solr.schema.FieldType; -import org.apache.solr.schema.StrField; import org.apache.solr.schema.NumberType; +import org.apache.solr.schema.StrField; import org.apache.solr.uninverting.UninvertingReader; +import com.carrotsearch.hppc.FloatArrayList; +import com.carrotsearch.hppc.IntArrayList; +import com.carrotsearch.hppc.IntIntHashMap; +import com.carrotsearch.hppc.IntLongHashMap; +import com.carrotsearch.hppc.cursors.IntIntCursor; +import com.carrotsearch.hppc.cursors.IntLongCursor; + import static org.apache.solr.common.params.CommonParams.SORT; /** @@ -474,7 +476,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { private final DocValuesProducer collapseValuesProducer; private FixedBitSet collapsedSet; private SortedDocValues collapseValues; - private MultiDocValues.OrdinalMap ordinalMap; + private OrdinalMap ordinalMap; private SortedDocValues segmentValues; private LongValues segmentOrdinalMap; private MultiDocValues.MultiSortedDocValues multiSortedDocValues; @@ -920,7 +922,7 @@ public class CollapsingQParserPlugin extends QParserPlugin { private LeafReaderContext[] contexts; private DocValuesProducer collapseValuesProducer; private SortedDocValues collapseValues; - protected MultiDocValues.OrdinalMap ordinalMap; + protected OrdinalMap ordinalMap; protected SortedDocValues segmentValues; protected LongValues segmentOrdinalMap; protected MultiDocValues.MultiSortedDocValues multiSortedDocValues; diff --git a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java index 1481f187df5..fe7a3f23937 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java +++ b/solr/core/src/java/org/apache/solr/search/facet/FacetFieldProcessorByArrayDV.java @@ -23,6 +23,7 @@ import java.util.List; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.DocIdSet; @@ -43,7 +44,7 @@ class FacetFieldProcessorByArrayDV extends FacetFieldProcessorByArray { boolean multiValuedField; SortedSetDocValues si; // only used for term lookups (for both single and multi-valued) - MultiDocValues.OrdinalMap ordinalMap = null; // maps per-segment ords to global ords + OrdinalMap ordinalMap = null; // maps per-segment ords to global ords FacetFieldProcessorByArrayDV(FacetContext fcontext, FacetField freq, SchemaField sf) { super(fcontext, freq, sf); diff --git a/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java b/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java index 2c3786377c8..a6d6b9744b5 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java +++ b/solr/core/src/java/org/apache/solr/search/facet/MinMaxAgg.java @@ -21,6 +21,7 @@ import java.util.Arrays; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.queries.function.ValueSource; import org.apache.lucene.util.BytesRef; @@ -176,7 +177,7 @@ public class MinMaxAgg extends SimpleAggValueSource { class SingleValuedOrdAcc extends OrdAcc { SortedDocValues topLevel; SortedDocValues[] subDvs; - MultiDocValues.OrdinalMap ordMap; + OrdinalMap ordMap; LongValues toGlobal; SortedDocValues subDv; diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueMultiDvSlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueMultiDvSlotAcc.java index 65fa81ac6ab..0a6eb226d0a 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/UniqueMultiDvSlotAcc.java +++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueMultiDvSlotAcc.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; @@ -30,7 +31,7 @@ import org.apache.solr.schema.SchemaField; class UniqueMultiDvSlotAcc extends UniqueSlotAcc { SortedSetDocValues topLevel; SortedSetDocValues[] subDvs; - MultiDocValues.OrdinalMap ordMap; + OrdinalMap ordMap; LongValues toGlobal; SortedSetDocValues subDv; diff --git a/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java b/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java index b39f2823453..434e680ca2f 100644 --- a/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java +++ b/solr/core/src/java/org/apache/solr/search/facet/UniqueSinglevaluedSlotAcc.java @@ -21,6 +21,7 @@ import java.io.IOException; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; @@ -31,7 +32,7 @@ import org.apache.solr.search.SolrIndexSearcher; class UniqueSinglevaluedSlotAcc extends UniqueSlotAcc { SortedDocValues topLevel; SortedDocValues[] subDvs; - MultiDocValues.OrdinalMap ordMap; + OrdinalMap ordMap; LongValues toGlobal; SortedDocValues subDv; diff --git a/solr/core/src/java/org/apache/solr/search/join/BlockJoinFieldFacetAccumulator.java b/solr/core/src/java/org/apache/solr/search/join/BlockJoinFieldFacetAccumulator.java index 141b09562c4..703307cc2b4 100644 --- a/solr/core/src/java/org/apache/solr/search/join/BlockJoinFieldFacetAccumulator.java +++ b/solr/core/src/java/org/apache/solr/search/join/BlockJoinFieldFacetAccumulator.java @@ -21,8 +21,8 @@ import java.util.Arrays; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.MultiDocValues.OrdinalMap; import org.apache.lucene.index.MultiDocValues; +import org.apache.lucene.index.OrdinalMap; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.util.ArrayUtil; @@ -50,7 +50,7 @@ class BlockJoinFieldFacetAccumulator { // elems are : facet value counter<<32 | last parent doc num private long[] segmentAccums = new long[0]; // for mapping per-segment ords to global ones - private MultiDocValues.OrdinalMap ordinalMap; + private OrdinalMap ordinalMap; private SchemaField schemaField; private SortedDocValues segmentSDV;