mirror of
https://github.com/apache/lucene.git
synced 2025-02-27 21:09:19 +00:00
LUCENE-10062: Switch to numeric doc values for encoding taxonomy ordinals
This commit is contained in:
parent
6b99f03cdd
commit
0ba310782f
lucene
CHANGES.txtMIGRATE.md
facet/src
java/org/apache/lucene/facet
FacetUtils.javaFacetsConfig.java
taxonomy
test/org/apache/lucene/facet
@ -367,6 +367,9 @@ Improvements
|
||||
See release notes. https://github.com/locationtech/spatial4j/releases/tag/spatial4j-0.8
|
||||
(David Smiley)
|
||||
|
||||
* LUCENE-10062: Switch taxonomy faceting to use numeric doc values for storing ordinals instead of binary doc values
|
||||
with its own custom encoding. (Greg Miller)
|
||||
|
||||
Bug fixes
|
||||
---------------------
|
||||
|
||||
|
@ -450,3 +450,17 @@ structure. Use a standard BoostQuery here instead.
|
||||
|
||||
Rather than using `setSort()` to change sort values, you should instead create
|
||||
a new Sort instance with the new values.
|
||||
|
||||
## Taxonomy-based faceting uses more modern encodings (LUCENE-9450, LUCENE-10062, LUCENE-10122)
|
||||
|
||||
The side-car taxonomy index now uses doc values for ord-to-path lookup (LUCENE-9450) and parent
|
||||
lookup (LUCENE-10122) instead of stored fields and positions (respectively). Document ordinals
|
||||
are now encoded with `SortedNumericDocValues` instead of using a custom (v-int) binary format.
|
||||
Performance gains have been observed with these encoding changes, but to benefit from them, users
|
||||
must create a new index using 9.x (it is not sufficient to reindex documents against an existing
|
||||
8.x index). In order to remain backwards-compatible with 8.x indexes, the older format is retained
|
||||
until a full rebuild is done.
|
||||
|
||||
Additionally, `OrdinalsReader` (and sub-classes) have been marked `@Deprecated` as custom binary
|
||||
encodings will not be supported for Document ordinals in 9.x onwards (`SortedNumericDocValues` are
|
||||
used out-of-the-box instead).
|
||||
|
@ -18,8 +18,15 @@
|
||||
package org.apache.lucene.facet;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.function.BiConsumer;
|
||||
import org.apache.lucene.facet.taxonomy.BackCompatSortedNumericDocValues;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/**
|
||||
* Utility class with a single method for getting a DocIdSetIterator that skips deleted docs
|
||||
@ -81,4 +88,47 @@ public final class FacetUtils {
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads ordinal values as {@link SortedNumericDocValues}. If the index still uses the older
|
||||
* binary format, it will wrap that with the SNDV API. Newer format indexes will just load the
|
||||
* SNDV directly.
|
||||
*
|
||||
* <p>This is really only needed/useful to maintain back-compat with the binary format. Once
|
||||
* back-compat is no longer needed, the SNDV field should just be loaded directly.
|
||||
*
|
||||
* @deprecated Please do not rely on this method. It is added as a temporary measure for providing
|
||||
* index backwards-compatibility with Lucene 8 and earlier indexes, and will be removed in
|
||||
* Lucene 10.
|
||||
*/
|
||||
@Deprecated
|
||||
public static SortedNumericDocValues loadOrdinalValues(LeafReader reader, String fieldName)
|
||||
throws IOException {
|
||||
return loadOrdinalValues(reader, fieldName, null);
|
||||
}
|
||||
|
||||
/**
|
||||
* Loads ordinal values as {@link SortedNumericDocValues}. If the index still uses the older
|
||||
* binary format, it will wrap that with the SNDV API. Newer format indexes will just load the
|
||||
* SNDV directly. The provided {@code binaryValueDecoder} allows custom decoding logic for older
|
||||
* binary format fields to be provided.
|
||||
*
|
||||
* <p>This is really only needed/useful to maintain back-compat with the binary format. Once
|
||||
* back-compat is no longer needed, the SNDV field should just be loaded directly.
|
||||
*
|
||||
* @deprecated Please do not rely on this method. It is added as a temporary measure for providing
|
||||
* index backwards-compatibility with Lucene 8 and earlier indexes, and will be removed in
|
||||
* Lucene 10.
|
||||
*/
|
||||
@Deprecated
|
||||
public static SortedNumericDocValues loadOrdinalValues(
|
||||
LeafReader reader, String fieldName, BiConsumer<BytesRef, IntsRef> binaryValueDecoder)
|
||||
throws IOException {
|
||||
if (reader.getMetaData().getCreatedVersionMajor() <= 8) {
|
||||
BinaryDocValues oldStyleDocValues = reader.getBinaryDocValues(fieldName);
|
||||
return BackCompatSortedNumericDocValues.wrap(oldStyleDocValues, binaryValueDecoder);
|
||||
} else {
|
||||
return reader.getSortedNumericDocValues(fieldName);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -28,6 +28,7 @@ import java.util.concurrent.ConcurrentHashMap;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedSetDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
|
||||
@ -409,9 +410,26 @@ public class FacetsConfig {
|
||||
indexDrillDownTerms(doc, indexFieldName, dimConfig, facetLabel);
|
||||
}
|
||||
|
||||
// Facet counts:
|
||||
// DocValues are considered stored fields:
|
||||
doc.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ordinals.get())));
|
||||
// Store the taxonomy ordinals associated with each doc. Prefer to use SortedNumericDocValues
|
||||
// but "fall back" to a custom binary format to maintain backwards compatibility with Lucene 8
|
||||
// indexes.
|
||||
IntsRef ords = ordinals.get();
|
||||
if (taxoWriter.useNumericDocValuesForOrdinals()) {
|
||||
// Dedupe and encode the ordinals. It's not important that we sort here
|
||||
// (SortedNumericDocValuesField will handle this internally), but we
|
||||
// sort to identify dups (since SNDVF doesn't dedupe):
|
||||
Arrays.sort(ords.ints, ords.offset, ords.offset + ords.length);
|
||||
int prev = -1;
|
||||
for (int i = 0; i < ords.length; i++) {
|
||||
int ord = ords.ints[ords.offset + i];
|
||||
if (ord > prev) {
|
||||
doc.add(new SortedNumericDocValuesField(indexFieldName, ord));
|
||||
prev = ord;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
doc.add(new BinaryDocValuesField(indexFieldName, dedupAndEncode(ords)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -507,7 +525,13 @@ public class FacetsConfig {
|
||||
}
|
||||
}
|
||||
|
||||
/** Encodes ordinals into a BytesRef; expert: subclass can override this to change encoding. */
|
||||
/**
|
||||
* Encodes ordinals into a BytesRef; expert: subclass can override this to change encoding.
|
||||
*
|
||||
* @deprecated Starting in Lucene 9, we moved to a more straight-forward numeric doc values
|
||||
* encoding and no longer support custom binary encodings.
|
||||
*/
|
||||
@Deprecated
|
||||
protected BytesRef dedupAndEncode(IntsRef ordinals) {
|
||||
Arrays.sort(ordinals.ints, ordinals.offset, ordinals.length);
|
||||
byte[] bytes = new byte[5 * ordinals.length];
|
||||
|
148
lucene/facet/src/java/org/apache/lucene/facet/taxonomy/BackCompatSortedNumericDocValues.java
Normal file
148
lucene/facet/src/java/org/apache/lucene/facet/taxonomy/BackCompatSortedNumericDocValues.java
Normal file
@ -0,0 +1,148 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.function.BiConsumer;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/**
|
||||
* Wraps a {@link BinaryDocValues} instance, providing a {@link SortedNumericDocValues} interface
|
||||
* for the purpose of being backwards-compatible. (see: LUCENE-10062)
|
||||
*
|
||||
* @deprecated Only here for back-compat support. Should be removed with Lucene 10.
|
||||
*/
|
||||
@Deprecated
|
||||
public class BackCompatSortedNumericDocValues extends SortedNumericDocValues {
|
||||
private final BinaryDocValues binaryDocValues;
|
||||
private final BiConsumer<BytesRef, IntsRef> binaryValueDecoder;
|
||||
private final IntsRef scratch = new IntsRef();
|
||||
private int curr;
|
||||
|
||||
/**
|
||||
* Wrap the provided binary encoded doc values. Decodes the binary values with the provided {@code
|
||||
* binaryValueDecoder}, allowing the default decoding behavior to be overridden. If a null doc
|
||||
* values instance is provided, the returned instance will also be null. If a null value decoder
|
||||
* is specified, the default encoding will be assumed.
|
||||
*/
|
||||
public static SortedNumericDocValues wrap(
|
||||
BinaryDocValues binaryDocValues, BiConsumer<BytesRef, IntsRef> binaryValueDecoder) {
|
||||
if (binaryDocValues == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
return new BackCompatSortedNumericDocValues(binaryDocValues, binaryValueDecoder);
|
||||
}
|
||||
|
||||
/** see the static {@code wrap} methods */
|
||||
private BackCompatSortedNumericDocValues(
|
||||
BinaryDocValues binaryDocValues, BiConsumer<BytesRef, IntsRef> binaryValueDecoder) {
|
||||
assert binaryDocValues != null;
|
||||
this.binaryDocValues = binaryDocValues;
|
||||
|
||||
if (binaryValueDecoder != null) {
|
||||
this.binaryValueDecoder = binaryValueDecoder;
|
||||
} else {
|
||||
this.binaryValueDecoder = BackCompatSortedNumericDocValues::loadValues;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
boolean result = binaryDocValues.advanceExact(target);
|
||||
if (result) {
|
||||
reloadValues();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long nextValue() throws IOException {
|
||||
curr++;
|
||||
assert curr < scratch.length;
|
||||
return scratch.ints[scratch.offset + curr];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docValueCount() {
|
||||
return scratch.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return binaryDocValues.docID();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return advance(binaryDocValues.docID() + 1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
int doc = binaryDocValues.advance(target);
|
||||
if (doc != NO_MORE_DOCS) {
|
||||
reloadValues();
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return binaryDocValues.cost();
|
||||
}
|
||||
|
||||
private void reloadValues() throws IOException {
|
||||
curr = -1;
|
||||
binaryValueDecoder.accept(binaryDocValues.binaryValue(), scratch);
|
||||
}
|
||||
|
||||
/** Load ordinals for the currently-positioned doc, assuming the default binary encoding. */
|
||||
static void loadValues(BytesRef buf, IntsRef ordinals) {
|
||||
// grow the buffer up front, even if by a large number of values (buf.length)
|
||||
// that saves the need to check inside the loop for every decoded value if
|
||||
// the buffer needs to grow.
|
||||
if (ordinals.ints.length < buf.length) {
|
||||
ordinals.ints = ArrayUtil.grow(ordinals.ints, buf.length);
|
||||
}
|
||||
|
||||
ordinals.offset = 0;
|
||||
ordinals.length = 0;
|
||||
|
||||
// it is better if the decoding is inlined like so, and not e.g.
|
||||
// in a utility method
|
||||
int upto = buf.offset + buf.length;
|
||||
int value = 0;
|
||||
int offset = buf.offset;
|
||||
int prev = 0;
|
||||
while (offset < upto) {
|
||||
byte b = buf.bytes[offset++];
|
||||
if (b >= 0) {
|
||||
ordinals.ints[ordinals.length] = ((value << 7) | b) + prev;
|
||||
value = 0;
|
||||
prev = ordinals.ints[ordinals.length];
|
||||
ordinals.length++;
|
||||
} else {
|
||||
value = (value << 7) | (b & 0x7F);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -45,7 +45,11 @@ import org.apache.lucene.util.RamUsageEstimator;
|
||||
*
|
||||
* <p><b>NOTE:</b> create one instance of this and re-use it for all facet implementations (the
|
||||
* cache is per-instance, not static).
|
||||
*
|
||||
* @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
|
||||
* Lucene 9
|
||||
*/
|
||||
@Deprecated
|
||||
public class CachedOrdinalsReader extends OrdinalsReader implements Accountable {
|
||||
|
||||
private final OrdinalsReader source;
|
||||
|
@ -17,15 +17,22 @@
|
||||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.facet.FacetUtils;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/** Decodes ordinals previously indexed into a BinaryDocValues field */
|
||||
/**
|
||||
* Decodes ordinals previously indexed into a BinaryDocValues field
|
||||
*
|
||||
* @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
|
||||
* Lucene 9
|
||||
*/
|
||||
@Deprecated
|
||||
public class DocValuesOrdinalsReader extends OrdinalsReader {
|
||||
private final String field;
|
||||
|
||||
@ -41,12 +48,12 @@ public class DocValuesOrdinalsReader extends OrdinalsReader {
|
||||
|
||||
@Override
|
||||
public OrdinalsSegmentReader getReader(LeafReaderContext context) throws IOException {
|
||||
BinaryDocValues values0 = context.reader().getBinaryDocValues(field);
|
||||
if (values0 == null) {
|
||||
values0 = DocValues.emptyBinary();
|
||||
SortedNumericDocValues dv0 =
|
||||
FacetUtils.loadOrdinalValues(context.reader(), field, this::decode);
|
||||
if (dv0 == null) {
|
||||
dv0 = DocValues.emptySortedNumeric();
|
||||
}
|
||||
|
||||
final BinaryDocValues values = values0;
|
||||
final SortedNumericDocValues dv = dv0;
|
||||
|
||||
return new OrdinalsSegmentReader() {
|
||||
|
||||
@ -59,16 +66,21 @@ public class DocValuesOrdinalsReader extends OrdinalsReader {
|
||||
"docs out of order: lastDocID=" + lastDocID + " vs docID=" + docID);
|
||||
}
|
||||
lastDocID = docID;
|
||||
if (docID > values.docID()) {
|
||||
values.advance(docID);
|
||||
|
||||
ordinals.offset = 0;
|
||||
ordinals.length = 0;
|
||||
|
||||
if (dv.advanceExact(docID)) {
|
||||
int count = dv.docValueCount();
|
||||
if (ordinals.ints.length < count) {
|
||||
ordinals.ints = ArrayUtil.grow(ordinals.ints, count);
|
||||
}
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
ordinals.ints[ordinals.length] = (int) dv.nextValue();
|
||||
ordinals.length++;
|
||||
}
|
||||
}
|
||||
final BytesRef bytes;
|
||||
if (values.docID() == docID) {
|
||||
bytes = values.binaryValue();
|
||||
} else {
|
||||
bytes = new BytesRef(BytesRef.EMPTY_BYTES);
|
||||
}
|
||||
decode(bytes, ordinals);
|
||||
}
|
||||
};
|
||||
}
|
||||
@ -91,33 +103,6 @@ public class DocValuesOrdinalsReader extends OrdinalsReader {
|
||||
* @param ordinals buffer for decoded ordinals
|
||||
*/
|
||||
public void decode(BytesRef buf, IntsRef ordinals) {
|
||||
|
||||
// grow the buffer up front, even if by a large number of values (buf.length)
|
||||
// that saves the need to check inside the loop for every decoded value if
|
||||
// the buffer needs to grow.
|
||||
if (ordinals.ints.length < buf.length) {
|
||||
ordinals.ints = ArrayUtil.grow(ordinals.ints, buf.length);
|
||||
}
|
||||
|
||||
ordinals.offset = 0;
|
||||
ordinals.length = 0;
|
||||
|
||||
// it is better if the decoding is inlined like so, and not e.g.
|
||||
// in a utility method
|
||||
int upto = buf.offset + buf.length;
|
||||
int value = 0;
|
||||
int offset = buf.offset;
|
||||
int prev = 0;
|
||||
while (offset < upto) {
|
||||
byte b = buf.bytes[offset++];
|
||||
if (b >= 0) {
|
||||
ordinals.ints[ordinals.length] = ((value << 7) | b) + prev;
|
||||
value = 0;
|
||||
prev = ordinals.ints[ordinals.length];
|
||||
ordinals.length++;
|
||||
} else {
|
||||
value = (value << 7) | (b & 0x7F);
|
||||
}
|
||||
}
|
||||
BackCompatSortedNumericDocValues.loadValues(buf, ordinals);
|
||||
}
|
||||
}
|
||||
|
@ -19,17 +19,17 @@ package org.apache.lucene.facet.taxonomy;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.facet.FacetUtils;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.search.ConjunctionUtils;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Computes facets counts, assuming the default encoding into DocValues was used.
|
||||
@ -70,8 +70,9 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
|
||||
private final void count(List<MatchingDocs> matchingDocs) throws IOException {
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
BinaryDocValues dv = hits.context.reader().getBinaryDocValues(indexFieldName);
|
||||
if (dv == null) { // this reader does not have DocValues for the requested category list
|
||||
SortedNumericDocValues dv =
|
||||
FacetUtils.loadOrdinalValues(hits.context.reader(), indexFieldName);
|
||||
if (dv == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -79,21 +80,8 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
ConjunctionUtils.intersectIterators(Arrays.asList(hits.bits.iterator(), dv));
|
||||
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
final BytesRef bytesRef = dv.binaryValue();
|
||||
byte[] bytes = bytesRef.bytes;
|
||||
int end = bytesRef.offset + bytesRef.length;
|
||||
int ord = 0;
|
||||
int offset = bytesRef.offset;
|
||||
int prev = 0;
|
||||
while (offset < end) {
|
||||
byte b = bytes[offset++];
|
||||
if (b >= 0) {
|
||||
prev = ord = ((ord << 7) | b) + prev;
|
||||
increment(ord);
|
||||
ord = 0;
|
||||
} else {
|
||||
ord = (ord << 7) | (b & 0x7F);
|
||||
}
|
||||
for (int i = 0; i < dv.docValueCount(); i++) {
|
||||
increment((int) dv.nextValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -103,8 +91,8 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
|
||||
private final void countAll(IndexReader reader) throws IOException {
|
||||
for (LeafReaderContext context : reader.leaves()) {
|
||||
BinaryDocValues dv = context.reader().getBinaryDocValues(indexFieldName);
|
||||
if (dv == null) { // this reader does not have DocValues for the requested category list
|
||||
SortedNumericDocValues dv = FacetUtils.loadOrdinalValues(context.reader(), indexFieldName);
|
||||
if (dv == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
@ -114,21 +102,9 @@ public class FastTaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
if (liveDocs != null && liveDocs.get(doc) == false) {
|
||||
continue;
|
||||
}
|
||||
final BytesRef bytesRef = dv.binaryValue();
|
||||
byte[] bytes = bytesRef.bytes;
|
||||
int end = bytesRef.offset + bytesRef.length;
|
||||
int ord = 0;
|
||||
int offset = bytesRef.offset;
|
||||
int prev = 0;
|
||||
while (offset < end) {
|
||||
byte b = bytes[offset++];
|
||||
if (b >= 0) {
|
||||
prev = ord = ((ord << 7) | b) + prev;
|
||||
increment(ord);
|
||||
ord = 0;
|
||||
} else {
|
||||
ord = (ord << 7) | (b & 0x7F);
|
||||
}
|
||||
|
||||
for (int i = 0; i < dv.docValueCount(); i++) {
|
||||
increment((int) dv.nextValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -16,7 +16,9 @@
|
||||
*/
|
||||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
import com.carrotsearch.hppc.IntArrayList;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Set;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
@ -26,7 +28,10 @@ import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.Ordina
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.FilterBinaryDocValues;
|
||||
import org.apache.lucene.index.FilterLeafReader;
|
||||
import org.apache.lucene.index.FilterSortedNumericDocValues;
|
||||
import org.apache.lucene.index.LeafReader;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
@ -107,6 +112,66 @@ public class OrdinalMappingLeafReader extends FilterLeafReader {
|
||||
}
|
||||
}
|
||||
|
||||
private class OrdinalMappingSortedNumericDocValues extends FilterSortedNumericDocValues {
|
||||
private final IntArrayList currentValues;
|
||||
private int currIndex;
|
||||
|
||||
OrdinalMappingSortedNumericDocValues(SortedNumericDocValues in) {
|
||||
super(in);
|
||||
currentValues = new IntArrayList(32);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
boolean result = in.advanceExact(target);
|
||||
if (result) {
|
||||
reloadValues();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
int result = in.advance(target);
|
||||
if (result != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
reloadValues();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
int result = in.nextDoc();
|
||||
if (result != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
reloadValues();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docValueCount() {
|
||||
return currentValues.elementsCount;
|
||||
}
|
||||
|
||||
private void reloadValues() throws IOException {
|
||||
currIndex = 0;
|
||||
currentValues.clear();
|
||||
for (int i = 0; i < in.docValueCount(); i++) {
|
||||
int originalOrd = Math.toIntExact(in.nextValue());
|
||||
currentValues.add(ordinalMap[originalOrd]);
|
||||
}
|
||||
Arrays.sort(currentValues.buffer, 0, currentValues.elementsCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long nextValue() {
|
||||
assert currIndex < currentValues.size();
|
||||
int actual = currentValues.get(currIndex);
|
||||
currIndex++;
|
||||
return actual;
|
||||
}
|
||||
}
|
||||
|
||||
private final int[] ordinalMap;
|
||||
private final InnerFacetsConfig facetsConfig;
|
||||
private final Set<String> facetFields;
|
||||
@ -125,31 +190,59 @@ public class OrdinalMappingLeafReader extends FilterLeafReader {
|
||||
}
|
||||
// always add the default indexFieldName. This is because FacetsConfig does
|
||||
// not explicitly record dimensions that were indexed under the default
|
||||
// DimConfig, unless they have a custome DimConfig.
|
||||
// DimConfig, unless they have a custom DimConfig.
|
||||
facetFields.add(FacetsConfig.DEFAULT_DIM_CONFIG.indexFieldName);
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: encodes category ordinals into a BytesRef. Override in case you use custom encoding,
|
||||
* other than the default done by FacetsConfig.
|
||||
*
|
||||
* @deprecated Custom binary formats are no longer directly supported for taxonomy faceting
|
||||
* starting in Lucene 9
|
||||
*/
|
||||
@Deprecated
|
||||
protected BytesRef encode(IntsRef ordinals) {
|
||||
return facetsConfig.dedupAndEncode(ordinals);
|
||||
}
|
||||
|
||||
/** Expert: override in case you used custom encoding for the categories under this field. */
|
||||
/**
|
||||
* Expert: override in case you used custom encoding for the categories under this field.
|
||||
*
|
||||
* @deprecated Custom binary formats are no longer directly supported for taxonomy faceting
|
||||
* starting in Lucene 9
|
||||
*/
|
||||
@Deprecated
|
||||
protected OrdinalsReader getOrdinalsReader(String field) {
|
||||
return new DocValuesOrdinalsReader(field);
|
||||
}
|
||||
|
||||
@Override
|
||||
public BinaryDocValues getBinaryDocValues(String field) throws IOException {
|
||||
if (facetFields.contains(field)) {
|
||||
BinaryDocValues original = in.getBinaryDocValues(field);
|
||||
if (original != null && facetFields.contains(field)) {
|
||||
// The requested field is a facet ordinals field _and_ it's non-null, so move forward with
|
||||
// mapping:
|
||||
final OrdinalsReader ordsReader = getOrdinalsReader(field);
|
||||
return new OrdinalMappingBinaryDocValues(
|
||||
ordsReader.getReader(in.getContext()), in.getBinaryDocValues(field));
|
||||
return new OrdinalMappingBinaryDocValues(ordsReader.getReader(in.getContext()), original);
|
||||
} else {
|
||||
return in.getBinaryDocValues(field);
|
||||
// The requested field either isn't present (null) or isn't a facet ordinals field. Either
|
||||
// way, just return the original:
|
||||
return original;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException {
|
||||
SortedNumericDocValues original = in.getSortedNumericDocValues(field);
|
||||
if (original != null && facetFields.contains(field)) {
|
||||
// The requested field is a facet ordinals field _and_ it's non-null, so move forward with
|
||||
// mapping:
|
||||
return new OrdinalMappingSortedNumericDocValues(original);
|
||||
} else {
|
||||
// The requested field either isn't present (null) or isn't a facet ordinals field. Either
|
||||
// way, just return the original:
|
||||
return original;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,13 @@ import java.io.IOException;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/** Provides per-document ordinals. */
|
||||
/**
|
||||
* Provides per-document ordinals.
|
||||
*
|
||||
* @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
|
||||
* Lucene 9
|
||||
*/
|
||||
@Deprecated
|
||||
public abstract class OrdinalsReader {
|
||||
|
||||
/** Returns ordinals for documents in one segment. */
|
||||
|
@ -29,8 +29,11 @@ import org.apache.lucene.util.IntsRef;
|
||||
* Reads from any {@link OrdinalsReader}; use {@link FastTaxonomyFacetCounts} if you are using the
|
||||
* default encoding from {@link BinaryDocValues}.
|
||||
*
|
||||
* @lucene.experimental
|
||||
* @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
|
||||
* Lucene 9. Please switch to {@link FastTaxonomyFacetCounts} or implement your own {@link
|
||||
* org.apache.lucene.facet.Facets} implementation if you have custom needs.
|
||||
*/
|
||||
@Deprecated
|
||||
public class TaxonomyFacetCounts extends IntTaxonomyFacets {
|
||||
private final OrdinalsReader ordinalsReader;
|
||||
|
||||
|
@ -20,7 +20,10 @@ import static org.apache.lucene.facet.taxonomy.TaxonomyReader.INVALID_ORDINAL;
|
||||
import static org.apache.lucene.facet.taxonomy.TaxonomyReader.ROOT_ORDINAL;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.facet.FacetUtils;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
|
||||
/**
|
||||
@ -34,19 +37,16 @@ public class TaxonomyFacetLabels {
|
||||
/** {@code TaxonomyReader} provided to the constructor */
|
||||
private final TaxonomyReader taxoReader;
|
||||
|
||||
/**
|
||||
* {@code OrdinalsReader} to decode ordinals previously indexed into the {@code BinaryDocValues}
|
||||
* facet field
|
||||
*/
|
||||
private final OrdinalsReader ordsReader;
|
||||
/** field storing the taxonomy ordinals */
|
||||
private final String indexFieldName;
|
||||
|
||||
/**
|
||||
* Sole constructor. Do not close the provided {@link TaxonomyReader} while still using this
|
||||
* instance!
|
||||
*/
|
||||
public TaxonomyFacetLabels(TaxonomyReader taxoReader, String indexFieldName) throws IOException {
|
||||
public TaxonomyFacetLabels(TaxonomyReader taxoReader, String indexFieldName) {
|
||||
this.taxoReader = taxoReader;
|
||||
this.ordsReader = new DocValuesOrdinalsReader(indexFieldName);
|
||||
this.indexFieldName = indexFieldName;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -62,7 +62,13 @@ public class TaxonomyFacetLabels {
|
||||
* @throws IOException when a low-level IO issue occurs
|
||||
*/
|
||||
public FacetLabelReader getFacetLabelReader(LeafReaderContext readerContext) throws IOException {
|
||||
return new FacetLabelReader(ordsReader, readerContext);
|
||||
SortedNumericDocValues ordinalValues =
|
||||
FacetUtils.loadOrdinalValues(readerContext.reader(), indexFieldName);
|
||||
if (ordinalValues == null) {
|
||||
ordinalValues = DocValues.emptySortedNumeric();
|
||||
}
|
||||
|
||||
return new FacetLabelReader(ordinalValues);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -71,18 +77,50 @@ public class TaxonomyFacetLabels {
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class FacetLabelReader {
|
||||
/** By default, we store taxonomy ordinals in SortedNumericDocValues field */
|
||||
private final SortedNumericDocValues ordinalValues;
|
||||
|
||||
/**
|
||||
* Users can provide their own custom OrdinalsReader for cases where the default encoding isn't
|
||||
* used. This capability is deprecated and will be removed in Lucene 10.
|
||||
*/
|
||||
private final OrdinalsReader.OrdinalsSegmentReader ordinalsSegmentReader;
|
||||
private final IntsRef decodedOrds = new IntsRef();
|
||||
|
||||
private final IntsRef decodedOrds;
|
||||
|
||||
private int currentDocId = -1;
|
||||
private int currentPos = -1;
|
||||
private boolean currentDocHasValues;
|
||||
private int currentPos;
|
||||
private int currentDocOrdinalCount;
|
||||
|
||||
// Lazily set when nextFacetLabel(int docId, String facetDimension) is first called
|
||||
private int[] parents;
|
||||
|
||||
/** Sole constructor. */
|
||||
/**
|
||||
* Construct from a specified {@link SortedNumericDocValues} field; useful for reading the
|
||||
* default encoding.
|
||||
*/
|
||||
public FacetLabelReader(SortedNumericDocValues ordinalValues) {
|
||||
this.ordinalValues = ordinalValues;
|
||||
ordinalsSegmentReader = null;
|
||||
decodedOrds = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct using a custom {@link OrdinalsReader}; useful if using a custom binary format.
|
||||
*
|
||||
* <p>Note: If using the default encoding, you can use {@link
|
||||
* #FacetLabelReader(SortedNumericDocValues)} directly
|
||||
*
|
||||
* @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting
|
||||
* with Lucene 9
|
||||
*/
|
||||
@Deprecated
|
||||
public FacetLabelReader(OrdinalsReader ordsReader, LeafReaderContext readerContext)
|
||||
throws IOException {
|
||||
ordinalsSegmentReader = ordsReader.getReader(readerContext);
|
||||
decodedOrds = new IntsRef();
|
||||
ordinalValues = null;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -108,20 +146,45 @@ public class TaxonomyFacetLabels {
|
||||
throw new IllegalArgumentException(
|
||||
"docs out of order: previous docId=" + currentDocId + " current docId=" + docId);
|
||||
}
|
||||
ordinalsSegmentReader.get(docId, decodedOrds);
|
||||
|
||||
currentDocId = docId;
|
||||
currentPos = decodedOrds.offset;
|
||||
|
||||
if (ordinalsSegmentReader != null) {
|
||||
ordinalsSegmentReader.get(docId, decodedOrds);
|
||||
currentPos = decodedOrds.offset;
|
||||
} else {
|
||||
currentDocHasValues = ordinalValues.advanceExact(docId);
|
||||
if (currentDocHasValues) {
|
||||
currentDocOrdinalCount = ordinalValues.docValueCount();
|
||||
currentPos = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int endPos = decodedOrds.offset + decodedOrds.length;
|
||||
assert currentPos <= endPos;
|
||||
int ord;
|
||||
if (ordinalsSegmentReader != null) {
|
||||
int endPos = decodedOrds.offset + decodedOrds.length;
|
||||
assert currentPos <= endPos;
|
||||
|
||||
if (currentPos == endPos) {
|
||||
// no more FacetLabels
|
||||
return null;
|
||||
if (currentPos == endPos) {
|
||||
return null;
|
||||
}
|
||||
|
||||
ord = decodedOrds.ints[currentPos++];
|
||||
} else {
|
||||
if (currentDocHasValues == false) {
|
||||
return null;
|
||||
}
|
||||
|
||||
assert currentPos <= currentDocOrdinalCount;
|
||||
if (currentPos == currentDocOrdinalCount) {
|
||||
return null;
|
||||
}
|
||||
|
||||
ord = (int) ordinalValues.nextValue();
|
||||
currentPos++;
|
||||
}
|
||||
|
||||
int ord = decodedOrds.ints[currentPos++];
|
||||
return taxoReader.getPath(ord);
|
||||
}
|
||||
|
||||
@ -168,24 +231,61 @@ public class TaxonomyFacetLabels {
|
||||
throw new IllegalArgumentException(
|
||||
"docs out of order: previous docId=" + currentDocId + " current docId=" + docId);
|
||||
}
|
||||
ordinalsSegmentReader.get(docId, decodedOrds);
|
||||
currentPos = decodedOrds.offset;
|
||||
currentDocId = docId;
|
||||
}
|
||||
|
||||
if (parents == null) {
|
||||
parents = taxoReader.getParallelTaxonomyArrays().parents();
|
||||
}
|
||||
|
||||
int endPos = decodedOrds.offset + decodedOrds.length;
|
||||
assert currentPos <= endPos;
|
||||
|
||||
for (; currentPos < endPos; ) {
|
||||
int ord = decodedOrds.ints[currentPos++];
|
||||
if (isDescendant(ord, parentOrd) == true) {
|
||||
return taxoReader.getPath(ord);
|
||||
if (ordinalsSegmentReader != null) {
|
||||
ordinalsSegmentReader.get(docId, decodedOrds);
|
||||
currentPos = decodedOrds.offset;
|
||||
} else {
|
||||
currentDocHasValues = ordinalValues.advanceExact(docId);
|
||||
if (currentDocHasValues) {
|
||||
currentDocOrdinalCount = ordinalValues.docValueCount();
|
||||
currentPos = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ordinalsSegmentReader != null) {
|
||||
int endPos = decodedOrds.offset + decodedOrds.length;
|
||||
assert currentPos <= endPos;
|
||||
|
||||
if (currentPos == endPos) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (parents == null) {
|
||||
parents = taxoReader.getParallelTaxonomyArrays().parents();
|
||||
}
|
||||
|
||||
do {
|
||||
int ord = decodedOrds.ints[currentPos++];
|
||||
if (isDescendant(ord, parentOrd) == true) {
|
||||
return taxoReader.getPath(ord);
|
||||
}
|
||||
} while (currentPos < endPos);
|
||||
} else {
|
||||
if (currentDocHasValues == false) {
|
||||
return null;
|
||||
}
|
||||
|
||||
assert currentPos <= currentDocOrdinalCount;
|
||||
if (currentPos == currentDocOrdinalCount) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if (parents == null) {
|
||||
parents = taxoReader.getParallelTaxonomyArrays().parents();
|
||||
}
|
||||
|
||||
do {
|
||||
int ord = (int) ordinalValues.nextValue();
|
||||
currentPos++;
|
||||
if (isDescendant(ord, parentOrd) == true) {
|
||||
return taxoReader.getPath(ord);
|
||||
}
|
||||
} while (currentPos < currentDocOrdinalCount);
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
@ -18,9 +18,12 @@ package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.facet.FacetUtils;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsCollector.MatchingDocs;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.search.ConjunctionUtils;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.DoubleValues;
|
||||
import org.apache.lucene.search.DoubleValuesSource;
|
||||
@ -36,8 +39,7 @@ public class TaxonomyFacetSumValueSource extends FloatTaxonomyFacets {
|
||||
|
||||
/**
|
||||
* Aggreggates double facet values from the provided {@link DoubleValuesSource}, pulling ordinals
|
||||
* using {@link DocValuesOrdinalsReader} against the default indexed facet field {@link
|
||||
* FacetsConfig#DEFAULT_INDEX_FIELD_NAME}.
|
||||
* from the default indexed facet field {@link FacetsConfig#DEFAULT_INDEX_FIELD_NAME}.
|
||||
*/
|
||||
public TaxonomyFacetSumValueSource(
|
||||
TaxonomyReader taxoReader,
|
||||
@ -45,18 +47,33 @@ public class TaxonomyFacetSumValueSource extends FloatTaxonomyFacets {
|
||||
FacetsCollector fc,
|
||||
DoubleValuesSource valueSource)
|
||||
throws IOException {
|
||||
this(
|
||||
new DocValuesOrdinalsReader(FacetsConfig.DEFAULT_INDEX_FIELD_NAME),
|
||||
taxoReader,
|
||||
config,
|
||||
fc,
|
||||
valueSource);
|
||||
this(FacetsConfig.DEFAULT_INDEX_FIELD_NAME, taxoReader, config, fc, valueSource);
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggreggates double facet values from the provided {@link DoubleValuesSource}, pulling ordinals
|
||||
* from the specified indexed facet field.
|
||||
*/
|
||||
public TaxonomyFacetSumValueSource(
|
||||
String indexField,
|
||||
TaxonomyReader taxoReader,
|
||||
FacetsConfig config,
|
||||
FacetsCollector fc,
|
||||
DoubleValuesSource valueSource)
|
||||
throws IOException {
|
||||
super(indexField, taxoReader, config);
|
||||
ordinalsReader = null;
|
||||
sumValues(fc.getMatchingDocs(), fc.getKeepScores(), valueSource);
|
||||
}
|
||||
|
||||
/**
|
||||
* Aggreggates float facet values from the provided {@link DoubleValuesSource}, and pulls ordinals
|
||||
* from the provided {@link OrdinalsReader}.
|
||||
*
|
||||
* @deprecated Custom binary encodings for taxonomy ordinals are no longer supported starting with
|
||||
* Lucene 9
|
||||
*/
|
||||
@Deprecated
|
||||
public TaxonomyFacetSumValueSource(
|
||||
OrdinalsReader ordinalsReader,
|
||||
TaxonomyReader taxoReader,
|
||||
@ -91,20 +108,47 @@ public class TaxonomyFacetSumValueSource extends FloatTaxonomyFacets {
|
||||
List<MatchingDocs> matchingDocs, boolean keepScores, DoubleValuesSource valueSource)
|
||||
throws IOException {
|
||||
|
||||
IntsRef scratch = new IntsRef();
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
|
||||
DoubleValues scores = keepScores ? scores(hits) : null;
|
||||
DoubleValues functionValues = valueSource.getValues(hits.context, scores);
|
||||
DocIdSetIterator docs = hits.bits.iterator();
|
||||
if (ordinalsReader != null) {
|
||||
// If the user provided a custom ordinals reader, use it to retrieve the document ordinals:
|
||||
IntsRef scratch = new IntsRef();
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
OrdinalsReader.OrdinalsSegmentReader ords = ordinalsReader.getReader(hits.context);
|
||||
DoubleValues scores = keepScores ? scores(hits) : null;
|
||||
DoubleValues functionValues = valueSource.getValues(hits.context, scores);
|
||||
DocIdSetIterator docs = hits.bits.iterator();
|
||||
|
||||
int doc;
|
||||
while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
ords.get(doc, scratch);
|
||||
if (functionValues.advanceExact(doc)) {
|
||||
float value = (float) functionValues.doubleValue();
|
||||
for (int i = 0; i < scratch.length; i++) {
|
||||
values[scratch.ints[i]] += value;
|
||||
int doc;
|
||||
while ((doc = docs.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
ords.get(doc, scratch);
|
||||
if (functionValues.advanceExact(doc)) {
|
||||
float value = (float) functionValues.doubleValue();
|
||||
for (int i = 0; i < scratch.length; i++) {
|
||||
values[scratch.ints[i]] += value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// If no custom ordinals reader is provided, expect the default encoding:
|
||||
for (MatchingDocs hits : matchingDocs) {
|
||||
SortedNumericDocValues ordinalValues =
|
||||
FacetUtils.loadOrdinalValues(hits.context.reader(), indexFieldName);
|
||||
if (ordinalValues == null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
DoubleValues scores = keepScores ? scores(hits) : null;
|
||||
DoubleValues functionValues = valueSource.getValues(hits.context, scores);
|
||||
DocIdSetIterator it =
|
||||
ConjunctionUtils.intersectIterators(List.of(hits.bits.iterator(), ordinalValues));
|
||||
|
||||
for (int doc = it.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = it.nextDoc()) {
|
||||
if (functionValues.advanceExact(doc)) {
|
||||
float value = (float) functionValues.doubleValue();
|
||||
int ordinalCount = ordinalValues.docValueCount();
|
||||
for (int i = 0; i < ordinalCount; i++) {
|
||||
values[(int) ordinalValues.nextValue()] += value;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -97,4 +97,18 @@ public interface TaxonomyWriter extends Closeable, TwoPhaseCommit {
|
||||
|
||||
/** Returns the commit user data iterable that was set on {@link #setLiveCommitData(Iterable)}. */
|
||||
public Iterable<Map.Entry<String, String>> getLiveCommitData();
|
||||
|
||||
/**
|
||||
* Determine whether-or-not to store taxonomy ordinals for each document using the older binary
|
||||
* format or the newer SortedNumericDocValues format (based on the version used to create the
|
||||
* index).
|
||||
*
|
||||
* @deprecated Please don't rely on this method as it will be removed in Lucene 10. It's being
|
||||
* introduced to support backwards-compatibility with Lucene 8 and earlier index formats
|
||||
* temporarily.
|
||||
*/
|
||||
@Deprecated
|
||||
default boolean useNumericDocValuesForOrdinals() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -162,7 +162,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||
openMode = config.getOpenMode();
|
||||
if (DirectoryReader.indexExists(directory) == false) {
|
||||
indexEpoch = 1;
|
||||
// no commit exists so we can safely use the new BinaryDocValues field
|
||||
// no commit exists so we can safely use the newer formats:
|
||||
useOlderFormat = false;
|
||||
} else {
|
||||
String epochStr = null;
|
||||
@ -1005,4 +1005,9 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||
public final long getTaxonomyEpoch() {
|
||||
return indexEpoch;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean useNumericDocValuesForOrdinals() {
|
||||
return useOlderFormat == false;
|
||||
}
|
||||
}
|
||||
|
@ -190,7 +190,7 @@ public class TestMultipleIndexFields extends FacetTestCase {
|
||||
private void assertOrdinalsExist(String field, IndexReader ir) throws IOException {
|
||||
for (LeafReaderContext context : ir.leaves()) {
|
||||
LeafReader r = context.reader();
|
||||
if (r.getBinaryDocValues(field) != null) {
|
||||
if (r.getSortedNumericDocValues(field) != null) {
|
||||
return; // not all segments must have this DocValues
|
||||
}
|
||||
}
|
||||
|
140
lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestBackCompatSortedNumericDocValues.java
Normal file
140
lucene/facet/src/test/org/apache/lucene/facet/taxonomy/TestBackCompatSortedNumericDocValues.java
Normal file
@ -0,0 +1,140 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomNumbers;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.document.BinaryDocValuesField;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.SortedNumericDocValues;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestBackCompatSortedNumericDocValues extends LuceneTestCase {
|
||||
|
||||
private static class FacetsConfigWrapper extends FacetsConfig {
|
||||
public BytesRef encodeValues(IntsRef values) {
|
||||
return dedupAndEncode(values);
|
||||
}
|
||||
}
|
||||
|
||||
public void testRandom() throws IOException {
|
||||
Directory dir = newDirectory();
|
||||
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
|
||||
|
||||
// sorta big scratch so we don't have to think about reallocating:
|
||||
IntsRef scratch = new IntsRef(100);
|
||||
|
||||
// used to access default binary encoding easily:
|
||||
FacetsConfigWrapper facetsConfig = new FacetsConfigWrapper();
|
||||
|
||||
// keep track of the values we expect to see for each doc:
|
||||
Map<String, List<Integer>> expectedValues = new HashMap<>();
|
||||
|
||||
int numDocs = atLeast(100);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
int numValues = RandomNumbers.randomIntBetween(random(), 1, 50);
|
||||
scratch.length = 0;
|
||||
scratch.offset = 0;
|
||||
Set<Integer> values = new HashSet<>();
|
||||
for (int j = 0; j < numValues; j++) {
|
||||
int value = random().nextInt(Integer.MAX_VALUE);
|
||||
values.add(value);
|
||||
// we might have dups in here, which is fine (encoding takes care of deduping and sorting):
|
||||
scratch.ints[j] = value;
|
||||
scratch.length++;
|
||||
}
|
||||
// we expect to get sorted and deduped values back out:
|
||||
expectedValues.put(String.valueOf(i), values.stream().sorted().collect(Collectors.toList()));
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new StoredField("id", String.valueOf(i)));
|
||||
doc.add(new BinaryDocValuesField("bdv", facetsConfig.encodeValues(scratch)));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
|
||||
writer.forceMerge(1);
|
||||
writer.commit();
|
||||
|
||||
IndexReader reader = writer.getReader();
|
||||
IndexSearcher searcher = newSearcher(reader);
|
||||
writer.close();
|
||||
|
||||
assert reader.leaves().size() == 1;
|
||||
BinaryDocValues binaryDocValues = reader.leaves().get(0).reader().getBinaryDocValues("bdv");
|
||||
assertNotNull(binaryDocValues);
|
||||
SortedNumericDocValues docValues = BackCompatSortedNumericDocValues.wrap(binaryDocValues, null);
|
||||
|
||||
TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), numDocs, Sort.INDEXORDER);
|
||||
|
||||
for (ScoreDoc scoreDoc : docs.scoreDocs) {
|
||||
String id = reader.document(scoreDoc.doc).get("id");
|
||||
int docId = scoreDoc.doc;
|
||||
|
||||
int doc;
|
||||
if (random().nextBoolean()) {
|
||||
doc = docValues.nextDoc();
|
||||
} else {
|
||||
if (random().nextBoolean()) {
|
||||
doc = docValues.advance(docId);
|
||||
} else {
|
||||
assertTrue(docValues.advanceExact(docId));
|
||||
doc = docId;
|
||||
}
|
||||
}
|
||||
assertEquals(docId, doc);
|
||||
assertEquals(docId, docValues.docID());
|
||||
|
||||
List<Integer> expected = expectedValues.get(id);
|
||||
assertEquals(expected.size(), docValues.docValueCount());
|
||||
checkValues(expected, docValues);
|
||||
}
|
||||
|
||||
// Run off the end and make sure that case is handled gracefully:
|
||||
assertEquals(DocIdSetIterator.NO_MORE_DOCS, docValues.nextDoc());
|
||||
|
||||
IOUtils.close(reader, dir);
|
||||
}
|
||||
|
||||
private void checkValues(List<Integer> expected, SortedNumericDocValues values)
|
||||
throws IOException {
|
||||
for (Integer e : expected) {
|
||||
assertEquals((long) e, values.nextValue());
|
||||
}
|
||||
}
|
||||
}
|
@ -410,9 +410,15 @@ public class TestTaxonomyFacetSumValueSource extends FacetTestCase {
|
||||
FacetsCollector.search(newSearcher(r), new MatchAllDocsQuery(), 10, fc);
|
||||
|
||||
Facets facets1 = getTaxonomyFacetCounts(taxoReader, config, fc);
|
||||
Facets facets2 =
|
||||
new TaxonomyFacetSumValueSource(
|
||||
new DocValuesOrdinalsReader("$b"), taxoReader, config, fc, DoubleValuesSource.SCORES);
|
||||
Facets facets2;
|
||||
if (random().nextBoolean()) {
|
||||
facets2 =
|
||||
new TaxonomyFacetSumValueSource(
|
||||
new DocValuesOrdinalsReader("$b"), taxoReader, config, fc, DoubleValuesSource.SCORES);
|
||||
} else {
|
||||
facets2 =
|
||||
new TaxonomyFacetSumValueSource("$b", taxoReader, config, fc, DoubleValuesSource.SCORES);
|
||||
}
|
||||
|
||||
assertEquals(r.maxDoc(), facets1.getTopChildren(10, "a").value.intValue());
|
||||
assertEquals(r.maxDoc(), facets2.getTopChildren(10, "b").value.doubleValue(), 1E-10);
|
||||
|
264
lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
264
lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestBackwardsCompatibility.java
@ -20,10 +20,32 @@ import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.facet.DrillDownQuery;
|
||||
import org.apache.lucene.facet.FacetField;
|
||||
import org.apache.lucene.facet.FacetResult;
|
||||
import org.apache.lucene.facet.Facets;
|
||||
import org.apache.lucene.facet.FacetsCollector;
|
||||
import org.apache.lucene.facet.FacetsConfig;
|
||||
import org.apache.lucene.facet.taxonomy.DocValuesOrdinalsReader;
|
||||
import org.apache.lucene.facet.taxonomy.FacetLabel;
|
||||
import org.apache.lucene.facet.taxonomy.FastTaxonomyFacetCounts;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyFacetCounts;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyFacetLabels;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyFacetSumValueSource;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.search.DoubleValuesSource;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.junit.Ignore;
|
||||
@ -49,50 +71,196 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||
//
|
||||
// Then move the zip file to your trunk checkout and use it in your test cases
|
||||
|
||||
public static final String oldTaxonomyIndexName = "taxonomy.8.10.0-cfs";
|
||||
private static final String OLD_TAXONOMY_INDEX_NAME = "taxonomy.8.11.0-cfs";
|
||||
private static final String OLD_INDEX_NAME = "index.8.11.0-cfs";
|
||||
|
||||
public void testCreateNewTaxonomy() throws IOException {
|
||||
createNewTaxonomyIndex(oldTaxonomyIndexName);
|
||||
createNewTaxonomyIndex(OLD_TAXONOMY_INDEX_NAME, OLD_INDEX_NAME);
|
||||
}
|
||||
|
||||
// Opens up a pre-existing old taxonomy index and adds new BinaryDocValues based fields
|
||||
private void createNewTaxonomyIndex(String dirName) throws IOException {
|
||||
Path indexDir = createTempDir(oldTaxonomyIndexName);
|
||||
TestUtil.unzip(getDataInputStream(dirName + ".zip"), indexDir);
|
||||
Directory dir = newFSDirectory(indexDir);
|
||||
/**
|
||||
* This test exercises a bunch of different faceting operations and directly taxonomy index
|
||||
* reading to make sure more modern faceting formats introduced in 9.0 are backwards-compatible
|
||||
* with 8.x indexes. It requires an "older" 8.x index to be in place with assumed docs/categories
|
||||
* already present. It makes sure it can still run a number of different "read" operations against
|
||||
* the old index, then it writes new content, forces a merge and does a bunch more "read"
|
||||
* operations. It may seem a bit chaotic, but it's trying to test a number of different
|
||||
* faceting-related implementations that require specific back-compat support.
|
||||
*/
|
||||
private void createNewTaxonomyIndex(String taxoDirName, String indexDirName) throws IOException {
|
||||
Path taxoPath = createTempDir(taxoDirName);
|
||||
Path indexPath = createTempDir(indexDirName);
|
||||
|
||||
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
|
||||
TestUtil.unzip(getDataInputStream(taxoDirName + ".zip"), taxoPath);
|
||||
TestUtil.unzip(getDataInputStream(indexDirName + ".zip"), indexPath);
|
||||
|
||||
Directory taxoDir = newFSDirectory(taxoPath);
|
||||
Directory indexDir = newFSDirectory(indexPath);
|
||||
|
||||
// Open the existing indexes (explicitly open in APPEND mode and fail if they don't exist since
|
||||
// we're trying to test
|
||||
// back-compat with existing indexes):
|
||||
DirectoryTaxonomyWriter taxoWriter =
|
||||
new DirectoryTaxonomyWriter(taxoDir, IndexWriterConfig.OpenMode.APPEND);
|
||||
IndexWriterConfig indexWriterConfig = newIndexWriterConfig();
|
||||
indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
|
||||
RandomIndexWriter indexWriter =
|
||||
new RandomIndexWriter(random(), indexDir, indexWriterConfig, random().nextBoolean());
|
||||
|
||||
// Use a default FacetsConfig. This assumes that we didn't need to register anything interesting
|
||||
// when creating
|
||||
// the older format index. If that changes, we need a way to ensure we re-use the same facet
|
||||
// configuration used
|
||||
// in created the old format taxonomy index:
|
||||
FacetsConfig facetsConfig = new FacetsConfig();
|
||||
|
||||
// At this point we should have a taxonomy index and "regular" index containing some taxonomy
|
||||
// categories and
|
||||
// documents with facet ordinals indexed. Confirm that we can facet and search against it as-is
|
||||
// before adding
|
||||
// anything new. Of course these tests are highly dependent on the index we're starting with, so
|
||||
// they will
|
||||
// need to be updated accordingly if the "old" test index changes:
|
||||
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
||||
IndexSearcher searcher = newSearcher(indexWriter.getReader());
|
||||
FacetsCollector facetsCollector = new FacetsCollector();
|
||||
searcher.search(new MatchAllDocsQuery(), facetsCollector);
|
||||
// Test a few different facet implementations that we know have back-compat implications:
|
||||
Facets facets = new FastTaxonomyFacetCounts(taxoReader, facetsConfig, facetsCollector);
|
||||
FacetResult facetResult = facets.getTopChildren(10, "f1");
|
||||
assertEquals(2, facetResult.value);
|
||||
facets =
|
||||
new TaxonomyFacetCounts(
|
||||
new DocValuesOrdinalsReader(), taxoReader, facetsConfig, facetsCollector);
|
||||
facetResult = facets.getTopChildren(10, "f1");
|
||||
assertEquals(2, facetResult.value);
|
||||
facets =
|
||||
new TaxonomyFacetSumValueSource(
|
||||
taxoReader, facetsConfig, facetsCollector, DoubleValuesSource.constant(1d));
|
||||
facetResult = facets.getTopChildren(10, "f1");
|
||||
assertEquals(2.0f, facetResult.value);
|
||||
// Test that we can drill-down as expected (and read facet labels from matching docs):
|
||||
TaxonomyFacetLabels facetLabels =
|
||||
new TaxonomyFacetLabels(taxoReader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
|
||||
assert (searcher.getIndexReader().leaves().size() == 1);
|
||||
TaxonomyFacetLabels.FacetLabelReader labelReader =
|
||||
facetLabels.getFacetLabelReader(searcher.getIndexReader().leaves().get(0));
|
||||
DrillDownQuery query = new DrillDownQuery(facetsConfig, new MatchAllDocsQuery());
|
||||
query.add("f1", "foo");
|
||||
TopDocs docResults = searcher.search(query, 10);
|
||||
assertEquals(1, docResults.totalHits.value);
|
||||
int docId = docResults.scoreDocs[0].doc;
|
||||
Set<FacetLabel> labels = new HashSet<>();
|
||||
for (FacetLabel label = labelReader.nextFacetLabel(docId);
|
||||
label != null;
|
||||
label = labelReader.nextFacetLabel(docId)) {
|
||||
labels.add(label);
|
||||
}
|
||||
assertEquals(2, labels.size());
|
||||
assertTrue(
|
||||
labels.containsAll(List.of(new FacetLabel("f1", "foo"), new FacetLabel("f2", "foo"))));
|
||||
assertEquals(0, docResults.scoreDocs[0].doc);
|
||||
// And make sure we can read directly from the taxonomy like we'd expect:
|
||||
int ord = taxoReader.getOrdinal(new FacetLabel("f1", "foo"));
|
||||
assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
|
||||
assertNotNull(taxoReader.getPath(ord));
|
||||
|
||||
// Now we'll add some new docs and taxonomy categories, force merge (to make sure that goes
|
||||
// well) and then do
|
||||
// some more searches, etc.:
|
||||
Document doc = new Document();
|
||||
doc.add(new FacetField("f1", "zed"));
|
||||
indexWriter.addDocument(facetsConfig.build(taxoWriter, doc));
|
||||
|
||||
FacetLabel cp_c = new FacetLabel("c");
|
||||
writer.addCategory(cp_c);
|
||||
writer.getInternalIndexWriter().forceMerge(1);
|
||||
writer.commit();
|
||||
taxoWriter.addCategory(cp_c);
|
||||
|
||||
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
|
||||
indexWriter.forceMerge(1);
|
||||
taxoWriter.getInternalIndexWriter().forceMerge(1);
|
||||
indexWriter.commit();
|
||||
taxoWriter.commit();
|
||||
|
||||
int ord1 = reader.getOrdinal(new FacetLabel("a"));
|
||||
assert ord1 != TaxonomyReader.INVALID_ORDINAL;
|
||||
// Just asserting ord1 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
|
||||
assertNotNull(reader.getPath(ord1));
|
||||
IOUtils.close(taxoReader, searcher.getIndexReader());
|
||||
taxoReader = new DirectoryTaxonomyReader(taxoWriter);
|
||||
searcher = newSearcher(indexWriter.getReader());
|
||||
IOUtils.close(indexWriter, taxoWriter);
|
||||
|
||||
int ord2 = reader.getOrdinal(new FacetLabel("b"));
|
||||
assert ord2 != TaxonomyReader.INVALID_ORDINAL;
|
||||
// Re-test a number of different use-cases, which should now "see" the newly added content:
|
||||
facetsCollector = new FacetsCollector();
|
||||
searcher.search(new MatchAllDocsQuery(), facetsCollector);
|
||||
facets = new FastTaxonomyFacetCounts(taxoReader, facetsConfig, facetsCollector);
|
||||
facetResult = facets.getTopChildren(10, "f1");
|
||||
assertEquals(3, facetResult.value);
|
||||
facets =
|
||||
new TaxonomyFacetCounts(
|
||||
new DocValuesOrdinalsReader(), taxoReader, facetsConfig, facetsCollector);
|
||||
facetResult = facets.getTopChildren(10, "f1");
|
||||
assertEquals(3, facetResult.value);
|
||||
facets =
|
||||
new TaxonomyFacetSumValueSource(
|
||||
taxoReader, facetsConfig, facetsCollector, DoubleValuesSource.constant(1d));
|
||||
facetResult = facets.getTopChildren(10, "f1");
|
||||
assertEquals(3.0f, facetResult.value);
|
||||
// Test that we can drill-down as expected, and access facet labels:
|
||||
facetLabels = new TaxonomyFacetLabels(taxoReader, FacetsConfig.DEFAULT_INDEX_FIELD_NAME);
|
||||
assert (searcher.getIndexReader().leaves().size() == 1);
|
||||
labelReader = facetLabels.getFacetLabelReader(searcher.getIndexReader().leaves().get(0));
|
||||
query = new DrillDownQuery(facetsConfig, new MatchAllDocsQuery());
|
||||
query.add("f1", "foo");
|
||||
docResults = searcher.search(query, 10);
|
||||
assertEquals(1, docResults.totalHits.value);
|
||||
docId = docResults.scoreDocs[0].doc;
|
||||
labels = new HashSet<>();
|
||||
for (FacetLabel label = labelReader.nextFacetLabel(docId);
|
||||
label != null;
|
||||
label = labelReader.nextFacetLabel(docId)) {
|
||||
labels.add(label);
|
||||
}
|
||||
assertEquals(2, labels.size());
|
||||
assertTrue(
|
||||
labels.containsAll(List.of(new FacetLabel("f1", "foo"), new FacetLabel("f2", "foo"))));
|
||||
labelReader = facetLabels.getFacetLabelReader(searcher.getIndexReader().leaves().get(0));
|
||||
query = new DrillDownQuery(facetsConfig, new MatchAllDocsQuery());
|
||||
query.add("f1", "zed");
|
||||
docResults = searcher.search(query, 10);
|
||||
assertEquals(1, docResults.totalHits.value);
|
||||
docId = docResults.scoreDocs[0].doc;
|
||||
labels = new HashSet<>();
|
||||
for (FacetLabel label = labelReader.nextFacetLabel(docId);
|
||||
label != null;
|
||||
label = labelReader.nextFacetLabel(docId)) {
|
||||
labels.add(label);
|
||||
}
|
||||
assertEquals(1, labels.size());
|
||||
assertTrue(labels.contains(new FacetLabel("f1", "zed")));
|
||||
// And make sure we can read directly from the taxonomy like we'd expect:
|
||||
ord = taxoReader.getOrdinal(new FacetLabel("f1", "foo"));
|
||||
assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
|
||||
assertNotNull(taxoReader.getPath(ord));
|
||||
ord = taxoReader.getOrdinal(new FacetLabel("f1", "zed"));
|
||||
assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
|
||||
assertNotNull(taxoReader.getPath(ord));
|
||||
// And check a few more direct reads from the taxonomy:
|
||||
ord = taxoReader.getOrdinal(new FacetLabel("a"));
|
||||
assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
|
||||
assertNotNull(taxoReader.getPath(ord));
|
||||
|
||||
ord = taxoReader.getOrdinal(new FacetLabel("b"));
|
||||
assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
|
||||
// Just asserting ord2 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
|
||||
assertNotNull(reader.getPath(ord2));
|
||||
assertNotNull(taxoReader.getPath(ord));
|
||||
|
||||
int ord3 = reader.getOrdinal(cp_c);
|
||||
assert ord3 != TaxonomyReader.INVALID_ORDINAL;
|
||||
assertNotNull(reader.getPath(ord3));
|
||||
ord = taxoReader.getOrdinal(cp_c);
|
||||
assertNotEquals(TaxonomyReader.INVALID_ORDINAL, ord);
|
||||
assertNotNull(taxoReader.getPath(ord));
|
||||
|
||||
reader.close();
|
||||
writer.close();
|
||||
dir.close();
|
||||
IOUtils.close(taxoReader, searcher.getIndexReader(), taxoDir, indexDir);
|
||||
}
|
||||
|
||||
// Opens up a pre-existing index and tries to run getBulkPath on it
|
||||
public void testGetBulkPathOnOlderCodec() throws Exception {
|
||||
Path indexDir = createTempDir(oldTaxonomyIndexName);
|
||||
TestUtil.unzip(getDataInputStream(oldTaxonomyIndexName + ".zip"), indexDir);
|
||||
Path indexDir = createTempDir(OLD_TAXONOMY_INDEX_NAME);
|
||||
TestUtil.unzip(getDataInputStream(OLD_TAXONOMY_INDEX_NAME + ".zip"), indexDir);
|
||||
Directory dir = newFSDirectory(indexDir);
|
||||
|
||||
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
|
||||
@ -114,21 +282,41 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
|
||||
// Used to create a fresh taxonomy index with StoredFields
|
||||
@Ignore
|
||||
public void testCreateOldTaxonomy() throws IOException {
|
||||
createOldTaxonomyIndex(oldTaxonomyIndexName);
|
||||
createOldTaxonomyIndex(OLD_TAXONOMY_INDEX_NAME, OLD_INDEX_NAME);
|
||||
}
|
||||
|
||||
private void createOldTaxonomyIndex(String dirName) throws IOException {
|
||||
Path indexDir = getIndexDir().resolve(dirName);
|
||||
Files.deleteIfExists(indexDir);
|
||||
Directory dir = newFSDirectory(indexDir);
|
||||
private void createOldTaxonomyIndex(String taxoDirName, String indexDirName) throws IOException {
|
||||
Path taxoPath = getIndexDir().resolve(taxoDirName);
|
||||
Path indexPath = getIndexDir().resolve(indexDirName);
|
||||
Files.deleteIfExists(taxoPath);
|
||||
Files.deleteIfExists(indexPath);
|
||||
|
||||
TaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
|
||||
Directory taxoDir = newFSDirectory(taxoPath);
|
||||
Directory indexDir = newFSDirectory(indexPath);
|
||||
|
||||
writer.addCategory(new FacetLabel("a"));
|
||||
writer.addCategory(new FacetLabel("b"));
|
||||
writer.commit();
|
||||
writer.close();
|
||||
dir.close();
|
||||
TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);
|
||||
FacetsConfig facetsConfig = new FacetsConfig();
|
||||
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), indexDir);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new FacetField("f1", "foo"));
|
||||
doc.add(new FacetField("f2", "foo"));
|
||||
indexWriter.addDocument(facetsConfig.build(taxoWriter, doc));
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new FacetField("f1", "bar"));
|
||||
indexWriter.addDocument(facetsConfig.build(taxoWriter, doc));
|
||||
|
||||
doc = new Document();
|
||||
doc.add(new FacetField("f2", "bar"));
|
||||
indexWriter.addDocument(facetsConfig.build(taxoWriter, doc));
|
||||
|
||||
taxoWriter.addCategory(new FacetLabel("a"));
|
||||
taxoWriter.addCategory(new FacetLabel("b"));
|
||||
|
||||
indexWriter.commit();
|
||||
taxoWriter.commit();
|
||||
IOUtils.close(indexWriter, taxoWriter, indexDir, taxoDir);
|
||||
}
|
||||
|
||||
private Path getIndexDir() {
|
||||
|
BIN
lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/index.8.11.0-cfs.zip
Normal file
BIN
lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/index.8.11.0-cfs.zip
Normal file
Binary file not shown.
Binary file not shown.
BIN
lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.11.0-cfs.zip
Normal file
BIN
lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/taxonomy.8.11.0-cfs.zip
Normal file
Binary file not shown.
Loading…
x
Reference in New Issue
Block a user