Use BDV or a StoredField based on the Lucene version that has created

the last index commit

If the Lucene version was < 9 then use a StringField or else
if the index is fresh or if the index is was built using a
version >= 9, then use a BDV field.
This commit is contained in:
Gautam Worah 2021-07-26 11:50:49 -07:00 committed by Mike McCandless
parent 7cb696041c
commit 162131ecf8
6 changed files with 25 additions and 11 deletions

View File

@ -21,7 +21,6 @@ import org.apache.lucene.util.BytesRef;
/** @lucene.experimental */
abstract class Consts {
static final String FULL = "$full_path$";
static final String FULL_BINARY = "$full_path_binary$";
static final String FIELD_PAYLOADS = "$payloads$";
static final String PAYLOAD_PARENT = "p";
static final BytesRef PAYLOAD_PARENT_BYTES_REF = new BytesRef(PAYLOAD_PARENT);

View File

@ -335,7 +335,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
int readerIndex = ReaderUtil.subIndex(ordinal, indexReader.leaves());
LeafReader leafReader = indexReader.leaves().get(readerIndex).reader();
// TODO: Use LUCENE-9476 to get the bulk lookup API for extracting BinaryDocValues
BinaryDocValues values = leafReader.getBinaryDocValues(Consts.FULL_BINARY);
BinaryDocValues values = leafReader.getBinaryDocValues(Consts.FULL);
FacetLabel ret;

View File

@ -62,6 +62,7 @@ import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
/**
* {@link TaxonomyWriter} which uses a {@link Directory} to store the taxonomy information on disk,
@ -475,8 +476,20 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
String fieldPath = FacetsConfig.pathToString(categoryPath.components, categoryPath.length);
fullPathField.setStringValue(fieldPath);
boolean commitExists = indexWriter.getLiveCommitData().iterator().hasNext();
/* no commits so this is a fresh index, or the old index was built using a Lucene 9 or greater version */
if ((commitExists == false)
|| (SegmentInfos.readLatestCommit(dir)
.getMinSegmentLuceneVersion()
.onOrAfter(Version.LUCENE_9_0_0))) {
/* Lucene 9 introduces BinaryDocValuesField for storing taxonomy categories */
d.add(new BinaryDocValuesField(Consts.FULL, new BytesRef(fieldPath)));
} else {
fullPathField = new StringField(Consts.FULL, fieldPath, Field.Store.YES);
}
d.add(fullPathField);
d.add(new BinaryDocValuesField(Consts.FULL_BINARY, new BytesRef(fieldPath)));
// Note that we do no pass an Analyzer here because the fields that are
// added to the Document are untokenized or contains their own TokenStream.

View File

@ -49,12 +49,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
//
// Then move the zip file to your trunk checkout and use it in your test cases
public static final String oldTaxonomyIndexName = "taxonomy.8.6.3-cfs";
public static final String oldTaxonomyIndexName = "taxonomy.8.10.0-cfs";
// LUCENE-9334 requires consistency of field data structures between documents.
// Old taxonomy index had $full_path$ field indexed only with postings,
// It is not allowed to add the same field $full_path$ indexed with BinaryDocValues
// for a new segment, that this test is trying to do.
public void testCreateNewTaxonomy() throws IOException {
createNewTaxonomyIndex(oldTaxonomyIndexName);
}
@ -67,8 +63,8 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
FacetLabel cp_b = new FacetLabel("b");
writer.addCategory(cp_b);
FacetLabel cp_c = new FacetLabel("c");
writer.addCategory(cp_c);
writer.getInternalIndexWriter().forceMerge(1);
writer.commit();
@ -79,10 +75,15 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
// Just asserting ord1 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
assertNotNull(reader.getPath(ord1));
int ord2 = reader.getOrdinal(cp_b);
int ord2 = reader.getOrdinal(new FacetLabel("b"));
assert ord2 != TaxonomyReader.INVALID_ORDINAL;
// Just asserting ord2 != TaxonomyReader.INVALID_ORDINAL is not enough to check compatibility
assertNotNull(reader.getPath(ord2));
int ord3 = reader.getOrdinal(cp_c);
assert ord3 != TaxonomyReader.INVALID_ORDINAL;
assertNotNull(reader.getPath(ord3));
reader.close();
writer.close();
dir.close();
@ -102,6 +103,7 @@ public class TestBackwardsCompatibility extends LuceneTestCase {
TaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
writer.addCategory(new FacetLabel("a"));
writer.addCategory(new FacetLabel("b"));
writer.commit();
writer.close();
dir.close();