Minor refactoring and cleanup to taxonomy index code (#11775)

This commit is contained in:
Shai Erera 2022-09-21 13:08:33 +03:00 committed by GitHub
parent add309bb40
commit bcc116057d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 90 additions and 108 deletions

View File

@ -32,7 +32,7 @@ public class FacetLabel implements Comparable<FacetLabel> {
/* /*
* copied from DocumentWriterPerThread -- if a FacetLabel is resolved to a * copied from DocumentWriterPerThread -- if a FacetLabel is resolved to a
* drill-down term which is encoded to a larger term than that length, it is * drill-down term which is encoded to a larger term than that length, it is
* silently dropped! Therefore we limit the number of characters to MAX/4 to * silently dropped! Therefore, we limit the number of characters to MAX/4 to
* be on the safe side. * be on the safe side.
*/ */
/** The maximum number of characters a {@link FacetLabel} can have. */ /** The maximum number of characters a {@link FacetLabel} can have. */
@ -48,10 +48,10 @@ public class FacetLabel implements Comparable<FacetLabel> {
/** The number of components of this {@link FacetLabel}. */ /** The number of components of this {@link FacetLabel}. */
public final int length; public final int length;
// Used by subpath // Used by sub-path
private FacetLabel(final FacetLabel copyFrom, final int prefixLen) { private FacetLabel(final FacetLabel copyFrom, final int prefixLen) {
// while the code which calls this method is safe, at some point a test // While the code which calls this method is safe, at some point a test
// tripped on AIOOBE in toString, but we failed to reproduce. adding the // tripped on AIOOBE in toString, but we failed to reproduce. Adding
// assert as a safety check. // assert as a safety check.
assert prefixLen >= 0 && prefixLen <= copyFrom.components.length assert prefixLen >= 0 && prefixLen <= copyFrom.components.length
: "prefixLen cannot be negative nor larger than the given components' length: prefixLen=" : "prefixLen cannot be negative nor larger than the given components' length: prefixLen="
@ -103,7 +103,7 @@ public class FacetLabel implements Comparable<FacetLabel> {
/** Compares this path with another {@link FacetLabel} for lexicographic order. */ /** Compares this path with another {@link FacetLabel} for lexicographic order. */
@Override @Override
public int compareTo(FacetLabel other) { public int compareTo(FacetLabel other) {
final int len = length < other.length ? length : other.length; final int len = Math.min(length, other.length);
for (int i = 0, j = 0; i < len; i++, j++) { for (int i = 0, j = 0; i < len; i++, j++) {
int cmp = components[i].compareTo(other.components[j]); int cmp = components[i].compareTo(other.components[j]);
if (cmp < 0) { if (cmp < 0) {
@ -120,7 +120,7 @@ public class FacetLabel implements Comparable<FacetLabel> {
@Override @Override
public boolean equals(Object obj) { public boolean equals(Object obj) {
if (!(obj instanceof FacetLabel)) { if (obj instanceof FacetLabel == false) {
return false; return false;
} }

View File

@ -62,10 +62,10 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
private static final int DEFAULT_CACHE_VALUE = 4000; private static final int DEFAULT_CACHE_VALUE = 4000;
// NOTE: very coarse estimate! // NOTE: very coarse estimate!
private static final int BYTES_PER_CACHE_ENTRY = private static final long BYTES_PER_CACHE_ENTRY =
4 * RamUsageEstimator.NUM_BYTES_OBJECT_REF 4L * RamUsageEstimator.NUM_BYTES_OBJECT_REF
+ 4 * RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 4L * RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
+ 8 * Character.BYTES; + 8L * Character.BYTES;
private final DirectoryTaxonomyWriter taxoWriter; private final DirectoryTaxonomyWriter taxoWriter;
private final long taxoEpoch; // used in doOpenIfChanged private final long taxoEpoch; // used in doOpenIfChanged
@ -91,7 +91,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
* @param categoryCache an ordinal to FacetLabel mapping if it already exists * @param categoryCache an ordinal to FacetLabel mapping if it already exists
* @param taxoArrays taxonomy arrays that store the parent, siblings, children information * @param taxoArrays taxonomy arrays that store the parent, siblings, children information
*/ */
protected DirectoryTaxonomyReader( DirectoryTaxonomyReader(
DirectoryReader indexReader, DirectoryReader indexReader,
DirectoryTaxonomyWriter taxoWriter, DirectoryTaxonomyWriter taxoWriter,
LRUHashMap<FacetLabel, Integer> ordinalCache, LRUHashMap<FacetLabel, Integer> ordinalCache,
@ -103,14 +103,9 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
this.taxoEpoch = taxoWriter == null ? -1 : taxoWriter.getTaxonomyEpoch(); this.taxoEpoch = taxoWriter == null ? -1 : taxoWriter.getTaxonomyEpoch();
// use the same instance of the cache, note the protective code in getOrdinal and getPath // use the same instance of the cache, note the protective code in getOrdinal and getPath
this.ordinalCache = this.ordinalCache = ordinalCache == null ? new LRUHashMap<>(DEFAULT_CACHE_VALUE) : ordinalCache;
ordinalCache == null
? new LRUHashMap<FacetLabel, Integer>(DEFAULT_CACHE_VALUE)
: ordinalCache;
this.categoryCache = this.categoryCache =
categoryCache == null categoryCache == null ? new LRUHashMap<>(DEFAULT_CACHE_VALUE) : categoryCache;
? new LRUHashMap<Integer, FacetLabel>(DEFAULT_CACHE_VALUE)
: categoryCache;
this.taxoArrays = taxoArrays != null ? new TaxonomyIndexArrays(indexReader, taxoArrays) : null; this.taxoArrays = taxoArrays != null ? new TaxonomyIndexArrays(indexReader, taxoArrays) : null;
} }
@ -151,16 +146,6 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
categoryCache = new LRUHashMap<>(DEFAULT_CACHE_VALUE); categoryCache = new LRUHashMap<>(DEFAULT_CACHE_VALUE);
} }
private synchronized void initTaxoArrays() throws IOException {
if (taxoArrays == null) {
// according to Java Concurrency in Practice, this might perform better on
// some JVMs, because the array initialization doesn't happen on the
// volatile member.
TaxonomyIndexArrays tmpArrays = new TaxonomyIndexArrays(indexReader);
taxoArrays = tmpArrays;
}
}
@Override @Override
protected void doClose() throws IOException { protected void doClose() throws IOException {
indexReader.close(); indexReader.close();
@ -215,18 +200,18 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
} }
} }
final DirectoryTaxonomyReader newtr; final DirectoryTaxonomyReader newTaxoReader;
if (recreated) { if (recreated) {
// if recreated, do not reuse anything from this instance. the information // if recreated, do not reuse anything from this instance. the information
// will be lazily computed by the new instance when needed. // will be lazily computed by the new instance when needed.
newtr = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null); newTaxoReader = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null);
} else { } else {
newtr = newTaxoReader =
new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, taxoArrays); new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, taxoArrays);
} }
success = true; success = true;
return newtr; return newTaxoReader;
} finally { } finally {
if (!success) { if (!success) {
IOUtils.closeWhileHandlingException(r2); IOUtils.closeWhileHandlingException(r2);
@ -256,10 +241,18 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
@Override @Override
public ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException { public ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException {
ensureOpen(); ensureOpen();
if (taxoArrays == null) { // By copying to a local variable we only perform a volatile read once (if it's not null)
initTaxoArrays(); TaxonomyIndexArrays arrays = taxoArrays;
if (arrays == null) {
synchronized (this) {
arrays = taxoArrays;
if (arrays == null) {
arrays = new TaxonomyIndexArrays(indexReader);
taxoArrays = arrays;
}
}
} }
return taxoArrays; return arrays;
} }
@Override @Override
@ -279,14 +272,14 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
synchronized (ordinalCache) { synchronized (ordinalCache) {
Integer res = ordinalCache.get(cp); Integer res = ordinalCache.get(cp);
if (res != null) { if (res != null) {
if (res.intValue() < indexReader.maxDoc()) { if (res < indexReader.maxDoc()) {
// Since the cache is shared with DTR instances allocated from // Since the cache is shared with DTR instances allocated from
// doOpenIfChanged, we need to ensure that the ordinal is one that // doOpenIfChanged, we need to ensure that the ordinal is one that
// this DTR instance recognizes. // this DTR instance recognizes.
return res.intValue(); return res;
} else { } else {
// if we get here, it means that the category was found in the cache, // if we get here, it means that the category was found in the cache,
// but is not recognized by this TR instance. Therefore there's no // but is not recognized by this TR instance. Therefore, there's no
// need to continue search for the path on disk, because we won't find // need to continue search for the path on disk, because we won't find
// it there too. // it there too.
return TaxonomyReader.INVALID_ORDINAL; return TaxonomyReader.INVALID_ORDINAL;
@ -306,13 +299,13 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
ret = docs.docID(); ret = docs.docID();
// we only store the fact that a category exists, not its inexistence. // We only store the fact that a category exists, not otherwise.
// This is required because the caches are shared with new DTR instances // This is required because the caches are shared with new DTR instances
// that are allocated from doOpenIfChanged. Therefore, if we only store // that are allocated from doOpenIfChanged. Therefore, if we only store
// information about found categories, we cannot accidently tell a new // information about found categories, we cannot accidentally tell a new
// generation of DTR that a category does not exist. // generation of DTR that a category does not exist.
synchronized (ordinalCache) { synchronized (ordinalCache) {
ordinalCache.put(cp, Integer.valueOf(ret)); ordinalCache.put(cp, ret);
} }
} }
@ -325,7 +318,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
// Since the cache is shared with DTR instances allocated from // Since the cache is shared with DTR instances allocated from
// doOpenIfChanged, we need to ensure that the ordinal is one that this DTR // doOpenIfChanged, we need to ensure that the ordinal is one that this DTR
// instance recognizes. Therefore we do this check up front, before we hit // instance recognizes. Therefore, we do this check up front, before we hit
// the cache. // the cache.
checkOrdinalBounds(ordinal); checkOrdinalBounds(ordinal);
@ -398,8 +391,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
* was created using StoredFields (with no performance gains) and uses DocValues based iteration * was created using StoredFields (with no performance gains) and uses DocValues based iteration
* when the index is based on BinaryDocValues. Lucene switched to BinaryDocValues in version 9.0 * when the index is based on BinaryDocValues. Lucene switched to BinaryDocValues in version 9.0
* *
* @param ordinals Array of ordinals that are assigned to categories inserted into the taxonomy * @param ordinals Array of category ordinals that were added to the taxonomy index
* index
*/ */
@Override @Override
public FacetLabel[] getBulkPath(int... ordinals) throws IOException { public FacetLabel[] getBulkPath(int... ordinals) throws IOException {
@ -464,7 +456,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
return super.getBulkPath(ordinals); return super.getBulkPath(ordinals);
} }
} }
// values is leaf specific so you only advance till you reach the target within the leaf // values is leaf specific, so you only advance till you reach the target within the leaf
boolean success = values.advanceExact(ordinals[i] - leafReaderDocBase); boolean success = values.advanceExact(ordinals[i] - leafReaderDocBase);
assert success; assert success;
bulkPath[originalPosition[i]] = bulkPath[originalPosition[i]] =
@ -558,8 +550,8 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
public String toString(int max) { public String toString(int max) {
ensureOpen(); ensureOpen();
StringBuilder sb = new StringBuilder(); StringBuilder sb = new StringBuilder();
int upperl = Math.min(max, indexReader.maxDoc()); int limit = Math.min(max, indexReader.maxDoc());
for (int i = 0; i < upperl; i++) { for (int i = 0; i < limit; i++) {
try { try {
FacetLabel category = this.getPath(i); FacetLabel category = this.getPath(i);
if (category == null) { if (category == null) {
@ -570,7 +562,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
sb.append(i).append(": EMPTY STRING!! \n"); sb.append(i).append(": EMPTY STRING!! \n");
continue; continue;
} }
sb.append(i).append(": ").append(category.toString()).append("\n"); sb.append(i).append(": ").append(category).append("\n");
} catch ( } catch (
@SuppressWarnings("unused") @SuppressWarnings("unused")
IOException e) { IOException e) {

View File

@ -89,11 +89,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
private final IndexWriter indexWriter; private final IndexWriter indexWriter;
private final TaxonomyWriterCache cache; private final TaxonomyWriterCache cache;
private final AtomicInteger cacheMisses = new AtomicInteger(0); private final AtomicInteger cacheMisses = new AtomicInteger(0);
private final AtomicInteger nextID = new AtomicInteger(0);
private final Field fullPathField;
// Records the taxonomy index epoch, updated on replaceTaxonomy as well. // Records the taxonomy index epoch, updated on replaceTaxonomy as well.
private long indexEpoch; private long indexEpoch;
private Field fullPathField;
private int cacheMissesUntilFill = 11; private int cacheMissesUntilFill = 11;
private boolean shouldFillCache = true; private boolean shouldFillCache = true;
@ -108,14 +109,13 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
* When the cache is <B>not</B> complete, and we can't find a category in the cache, we still need * When the cache is <B>not</B> complete, and we can't find a category in the cache, we still need
* to look for it in the on-disk index; Therefore when the cache is not complete, we need to open * to look for it in the on-disk index; Therefore when the cache is not complete, we need to open
* a "reader" to the taxonomy index. The cache becomes incomplete if it was never filled with the * a "reader" to the taxonomy index. The cache becomes incomplete if it was never filled with the
* existing categories, or if a put() to the cache ever returned true (meaning that some of the * existing categories, or if a put() to the cache ever returned true (meaning that some cached
* cached data was cleared). * data was cleared).
*/ */
private volatile boolean cacheIsComplete; private volatile boolean cacheIsComplete;
private volatile boolean isClosed = false; private volatile boolean isClosed = false;
private volatile TaxonomyIndexArrays taxoArrays; private volatile TaxonomyIndexArrays taxoArrays;
private volatile int nextID;
/** /**
* Construct a Taxonomy writer. * Construct a Taxonomy writer.
@ -169,14 +169,14 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
fullPathField = new StringField(Consts.FULL, "", Field.Store.NO); fullPathField = new StringField(Consts.FULL, "", Field.Store.NO);
nextID = indexWriter.getDocStats().maxDoc; nextID.set(indexWriter.getDocStats().maxDoc);
if (cache == null) { if (cache == null) {
cache = defaultTaxonomyWriterCache(); cache = defaultTaxonomyWriterCache();
} }
this.cache = cache; this.cache = cache;
if (nextID == 0) { if (nextID.get() == 0) {
cacheIsComplete = true; cacheIsComplete = true;
// Make sure that the taxonomy always contain the root category // Make sure that the taxonomy always contain the root category
// with category id 0. // with category id 0.
@ -424,9 +424,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
} else { } else {
parent = TaxonomyReader.INVALID_ORDINAL; parent = TaxonomyReader.INVALID_ORDINAL;
} }
int id = addCategoryDocument(cp, parent); return addCategoryDocument(cp, parent);
return id;
} }
/** /**
@ -439,7 +437,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
} }
/** /**
* Note that the methods calling addCategoryDocument() are synchornized, so this method is * Note that the methods calling addCategoryDocument() are synchronized, so this method is
* effectively synchronized as well. * effectively synchronized as well.
*/ */
private int addCategoryDocument(FacetLabel categoryPath, int parent) throws IOException { private int addCategoryDocument(FacetLabel categoryPath, int parent) throws IOException {
@ -455,11 +453,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
d.add(fullPathField); d.add(fullPathField);
// Note that we do no pass an Analyzer here because the fields that are
// added to the Document are untokenized or contains their own TokenStream.
// Therefore the IndexWriter's Analyzer has no effect.
indexWriter.addDocument(d); indexWriter.addDocument(d);
int id = nextID++; int id = nextID.getAndIncrement();
// added a category document, mark that ReaderManager is not up-to-date // added a category document, mark that ReaderManager is not up-to-date
shouldRefreshReaderManager = true; shouldRefreshReaderManager = true;
@ -491,7 +486,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// addCategoryDocument -- when this method returns, we must know that the // addCategoryDocument -- when this method returns, we must know that the
// reader manager's state is current. also, it sets shouldRefresh to false, // reader manager's state is current. also, it sets shouldRefresh to false,
// and this cannot overlap with addCatDoc too. // and this cannot overlap with addCatDoc too.
// NOTE: since this method is sync'ed, it can call maybeRefresh, instead of // NOTE: since this method is synced, it can call maybeRefresh, instead of
// maybeRefreshBlocking. If ever this is changed, make sure to change the // maybeRefreshBlocking. If ever this is changed, make sure to change the
// call too. // call too.
if (shouldRefreshReaderManager && initializedReaderManager) { if (shouldRefreshReaderManager && initializedReaderManager) {
@ -567,7 +562,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override @Override
public int getSize() { public int getSize() {
ensureOpen(); ensureOpen();
return nextID; return nextID.get();
} }
/** /**
@ -656,33 +651,33 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
} }
private TaxonomyIndexArrays getTaxoArrays() throws IOException { private TaxonomyIndexArrays getTaxoArrays() throws IOException {
if (taxoArrays == null) { // By copying to a local variable we only perform a volatile read once (if it's not null)
TaxonomyIndexArrays arrays = taxoArrays;
if (arrays == null) {
synchronized (this) { synchronized (this) {
if (taxoArrays == null) { arrays = taxoArrays;
if (arrays == null) {
initReaderManager(); initReaderManager();
DirectoryReader reader = readerManager.acquire(); DirectoryReader reader = readerManager.acquire();
try { try {
// according to Java Concurrency, this might perform better on some arrays = new TaxonomyIndexArrays(reader);
// JVMs, since the object initialization doesn't happen on the
// volatile member.
TaxonomyIndexArrays tmpArrays = new TaxonomyIndexArrays(reader);
taxoArrays = tmpArrays;
} finally { } finally {
readerManager.release(reader); readerManager.release(reader);
} }
taxoArrays = arrays;
} }
} }
} }
return taxoArrays; return arrays;
} }
@Override @Override
public int getParent(int ordinal) throws IOException { public int getParent(int ordinal) throws IOException {
ensureOpen(); ensureOpen();
// Note: the following if() just enforces that a user can never ask // Note: the following if() just enforces that a user can never ask
// for the parent of a nonexistant category - even if the parent array // for the parent of a nonexistent category - even if the parent array
// was allocated bigger than it really needs to be. // was allocated bigger than it really needs to be.
Objects.checkIndex(ordinal, nextID); Objects.checkIndex(ordinal, nextID.get());
int[] parents = getTaxoArrays().parents(); int[] parents = getTaxoArrays().parents();
assert ordinal < parents.length assert ordinal < parents.length
@ -697,11 +692,9 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
*/ */
public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException { public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException {
ensureOpen(); ensureOpen();
DirectoryReader r = DirectoryReader.open(taxoDir); try (DirectoryReader r = DirectoryReader.open(taxoDir)) {
try {
final int size = r.numDocs(); final int size = r.numDocs();
final OrdinalMap ordinalMap = map; map.setSize(size);
ordinalMap.setSize(size);
int base = 0; int base = 0;
PostingsEnum docs = null; PostingsEnum docs = null;
for (final LeafReaderContext ctx : r.leaves()) { for (final LeafReaderContext ctx : r.leaves()) {
@ -713,13 +706,11 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString())); FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
final int ordinal = addCategory(cp); final int ordinal = addCategory(cp);
docs = te.postings(docs, PostingsEnum.NONE); docs = te.postings(docs, PostingsEnum.NONE);
ordinalMap.addMapping(docs.nextDoc() + base, ordinal); map.addMapping(docs.nextDoc() + base, ordinal);
} }
base += ar.maxDoc(); // no deletions, so we're ok base += ar.maxDoc(); // no deletions, so we're ok
} }
ordinalMap.addDone(); map.addDone();
} finally {
r.close();
} }
} }
@ -736,22 +727,22 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
* the same time, it is recommended to put the first taxonomy's map in memory, and all the rest on * the same time, it is recommended to put the first taxonomy's map in memory, and all the rest on
* disk (later to be automatically read into memory one by one, when needed). * disk (later to be automatically read into memory one by one, when needed).
*/ */
public static interface OrdinalMap { public interface OrdinalMap {
/** /**
* Set the size of the map. This MUST be called before addMapping(). It is assumed (but not * Set the size of the map. This MUST be called before addMapping(). It is assumed (but not
* verified) that addMapping() will then be called exactly 'size' times, with different * verified) that addMapping() will then be called exactly 'size' times, with different
* origOrdinals between 0 and size-1. * origOrdinals between 0 and size-1.
*/ */
public void setSize(int size) throws IOException; void setSize(int size) throws IOException;
/** Record a mapping. */ /** Record a mapping. */
public void addMapping(int origOrdinal, int newOrdinal) throws IOException; void addMapping(int origOrdinal, int newOrdinal) throws IOException;
/** /**
* Call addDone() to say that all addMapping() have been done. In some implementations this * Call addDone() to say that all addMapping() have been done. In some implementations this
* might free some resources. * might free some resources.
*/ */
public void addDone() throws IOException; void addDone() throws IOException;
/** /**
* Return the map from the taxonomy's original (consecutive) ordinals to the new taxonomy's * Return the map from the taxonomy's original (consecutive) ordinals to the new taxonomy's
@ -760,7 +751,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
* needed. Calling it will also free all resources that the map might be holding (such as * needed. Calling it will also free all resources that the map might be holding (such as
* temporary disk space), other than the returned int[]. * temporary disk space), other than the returned int[].
*/ */
public int[] getMap() throws IOException; int[] getMap() throws IOException;
} }
/** {@link OrdinalMap} maintained in memory */ /** {@link OrdinalMap} maintained in memory */
@ -793,13 +784,15 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
/** {@link OrdinalMap} maintained on file system */ /** {@link OrdinalMap} maintained on file system */
public static final class DiskOrdinalMap implements OrdinalMap { public static final class DiskOrdinalMap implements OrdinalMap {
Path tmpfile; private final Path mapFile;
DataOutputStream out;
private int[] map = null;
private DataOutputStream out;
/** Sole constructor. */ /** Sole constructor. */
public DiskOrdinalMap(Path tmpfile) throws IOException { public DiskOrdinalMap(Path mapFile) throws IOException {
this.tmpfile = tmpfile; this.mapFile = mapFile;
out = new DataOutputStream(new BufferedOutputStream(Files.newOutputStream(tmpfile))); out = new DataOutputStream(new BufferedOutputStream(Files.newOutputStream(mapFile)));
} }
@Override @Override
@ -821,8 +814,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
} }
} }
int[] map = null;
@Override @Override
public int[] getMap() throws IOException { public int[] getMap() throws IOException {
if (map != null) { if (map != null) {
@ -830,20 +821,20 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
} }
addDone(); // in case this wasn't previously called addDone(); // in case this wasn't previously called
try (DataInputStream in = try (DataInputStream in =
new DataInputStream(new BufferedInputStream(Files.newInputStream(tmpfile)))) { new DataInputStream(new BufferedInputStream(Files.newInputStream(mapFile)))) {
map = new int[in.readInt()]; map = new int[in.readInt()];
// NOTE: The current code assumes here that the map is complete, // NOTE: The current code assumes that the map is complete,
// i.e., every ordinal gets one and exactly one value. Otherwise, // i.e. that every ordinal gets exactly one value. Otherwise,
// we may run into an EOF here, or vice versa, not read everything. // we may run into an EOF here, or not read everything.
for (int i = 0; i < map.length; i++) { for (int i = 0; i < map.length; i++) {
int origordinal = in.readInt(); int origOrdinal = in.readInt();
int newordinal = in.readInt(); int newOrdinal = in.readInt();
map[origordinal] = newordinal; map[origOrdinal] = newOrdinal;
} }
} }
// Delete the temporary file, which is no longer needed. // Delete the temporary file, which is no longer needed.
Files.delete(tmpfile); Files.delete(mapFile);
return map; return map;
} }
@ -863,8 +854,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
/** /**
* Replaces the current taxonomy with the given one. This method should generally be called in * Replaces the current taxonomy with the given one. This method should generally be called in
* conjunction with {@link IndexWriter#addIndexes(Directory...)} to replace both the taxonomy as * conjunction with {@link IndexWriter#addIndexes(Directory...)} to replace both the taxonomy and
* well as the search index content. * the search index content.
*/ */
public synchronized void replaceTaxonomy(Directory taxoDir) throws IOException { public synchronized void replaceTaxonomy(Directory taxoDir) throws IOException {
// replace the taxonomy by doing IW optimized operations // replace the taxonomy by doing IW optimized operations
@ -873,7 +864,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
shouldRefreshReaderManager = true; shouldRefreshReaderManager = true;
initReaderManager(); // ensure that it's initialized initReaderManager(); // ensure that it's initialized
refreshReaderManager(); refreshReaderManager();
nextID = indexWriter.getDocStats().maxDoc; nextID.set(indexWriter.getDocStats().maxDoc);
taxoArrays = null; // must nullify so that it's re-computed next time it's needed taxoArrays = null; // must nullify so that it's re-computed next time it's needed
// need to clear the cache, so that addCategory won't accidentally return // need to clear the cache, so that addCategory won't accidentally return

View File

@ -78,7 +78,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
} }
} }
private final synchronized void initChildrenSiblings(TaxonomyIndexArrays copyFrom) { private synchronized void initChildrenSiblings(TaxonomyIndexArrays copyFrom) {
if (!initializedChildren) { // must do this check ! if (!initializedChildren) { // must do this check !
children = new int[parents.length]; children = new int[parents.length];
siblings = new int[parents.length]; siblings = new int[parents.length];
@ -141,7 +141,6 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
throw new CorruptIndexException( throw new CorruptIndexException(
"Missing parent data for category " + (doc + leafContext.docBase), reader.toString()); "Missing parent data for category " + (doc + leafContext.docBase), reader.toString());
} }
// we're putting an int and converting it back so it should be safe
parents[doc + leafContext.docBase] = Math.toIntExact(parentValues.longValue()); parents[doc + leafContext.docBase] = Math.toIntExact(parentValues.longValue());
} }
} }
@ -204,7 +203,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
@Override @Override
public synchronized long ramBytesUsed() { public synchronized long ramBytesUsed() {
long ramBytesUsed = long ramBytesUsed =
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF + 1; RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 3L * RamUsageEstimator.NUM_BYTES_OBJECT_REF + 1;
ramBytesUsed += RamUsageEstimator.shallowSizeOf(parents); ramBytesUsed += RamUsageEstimator.shallowSizeOf(parents);
if (children != null) { if (children != null) {
ramBytesUsed += RamUsageEstimator.shallowSizeOf(children); ramBytesUsed += RamUsageEstimator.shallowSizeOf(children);