mirror of https://github.com/apache/lucene.git
Minor refactoring and cleanup to taxonomy index code (#11775)
This commit is contained in:
parent
add309bb40
commit
bcc116057d
|
@ -32,7 +32,7 @@ public class FacetLabel implements Comparable<FacetLabel> {
|
|||
/*
|
||||
* copied from DocumentWriterPerThread -- if a FacetLabel is resolved to a
|
||||
* drill-down term which is encoded to a larger term than that length, it is
|
||||
* silently dropped! Therefore we limit the number of characters to MAX/4 to
|
||||
* silently dropped! Therefore, we limit the number of characters to MAX/4 to
|
||||
* be on the safe side.
|
||||
*/
|
||||
/** The maximum number of characters a {@link FacetLabel} can have. */
|
||||
|
@ -48,10 +48,10 @@ public class FacetLabel implements Comparable<FacetLabel> {
|
|||
/** The number of components of this {@link FacetLabel}. */
|
||||
public final int length;
|
||||
|
||||
// Used by subpath
|
||||
// Used by sub-path
|
||||
private FacetLabel(final FacetLabel copyFrom, final int prefixLen) {
|
||||
// while the code which calls this method is safe, at some point a test
|
||||
// tripped on AIOOBE in toString, but we failed to reproduce. adding the
|
||||
// While the code which calls this method is safe, at some point a test
|
||||
// tripped on AIOOBE in toString, but we failed to reproduce. Adding
|
||||
// assert as a safety check.
|
||||
assert prefixLen >= 0 && prefixLen <= copyFrom.components.length
|
||||
: "prefixLen cannot be negative nor larger than the given components' length: prefixLen="
|
||||
|
@ -103,7 +103,7 @@ public class FacetLabel implements Comparable<FacetLabel> {
|
|||
/** Compares this path with another {@link FacetLabel} for lexicographic order. */
|
||||
@Override
|
||||
public int compareTo(FacetLabel other) {
|
||||
final int len = length < other.length ? length : other.length;
|
||||
final int len = Math.min(length, other.length);
|
||||
for (int i = 0, j = 0; i < len; i++, j++) {
|
||||
int cmp = components[i].compareTo(other.components[j]);
|
||||
if (cmp < 0) {
|
||||
|
@ -120,7 +120,7 @@ public class FacetLabel implements Comparable<FacetLabel> {
|
|||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (!(obj instanceof FacetLabel)) {
|
||||
if (obj instanceof FacetLabel == false) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
|
|
@ -62,10 +62,10 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
private static final int DEFAULT_CACHE_VALUE = 4000;
|
||||
|
||||
// NOTE: very coarse estimate!
|
||||
private static final int BYTES_PER_CACHE_ENTRY =
|
||||
4 * RamUsageEstimator.NUM_BYTES_OBJECT_REF
|
||||
+ 4 * RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
|
||||
+ 8 * Character.BYTES;
|
||||
private static final long BYTES_PER_CACHE_ENTRY =
|
||||
4L * RamUsageEstimator.NUM_BYTES_OBJECT_REF
|
||||
+ 4L * RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
|
||||
+ 8L * Character.BYTES;
|
||||
|
||||
private final DirectoryTaxonomyWriter taxoWriter;
|
||||
private final long taxoEpoch; // used in doOpenIfChanged
|
||||
|
@ -91,7 +91,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
* @param categoryCache an ordinal to FacetLabel mapping if it already exists
|
||||
* @param taxoArrays taxonomy arrays that store the parent, siblings, children information
|
||||
*/
|
||||
protected DirectoryTaxonomyReader(
|
||||
DirectoryTaxonomyReader(
|
||||
DirectoryReader indexReader,
|
||||
DirectoryTaxonomyWriter taxoWriter,
|
||||
LRUHashMap<FacetLabel, Integer> ordinalCache,
|
||||
|
@ -103,14 +103,9 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
this.taxoEpoch = taxoWriter == null ? -1 : taxoWriter.getTaxonomyEpoch();
|
||||
|
||||
// use the same instance of the cache, note the protective code in getOrdinal and getPath
|
||||
this.ordinalCache =
|
||||
ordinalCache == null
|
||||
? new LRUHashMap<FacetLabel, Integer>(DEFAULT_CACHE_VALUE)
|
||||
: ordinalCache;
|
||||
this.ordinalCache = ordinalCache == null ? new LRUHashMap<>(DEFAULT_CACHE_VALUE) : ordinalCache;
|
||||
this.categoryCache =
|
||||
categoryCache == null
|
||||
? new LRUHashMap<Integer, FacetLabel>(DEFAULT_CACHE_VALUE)
|
||||
: categoryCache;
|
||||
categoryCache == null ? new LRUHashMap<>(DEFAULT_CACHE_VALUE) : categoryCache;
|
||||
|
||||
this.taxoArrays = taxoArrays != null ? new TaxonomyIndexArrays(indexReader, taxoArrays) : null;
|
||||
}
|
||||
|
@ -151,16 +146,6 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
categoryCache = new LRUHashMap<>(DEFAULT_CACHE_VALUE);
|
||||
}
|
||||
|
||||
private synchronized void initTaxoArrays() throws IOException {
|
||||
if (taxoArrays == null) {
|
||||
// according to Java Concurrency in Practice, this might perform better on
|
||||
// some JVMs, because the array initialization doesn't happen on the
|
||||
// volatile member.
|
||||
TaxonomyIndexArrays tmpArrays = new TaxonomyIndexArrays(indexReader);
|
||||
taxoArrays = tmpArrays;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doClose() throws IOException {
|
||||
indexReader.close();
|
||||
|
@ -215,18 +200,18 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
}
|
||||
}
|
||||
|
||||
final DirectoryTaxonomyReader newtr;
|
||||
final DirectoryTaxonomyReader newTaxoReader;
|
||||
if (recreated) {
|
||||
// if recreated, do not reuse anything from this instance. the information
|
||||
// will be lazily computed by the new instance when needed.
|
||||
newtr = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null);
|
||||
newTaxoReader = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null);
|
||||
} else {
|
||||
newtr =
|
||||
newTaxoReader =
|
||||
new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, taxoArrays);
|
||||
}
|
||||
|
||||
success = true;
|
||||
return newtr;
|
||||
return newTaxoReader;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(r2);
|
||||
|
@ -256,10 +241,18 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
@Override
|
||||
public ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException {
|
||||
ensureOpen();
|
||||
if (taxoArrays == null) {
|
||||
initTaxoArrays();
|
||||
// By copying to a local variable we only perform a volatile read once (if it's not null)
|
||||
TaxonomyIndexArrays arrays = taxoArrays;
|
||||
if (arrays == null) {
|
||||
synchronized (this) {
|
||||
arrays = taxoArrays;
|
||||
if (arrays == null) {
|
||||
arrays = new TaxonomyIndexArrays(indexReader);
|
||||
taxoArrays = arrays;
|
||||
}
|
||||
}
|
||||
}
|
||||
return taxoArrays;
|
||||
return arrays;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -279,14 +272,14 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
synchronized (ordinalCache) {
|
||||
Integer res = ordinalCache.get(cp);
|
||||
if (res != null) {
|
||||
if (res.intValue() < indexReader.maxDoc()) {
|
||||
if (res < indexReader.maxDoc()) {
|
||||
// Since the cache is shared with DTR instances allocated from
|
||||
// doOpenIfChanged, we need to ensure that the ordinal is one that
|
||||
// this DTR instance recognizes.
|
||||
return res.intValue();
|
||||
return res;
|
||||
} else {
|
||||
// if we get here, it means that the category was found in the cache,
|
||||
// but is not recognized by this TR instance. Therefore there's no
|
||||
// but is not recognized by this TR instance. Therefore, there's no
|
||||
// need to continue search for the path on disk, because we won't find
|
||||
// it there too.
|
||||
return TaxonomyReader.INVALID_ORDINAL;
|
||||
|
@ -306,13 +299,13 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
ret = docs.docID();
|
||||
|
||||
// we only store the fact that a category exists, not its inexistence.
|
||||
// We only store the fact that a category exists, not otherwise.
|
||||
// This is required because the caches are shared with new DTR instances
|
||||
// that are allocated from doOpenIfChanged. Therefore, if we only store
|
||||
// information about found categories, we cannot accidently tell a new
|
||||
// information about found categories, we cannot accidentally tell a new
|
||||
// generation of DTR that a category does not exist.
|
||||
synchronized (ordinalCache) {
|
||||
ordinalCache.put(cp, Integer.valueOf(ret));
|
||||
ordinalCache.put(cp, ret);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -325,7 +318,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
|
||||
// Since the cache is shared with DTR instances allocated from
|
||||
// doOpenIfChanged, we need to ensure that the ordinal is one that this DTR
|
||||
// instance recognizes. Therefore we do this check up front, before we hit
|
||||
// instance recognizes. Therefore, we do this check up front, before we hit
|
||||
// the cache.
|
||||
checkOrdinalBounds(ordinal);
|
||||
|
||||
|
@ -398,8 +391,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
* was created using StoredFields (with no performance gains) and uses DocValues based iteration
|
||||
* when the index is based on BinaryDocValues. Lucene switched to BinaryDocValues in version 9.0
|
||||
*
|
||||
* @param ordinals Array of ordinals that are assigned to categories inserted into the taxonomy
|
||||
* index
|
||||
* @param ordinals Array of category ordinals that were added to the taxonomy index
|
||||
*/
|
||||
@Override
|
||||
public FacetLabel[] getBulkPath(int... ordinals) throws IOException {
|
||||
|
@ -464,7 +456,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
return super.getBulkPath(ordinals);
|
||||
}
|
||||
}
|
||||
// values is leaf specific so you only advance till you reach the target within the leaf
|
||||
// values is leaf specific, so you only advance till you reach the target within the leaf
|
||||
boolean success = values.advanceExact(ordinals[i] - leafReaderDocBase);
|
||||
assert success;
|
||||
bulkPath[originalPosition[i]] =
|
||||
|
@ -558,8 +550,8 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
public String toString(int max) {
|
||||
ensureOpen();
|
||||
StringBuilder sb = new StringBuilder();
|
||||
int upperl = Math.min(max, indexReader.maxDoc());
|
||||
for (int i = 0; i < upperl; i++) {
|
||||
int limit = Math.min(max, indexReader.maxDoc());
|
||||
for (int i = 0; i < limit; i++) {
|
||||
try {
|
||||
FacetLabel category = this.getPath(i);
|
||||
if (category == null) {
|
||||
|
@ -570,7 +562,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
|||
sb.append(i).append(": EMPTY STRING!! \n");
|
||||
continue;
|
||||
}
|
||||
sb.append(i).append(": ").append(category.toString()).append("\n");
|
||||
sb.append(i).append(": ").append(category).append("\n");
|
||||
} catch (
|
||||
@SuppressWarnings("unused")
|
||||
IOException e) {
|
||||
|
|
|
@ -89,11 +89,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
private final IndexWriter indexWriter;
|
||||
private final TaxonomyWriterCache cache;
|
||||
private final AtomicInteger cacheMisses = new AtomicInteger(0);
|
||||
private final AtomicInteger nextID = new AtomicInteger(0);
|
||||
private final Field fullPathField;
|
||||
|
||||
// Records the taxonomy index epoch, updated on replaceTaxonomy as well.
|
||||
private long indexEpoch;
|
||||
|
||||
private Field fullPathField;
|
||||
private int cacheMissesUntilFill = 11;
|
||||
private boolean shouldFillCache = true;
|
||||
|
||||
|
@ -108,14 +109,13 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
* When the cache is <B>not</B> complete, and we can't find a category in the cache, we still need
|
||||
* to look for it in the on-disk index; Therefore when the cache is not complete, we need to open
|
||||
* a "reader" to the taxonomy index. The cache becomes incomplete if it was never filled with the
|
||||
* existing categories, or if a put() to the cache ever returned true (meaning that some of the
|
||||
* cached data was cleared).
|
||||
* existing categories, or if a put() to the cache ever returned true (meaning that some cached
|
||||
* data was cleared).
|
||||
*/
|
||||
private volatile boolean cacheIsComplete;
|
||||
|
||||
private volatile boolean isClosed = false;
|
||||
private volatile TaxonomyIndexArrays taxoArrays;
|
||||
private volatile int nextID;
|
||||
|
||||
/**
|
||||
* Construct a Taxonomy writer.
|
||||
|
@ -169,14 +169,14 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
|
||||
fullPathField = new StringField(Consts.FULL, "", Field.Store.NO);
|
||||
|
||||
nextID = indexWriter.getDocStats().maxDoc;
|
||||
nextID.set(indexWriter.getDocStats().maxDoc);
|
||||
|
||||
if (cache == null) {
|
||||
cache = defaultTaxonomyWriterCache();
|
||||
}
|
||||
this.cache = cache;
|
||||
|
||||
if (nextID == 0) {
|
||||
if (nextID.get() == 0) {
|
||||
cacheIsComplete = true;
|
||||
// Make sure that the taxonomy always contain the root category
|
||||
// with category id 0.
|
||||
|
@ -424,9 +424,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
} else {
|
||||
parent = TaxonomyReader.INVALID_ORDINAL;
|
||||
}
|
||||
int id = addCategoryDocument(cp, parent);
|
||||
|
||||
return id;
|
||||
return addCategoryDocument(cp, parent);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -439,7 +437,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
}
|
||||
|
||||
/**
|
||||
* Note that the methods calling addCategoryDocument() are synchornized, so this method is
|
||||
* Note that the methods calling addCategoryDocument() are synchronized, so this method is
|
||||
* effectively synchronized as well.
|
||||
*/
|
||||
private int addCategoryDocument(FacetLabel categoryPath, int parent) throws IOException {
|
||||
|
@ -455,11 +453,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
|
||||
d.add(fullPathField);
|
||||
|
||||
// Note that we do no pass an Analyzer here because the fields that are
|
||||
// added to the Document are untokenized or contains their own TokenStream.
|
||||
// Therefore the IndexWriter's Analyzer has no effect.
|
||||
indexWriter.addDocument(d);
|
||||
int id = nextID++;
|
||||
int id = nextID.getAndIncrement();
|
||||
|
||||
// added a category document, mark that ReaderManager is not up-to-date
|
||||
shouldRefreshReaderManager = true;
|
||||
|
@ -491,7 +486,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// addCategoryDocument -- when this method returns, we must know that the
|
||||
// reader manager's state is current. also, it sets shouldRefresh to false,
|
||||
// and this cannot overlap with addCatDoc too.
|
||||
// NOTE: since this method is sync'ed, it can call maybeRefresh, instead of
|
||||
// NOTE: since this method is synced, it can call maybeRefresh, instead of
|
||||
// maybeRefreshBlocking. If ever this is changed, make sure to change the
|
||||
// call too.
|
||||
if (shouldRefreshReaderManager && initializedReaderManager) {
|
||||
|
@ -567,7 +562,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
@Override
|
||||
public int getSize() {
|
||||
ensureOpen();
|
||||
return nextID;
|
||||
return nextID.get();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -656,33 +651,33 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
}
|
||||
|
||||
private TaxonomyIndexArrays getTaxoArrays() throws IOException {
|
||||
if (taxoArrays == null) {
|
||||
// By copying to a local variable we only perform a volatile read once (if it's not null)
|
||||
TaxonomyIndexArrays arrays = taxoArrays;
|
||||
if (arrays == null) {
|
||||
synchronized (this) {
|
||||
if (taxoArrays == null) {
|
||||
arrays = taxoArrays;
|
||||
if (arrays == null) {
|
||||
initReaderManager();
|
||||
DirectoryReader reader = readerManager.acquire();
|
||||
try {
|
||||
// according to Java Concurrency, this might perform better on some
|
||||
// JVMs, since the object initialization doesn't happen on the
|
||||
// volatile member.
|
||||
TaxonomyIndexArrays tmpArrays = new TaxonomyIndexArrays(reader);
|
||||
taxoArrays = tmpArrays;
|
||||
arrays = new TaxonomyIndexArrays(reader);
|
||||
} finally {
|
||||
readerManager.release(reader);
|
||||
}
|
||||
taxoArrays = arrays;
|
||||
}
|
||||
}
|
||||
}
|
||||
return taxoArrays;
|
||||
return arrays;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getParent(int ordinal) throws IOException {
|
||||
ensureOpen();
|
||||
// Note: the following if() just enforces that a user can never ask
|
||||
// for the parent of a nonexistant category - even if the parent array
|
||||
// for the parent of a nonexistent category - even if the parent array
|
||||
// was allocated bigger than it really needs to be.
|
||||
Objects.checkIndex(ordinal, nextID);
|
||||
Objects.checkIndex(ordinal, nextID.get());
|
||||
|
||||
int[] parents = getTaxoArrays().parents();
|
||||
assert ordinal < parents.length
|
||||
|
@ -697,11 +692,9 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
*/
|
||||
public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException {
|
||||
ensureOpen();
|
||||
DirectoryReader r = DirectoryReader.open(taxoDir);
|
||||
try {
|
||||
try (DirectoryReader r = DirectoryReader.open(taxoDir)) {
|
||||
final int size = r.numDocs();
|
||||
final OrdinalMap ordinalMap = map;
|
||||
ordinalMap.setSize(size);
|
||||
map.setSize(size);
|
||||
int base = 0;
|
||||
PostingsEnum docs = null;
|
||||
for (final LeafReaderContext ctx : r.leaves()) {
|
||||
|
@ -713,13 +706,11 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
|
||||
final int ordinal = addCategory(cp);
|
||||
docs = te.postings(docs, PostingsEnum.NONE);
|
||||
ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
|
||||
map.addMapping(docs.nextDoc() + base, ordinal);
|
||||
}
|
||||
base += ar.maxDoc(); // no deletions, so we're ok
|
||||
}
|
||||
ordinalMap.addDone();
|
||||
} finally {
|
||||
r.close();
|
||||
map.addDone();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -736,22 +727,22 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
* the same time, it is recommended to put the first taxonomy's map in memory, and all the rest on
|
||||
* disk (later to be automatically read into memory one by one, when needed).
|
||||
*/
|
||||
public static interface OrdinalMap {
|
||||
public interface OrdinalMap {
|
||||
/**
|
||||
* Set the size of the map. This MUST be called before addMapping(). It is assumed (but not
|
||||
* verified) that addMapping() will then be called exactly 'size' times, with different
|
||||
* origOrdinals between 0 and size-1.
|
||||
*/
|
||||
public void setSize(int size) throws IOException;
|
||||
void setSize(int size) throws IOException;
|
||||
|
||||
/** Record a mapping. */
|
||||
public void addMapping(int origOrdinal, int newOrdinal) throws IOException;
|
||||
void addMapping(int origOrdinal, int newOrdinal) throws IOException;
|
||||
|
||||
/**
|
||||
* Call addDone() to say that all addMapping() have been done. In some implementations this
|
||||
* might free some resources.
|
||||
*/
|
||||
public void addDone() throws IOException;
|
||||
void addDone() throws IOException;
|
||||
|
||||
/**
|
||||
* Return the map from the taxonomy's original (consecutive) ordinals to the new taxonomy's
|
||||
|
@ -760,7 +751,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
* needed. Calling it will also free all resources that the map might be holding (such as
|
||||
* temporary disk space), other than the returned int[].
|
||||
*/
|
||||
public int[] getMap() throws IOException;
|
||||
int[] getMap() throws IOException;
|
||||
}
|
||||
|
||||
/** {@link OrdinalMap} maintained in memory */
|
||||
|
@ -793,13 +784,15 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
|
||||
/** {@link OrdinalMap} maintained on file system */
|
||||
public static final class DiskOrdinalMap implements OrdinalMap {
|
||||
Path tmpfile;
|
||||
DataOutputStream out;
|
||||
private final Path mapFile;
|
||||
|
||||
private int[] map = null;
|
||||
private DataOutputStream out;
|
||||
|
||||
/** Sole constructor. */
|
||||
public DiskOrdinalMap(Path tmpfile) throws IOException {
|
||||
this.tmpfile = tmpfile;
|
||||
out = new DataOutputStream(new BufferedOutputStream(Files.newOutputStream(tmpfile)));
|
||||
public DiskOrdinalMap(Path mapFile) throws IOException {
|
||||
this.mapFile = mapFile;
|
||||
out = new DataOutputStream(new BufferedOutputStream(Files.newOutputStream(mapFile)));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -821,8 +814,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
}
|
||||
}
|
||||
|
||||
int[] map = null;
|
||||
|
||||
@Override
|
||||
public int[] getMap() throws IOException {
|
||||
if (map != null) {
|
||||
|
@ -830,20 +821,20 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
}
|
||||
addDone(); // in case this wasn't previously called
|
||||
try (DataInputStream in =
|
||||
new DataInputStream(new BufferedInputStream(Files.newInputStream(tmpfile)))) {
|
||||
new DataInputStream(new BufferedInputStream(Files.newInputStream(mapFile)))) {
|
||||
map = new int[in.readInt()];
|
||||
// NOTE: The current code assumes here that the map is complete,
|
||||
// i.e., every ordinal gets one and exactly one value. Otherwise,
|
||||
// we may run into an EOF here, or vice versa, not read everything.
|
||||
// NOTE: The current code assumes that the map is complete,
|
||||
// i.e. that every ordinal gets exactly one value. Otherwise,
|
||||
// we may run into an EOF here, or not read everything.
|
||||
for (int i = 0; i < map.length; i++) {
|
||||
int origordinal = in.readInt();
|
||||
int newordinal = in.readInt();
|
||||
map[origordinal] = newordinal;
|
||||
int origOrdinal = in.readInt();
|
||||
int newOrdinal = in.readInt();
|
||||
map[origOrdinal] = newOrdinal;
|
||||
}
|
||||
}
|
||||
|
||||
// Delete the temporary file, which is no longer needed.
|
||||
Files.delete(tmpfile);
|
||||
Files.delete(mapFile);
|
||||
|
||||
return map;
|
||||
}
|
||||
|
@ -863,8 +854,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
|
||||
/**
|
||||
* Replaces the current taxonomy with the given one. This method should generally be called in
|
||||
* conjunction with {@link IndexWriter#addIndexes(Directory...)} to replace both the taxonomy as
|
||||
* well as the search index content.
|
||||
* conjunction with {@link IndexWriter#addIndexes(Directory...)} to replace both the taxonomy and
|
||||
* the search index content.
|
||||
*/
|
||||
public synchronized void replaceTaxonomy(Directory taxoDir) throws IOException {
|
||||
// replace the taxonomy by doing IW optimized operations
|
||||
|
@ -873,7 +864,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
shouldRefreshReaderManager = true;
|
||||
initReaderManager(); // ensure that it's initialized
|
||||
refreshReaderManager();
|
||||
nextID = indexWriter.getDocStats().maxDoc;
|
||||
nextID.set(indexWriter.getDocStats().maxDoc);
|
||||
taxoArrays = null; // must nullify so that it's re-computed next time it's needed
|
||||
|
||||
// need to clear the cache, so that addCategory won't accidentally return
|
||||
|
|
|
@ -78,7 +78,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
|||
}
|
||||
}
|
||||
|
||||
private final synchronized void initChildrenSiblings(TaxonomyIndexArrays copyFrom) {
|
||||
private synchronized void initChildrenSiblings(TaxonomyIndexArrays copyFrom) {
|
||||
if (!initializedChildren) { // must do this check !
|
||||
children = new int[parents.length];
|
||||
siblings = new int[parents.length];
|
||||
|
@ -141,7 +141,6 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
|||
throw new CorruptIndexException(
|
||||
"Missing parent data for category " + (doc + leafContext.docBase), reader.toString());
|
||||
}
|
||||
// we're putting an int and converting it back so it should be safe
|
||||
parents[doc + leafContext.docBase] = Math.toIntExact(parentValues.longValue());
|
||||
}
|
||||
}
|
||||
|
@ -204,7 +203,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
|||
@Override
|
||||
public synchronized long ramBytesUsed() {
|
||||
long ramBytesUsed =
|
||||
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF + 1;
|
||||
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 3L * RamUsageEstimator.NUM_BYTES_OBJECT_REF + 1;
|
||||
ramBytesUsed += RamUsageEstimator.shallowSizeOf(parents);
|
||||
if (children != null) {
|
||||
ramBytesUsed += RamUsageEstimator.shallowSizeOf(children);
|
||||
|
|
Loading…
Reference in New Issue