mirror of https://github.com/apache/lucene.git
Minor refactoring and cleanup to taxonomy index code (#11775)
This commit is contained in:
parent
add309bb40
commit
bcc116057d
|
@ -32,7 +32,7 @@ public class FacetLabel implements Comparable<FacetLabel> {
|
||||||
/*
|
/*
|
||||||
* copied from DocumentWriterPerThread -- if a FacetLabel is resolved to a
|
* copied from DocumentWriterPerThread -- if a FacetLabel is resolved to a
|
||||||
* drill-down term which is encoded to a larger term than that length, it is
|
* drill-down term which is encoded to a larger term than that length, it is
|
||||||
* silently dropped! Therefore we limit the number of characters to MAX/4 to
|
* silently dropped! Therefore, we limit the number of characters to MAX/4 to
|
||||||
* be on the safe side.
|
* be on the safe side.
|
||||||
*/
|
*/
|
||||||
/** The maximum number of characters a {@link FacetLabel} can have. */
|
/** The maximum number of characters a {@link FacetLabel} can have. */
|
||||||
|
@ -48,10 +48,10 @@ public class FacetLabel implements Comparable<FacetLabel> {
|
||||||
/** The number of components of this {@link FacetLabel}. */
|
/** The number of components of this {@link FacetLabel}. */
|
||||||
public final int length;
|
public final int length;
|
||||||
|
|
||||||
// Used by subpath
|
// Used by sub-path
|
||||||
private FacetLabel(final FacetLabel copyFrom, final int prefixLen) {
|
private FacetLabel(final FacetLabel copyFrom, final int prefixLen) {
|
||||||
// while the code which calls this method is safe, at some point a test
|
// While the code which calls this method is safe, at some point a test
|
||||||
// tripped on AIOOBE in toString, but we failed to reproduce. adding the
|
// tripped on AIOOBE in toString, but we failed to reproduce. Adding
|
||||||
// assert as a safety check.
|
// assert as a safety check.
|
||||||
assert prefixLen >= 0 && prefixLen <= copyFrom.components.length
|
assert prefixLen >= 0 && prefixLen <= copyFrom.components.length
|
||||||
: "prefixLen cannot be negative nor larger than the given components' length: prefixLen="
|
: "prefixLen cannot be negative nor larger than the given components' length: prefixLen="
|
||||||
|
@ -103,7 +103,7 @@ public class FacetLabel implements Comparable<FacetLabel> {
|
||||||
/** Compares this path with another {@link FacetLabel} for lexicographic order. */
|
/** Compares this path with another {@link FacetLabel} for lexicographic order. */
|
||||||
@Override
|
@Override
|
||||||
public int compareTo(FacetLabel other) {
|
public int compareTo(FacetLabel other) {
|
||||||
final int len = length < other.length ? length : other.length;
|
final int len = Math.min(length, other.length);
|
||||||
for (int i = 0, j = 0; i < len; i++, j++) {
|
for (int i = 0, j = 0; i < len; i++, j++) {
|
||||||
int cmp = components[i].compareTo(other.components[j]);
|
int cmp = components[i].compareTo(other.components[j]);
|
||||||
if (cmp < 0) {
|
if (cmp < 0) {
|
||||||
|
@ -120,7 +120,7 @@ public class FacetLabel implements Comparable<FacetLabel> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object obj) {
|
public boolean equals(Object obj) {
|
||||||
if (!(obj instanceof FacetLabel)) {
|
if (obj instanceof FacetLabel == false) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -62,10 +62,10 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
||||||
private static final int DEFAULT_CACHE_VALUE = 4000;
|
private static final int DEFAULT_CACHE_VALUE = 4000;
|
||||||
|
|
||||||
// NOTE: very coarse estimate!
|
// NOTE: very coarse estimate!
|
||||||
private static final int BYTES_PER_CACHE_ENTRY =
|
private static final long BYTES_PER_CACHE_ENTRY =
|
||||||
4 * RamUsageEstimator.NUM_BYTES_OBJECT_REF
|
4L * RamUsageEstimator.NUM_BYTES_OBJECT_REF
|
||||||
+ 4 * RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
|
+ 4L * RamUsageEstimator.NUM_BYTES_OBJECT_HEADER
|
||||||
+ 8 * Character.BYTES;
|
+ 8L * Character.BYTES;
|
||||||
|
|
||||||
private final DirectoryTaxonomyWriter taxoWriter;
|
private final DirectoryTaxonomyWriter taxoWriter;
|
||||||
private final long taxoEpoch; // used in doOpenIfChanged
|
private final long taxoEpoch; // used in doOpenIfChanged
|
||||||
|
@ -91,7 +91,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
||||||
* @param categoryCache an ordinal to FacetLabel mapping if it already exists
|
* @param categoryCache an ordinal to FacetLabel mapping if it already exists
|
||||||
* @param taxoArrays taxonomy arrays that store the parent, siblings, children information
|
* @param taxoArrays taxonomy arrays that store the parent, siblings, children information
|
||||||
*/
|
*/
|
||||||
protected DirectoryTaxonomyReader(
|
DirectoryTaxonomyReader(
|
||||||
DirectoryReader indexReader,
|
DirectoryReader indexReader,
|
||||||
DirectoryTaxonomyWriter taxoWriter,
|
DirectoryTaxonomyWriter taxoWriter,
|
||||||
LRUHashMap<FacetLabel, Integer> ordinalCache,
|
LRUHashMap<FacetLabel, Integer> ordinalCache,
|
||||||
|
@ -103,14 +103,9 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
||||||
this.taxoEpoch = taxoWriter == null ? -1 : taxoWriter.getTaxonomyEpoch();
|
this.taxoEpoch = taxoWriter == null ? -1 : taxoWriter.getTaxonomyEpoch();
|
||||||
|
|
||||||
// use the same instance of the cache, note the protective code in getOrdinal and getPath
|
// use the same instance of the cache, note the protective code in getOrdinal and getPath
|
||||||
this.ordinalCache =
|
this.ordinalCache = ordinalCache == null ? new LRUHashMap<>(DEFAULT_CACHE_VALUE) : ordinalCache;
|
||||||
ordinalCache == null
|
|
||||||
? new LRUHashMap<FacetLabel, Integer>(DEFAULT_CACHE_VALUE)
|
|
||||||
: ordinalCache;
|
|
||||||
this.categoryCache =
|
this.categoryCache =
|
||||||
categoryCache == null
|
categoryCache == null ? new LRUHashMap<>(DEFAULT_CACHE_VALUE) : categoryCache;
|
||||||
? new LRUHashMap<Integer, FacetLabel>(DEFAULT_CACHE_VALUE)
|
|
||||||
: categoryCache;
|
|
||||||
|
|
||||||
this.taxoArrays = taxoArrays != null ? new TaxonomyIndexArrays(indexReader, taxoArrays) : null;
|
this.taxoArrays = taxoArrays != null ? new TaxonomyIndexArrays(indexReader, taxoArrays) : null;
|
||||||
}
|
}
|
||||||
|
@ -151,16 +146,6 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
||||||
categoryCache = new LRUHashMap<>(DEFAULT_CACHE_VALUE);
|
categoryCache = new LRUHashMap<>(DEFAULT_CACHE_VALUE);
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized void initTaxoArrays() throws IOException {
|
|
||||||
if (taxoArrays == null) {
|
|
||||||
// according to Java Concurrency in Practice, this might perform better on
|
|
||||||
// some JVMs, because the array initialization doesn't happen on the
|
|
||||||
// volatile member.
|
|
||||||
TaxonomyIndexArrays tmpArrays = new TaxonomyIndexArrays(indexReader);
|
|
||||||
taxoArrays = tmpArrays;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
protected void doClose() throws IOException {
|
protected void doClose() throws IOException {
|
||||||
indexReader.close();
|
indexReader.close();
|
||||||
|
@ -215,18 +200,18 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final DirectoryTaxonomyReader newtr;
|
final DirectoryTaxonomyReader newTaxoReader;
|
||||||
if (recreated) {
|
if (recreated) {
|
||||||
// if recreated, do not reuse anything from this instance. the information
|
// if recreated, do not reuse anything from this instance. the information
|
||||||
// will be lazily computed by the new instance when needed.
|
// will be lazily computed by the new instance when needed.
|
||||||
newtr = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null);
|
newTaxoReader = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null);
|
||||||
} else {
|
} else {
|
||||||
newtr =
|
newTaxoReader =
|
||||||
new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, taxoArrays);
|
new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, taxoArrays);
|
||||||
}
|
}
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
return newtr;
|
return newTaxoReader;
|
||||||
} finally {
|
} finally {
|
||||||
if (!success) {
|
if (!success) {
|
||||||
IOUtils.closeWhileHandlingException(r2);
|
IOUtils.closeWhileHandlingException(r2);
|
||||||
|
@ -256,10 +241,18 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
||||||
@Override
|
@Override
|
||||||
public ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException {
|
public ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
if (taxoArrays == null) {
|
// By copying to a local variable we only perform a volatile read once (if it's not null)
|
||||||
initTaxoArrays();
|
TaxonomyIndexArrays arrays = taxoArrays;
|
||||||
|
if (arrays == null) {
|
||||||
|
synchronized (this) {
|
||||||
|
arrays = taxoArrays;
|
||||||
|
if (arrays == null) {
|
||||||
|
arrays = new TaxonomyIndexArrays(indexReader);
|
||||||
|
taxoArrays = arrays;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return taxoArrays;
|
return arrays;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -279,14 +272,14 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
||||||
synchronized (ordinalCache) {
|
synchronized (ordinalCache) {
|
||||||
Integer res = ordinalCache.get(cp);
|
Integer res = ordinalCache.get(cp);
|
||||||
if (res != null) {
|
if (res != null) {
|
||||||
if (res.intValue() < indexReader.maxDoc()) {
|
if (res < indexReader.maxDoc()) {
|
||||||
// Since the cache is shared with DTR instances allocated from
|
// Since the cache is shared with DTR instances allocated from
|
||||||
// doOpenIfChanged, we need to ensure that the ordinal is one that
|
// doOpenIfChanged, we need to ensure that the ordinal is one that
|
||||||
// this DTR instance recognizes.
|
// this DTR instance recognizes.
|
||||||
return res.intValue();
|
return res;
|
||||||
} else {
|
} else {
|
||||||
// if we get here, it means that the category was found in the cache,
|
// if we get here, it means that the category was found in the cache,
|
||||||
// but is not recognized by this TR instance. Therefore there's no
|
// but is not recognized by this TR instance. Therefore, there's no
|
||||||
// need to continue search for the path on disk, because we won't find
|
// need to continue search for the path on disk, because we won't find
|
||||||
// it there too.
|
// it there too.
|
||||||
return TaxonomyReader.INVALID_ORDINAL;
|
return TaxonomyReader.INVALID_ORDINAL;
|
||||||
|
@ -306,13 +299,13 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
||||||
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
ret = docs.docID();
|
ret = docs.docID();
|
||||||
|
|
||||||
// we only store the fact that a category exists, not its inexistence.
|
// We only store the fact that a category exists, not otherwise.
|
||||||
// This is required because the caches are shared with new DTR instances
|
// This is required because the caches are shared with new DTR instances
|
||||||
// that are allocated from doOpenIfChanged. Therefore, if we only store
|
// that are allocated from doOpenIfChanged. Therefore, if we only store
|
||||||
// information about found categories, we cannot accidently tell a new
|
// information about found categories, we cannot accidentally tell a new
|
||||||
// generation of DTR that a category does not exist.
|
// generation of DTR that a category does not exist.
|
||||||
synchronized (ordinalCache) {
|
synchronized (ordinalCache) {
|
||||||
ordinalCache.put(cp, Integer.valueOf(ret));
|
ordinalCache.put(cp, ret);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -325,7 +318,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
||||||
|
|
||||||
// Since the cache is shared with DTR instances allocated from
|
// Since the cache is shared with DTR instances allocated from
|
||||||
// doOpenIfChanged, we need to ensure that the ordinal is one that this DTR
|
// doOpenIfChanged, we need to ensure that the ordinal is one that this DTR
|
||||||
// instance recognizes. Therefore we do this check up front, before we hit
|
// instance recognizes. Therefore, we do this check up front, before we hit
|
||||||
// the cache.
|
// the cache.
|
||||||
checkOrdinalBounds(ordinal);
|
checkOrdinalBounds(ordinal);
|
||||||
|
|
||||||
|
@ -398,8 +391,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
||||||
* was created using StoredFields (with no performance gains) and uses DocValues based iteration
|
* was created using StoredFields (with no performance gains) and uses DocValues based iteration
|
||||||
* when the index is based on BinaryDocValues. Lucene switched to BinaryDocValues in version 9.0
|
* when the index is based on BinaryDocValues. Lucene switched to BinaryDocValues in version 9.0
|
||||||
*
|
*
|
||||||
* @param ordinals Array of ordinals that are assigned to categories inserted into the taxonomy
|
* @param ordinals Array of category ordinals that were added to the taxonomy index
|
||||||
* index
|
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public FacetLabel[] getBulkPath(int... ordinals) throws IOException {
|
public FacetLabel[] getBulkPath(int... ordinals) throws IOException {
|
||||||
|
@ -464,7 +456,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
||||||
return super.getBulkPath(ordinals);
|
return super.getBulkPath(ordinals);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// values is leaf specific so you only advance till you reach the target within the leaf
|
// values is leaf specific, so you only advance till you reach the target within the leaf
|
||||||
boolean success = values.advanceExact(ordinals[i] - leafReaderDocBase);
|
boolean success = values.advanceExact(ordinals[i] - leafReaderDocBase);
|
||||||
assert success;
|
assert success;
|
||||||
bulkPath[originalPosition[i]] =
|
bulkPath[originalPosition[i]] =
|
||||||
|
@ -558,8 +550,8 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
||||||
public String toString(int max) {
|
public String toString(int max) {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
int upperl = Math.min(max, indexReader.maxDoc());
|
int limit = Math.min(max, indexReader.maxDoc());
|
||||||
for (int i = 0; i < upperl; i++) {
|
for (int i = 0; i < limit; i++) {
|
||||||
try {
|
try {
|
||||||
FacetLabel category = this.getPath(i);
|
FacetLabel category = this.getPath(i);
|
||||||
if (category == null) {
|
if (category == null) {
|
||||||
|
@ -570,7 +562,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader implements Accountab
|
||||||
sb.append(i).append(": EMPTY STRING!! \n");
|
sb.append(i).append(": EMPTY STRING!! \n");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
sb.append(i).append(": ").append(category.toString()).append("\n");
|
sb.append(i).append(": ").append(category).append("\n");
|
||||||
} catch (
|
} catch (
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
IOException e) {
|
IOException e) {
|
||||||
|
|
|
@ -89,11 +89,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
private final IndexWriter indexWriter;
|
private final IndexWriter indexWriter;
|
||||||
private final TaxonomyWriterCache cache;
|
private final TaxonomyWriterCache cache;
|
||||||
private final AtomicInteger cacheMisses = new AtomicInteger(0);
|
private final AtomicInteger cacheMisses = new AtomicInteger(0);
|
||||||
|
private final AtomicInteger nextID = new AtomicInteger(0);
|
||||||
|
private final Field fullPathField;
|
||||||
|
|
||||||
// Records the taxonomy index epoch, updated on replaceTaxonomy as well.
|
// Records the taxonomy index epoch, updated on replaceTaxonomy as well.
|
||||||
private long indexEpoch;
|
private long indexEpoch;
|
||||||
|
|
||||||
private Field fullPathField;
|
|
||||||
private int cacheMissesUntilFill = 11;
|
private int cacheMissesUntilFill = 11;
|
||||||
private boolean shouldFillCache = true;
|
private boolean shouldFillCache = true;
|
||||||
|
|
||||||
|
@ -108,14 +109,13 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
* When the cache is <B>not</B> complete, and we can't find a category in the cache, we still need
|
* When the cache is <B>not</B> complete, and we can't find a category in the cache, we still need
|
||||||
* to look for it in the on-disk index; Therefore when the cache is not complete, we need to open
|
* to look for it in the on-disk index; Therefore when the cache is not complete, we need to open
|
||||||
* a "reader" to the taxonomy index. The cache becomes incomplete if it was never filled with the
|
* a "reader" to the taxonomy index. The cache becomes incomplete if it was never filled with the
|
||||||
* existing categories, or if a put() to the cache ever returned true (meaning that some of the
|
* existing categories, or if a put() to the cache ever returned true (meaning that some cached
|
||||||
* cached data was cleared).
|
* data was cleared).
|
||||||
*/
|
*/
|
||||||
private volatile boolean cacheIsComplete;
|
private volatile boolean cacheIsComplete;
|
||||||
|
|
||||||
private volatile boolean isClosed = false;
|
private volatile boolean isClosed = false;
|
||||||
private volatile TaxonomyIndexArrays taxoArrays;
|
private volatile TaxonomyIndexArrays taxoArrays;
|
||||||
private volatile int nextID;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Construct a Taxonomy writer.
|
* Construct a Taxonomy writer.
|
||||||
|
@ -169,14 +169,14 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
|
|
||||||
fullPathField = new StringField(Consts.FULL, "", Field.Store.NO);
|
fullPathField = new StringField(Consts.FULL, "", Field.Store.NO);
|
||||||
|
|
||||||
nextID = indexWriter.getDocStats().maxDoc;
|
nextID.set(indexWriter.getDocStats().maxDoc);
|
||||||
|
|
||||||
if (cache == null) {
|
if (cache == null) {
|
||||||
cache = defaultTaxonomyWriterCache();
|
cache = defaultTaxonomyWriterCache();
|
||||||
}
|
}
|
||||||
this.cache = cache;
|
this.cache = cache;
|
||||||
|
|
||||||
if (nextID == 0) {
|
if (nextID.get() == 0) {
|
||||||
cacheIsComplete = true;
|
cacheIsComplete = true;
|
||||||
// Make sure that the taxonomy always contain the root category
|
// Make sure that the taxonomy always contain the root category
|
||||||
// with category id 0.
|
// with category id 0.
|
||||||
|
@ -424,9 +424,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
} else {
|
} else {
|
||||||
parent = TaxonomyReader.INVALID_ORDINAL;
|
parent = TaxonomyReader.INVALID_ORDINAL;
|
||||||
}
|
}
|
||||||
int id = addCategoryDocument(cp, parent);
|
return addCategoryDocument(cp, parent);
|
||||||
|
|
||||||
return id;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -439,7 +437,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Note that the methods calling addCategoryDocument() are synchornized, so this method is
|
* Note that the methods calling addCategoryDocument() are synchronized, so this method is
|
||||||
* effectively synchronized as well.
|
* effectively synchronized as well.
|
||||||
*/
|
*/
|
||||||
private int addCategoryDocument(FacetLabel categoryPath, int parent) throws IOException {
|
private int addCategoryDocument(FacetLabel categoryPath, int parent) throws IOException {
|
||||||
|
@ -455,11 +453,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
|
|
||||||
d.add(fullPathField);
|
d.add(fullPathField);
|
||||||
|
|
||||||
// Note that we do no pass an Analyzer here because the fields that are
|
|
||||||
// added to the Document are untokenized or contains their own TokenStream.
|
|
||||||
// Therefore the IndexWriter's Analyzer has no effect.
|
|
||||||
indexWriter.addDocument(d);
|
indexWriter.addDocument(d);
|
||||||
int id = nextID++;
|
int id = nextID.getAndIncrement();
|
||||||
|
|
||||||
// added a category document, mark that ReaderManager is not up-to-date
|
// added a category document, mark that ReaderManager is not up-to-date
|
||||||
shouldRefreshReaderManager = true;
|
shouldRefreshReaderManager = true;
|
||||||
|
@ -491,7 +486,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
// addCategoryDocument -- when this method returns, we must know that the
|
// addCategoryDocument -- when this method returns, we must know that the
|
||||||
// reader manager's state is current. also, it sets shouldRefresh to false,
|
// reader manager's state is current. also, it sets shouldRefresh to false,
|
||||||
// and this cannot overlap with addCatDoc too.
|
// and this cannot overlap with addCatDoc too.
|
||||||
// NOTE: since this method is sync'ed, it can call maybeRefresh, instead of
|
// NOTE: since this method is synced, it can call maybeRefresh, instead of
|
||||||
// maybeRefreshBlocking. If ever this is changed, make sure to change the
|
// maybeRefreshBlocking. If ever this is changed, make sure to change the
|
||||||
// call too.
|
// call too.
|
||||||
if (shouldRefreshReaderManager && initializedReaderManager) {
|
if (shouldRefreshReaderManager && initializedReaderManager) {
|
||||||
|
@ -567,7 +562,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
@Override
|
@Override
|
||||||
public int getSize() {
|
public int getSize() {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
return nextID;
|
return nextID.get();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -656,33 +651,33 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
}
|
}
|
||||||
|
|
||||||
private TaxonomyIndexArrays getTaxoArrays() throws IOException {
|
private TaxonomyIndexArrays getTaxoArrays() throws IOException {
|
||||||
if (taxoArrays == null) {
|
// By copying to a local variable we only perform a volatile read once (if it's not null)
|
||||||
|
TaxonomyIndexArrays arrays = taxoArrays;
|
||||||
|
if (arrays == null) {
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
if (taxoArrays == null) {
|
arrays = taxoArrays;
|
||||||
|
if (arrays == null) {
|
||||||
initReaderManager();
|
initReaderManager();
|
||||||
DirectoryReader reader = readerManager.acquire();
|
DirectoryReader reader = readerManager.acquire();
|
||||||
try {
|
try {
|
||||||
// according to Java Concurrency, this might perform better on some
|
arrays = new TaxonomyIndexArrays(reader);
|
||||||
// JVMs, since the object initialization doesn't happen on the
|
|
||||||
// volatile member.
|
|
||||||
TaxonomyIndexArrays tmpArrays = new TaxonomyIndexArrays(reader);
|
|
||||||
taxoArrays = tmpArrays;
|
|
||||||
} finally {
|
} finally {
|
||||||
readerManager.release(reader);
|
readerManager.release(reader);
|
||||||
}
|
}
|
||||||
|
taxoArrays = arrays;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return taxoArrays;
|
return arrays;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int getParent(int ordinal) throws IOException {
|
public int getParent(int ordinal) throws IOException {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
// Note: the following if() just enforces that a user can never ask
|
// Note: the following if() just enforces that a user can never ask
|
||||||
// for the parent of a nonexistant category - even if the parent array
|
// for the parent of a nonexistent category - even if the parent array
|
||||||
// was allocated bigger than it really needs to be.
|
// was allocated bigger than it really needs to be.
|
||||||
Objects.checkIndex(ordinal, nextID);
|
Objects.checkIndex(ordinal, nextID.get());
|
||||||
|
|
||||||
int[] parents = getTaxoArrays().parents();
|
int[] parents = getTaxoArrays().parents();
|
||||||
assert ordinal < parents.length
|
assert ordinal < parents.length
|
||||||
|
@ -697,11 +692,9 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
*/
|
*/
|
||||||
public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException {
|
public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
DirectoryReader r = DirectoryReader.open(taxoDir);
|
try (DirectoryReader r = DirectoryReader.open(taxoDir)) {
|
||||||
try {
|
|
||||||
final int size = r.numDocs();
|
final int size = r.numDocs();
|
||||||
final OrdinalMap ordinalMap = map;
|
map.setSize(size);
|
||||||
ordinalMap.setSize(size);
|
|
||||||
int base = 0;
|
int base = 0;
|
||||||
PostingsEnum docs = null;
|
PostingsEnum docs = null;
|
||||||
for (final LeafReaderContext ctx : r.leaves()) {
|
for (final LeafReaderContext ctx : r.leaves()) {
|
||||||
|
@ -713,13 +706,11 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
|
FacetLabel cp = new FacetLabel(FacetsConfig.stringToPath(te.term().utf8ToString()));
|
||||||
final int ordinal = addCategory(cp);
|
final int ordinal = addCategory(cp);
|
||||||
docs = te.postings(docs, PostingsEnum.NONE);
|
docs = te.postings(docs, PostingsEnum.NONE);
|
||||||
ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
|
map.addMapping(docs.nextDoc() + base, ordinal);
|
||||||
}
|
}
|
||||||
base += ar.maxDoc(); // no deletions, so we're ok
|
base += ar.maxDoc(); // no deletions, so we're ok
|
||||||
}
|
}
|
||||||
ordinalMap.addDone();
|
map.addDone();
|
||||||
} finally {
|
|
||||||
r.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -736,22 +727,22 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
* the same time, it is recommended to put the first taxonomy's map in memory, and all the rest on
|
* the same time, it is recommended to put the first taxonomy's map in memory, and all the rest on
|
||||||
* disk (later to be automatically read into memory one by one, when needed).
|
* disk (later to be automatically read into memory one by one, when needed).
|
||||||
*/
|
*/
|
||||||
public static interface OrdinalMap {
|
public interface OrdinalMap {
|
||||||
/**
|
/**
|
||||||
* Set the size of the map. This MUST be called before addMapping(). It is assumed (but not
|
* Set the size of the map. This MUST be called before addMapping(). It is assumed (but not
|
||||||
* verified) that addMapping() will then be called exactly 'size' times, with different
|
* verified) that addMapping() will then be called exactly 'size' times, with different
|
||||||
* origOrdinals between 0 and size-1.
|
* origOrdinals between 0 and size-1.
|
||||||
*/
|
*/
|
||||||
public void setSize(int size) throws IOException;
|
void setSize(int size) throws IOException;
|
||||||
|
|
||||||
/** Record a mapping. */
|
/** Record a mapping. */
|
||||||
public void addMapping(int origOrdinal, int newOrdinal) throws IOException;
|
void addMapping(int origOrdinal, int newOrdinal) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Call addDone() to say that all addMapping() have been done. In some implementations this
|
* Call addDone() to say that all addMapping() have been done. In some implementations this
|
||||||
* might free some resources.
|
* might free some resources.
|
||||||
*/
|
*/
|
||||||
public void addDone() throws IOException;
|
void addDone() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the map from the taxonomy's original (consecutive) ordinals to the new taxonomy's
|
* Return the map from the taxonomy's original (consecutive) ordinals to the new taxonomy's
|
||||||
|
@ -760,7 +751,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
* needed. Calling it will also free all resources that the map might be holding (such as
|
* needed. Calling it will also free all resources that the map might be holding (such as
|
||||||
* temporary disk space), other than the returned int[].
|
* temporary disk space), other than the returned int[].
|
||||||
*/
|
*/
|
||||||
public int[] getMap() throws IOException;
|
int[] getMap() throws IOException;
|
||||||
}
|
}
|
||||||
|
|
||||||
/** {@link OrdinalMap} maintained in memory */
|
/** {@link OrdinalMap} maintained in memory */
|
||||||
|
@ -793,13 +784,15 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
|
|
||||||
/** {@link OrdinalMap} maintained on file system */
|
/** {@link OrdinalMap} maintained on file system */
|
||||||
public static final class DiskOrdinalMap implements OrdinalMap {
|
public static final class DiskOrdinalMap implements OrdinalMap {
|
||||||
Path tmpfile;
|
private final Path mapFile;
|
||||||
DataOutputStream out;
|
|
||||||
|
private int[] map = null;
|
||||||
|
private DataOutputStream out;
|
||||||
|
|
||||||
/** Sole constructor. */
|
/** Sole constructor. */
|
||||||
public DiskOrdinalMap(Path tmpfile) throws IOException {
|
public DiskOrdinalMap(Path mapFile) throws IOException {
|
||||||
this.tmpfile = tmpfile;
|
this.mapFile = mapFile;
|
||||||
out = new DataOutputStream(new BufferedOutputStream(Files.newOutputStream(tmpfile)));
|
out = new DataOutputStream(new BufferedOutputStream(Files.newOutputStream(mapFile)));
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -821,8 +814,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int[] map = null;
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int[] getMap() throws IOException {
|
public int[] getMap() throws IOException {
|
||||||
if (map != null) {
|
if (map != null) {
|
||||||
|
@ -830,20 +821,20 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
}
|
}
|
||||||
addDone(); // in case this wasn't previously called
|
addDone(); // in case this wasn't previously called
|
||||||
try (DataInputStream in =
|
try (DataInputStream in =
|
||||||
new DataInputStream(new BufferedInputStream(Files.newInputStream(tmpfile)))) {
|
new DataInputStream(new BufferedInputStream(Files.newInputStream(mapFile)))) {
|
||||||
map = new int[in.readInt()];
|
map = new int[in.readInt()];
|
||||||
// NOTE: The current code assumes here that the map is complete,
|
// NOTE: The current code assumes that the map is complete,
|
||||||
// i.e., every ordinal gets one and exactly one value. Otherwise,
|
// i.e. that every ordinal gets exactly one value. Otherwise,
|
||||||
// we may run into an EOF here, or vice versa, not read everything.
|
// we may run into an EOF here, or not read everything.
|
||||||
for (int i = 0; i < map.length; i++) {
|
for (int i = 0; i < map.length; i++) {
|
||||||
int origordinal = in.readInt();
|
int origOrdinal = in.readInt();
|
||||||
int newordinal = in.readInt();
|
int newOrdinal = in.readInt();
|
||||||
map[origordinal] = newordinal;
|
map[origOrdinal] = newOrdinal;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Delete the temporary file, which is no longer needed.
|
// Delete the temporary file, which is no longer needed.
|
||||||
Files.delete(tmpfile);
|
Files.delete(mapFile);
|
||||||
|
|
||||||
return map;
|
return map;
|
||||||
}
|
}
|
||||||
|
@ -863,8 +854,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Replaces the current taxonomy with the given one. This method should generally be called in
|
* Replaces the current taxonomy with the given one. This method should generally be called in
|
||||||
* conjunction with {@link IndexWriter#addIndexes(Directory...)} to replace both the taxonomy as
|
* conjunction with {@link IndexWriter#addIndexes(Directory...)} to replace both the taxonomy and
|
||||||
* well as the search index content.
|
* the search index content.
|
||||||
*/
|
*/
|
||||||
public synchronized void replaceTaxonomy(Directory taxoDir) throws IOException {
|
public synchronized void replaceTaxonomy(Directory taxoDir) throws IOException {
|
||||||
// replace the taxonomy by doing IW optimized operations
|
// replace the taxonomy by doing IW optimized operations
|
||||||
|
@ -873,7 +864,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
shouldRefreshReaderManager = true;
|
shouldRefreshReaderManager = true;
|
||||||
initReaderManager(); // ensure that it's initialized
|
initReaderManager(); // ensure that it's initialized
|
||||||
refreshReaderManager();
|
refreshReaderManager();
|
||||||
nextID = indexWriter.getDocStats().maxDoc;
|
nextID.set(indexWriter.getDocStats().maxDoc);
|
||||||
taxoArrays = null; // must nullify so that it's re-computed next time it's needed
|
taxoArrays = null; // must nullify so that it's re-computed next time it's needed
|
||||||
|
|
||||||
// need to clear the cache, so that addCategory won't accidentally return
|
// need to clear the cache, so that addCategory won't accidentally return
|
||||||
|
|
|
@ -78,7 +78,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private final synchronized void initChildrenSiblings(TaxonomyIndexArrays copyFrom) {
|
private synchronized void initChildrenSiblings(TaxonomyIndexArrays copyFrom) {
|
||||||
if (!initializedChildren) { // must do this check !
|
if (!initializedChildren) { // must do this check !
|
||||||
children = new int[parents.length];
|
children = new int[parents.length];
|
||||||
siblings = new int[parents.length];
|
siblings = new int[parents.length];
|
||||||
|
@ -141,7 +141,6 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
||||||
throw new CorruptIndexException(
|
throw new CorruptIndexException(
|
||||||
"Missing parent data for category " + (doc + leafContext.docBase), reader.toString());
|
"Missing parent data for category " + (doc + leafContext.docBase), reader.toString());
|
||||||
}
|
}
|
||||||
// we're putting an int and converting it back so it should be safe
|
|
||||||
parents[doc + leafContext.docBase] = Math.toIntExact(parentValues.longValue());
|
parents[doc + leafContext.docBase] = Math.toIntExact(parentValues.longValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -204,7 +203,7 @@ class TaxonomyIndexArrays extends ParallelTaxonomyArrays implements Accountable
|
||||||
@Override
|
@Override
|
||||||
public synchronized long ramBytesUsed() {
|
public synchronized long ramBytesUsed() {
|
||||||
long ramBytesUsed =
|
long ramBytesUsed =
|
||||||
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 3 * RamUsageEstimator.NUM_BYTES_OBJECT_REF + 1;
|
RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 3L * RamUsageEstimator.NUM_BYTES_OBJECT_REF + 1;
|
||||||
ramBytesUsed += RamUsageEstimator.shallowSizeOf(parents);
|
ramBytesUsed += RamUsageEstimator.shallowSizeOf(parents);
|
||||||
if (children != null) {
|
if (children != null) {
|
||||||
ramBytesUsed += RamUsageEstimator.shallowSizeOf(children);
|
ramBytesUsed += RamUsageEstimator.shallowSizeOf(children);
|
||||||
|
|
Loading…
Reference in New Issue