mirror of https://github.com/apache/lucene.git
simplify DirTaxoWriter.addCategory synchronization in an attempt to make concurrency more safer
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1364618 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
aa28aff77e
commit
7022593713
|
@ -375,7 +375,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
* returning the category's ordinal, or a negative number in case the
|
||||
* category does not yet exist in the taxonomy.
|
||||
*/
|
||||
protected int findCategory(CategoryPath categoryPath) throws IOException {
|
||||
protected synchronized int findCategory(CategoryPath categoryPath) throws IOException {
|
||||
// If we can find the category in the cache, or we know the cache is
|
||||
// complete, we can return the response directly from it
|
||||
int res = cache.get(categoryPath);
|
||||
|
@ -474,12 +474,11 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
@Override
|
||||
public int addCategory(CategoryPath categoryPath) throws IOException {
|
||||
ensureOpen();
|
||||
// If the category is already in the cache and/or the taxonomy, we
|
||||
// should return its existing ordinal
|
||||
int res = findCategory(categoryPath);
|
||||
// check the cache outside the synchronized block. this results in better
|
||||
// concurrency when categories are there.
|
||||
int res = cache.get(categoryPath);
|
||||
if (res < 0) {
|
||||
// the category is neither in the cache nor in the index - following code
|
||||
// cannot be executed in parallel.
|
||||
// the category is not in the cache - following code cannot be executed in parallel.
|
||||
synchronized (this) {
|
||||
res = findCategory(categoryPath);
|
||||
if (res < 0) {
|
||||
|
@ -494,7 +493,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
}
|
||||
}
|
||||
return res;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -22,22 +22,25 @@ import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
|||
|
||||
/**
|
||||
* TaxonomyWriterCache is a relatively simple interface for a cache of
|
||||
* category->ordinal mappings, used in TaxonomyWriter implementations
|
||||
* (such as {@link DirectoryTaxonomyWriter}).
|
||||
* <P>
|
||||
* It basically has put() methods for adding a mapping, and get() for looking
|
||||
* a mapping up the cache. The cache does <B>not</B> guarantee to hold
|
||||
* everything that has been put into it, and might in fact selectively
|
||||
* delete some of the mappings (e.g., the ones least recently used).
|
||||
* This means that if get() returns a negative response, it does not
|
||||
* necessarily mean that the category doesn't exist - just that it is not
|
||||
* in the cache. The caller can only infer that the category doesn't exist
|
||||
* if it knows the cache to be complete (because all the categories were
|
||||
* loaded into the cache, and since then no put() returned true).
|
||||
* <P> However,
|
||||
* if it does so, it should clear out large parts of the cache at once, because
|
||||
* the user will typically need to work hard to recover from every cache
|
||||
* category->ordinal mappings, used in TaxonomyWriter implementations (such as
|
||||
* {@link DirectoryTaxonomyWriter}).
|
||||
* <p>
|
||||
* It basically has put() methods for adding a mapping, and get() for looking a
|
||||
* mapping up the cache. The cache does <B>not</B> guarantee to hold everything
|
||||
* that has been put into it, and might in fact selectively delete some of the
|
||||
* mappings (e.g., the ones least recently used). This means that if get()
|
||||
* returns a negative response, it does not necessarily mean that the category
|
||||
* doesn't exist - just that it is not in the cache. The caller can only infer
|
||||
* that the category doesn't exist if it knows the cache to be complete (because
|
||||
* all the categories were loaded into the cache, and since then no put()
|
||||
* returned true).
|
||||
* <p>
|
||||
* However, if it does so, it should clear out large parts of the cache at once,
|
||||
* because the user will typically need to work hard to recover from every cache
|
||||
* cleanup (see {@link #put(CategoryPath, int)}'s return value).
|
||||
* <p>
|
||||
* <b>NOTE:</b> the cache may be accessed concurrently by multiple threads,
|
||||
* therefore cache implementations should take this into consideration.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
|
|
Loading…
Reference in New Issue