mirror of https://github.com/apache/lucene.git
LUCENE-3441: facets NRT support
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1412149 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
258baa7069
commit
8b5e57faee
|
@ -34,6 +34,26 @@ Changes in backwards compatibility policy
|
||||||
Override lengthNorm and/or encode/decodeNormValue to change the specifics,
|
Override lengthNorm and/or encode/decodeNormValue to change the specifics,
|
||||||
like Lucene 3.x. (Robert Muir)
|
like Lucene 3.x. (Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-3441: The facet module now supports NRT. As a result, the following
|
||||||
|
changes were made:
|
||||||
|
- DirectoryTaxonomyReader has a new constructor which takes a
|
||||||
|
DirectoryTaxonomyWriter. You should use that constructor in order to get
|
||||||
|
the NRT support (or the old one for non-NRT).
|
||||||
|
- TaxonomyReader.refresh() removed in exchange for TaxonomyReader.openIfChanged
|
||||||
|
static method. Similar to DirectoryReader, the method either returns null
|
||||||
|
if no changes were made to the taxonomy, or a new TR instance otherwise.
|
||||||
|
Instead of calling refresh(), you should write similar code to how you reopen
|
||||||
|
a regular DirectoryReader.
|
||||||
|
- TaxonomyReader.openIfChanged (previously refresh()) no longer throws
|
||||||
|
IncosistentTaxonomyException, and supports recreate. InconsistentTaxoEx
|
||||||
|
was removed.
|
||||||
|
- ChildrenArrays was pulled out of TaxonomyReader into a top-level class.
|
||||||
|
- TaxonomyReader was made an abstract class (instead of an interface), with
|
||||||
|
methods such as close() and reference counting management pulled from
|
||||||
|
DirectoryTaxonomyReader, and made final. The rest of the methods, remained
|
||||||
|
abstract.
|
||||||
|
(Shai Erera, Gilad Barkai)
|
||||||
|
|
||||||
New Features
|
New Features
|
||||||
|
|
||||||
* LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of
|
* LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of
|
||||||
|
|
|
@ -8,8 +8,8 @@ import org.apache.lucene.facet.search.results.FacetResult;
|
||||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||||
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
|
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
|
||||||
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
|
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
|
||||||
|
import org.apache.lucene.facet.taxonomy.ChildrenArrays;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays;
|
|
||||||
import org.apache.lucene.facet.util.ResultSortUtils;
|
import org.apache.lucene.facet.util.ResultSortUtils;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -120,7 +120,7 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
||||||
* @return total number of descendants considered here by pq, excluding ordinal itself.
|
* @return total number of descendants considered here by pq, excluding ordinal itself.
|
||||||
*/
|
*/
|
||||||
private int heapDescendants(int ordinal, Heap<FacetResultNode> pq,
|
private int heapDescendants(int ordinal, Heap<FacetResultNode> pq,
|
||||||
MutableFacetResultNode parentResultNode, FacetArrays facetArrays, int offset) {
|
MutableFacetResultNode parentResultNode, FacetArrays facetArrays, int offset) throws IOException {
|
||||||
int partitionSize = facetArrays.getArraysLength();
|
int partitionSize = facetArrays.getArraysLength();
|
||||||
int endOffset = offset + partitionSize;
|
int endOffset = offset + partitionSize;
|
||||||
ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays();
|
ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays();
|
||||||
|
|
|
@ -12,8 +12,8 @@ import org.apache.lucene.facet.search.results.FacetResult;
|
||||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||||
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
|
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
|
||||||
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
|
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
|
||||||
|
import org.apache.lucene.facet.taxonomy.ChildrenArrays;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays;
|
|
||||||
import org.apache.lucene.util.collections.IntIterator;
|
import org.apache.lucene.util.collections.IntIterator;
|
||||||
import org.apache.lucene.util.collections.IntToObjectMap;
|
import org.apache.lucene.util.collections.IntToObjectMap;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,87 @@
|
||||||
|
package org.apache.lucene.facet.taxonomy;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Equivalent representations of the taxonomy's parent info,
|
||||||
|
* used internally for efficient computation of facet results:
|
||||||
|
* "youngest child" and "oldest sibling"
|
||||||
|
*/
|
||||||
|
public class ChildrenArrays {
|
||||||
|
|
||||||
|
private final int[] youngestChild, olderSibling;
|
||||||
|
|
||||||
|
public ChildrenArrays(int[] parents) {
|
||||||
|
this(parents, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
public ChildrenArrays(int[] parents, ChildrenArrays copyFrom) {
|
||||||
|
youngestChild = new int[parents.length];
|
||||||
|
olderSibling = new int[parents.length];
|
||||||
|
int first = 0;
|
||||||
|
if (copyFrom != null) {
|
||||||
|
System.arraycopy(copyFrom.getYoungestChildArray(), 0, youngestChild, 0, copyFrom.getYoungestChildArray().length);
|
||||||
|
System.arraycopy(copyFrom.getOlderSiblingArray(), 0, olderSibling, 0, copyFrom.getOlderSiblingArray().length);
|
||||||
|
first = copyFrom.getOlderSiblingArray().length;
|
||||||
|
}
|
||||||
|
computeArrays(parents, first);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void computeArrays(int[] parents, int first) {
|
||||||
|
// reset the youngest child of all ordinals. while this should be done only
|
||||||
|
// for the leaves, we don't know up front which are the leaves, so we reset
|
||||||
|
// all of them.
|
||||||
|
for (int i = first; i < parents.length; i++) {
|
||||||
|
youngestChild[i] = TaxonomyReader.INVALID_ORDINAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// the root category has no parent, and therefore no siblings
|
||||||
|
if (first == 0) {
|
||||||
|
first = 1;
|
||||||
|
olderSibling[0] = TaxonomyReader.INVALID_ORDINAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (int i = first; i < parents.length; i++) {
|
||||||
|
// note that parents[i] is always < i, so the right-hand-side of
|
||||||
|
// the following line is already set when we get here
|
||||||
|
olderSibling[i] = youngestChild[parents[i]];
|
||||||
|
youngestChild[parents[i]] = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an {@code int[]} the size of the taxonomy listing for each category
|
||||||
|
* the ordinal of its immediate older sibling (the sibling in the taxonomy
|
||||||
|
* tree with the highest ordinal below that of the given ordinal). The value
|
||||||
|
* for a category with no older sibling is {@link TaxonomyReader#INVALID_ORDINAL}.
|
||||||
|
*/
|
||||||
|
public int[] getOlderSiblingArray() {
|
||||||
|
return olderSibling;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an {@code int[]} the size of the taxonomy listing the ordinal of
|
||||||
|
* the youngest (highest numbered) child category of each category in the
|
||||||
|
* taxonomy. The value for a leaf category (a category without children) is
|
||||||
|
* {@link TaxonomyReader#INVALID_ORDINAL}.
|
||||||
|
*/
|
||||||
|
public int[] getYoungestChildArray() {
|
||||||
|
return youngestChild;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -1,40 +0,0 @@
|
||||||
package org.apache.lucene.facet.taxonomy;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Exception indicating that a certain operation could not be performed
|
|
||||||
* on a taxonomy related object because of an inconsistency.
|
|
||||||
* <p>
|
|
||||||
* For example, trying to refresh a taxonomy reader might fail in case
|
|
||||||
* the underlying taxonomy was meanwhile modified in a manner which
|
|
||||||
* does not allow to perform such a refresh. (See {@link TaxonomyReader#refresh()}.)
|
|
||||||
*
|
|
||||||
* @lucene.experimental
|
|
||||||
*/
|
|
||||||
public class InconsistentTaxonomyException extends Exception {
|
|
||||||
|
|
||||||
public InconsistentTaxonomyException(String message) {
|
|
||||||
super(message);
|
|
||||||
}
|
|
||||||
|
|
||||||
public InconsistentTaxonomyException() {
|
|
||||||
super();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
|
@ -3,6 +3,9 @@ package org.apache.lucene.facet.taxonomy;
|
||||||
import java.io.Closeable;
|
import java.io.Closeable;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -60,13 +63,13 @@ import java.util.Map;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public interface TaxonomyReader extends Closeable {
|
public abstract class TaxonomyReader implements Closeable {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The root category (the category with the empty path) always has the
|
* The root category (the category with the empty path) always has the ordinal
|
||||||
* ordinal 0, to which we give a name ROOT_ORDINAL.
|
* 0, to which we give a name ROOT_ORDINAL. {@link #getOrdinal(CategoryPath)}
|
||||||
* getOrdinal() of an empty path will always return ROOT_ORDINAL, and
|
* of an empty path will always return {@code ROOT_ORDINAL}, and
|
||||||
* getCategory(ROOT_ORDINAL) will return the empty path.
|
* {@link #getPath(int)} with {@code ROOT_ORDINAL} will return the empty path.
|
||||||
*/
|
*/
|
||||||
public final static int ROOT_ORDINAL = 0;
|
public final static int ROOT_ORDINAL = 0;
|
||||||
|
|
||||||
|
@ -77,207 +80,189 @@ public interface TaxonomyReader extends Closeable {
|
||||||
public final static int INVALID_ORDINAL = -1;
|
public final static int INVALID_ORDINAL = -1;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* getOrdinal() returns the ordinal of the category given as a path.
|
* If the taxonomy has changed since the provided reader was opened, open and
|
||||||
* The ordinal is the category's serial number, an integer which starts
|
* return a new {@link TaxonomyReader}; else, return {@code null}. The new
|
||||||
* with 0 and grows as more categories are added (note that once a category
|
* reader, if not {@code null}, will be the same type of reader as the one
|
||||||
* is added, it can never be deleted).
|
* given to this method.
|
||||||
* <P>
|
*
|
||||||
* If the given category wasn't found in the taxonomy, INVALID_ORDINAL is
|
* <p>
|
||||||
* returned.
|
* This method is typically far less costly than opening a fully new
|
||||||
|
* {@link TaxonomyReader} as it shares resources with the provided
|
||||||
|
* {@link TaxonomyReader}, when possible.
|
||||||
*/
|
*/
|
||||||
public int getOrdinal(CategoryPath categoryPath) throws IOException;
|
public static <T extends TaxonomyReader> T openIfChanged(T oldTaxoReader) throws IOException {
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
/**
|
final T newTaxoReader = (T) oldTaxoReader.doOpenIfChanged();
|
||||||
* getPath() returns the path name of the category with the given
|
assert newTaxoReader != oldTaxoReader;
|
||||||
* ordinal. The path is returned as a new CategoryPath object - to
|
return newTaxoReader;
|
||||||
* reuse an existing object, use {@link #getPath(int, CategoryPath)}.
|
}
|
||||||
* <P>
|
|
||||||
* A null is returned if a category with the given ordinal does not exist.
|
|
||||||
*/
|
|
||||||
public CategoryPath getPath(int ordinal) throws IOException;
|
|
||||||
|
|
||||||
/**
|
private volatile boolean closed = false;
|
||||||
* getPath() returns the path name of the category with the given
|
|
||||||
* ordinal. The path is written to the given CategoryPath object (which
|
|
||||||
* is cleared first).
|
|
||||||
* <P>
|
|
||||||
* If a category with the given ordinal does not exist, the given
|
|
||||||
* CategoryPath object is not modified, and the method returns
|
|
||||||
* <code>false</code>. Otherwise, the method returns <code>true</code>.
|
|
||||||
*/
|
|
||||||
public boolean getPath(int ordinal, CategoryPath result) throws IOException;
|
|
||||||
|
|
||||||
/**
|
// set refCount to 1 at start
|
||||||
* refresh() re-reads the taxonomy information if there were any changes to
|
private final AtomicInteger refCount = new AtomicInteger(1);
|
||||||
* the taxonomy since this instance was opened or last refreshed. Calling
|
|
||||||
* refresh() is more efficient than close()ing the old instance and opening a
|
|
||||||
* new one.
|
|
||||||
* <P>
|
|
||||||
* If there were no changes since this instance was opened or last refreshed,
|
|
||||||
* then this call does nothing. Note, however, that this is still a relatively
|
|
||||||
* slow method (as it needs to verify whether there have been any changes on
|
|
||||||
* disk to the taxonomy), so it should not be called too often needlessly. In
|
|
||||||
* faceted search, the taxonomy reader's refresh() should be called only after
|
|
||||||
* a reopen() of the main index.
|
|
||||||
* <P>
|
|
||||||
* Refreshing the taxonomy might fail in some cases, for example
|
|
||||||
* if the taxonomy was recreated since this instance was opened or last refreshed.
|
|
||||||
* In this case an {@link InconsistentTaxonomyException} is thrown,
|
|
||||||
* suggesting that in order to obtain up-to-date taxonomy data a new
|
|
||||||
* {@link TaxonomyReader} should be opened. Note: This {@link TaxonomyReader}
|
|
||||||
* instance remains unchanged and usable in this case, and the application can
|
|
||||||
* continue to use it, and should still {@link #close()} when no longer needed.
|
|
||||||
* <P>
|
|
||||||
* It should be noted that refresh() is similar in purpose to
|
|
||||||
* IndexReader.reopen(), but the two methods behave differently. refresh()
|
|
||||||
* refreshes the existing TaxonomyReader object, rather than opening a new one
|
|
||||||
* in addition to the old one as reopen() does. The reason is that in a
|
|
||||||
* taxonomy, one can only add new categories and cannot modify or delete
|
|
||||||
* existing categories; Therefore, there is no reason to keep an old snapshot
|
|
||||||
* of the taxonomy open - refreshing the taxonomy to the newest data and using
|
|
||||||
* this new snapshots in all threads (whether new or old) is fine. This saves
|
|
||||||
* us needing to keep multiple copies of the taxonomy open in memory.
|
|
||||||
* @return true if anything has changed, false otherwise.
|
|
||||||
*/
|
|
||||||
public boolean refresh() throws IOException, InconsistentTaxonomyException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* getParent() returns the ordinal of the parent category of the category
|
* performs the actual task of closing the resources that are used by the
|
||||||
* with the given ordinal.
|
* taxonomy reader.
|
||||||
* <P>
|
|
||||||
* When a category is specified as a path name, finding the path of its
|
|
||||||
* parent is as trivial as dropping the last component of the path.
|
|
||||||
* getParent() is functionally equivalent to calling getPath() on the
|
|
||||||
* given ordinal, dropping the last component of the path, and then calling
|
|
||||||
* getOrdinal() to get an ordinal back. However, implementations are
|
|
||||||
* expected to provide a much more efficient implementation:
|
|
||||||
* <P>
|
|
||||||
* getParent() should be a very quick method, as it is used during the
|
|
||||||
* facet aggregation process in faceted search. Implementations will most
|
|
||||||
* likely want to serve replies to this method from a pre-filled cache.
|
|
||||||
* <P>
|
|
||||||
* If the given ordinal is the ROOT_ORDINAL, an INVALID_ORDINAL is returned.
|
|
||||||
* If the given ordinal is a top-level category, the ROOT_ORDINAL is returned.
|
|
||||||
* If an invalid ordinal is given (negative or beyond the last available
|
|
||||||
* ordinal), an ArrayIndexOutOfBoundsException is thrown. However, it is
|
|
||||||
* expected that getParent will only be called for ordinals which are
|
|
||||||
* already known to be in the taxonomy.
|
|
||||||
*/
|
*/
|
||||||
public int getParent(int ordinal) throws IOException;
|
protected abstract void doClose() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* getParentArray() returns an int array of size getSize() listing the
|
* Implements the actual opening of a new {@link TaxonomyReader} instance if
|
||||||
* ordinal of the parent category of each category in the taxonomy.
|
* the taxonomy has changed.
|
||||||
* <P>
|
*
|
||||||
* The caller can hold on to the array it got indefinitely - it is
|
* @see #openIfChanged(TaxonomyReader)
|
||||||
* guaranteed that no-one else will modify it. The other side of the
|
|
||||||
* same coin is that the caller must treat the array it got as read-only
|
|
||||||
* and <B>not modify it</B>, because other callers might have gotten the
|
|
||||||
* same array too (and getParent() calls might be answered from the
|
|
||||||
* same array).
|
|
||||||
* <P>
|
|
||||||
* If you use getParentArray() instead of getParent(), remember that
|
|
||||||
* the array you got is (naturally) not modified after a refresh(),
|
|
||||||
* so you should always call getParentArray() again after a refresh().
|
|
||||||
* <P>
|
|
||||||
* This method's function is similar to allocating an array of size
|
|
||||||
* getSize() and filling it with getParent() calls, but implementations
|
|
||||||
* are encouraged to implement it much more efficiently, with O(1)
|
|
||||||
* complexity. This can be done, for example, by the implementation
|
|
||||||
* already keeping the parents in an array, and just returning this
|
|
||||||
* array (without any allocation or copying) when requested.
|
|
||||||
*/
|
*/
|
||||||
public int[] getParentArray() throws IOException;
|
protected abstract TaxonomyReader doOpenIfChanged() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Equivalent representations of the taxonomy's parent info,
|
* @throws AlreadyClosedException if this IndexReader is closed
|
||||||
* used internally for efficient computation of facet results:
|
|
||||||
* "youngest child" and "oldest sibling"
|
|
||||||
*/
|
*/
|
||||||
public static interface ChildrenArrays {
|
protected final void ensureOpen() throws AlreadyClosedException {
|
||||||
/**
|
if (getRefCount() <= 0) {
|
||||||
* getYoungestChildArray() returns an int array of size getSize()
|
throw new AlreadyClosedException("this TaxonomyReader is closed");
|
||||||
* listing the ordinal of the youngest (highest numbered) child
|
}
|
||||||
* category of each category in the taxonomy. The value for a leaf
|
}
|
||||||
* category (a category without children) is
|
|
||||||
* <code>INVALID_ORDINAL</code>.
|
@Override
|
||||||
*/
|
public final void close() throws IOException {
|
||||||
public int[] getYoungestChildArray();
|
if (!closed) {
|
||||||
/**
|
synchronized (this) {
|
||||||
* getOlderSiblingArray() returns an int array of size getSize()
|
if (!closed) {
|
||||||
* listing for each category the ordinal of its immediate older
|
decRef();
|
||||||
* sibling (the sibling in the taxonomy tree with the highest ordinal
|
closed = true;
|
||||||
* below that of the given ordinal). The value for a category with no
|
}
|
||||||
* older sibling is <code>INVALID_ORDINAL</code>.
|
}
|
||||||
*/
|
}
|
||||||
public int[] getOlderSiblingArray();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* getChildrenArrays() returns a {@link ChildrenArrays} object which can
|
* Expert: decreases the refCount of this TaxonomyReader instance. If the
|
||||||
* be used together to efficiently enumerate the children of any category.
|
* refCount drops to 0 this taxonomy reader is closed.
|
||||||
* <P>
|
|
||||||
* The caller can hold on to the object it got indefinitely - it is
|
|
||||||
* guaranteed that no-one else will modify it. The other side of the
|
|
||||||
* same coin is that the caller must treat the object which it got (and
|
|
||||||
* the arrays it contains) as read-only and <B>not modify it</B>, because
|
|
||||||
* other callers might have gotten the same object too.
|
|
||||||
* <P>
|
|
||||||
* Implementations should have O(getSize()) time for the first call or
|
|
||||||
* after a refresh(), but O(1) time for further calls. In neither case
|
|
||||||
* there should be a need to read new data from disk. These guarantees
|
|
||||||
* are most likely achieved by calculating this object (based on the
|
|
||||||
* getParentArray()) when first needed, and later (if the taxonomy was not
|
|
||||||
* refreshed) returning the same object (without any allocation or copying)
|
|
||||||
* when requested.
|
|
||||||
* <P>
|
|
||||||
* The reason we have one method returning one object, rather than two
|
|
||||||
* methods returning two arrays, is to avoid race conditions in a multi-
|
|
||||||
* threaded application: We want to avoid the possibility of returning one
|
|
||||||
* new array and one old array, as those could not be used together.
|
|
||||||
*/
|
*/
|
||||||
public ChildrenArrays getChildrenArrays();
|
public final void decRef() throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
final int rc = refCount.decrementAndGet();
|
||||||
|
if (rc == 0) {
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
doClose();
|
||||||
|
closed = true;
|
||||||
|
success = true;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
// Put reference back on failure
|
||||||
|
refCount.incrementAndGet();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if (rc < 0) {
|
||||||
|
throw new IllegalStateException("too many decRef calls: refCount is " + rc + " after decrement");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a {@link ChildrenArrays} object which can be used together to
|
||||||
|
* efficiently enumerate the children of any category.
|
||||||
|
* <p>
|
||||||
|
* The caller can hold on to the object it got indefinitely - it is guaranteed
|
||||||
|
* that no-one else will modify it. The other side of the same coin is that
|
||||||
|
* the caller must treat the object which it got (and the arrays it contains)
|
||||||
|
* as read-only and <b>not modify it</b>, because other callers might have
|
||||||
|
* gotten the same object too.
|
||||||
|
*/
|
||||||
|
public abstract ChildrenArrays getChildrenArrays() throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieve user committed data.
|
* Retrieve user committed data.
|
||||||
|
*
|
||||||
* @see TaxonomyWriter#commit(Map)
|
* @see TaxonomyWriter#commit(Map)
|
||||||
*/
|
*/
|
||||||
public Map<String, String> getCommitUserData() throws IOException;
|
public abstract Map<String, String> getCommitUserData() throws IOException;
|
||||||
|
|
||||||
/**
|
|
||||||
* Expert: increments the refCount of this TaxonomyReader instance.
|
|
||||||
* RefCounts can be used to determine when a taxonomy reader can be closed
|
|
||||||
* safely, i.e. as soon as there are no more references.
|
|
||||||
* Be sure to always call a corresponding decRef(), in a finally clause;
|
|
||||||
* otherwise the reader may never be closed.
|
|
||||||
*/
|
|
||||||
public void incRef();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Expert: decreases the refCount of this TaxonomyReader instance.
|
|
||||||
* If the refCount drops to 0, then pending changes (if any) can be
|
|
||||||
* committed to the taxonomy index and this reader can be closed.
|
|
||||||
* @throws IOException If there is a low-level I/O error.
|
|
||||||
*/
|
|
||||||
public void decRef() throws IOException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Expert: returns the current refCount for this taxonomy reader
|
* Returns the ordinal of the category given as a path. The ordinal is the
|
||||||
|
* category's serial number, an integer which starts with 0 and grows as more
|
||||||
|
* categories are added (note that once a category is added, it can never be
|
||||||
|
* deleted).
|
||||||
|
*
|
||||||
|
* @return the category's ordinal or {@link #INVALID_ORDINAL} if the category
|
||||||
|
* wasn't foun.
|
||||||
*/
|
*/
|
||||||
public int getRefCount();
|
public abstract int getOrdinal(CategoryPath categoryPath) throws IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* getSize() returns the number of categories in the taxonomy.
|
* Returns the ordinal of the parent category of the category with the given
|
||||||
* <P>
|
* ordinal, according to the following rules:
|
||||||
* Because categories are numbered consecutively starting with 0, it
|
*
|
||||||
* means the taxonomy contains ordinals 0 through getSize()-1.
|
*
|
||||||
* <P>
|
* <ul>
|
||||||
* Note that the number returned by getSize() is often slightly higher
|
* <li>If the given ordinal is the {@link #ROOT_ORDINAL}, an
|
||||||
* than the number of categories inserted into the taxonomy; This is
|
* {@link #INVALID_ORDINAL} is returned.
|
||||||
* because when a category is added to the taxonomy, its ancestors
|
* <li>If the given ordinal is a top-level category, the {@link #ROOT_ORDINAL}
|
||||||
* are also added automatically (including the root, which always get
|
* is returned.
|
||||||
* ordinal 0).
|
* <li>If the given ordinal is an existing category, returns the ordinal of
|
||||||
|
* its parent
|
||||||
|
* </ul>
|
||||||
|
*
|
||||||
|
* @throws ArrayIndexOutOfBoundsException
|
||||||
|
* if an invalid ordinal is given (negative or beyond the last
|
||||||
|
* available ordinal)
|
||||||
*/
|
*/
|
||||||
public int getSize();
|
public abstract int getParent(int ordinal) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an {@code int[]} the size of the taxonomy listing the ordinal of
|
||||||
|
* the parent category of each category in the taxonomy.
|
||||||
|
* <p>
|
||||||
|
* The caller can hold on to the array it got indefinitely - it is guaranteed
|
||||||
|
* that no-one else will modify it. The other side of the same coin is that
|
||||||
|
* the caller must treat the array it got as read-only and <b>not modify
|
||||||
|
* it</b>, because other callers might have gotten the same array too (and
|
||||||
|
* getParent() calls might be answered from the same array).
|
||||||
|
*/
|
||||||
|
public abstract int[] getParentArray() throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the path name of the category with the given ordinal. The path is
|
||||||
|
* returned as a new CategoryPath object - to reuse an existing object, use
|
||||||
|
* {@link #getPath(int, CategoryPath)}.
|
||||||
|
*
|
||||||
|
* @return a {@link CategoryPath} with the required path, or {@code null} if
|
||||||
|
* the given ordinal is unknown to the taxonomy.
|
||||||
|
*/
|
||||||
|
public abstract CategoryPath getPath(int ordinal) throws IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Same as {@link #getPath(int)}, only reuses the given {@link CategoryPath}
|
||||||
|
* instances.
|
||||||
|
*/
|
||||||
|
public abstract boolean getPath(int ordinal, CategoryPath result) throws IOException;
|
||||||
|
|
||||||
|
/** Returns the current refCount for this taxonomy reader. */
|
||||||
|
public final int getRefCount() {
|
||||||
|
return refCount.get();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the number of categories in the taxonomy. Note that the number of
|
||||||
|
* categories returned is often slightly higher than the number of categories
|
||||||
|
* inserted into the taxonomy; This is because when a category is added to the
|
||||||
|
* taxonomy, its ancestors are also added automatically (including the root,
|
||||||
|
* which always get ordinal 0).
|
||||||
|
*/
|
||||||
|
public abstract int getSize();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Expert: increments the refCount of this TaxonomyReader instance. RefCounts
|
||||||
|
* can be used to determine when a taxonomy reader can be closed safely, i.e.
|
||||||
|
* as soon as there are no more references. Be sure to always call a
|
||||||
|
* corresponding decRef(), in a finally clause; otherwise the reader may never
|
||||||
|
* be closed.
|
||||||
|
*/
|
||||||
|
public final void incRef() {
|
||||||
|
ensureOpen();
|
||||||
|
refCount.incrementAndGet();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,6 +2,7 @@ package org.apache.lucene.facet.taxonomy.directory;
|
||||||
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
import org.apache.lucene.index.FieldInfo;
|
||||||
import org.apache.lucene.index.StoredFieldVisitor;
|
import org.apache.lucene.index.StoredFieldVisitor;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -28,6 +29,7 @@ abstract class Consts {
|
||||||
static final String FULL = "$full_path$";
|
static final String FULL = "$full_path$";
|
||||||
static final String FIELD_PAYLOADS = "$payloads$";
|
static final String FIELD_PAYLOADS = "$payloads$";
|
||||||
static final String PAYLOAD_PARENT = "p";
|
static final String PAYLOAD_PARENT = "p";
|
||||||
|
static final BytesRef PAYLOAD_PARENT_BYTES_REF = new BytesRef(PAYLOAD_PARENT);
|
||||||
static final char[] PAYLOAD_PARENT_CHARS = PAYLOAD_PARENT.toCharArray();
|
static final char[] PAYLOAD_PARENT_CHARS = PAYLOAD_PARENT.toCharArray();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -1,29 +1,23 @@
|
||||||
package org.apache.lucene.facet.taxonomy.directory;
|
package org.apache.lucene.facet.taxonomy.directory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
|
||||||
import java.util.concurrent.locks.ReadWriteLock;
|
|
||||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
|
||||||
import java.util.logging.Level;
|
import java.util.logging.Level;
|
||||||
import java.util.logging.Logger;
|
import java.util.logging.Logger;
|
||||||
|
|
||||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||||
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
|
import org.apache.lucene.facet.taxonomy.ChildrenArrays;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
import org.apache.lucene.facet.taxonomy.directory.Consts.LoadFullPathOnly;
|
import org.apache.lucene.facet.taxonomy.directory.Consts.LoadFullPathOnly;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
import org.apache.lucene.index.DirectoryReader;
|
||||||
import org.apache.lucene.index.DocsEnum;
|
import org.apache.lucene.index.DocsEnum;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.MultiFields;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.util.Bits;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.apache.lucene.util.IOUtils;
|
||||||
import org.apache.lucene.util.collections.LRUHashMap;
|
import org.apache.lucene.util.collections.LRUHashMap;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -55,89 +49,341 @@ import org.apache.lucene.util.collections.LRUHashMap;
|
||||||
*
|
*
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
public class DirectoryTaxonomyReader implements TaxonomyReader {
|
public class DirectoryTaxonomyReader extends TaxonomyReader {
|
||||||
|
|
||||||
private static final Logger logger = Logger.getLogger(DirectoryTaxonomyReader.class.getName());
|
private static final Logger logger = Logger.getLogger(DirectoryTaxonomyReader.class.getName());
|
||||||
|
|
||||||
|
private static final int DEFAULT_CACHE_VALUE = 4000;
|
||||||
|
|
||||||
private DirectoryReader indexReader;
|
private final DirectoryTaxonomyWriter taxoWriter;
|
||||||
|
private final long taxoEpoch; // used in doOpenIfChanged
|
||||||
|
private final DirectoryReader indexReader;
|
||||||
|
|
||||||
// The following lock is used to allow multiple threads to read from the
|
// TODO: test DoubleBarrelLRUCache and consider using it instead
|
||||||
// index concurrently, while having them block during the very short
|
private LRUHashMap<String, Integer> ordinalCache;
|
||||||
// critical moment of refresh() (see comments below). Note, however, that
|
private LRUHashMap<Integer, String> categoryCache;
|
||||||
// we only read from the index when we don't have the entry in our cache,
|
|
||||||
// and the caches are locked separately.
|
|
||||||
private ReadWriteLock indexReaderLock = new ReentrantReadWriteLock();
|
|
||||||
|
|
||||||
// The following are the limited-size LRU caches used to cache the latest
|
// TODO: consolidate these objects into one ParentInfo or something?
|
||||||
// results from getOrdinal() and getLabel().
|
private volatile ParentArray parentArray;
|
||||||
// Because LRUHashMap is not thread-safe, we need to synchronize on this
|
private volatile ChildrenArrays childrenArrays;
|
||||||
// object when using it. Unfortunately, this is not optimal under heavy
|
|
||||||
// contention because it means that while one thread is using the cache
|
|
||||||
// (reading or modifying) others are blocked from using it - or even
|
|
||||||
// starting to do benign things like calculating the hash function. A more
|
|
||||||
// efficient approach would be to use a non-locking (as much as possible)
|
|
||||||
// concurrent solution, along the lines of java.util.concurrent.ConcurrentHashMap
|
|
||||||
// but with LRU semantics.
|
|
||||||
// However, even in the current sub-optimal implementation we do not make
|
|
||||||
// the mistake of locking out readers while waiting for disk in a cache
|
|
||||||
// miss - below, we do not hold cache lock while reading missing data from
|
|
||||||
// disk.
|
|
||||||
private final LRUHashMap<String, Integer> ordinalCache;
|
|
||||||
private final LRUHashMap<Integer, String> categoryCache;
|
|
||||||
|
|
||||||
// getParent() needs to be extremely efficient, to the point that we need
|
|
||||||
// to fetch all the data in advance into memory, and answer these calls
|
|
||||||
// from memory. Currently we use a large integer array, which is
|
|
||||||
// initialized when the taxonomy is opened, and potentially enlarged
|
|
||||||
// when it is refresh()ed.
|
|
||||||
// These arrays are not syncrhonized. Rather, the reference to the array
|
|
||||||
// is volatile, and the only writing operation (refreshPrefetchArrays)
|
|
||||||
// simply creates a new array and replaces the reference. The volatility
|
|
||||||
// of the reference ensures the correct atomic replacement and its
|
|
||||||
// visibility properties (the content of the array is visible when the
|
|
||||||
// new reference is visible).
|
|
||||||
private ParentArray parentArray;
|
|
||||||
|
|
||||||
private char delimiter = Consts.DEFAULT_DELIMITER;
|
private char delimiter = Consts.DEFAULT_DELIMITER;
|
||||||
|
|
||||||
private volatile boolean closed = false;
|
/**
|
||||||
|
* Called only from {@link #doOpenIfChanged()}. If the taxonomy has been
|
||||||
// set refCount to 1 at start
|
* recreated, you should pass {@code null} as the caches and parent/children
|
||||||
private final AtomicInteger refCount = new AtomicInteger(1);
|
* arrays.
|
||||||
|
*/
|
||||||
|
DirectoryTaxonomyReader(DirectoryReader indexReader, DirectoryTaxonomyWriter taxoWriter,
|
||||||
|
LRUHashMap<String,Integer> ordinalCache,
|
||||||
|
LRUHashMap<Integer,String> categoryCache, ParentArray parentArray,
|
||||||
|
ChildrenArrays childrenArrays) throws IOException {
|
||||||
|
this.indexReader = indexReader;
|
||||||
|
this.taxoWriter = taxoWriter;
|
||||||
|
this.taxoEpoch = taxoWriter == null ? -1 : taxoWriter.getTaxonomyEpoch();
|
||||||
|
|
||||||
|
// use the same instance of the cache, note the protective code in getOrdinal and getPath
|
||||||
|
this.ordinalCache = ordinalCache == null ? new LRUHashMap<String,Integer>(DEFAULT_CACHE_VALUE) : ordinalCache;
|
||||||
|
this.categoryCache = categoryCache == null ? new LRUHashMap<Integer,String>(DEFAULT_CACHE_VALUE) : categoryCache;
|
||||||
|
|
||||||
|
this.parentArray = null;
|
||||||
|
this.childrenArrays = null;
|
||||||
|
if (parentArray != null) {
|
||||||
|
this.parentArray = new ParentArray(indexReader, parentArray);
|
||||||
|
if (childrenArrays != null) {
|
||||||
|
this.childrenArrays = new ChildrenArrays(this.parentArray.getArray(), childrenArrays);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Open for reading a taxonomy stored in a given {@link Directory}.
|
* Open for reading a taxonomy stored in a given {@link Directory}.
|
||||||
|
*
|
||||||
* @param directory
|
* @param directory
|
||||||
* The {@link Directory} in which to the taxonomy lives. Note that
|
* The {@link Directory} in which the taxonomy resides.
|
||||||
* the taxonomy is read directly to that directory (not from a
|
* @throws CorruptIndexException
|
||||||
* subdirectory of it).
|
* if the Taxonomy is corrupt.
|
||||||
* @throws CorruptIndexException if the Taxonomy is corrupted.
|
* @throws IOException
|
||||||
* @throws IOException if another error occurred.
|
* if another error occurred.
|
||||||
*/
|
*/
|
||||||
public DirectoryTaxonomyReader(Directory directory) throws IOException {
|
public DirectoryTaxonomyReader(Directory directory) throws IOException {
|
||||||
this.indexReader = openIndexReader(directory);
|
indexReader = openIndexReader(directory);
|
||||||
|
taxoWriter = null;
|
||||||
|
taxoEpoch = -1;
|
||||||
|
|
||||||
// These are the default cache sizes; they can be configured after
|
// These are the default cache sizes; they can be configured after
|
||||||
// construction with the cache's setMaxSize() method
|
// construction with the cache's setMaxSize() method
|
||||||
ordinalCache = new LRUHashMap<String, Integer>(4000);
|
ordinalCache = new LRUHashMap<String, Integer>(DEFAULT_CACHE_VALUE);
|
||||||
categoryCache = new LRUHashMap<Integer, String>(4000);
|
categoryCache = new LRUHashMap<Integer, String>(DEFAULT_CACHE_VALUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Opens a {@link DirectoryTaxonomyReader} over the given
|
||||||
|
* {@link DirectoryTaxonomyWriter} (for NRT).
|
||||||
|
*
|
||||||
|
* @param taxoWriter
|
||||||
|
* The {@link DirectoryTaxonomyWriter} from which to obtain newly
|
||||||
|
* added categories, in real-time.
|
||||||
|
*/
|
||||||
|
public DirectoryTaxonomyReader(DirectoryTaxonomyWriter taxoWriter) throws IOException {
|
||||||
|
this.taxoWriter = taxoWriter;
|
||||||
|
taxoEpoch = taxoWriter.getTaxonomyEpoch();
|
||||||
|
indexReader = openIndexReader(taxoWriter.getInternalIndexWriter());
|
||||||
|
|
||||||
|
// These are the default cache sizes; they can be configured after
|
||||||
|
// construction with the cache's setMaxSize() method
|
||||||
|
ordinalCache = new LRUHashMap<String, Integer>(DEFAULT_CACHE_VALUE);
|
||||||
|
categoryCache = new LRUHashMap<Integer, String>(DEFAULT_CACHE_VALUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getLabel(int catID) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
|
||||||
// TODO (Facet): consider lazily create parent array when asked, not in the constructor
|
// Since the cache is shared with DTR instances allocated from
|
||||||
parentArray = new ParentArray();
|
// doOpenIfChanged, we need to ensure that the ordinal is one that this DTR
|
||||||
parentArray.refresh(indexReader);
|
// instance recognizes. Therefore we do this check up front, before we hit
|
||||||
|
// the cache.
|
||||||
|
if (catID < 0 || catID >= indexReader.maxDoc()) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: can we use an int-based hash impl, such as IntToObjectMap,
|
||||||
|
// wrapped as LRU?
|
||||||
|
Integer catIDInteger = Integer.valueOf(catID);
|
||||||
|
synchronized (categoryCache) {
|
||||||
|
String res = categoryCache.get(catIDInteger);
|
||||||
|
if (res != null) {
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final LoadFullPathOnly loader = new LoadFullPathOnly();
|
||||||
|
indexReader.document(catID, loader);
|
||||||
|
String ret = loader.getFullPath();
|
||||||
|
synchronized (categoryCache) {
|
||||||
|
categoryCache.put(catIDInteger, ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void doClose() throws IOException {
|
||||||
|
indexReader.close();
|
||||||
|
parentArray = null;
|
||||||
|
childrenArrays = null;
|
||||||
|
// do not clear() the caches, as they may be used by other DTR instances.
|
||||||
|
ordinalCache = null;
|
||||||
|
categoryCache = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Implements the opening of a new {@link DirectoryTaxonomyReader} instance if
|
||||||
|
* the taxonomy has changed.
|
||||||
|
*
|
||||||
|
* <p>
|
||||||
|
* <b>NOTE:</b> the returned {@link DirectoryTaxonomyReader} shares the
|
||||||
|
* ordinal and category caches with this reader. This is not expected to cause
|
||||||
|
* any issues, unless the two instances continue to live. The reader
|
||||||
|
* guarantees that the two instances cannot affect each other in terms of
|
||||||
|
* correctness of the caches, however if the size of the cache is changed
|
||||||
|
* through {@link #setCacheSize(int)}, it will affect both reader instances.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
protected DirectoryTaxonomyReader doOpenIfChanged() throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
|
||||||
|
final DirectoryReader r2;
|
||||||
|
if (taxoWriter == null) {
|
||||||
|
// not NRT
|
||||||
|
r2 = DirectoryReader.openIfChanged(indexReader);
|
||||||
|
} else {
|
||||||
|
// NRT
|
||||||
|
r2 = DirectoryReader.openIfChanged(indexReader, taxoWriter.getInternalIndexWriter(), false);
|
||||||
|
}
|
||||||
|
if (r2 == null) {
|
||||||
|
return null; // no changes, nothing to do
|
||||||
|
}
|
||||||
|
|
||||||
|
// check if the taxonomy was recreated
|
||||||
|
boolean success = false;
|
||||||
|
try {
|
||||||
|
boolean recreated = false;
|
||||||
|
if (taxoWriter == null) {
|
||||||
|
// not NRT, check epoch from commit data
|
||||||
|
String t1 = indexReader.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH);
|
||||||
|
String t2 = r2.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH);
|
||||||
|
if (t1 == null) {
|
||||||
|
if (t2 != null) {
|
||||||
|
recreated = true;
|
||||||
|
}
|
||||||
|
} else if (!t1.equals(t2)) {
|
||||||
|
// t1 != null and t2 cannot be null b/c DirTaxoWriter always puts the commit data.
|
||||||
|
// it's ok to use String.equals because we require the two epoch values to be the same.
|
||||||
|
recreated = true;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// NRT, compare current taxoWriter.epoch() vs the one that was given at construction
|
||||||
|
if (taxoEpoch != taxoWriter.getTaxonomyEpoch()) {
|
||||||
|
recreated = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final DirectoryTaxonomyReader newtr;
|
||||||
|
if (recreated) {
|
||||||
|
// if recreated, do not reuse anything from this instace. the information
|
||||||
|
// will be lazily computed by the new instance when needed.
|
||||||
|
newtr = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null, null);
|
||||||
|
} else {
|
||||||
|
newtr = new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, parentArray, childrenArrays);
|
||||||
|
}
|
||||||
|
|
||||||
|
success = true;
|
||||||
|
return newtr;
|
||||||
|
} finally {
|
||||||
|
if (!success) {
|
||||||
|
IOUtils.closeWhileHandlingException(r2);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
protected DirectoryReader openIndexReader(Directory directory) throws IOException {
|
protected DirectoryReader openIndexReader(Directory directory) throws IOException {
|
||||||
return DirectoryReader.open(directory);
|
return DirectoryReader.open(directory);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected DirectoryReader openIndexReader(IndexWriter writer) throws IOException {
|
||||||
|
return DirectoryReader.open(writer, false);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @throws AlreadyClosedException if this IndexReader is closed
|
* Expert: returns the underlying {@link DirectoryReader} instance that is
|
||||||
|
* used by this {@link TaxonomyReader}.
|
||||||
*/
|
*/
|
||||||
protected final void ensureOpen() throws AlreadyClosedException {
|
DirectoryReader getInternalIndexReader() {
|
||||||
if (getRefCount() <= 0) {
|
ensureOpen();
|
||||||
throw new AlreadyClosedException("this TaxonomyReader is closed");
|
return indexReader;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ChildrenArrays getChildrenArrays() throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
if (childrenArrays == null) {
|
||||||
|
synchronized (this) {
|
||||||
|
if (childrenArrays == null) {
|
||||||
|
childrenArrays = new ChildrenArrays(getParentArray());
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
return childrenArrays;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public Map<String, String> getCommitUserData() throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
return indexReader.getIndexCommit().getUserData();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getOrdinal(CategoryPath categoryPath) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
if (categoryPath.length() == 0) {
|
||||||
|
return ROOT_ORDINAL;
|
||||||
|
}
|
||||||
|
String path = categoryPath.toString(delimiter);
|
||||||
|
|
||||||
|
// First try to find the answer in the LRU cache:
|
||||||
|
synchronized (ordinalCache) {
|
||||||
|
Integer res = ordinalCache.get(path);
|
||||||
|
if (res != null) {
|
||||||
|
if (res.intValue() < indexReader.maxDoc()) {
|
||||||
|
// Since the cache is shared with DTR instances allocated from
|
||||||
|
// doOpenIfChanged, we need to ensure that the ordinal is one that
|
||||||
|
// this DTR instance recognizes.
|
||||||
|
return res.intValue();
|
||||||
|
} else {
|
||||||
|
// if we get here, it means that the category was found in the cache,
|
||||||
|
// but is not recognized by this TR instance. Therefore there's no
|
||||||
|
// need to continue search for the path on disk, because we won't find
|
||||||
|
// it there too.
|
||||||
|
return TaxonomyReader.INVALID_ORDINAL;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// If we're still here, we have a cache miss. We need to fetch the
|
||||||
|
// value from disk, and then also put it in the cache:
|
||||||
|
int ret = TaxonomyReader.INVALID_ORDINAL;
|
||||||
|
DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(path), 0);
|
||||||
|
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
ret = docs.docID();
|
||||||
|
|
||||||
|
// we only store the fact that a category exists, not its inexistence.
|
||||||
|
// This is required because the caches are shared with new DTR instances
|
||||||
|
// that are allocated from doOpenIfChanged. Therefore, if we only store
|
||||||
|
// information about found categories, we cannot accidently tell a new
|
||||||
|
// generation of DTR that a category does not exist.
|
||||||
|
synchronized (ordinalCache) {
|
||||||
|
ordinalCache.put(path, Integer.valueOf(ret));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: move to a ParentInfo class? (see TODO for parentArray)
|
||||||
|
@Override
|
||||||
|
public int getParent(int ordinal) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
return getParentArray()[ordinal];
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int[] getParentArray() throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
if (parentArray == null) {
|
||||||
|
synchronized (this) {
|
||||||
|
if (parentArray == null) {
|
||||||
|
parentArray = new ParentArray(indexReader);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parentArray.getArray();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public CategoryPath getPath(int ordinal) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
// TODO (Facet): Currently, the LRU cache we use (getCategoryCache) holds
|
||||||
|
// strings with delimiters, not CategoryPath objects, so even if
|
||||||
|
// we have a cache hit, we need to process the string and build a new
|
||||||
|
// CategoryPath object every time. What is preventing us from putting
|
||||||
|
// the actual CategoryPath object in the cache is the fact that these
|
||||||
|
// objects are mutable. So we should create an immutable (read-only)
|
||||||
|
// interface that CategoryPath implements, and this method should
|
||||||
|
// return this interface, not the writable CategoryPath.
|
||||||
|
String label = getLabel(ordinal);
|
||||||
|
if (label == null) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new CategoryPath(label, delimiter);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean getPath(int ordinal, CategoryPath result) throws IOException {
|
||||||
|
ensureOpen();
|
||||||
|
String label = getLabel(ordinal);
|
||||||
|
if (label == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
result.clear();
|
||||||
|
result.add(label, delimiter);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int getSize() {
|
||||||
|
ensureOpen();
|
||||||
|
return indexReader.numDocs();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -151,10 +397,10 @@ public class DirectoryTaxonomyReader implements TaxonomyReader {
|
||||||
*/
|
*/
|
||||||
public void setCacheSize(int size) {
|
public void setCacheSize(int size) {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
synchronized(categoryCache) {
|
synchronized (categoryCache) {
|
||||||
categoryCache.setMaxSize(size);
|
categoryCache.setMaxSize(size);
|
||||||
}
|
}
|
||||||
synchronized(ordinalCache) {
|
synchronized (ordinalCache) {
|
||||||
ordinalCache.setMaxSize(size);
|
ordinalCache.setMaxSize(size);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -173,361 +419,11 @@ public class DirectoryTaxonomyReader implements TaxonomyReader {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
this.delimiter = delimiter;
|
this.delimiter = delimiter;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getOrdinal(CategoryPath categoryPath) throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
if (categoryPath.length()==0) {
|
|
||||||
return ROOT_ORDINAL;
|
|
||||||
}
|
|
||||||
String path = categoryPath.toString(delimiter);
|
|
||||||
|
|
||||||
// First try to find the answer in the LRU cache:
|
|
||||||
synchronized(ordinalCache) {
|
|
||||||
Integer res = ordinalCache.get(path);
|
|
||||||
if (res!=null) {
|
|
||||||
return res.intValue();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we're still here, we have a cache miss. We need to fetch the
|
|
||||||
// value from disk, and then also put it in the cache:
|
|
||||||
int ret = TaxonomyReader.INVALID_ORDINAL;
|
|
||||||
try {
|
|
||||||
indexReaderLock.readLock().lock();
|
|
||||||
// TODO (Facet): avoid Multi*?
|
|
||||||
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
|
|
||||||
DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, liveDocs, Consts.FULL, new BytesRef(path), 0);
|
|
||||||
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
|
||||||
ret = docs.docID();
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
indexReaderLock.readLock().unlock();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Put the new value in the cache. Note that it is possible that while
|
|
||||||
// we were doing the above fetching (without the cache locked), some
|
|
||||||
// other thread already added the same category to the cache. We do
|
|
||||||
// not care about this possibilty, as LRUCache replaces previous values
|
|
||||||
// of the same keys (it doesn't store duplicates).
|
|
||||||
synchronized(ordinalCache) {
|
|
||||||
// GB: new Integer(int); creates a new object each and every time.
|
|
||||||
// Integer.valueOf(int) might not (See JavaDoc).
|
|
||||||
ordinalCache.put(path, Integer.valueOf(ret));
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public CategoryPath getPath(int ordinal) throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
// TODO (Facet): Currently, the LRU cache we use (getCategoryCache) holds
|
|
||||||
// strings with delimiters, not CategoryPath objects, so even if
|
|
||||||
// we have a cache hit, we need to process the string and build a new
|
|
||||||
// CategoryPath object every time. What is preventing us from putting
|
|
||||||
// the actual CategoryPath object in the cache is the fact that these
|
|
||||||
// objects are mutable. So we should create an immutable (read-only)
|
|
||||||
// interface that CategoryPath implements, and this method should
|
|
||||||
// return this interface, not the writable CategoryPath.
|
|
||||||
String label = getLabel(ordinal);
|
|
||||||
if (label==null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return new CategoryPath(label, delimiter);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean getPath(int ordinal, CategoryPath result) throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
String label = getLabel(ordinal);
|
|
||||||
if (label==null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
result.clear();
|
|
||||||
result.add(label, delimiter);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private String getLabel(int catID) throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
// First try to find the answer in the LRU cache. It is very
|
|
||||||
// unfortunate that we need to allocate an Integer object here -
|
|
||||||
// it would have been better if we used a hash table specifically
|
|
||||||
// designed for int keys...
|
|
||||||
// GB: new Integer(int); creates a new object each and every time.
|
|
||||||
// Integer.valueOf(int) might not (See JavaDoc).
|
|
||||||
Integer catIDInteger = Integer.valueOf(catID);
|
|
||||||
|
|
||||||
synchronized(categoryCache) {
|
|
||||||
String res = categoryCache.get(catIDInteger);
|
|
||||||
if (res!=null) {
|
|
||||||
return res;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we're still here, we have a cache miss. We need to fetch the
|
|
||||||
// value from disk, and then also put it in the cache:
|
|
||||||
String ret;
|
|
||||||
try {
|
|
||||||
indexReaderLock.readLock().lock();
|
|
||||||
// The taxonomy API dictates that if we get an invalid category
|
|
||||||
// ID, we should return null, If we don't check this here, we
|
|
||||||
// can some sort of an exception from the document() call below.
|
|
||||||
// NOTE: Currently, we *do not* cache this return value; There
|
|
||||||
// isn't much point to do so, because checking the validity of
|
|
||||||
// the docid doesn't require disk access - just comparing with
|
|
||||||
// the number indexReader.maxDoc().
|
|
||||||
if (catID<0 || catID>=indexReader.maxDoc()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
final LoadFullPathOnly loader = new LoadFullPathOnly();
|
|
||||||
indexReader.document(catID, loader);
|
|
||||||
ret = loader.getFullPath();
|
|
||||||
} finally {
|
|
||||||
indexReaderLock.readLock().unlock();
|
|
||||||
}
|
|
||||||
// Put the new value in the cache. Note that it is possible that while
|
|
||||||
// we were doing the above fetching (without the cache locked), some
|
|
||||||
// other thread already added the same category to the cache. We do
|
|
||||||
// not care about this possibility, as LRUCache replaces previous
|
|
||||||
// values of the same keys (it doesn't store duplicates).
|
|
||||||
synchronized (categoryCache) {
|
|
||||||
categoryCache.put(catIDInteger, ret);
|
|
||||||
}
|
|
||||||
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getParent(int ordinal) {
|
|
||||||
ensureOpen();
|
|
||||||
// Note how we don't need to hold the read lock to do the following,
|
|
||||||
// because the array reference is volatile, ensuring the correct
|
|
||||||
// visibility and ordering: if we get the new reference, the new
|
|
||||||
// data is also visible to this thread.
|
|
||||||
return getParentArray()[ordinal];
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* getParentArray() returns an int array of size getSize() listing the
|
|
||||||
* ordinal of the parent category of each category in the taxonomy.
|
|
||||||
* <P>
|
|
||||||
* The caller can hold on to the array it got indefinitely - it is
|
|
||||||
* guaranteed that no-one else will modify it. The other side of the
|
|
||||||
* same coin is that the caller must treat the array it got as read-only
|
|
||||||
* and <B>not modify it</B>, because other callers might have gotten the
|
|
||||||
* same array too, and getParent() calls are also answered from the
|
|
||||||
* same array.
|
|
||||||
* <P>
|
|
||||||
* The getParentArray() call is extremely efficient, merely returning
|
|
||||||
* a reference to an array that already exists. For a caller that plans
|
|
||||||
* to call getParent() for many categories, using getParentArray() and
|
|
||||||
* the array it returns is a somewhat faster approach because it avoids
|
|
||||||
* the overhead of method calls and volatile dereferencing.
|
|
||||||
* <P>
|
|
||||||
* If you use getParentArray() instead of getParent(), remember that
|
|
||||||
* the array you got is (naturally) not modified after a refresh(),
|
|
||||||
* so you should always call getParentArray() again after a refresh().
|
|
||||||
*/
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int[] getParentArray() {
|
|
||||||
ensureOpen();
|
|
||||||
// Note how we don't need to hold the read lock to do the following,
|
|
||||||
// because the array reference is volatile, ensuring the correct
|
|
||||||
// visibility and ordering: if we get the new reference, the new
|
|
||||||
// data is also visible to this thread.
|
|
||||||
return parentArray.getArray();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Note that refresh() is synchronized (it is the only synchronized
|
|
||||||
// method in this class) to ensure that it never gets called concurrently
|
|
||||||
// with itself.
|
|
||||||
@Override
|
|
||||||
public synchronized boolean refresh() throws IOException, InconsistentTaxonomyException {
|
|
||||||
ensureOpen();
|
|
||||||
/*
|
|
||||||
* Since refresh() can be a lengthy operation, it is very important that we
|
|
||||||
* avoid locking out all readers for its duration. This is why we don't hold
|
|
||||||
* the indexReaderLock write lock for the entire duration of this method. In
|
|
||||||
* fact, it is enough to hold it only during a single assignment! Other
|
|
||||||
* comments in this method will explain this.
|
|
||||||
*/
|
|
||||||
|
|
||||||
// note that the lengthy operation indexReader.reopen() does not
|
|
||||||
// modify the reader, so we can do it without holding a lock. We can
|
|
||||||
// safely read indexReader without holding the write lock, because
|
|
||||||
// no other thread can be writing at this time (this method is the
|
|
||||||
// only possible writer, and it is "synchronized" to avoid this case).
|
|
||||||
DirectoryReader r2 = DirectoryReader.openIfChanged(indexReader);
|
|
||||||
if (r2 == null) {
|
|
||||||
return false; // no changes, nothing to do
|
|
||||||
}
|
|
||||||
|
|
||||||
// validate that a refresh is valid at this point, i.e. that the taxonomy
|
|
||||||
// was not recreated since this reader was last opened or refresshed.
|
|
||||||
String t1 = indexReader.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH);
|
|
||||||
String t2 = r2.getIndexCommit().getUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH);
|
|
||||||
if (t1 == null) {
|
|
||||||
if (t2 != null) {
|
|
||||||
r2.close();
|
|
||||||
throw new InconsistentTaxonomyException("Taxonomy was recreated, epoch= " + t2);
|
|
||||||
}
|
|
||||||
} else if (!t1.equals(t2)) {
|
|
||||||
// t1 != null and t2 cannot be null b/c DirTaxoWriter always puts the commit data.
|
|
||||||
// it's ok to use String.equals because we require the two epoch values to be the same.
|
|
||||||
r2.close();
|
|
||||||
throw new InconsistentTaxonomyException("Taxonomy was recreated epoch = " + t2 + " != " + t1);
|
|
||||||
}
|
|
||||||
|
|
||||||
IndexReader oldreader = indexReader;
|
|
||||||
// we can close the old searcher, but need to synchronize this
|
|
||||||
// so that we don't close it in the middle that another routine
|
|
||||||
// is reading from it.
|
|
||||||
indexReaderLock.writeLock().lock();
|
|
||||||
indexReader = r2;
|
|
||||||
indexReaderLock.writeLock().unlock();
|
|
||||||
// We can close the old reader, but need to be certain that we
|
|
||||||
// don't close it while another method is reading from it.
|
|
||||||
// Luckily, we can be certain of that even without putting the
|
|
||||||
// oldreader.close() in the locked section. The reason is that
|
|
||||||
// after lock() succeeded above, we know that all existing readers
|
|
||||||
// had finished (this is what a read-write lock ensures). New
|
|
||||||
// readers, starting after the unlock() we just did, already got
|
|
||||||
// the new indexReader we set above. So nobody can be possibly
|
|
||||||
// using the old indexReader, and we can close it:
|
|
||||||
oldreader.close();
|
|
||||||
|
|
||||||
// We prefetch some of the arrays to make requests much faster.
|
|
||||||
// Let's refresh these prefetched arrays; This refresh is much
|
|
||||||
// is made more efficient by assuming that it is enough to read
|
|
||||||
// the values for new categories (old categories could not have been
|
|
||||||
// changed or deleted)
|
|
||||||
// Note that this this done without the write lock being held,
|
|
||||||
// which means that it is possible that during a refresh(), a
|
|
||||||
// reader will have some methods (like getOrdinal and getCategory)
|
|
||||||
// return fresh information, while getParent()
|
|
||||||
// (only to be prefetched now) still return older information.
|
|
||||||
// We consider this to be acceptable. The important thing,
|
|
||||||
// however, is that refreshPrefetchArrays() itself writes to
|
|
||||||
// the arrays in a correct manner (see discussion there)
|
|
||||||
parentArray.refresh(indexReader);
|
|
||||||
|
|
||||||
// Remove any INVALID_ORDINAL values from the ordinal cache,
|
|
||||||
// because it is possible those are now answered by the new data!
|
|
||||||
Iterator<Entry<String, Integer>> i = ordinalCache.entrySet().iterator();
|
|
||||||
while (i.hasNext()) {
|
|
||||||
Entry<String, Integer> e = i.next();
|
|
||||||
if (e.getValue().intValue() == INVALID_ORDINAL) {
|
|
||||||
i.remove();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
if (!closed) {
|
|
||||||
synchronized (this) {
|
|
||||||
if (!closed) {
|
|
||||||
decRef();
|
|
||||||
closed = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Do the actual closing, free up resources */
|
|
||||||
private void doClose() throws IOException {
|
|
||||||
indexReader.close();
|
|
||||||
closed = true;
|
|
||||||
|
|
||||||
parentArray = null;
|
|
||||||
childrenArrays = null;
|
|
||||||
categoryCache.clear();
|
|
||||||
ordinalCache.clear();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getSize() {
|
|
||||||
ensureOpen();
|
|
||||||
indexReaderLock.readLock().lock();
|
|
||||||
try {
|
|
||||||
return indexReader.numDocs();
|
|
||||||
} finally {
|
|
||||||
indexReaderLock.readLock().unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Map<String, String> getCommitUserData() throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
return indexReader.getIndexCommit().getUserData();
|
|
||||||
}
|
|
||||||
|
|
||||||
private ChildrenArrays childrenArrays;
|
|
||||||
Object childrenArraysRebuild = new Object();
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public ChildrenArrays getChildrenArrays() {
|
|
||||||
ensureOpen();
|
|
||||||
// Check if the taxonomy grew since we built the array, and if it
|
|
||||||
// did, create new (and larger) arrays and fill them as required.
|
|
||||||
// We do all this under a lock, two prevent to concurrent calls to
|
|
||||||
// needlessly do the same array building at the same time.
|
|
||||||
synchronized(childrenArraysRebuild) {
|
|
||||||
int num = getSize();
|
|
||||||
int first;
|
|
||||||
if (childrenArrays==null) {
|
|
||||||
first = 0;
|
|
||||||
} else {
|
|
||||||
first = childrenArrays.getYoungestChildArray().length;
|
|
||||||
}
|
|
||||||
// If the taxonomy hasn't grown, we can return the existing object
|
|
||||||
// immediately
|
|
||||||
if (first == num) {
|
|
||||||
return childrenArrays;
|
|
||||||
}
|
|
||||||
// Otherwise, build new arrays for a new ChildrenArray object.
|
|
||||||
// These arrays start with an enlarged copy of the previous arrays,
|
|
||||||
// and then are modified to take into account the new categories:
|
|
||||||
int[] newYoungestChildArray = new int[num];
|
|
||||||
int[] newOlderSiblingArray = new int[num];
|
|
||||||
// In Java 6, we could just do Arrays.copyOf()...
|
|
||||||
if (childrenArrays!=null) {
|
|
||||||
System.arraycopy(childrenArrays.getYoungestChildArray(), 0,
|
|
||||||
newYoungestChildArray, 0, childrenArrays.getYoungestChildArray().length);
|
|
||||||
System.arraycopy(childrenArrays.getOlderSiblingArray(), 0,
|
|
||||||
newOlderSiblingArray, 0, childrenArrays.getOlderSiblingArray().length);
|
|
||||||
}
|
|
||||||
int[] parents = getParentArray();
|
|
||||||
for (int i=first; i<num; i++) {
|
|
||||||
newYoungestChildArray[i] = INVALID_ORDINAL;
|
|
||||||
}
|
|
||||||
// In the loop below we can ignore the root category (0) because
|
|
||||||
// it has no parent
|
|
||||||
if (first==0) {
|
|
||||||
first = 1;
|
|
||||||
newOlderSiblingArray[0] = INVALID_ORDINAL;
|
|
||||||
}
|
|
||||||
for (int i=first; i<num; i++) {
|
|
||||||
// Note that parents[i] is always < i, so the right-hand-side of
|
|
||||||
// the following line is already set when we get here.
|
|
||||||
newOlderSiblingArray[i] = newYoungestChildArray[parents[i]];
|
|
||||||
newYoungestChildArray[parents[i]] = i;
|
|
||||||
}
|
|
||||||
// Finally switch to the new arrays
|
|
||||||
childrenArrays = new ChildrenArraysImpl(newYoungestChildArray,
|
|
||||||
newOlderSiblingArray);
|
|
||||||
return childrenArrays;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public String toString(int max) {
|
public String toString(int max) {
|
||||||
ensureOpen();
|
ensureOpen();
|
||||||
StringBuilder sb = new StringBuilder();
|
StringBuilder sb = new StringBuilder();
|
||||||
int upperl = Math.min(max, this.indexReader.maxDoc());
|
int upperl = Math.min(max, indexReader.maxDoc());
|
||||||
for (int i = 0; i < upperl; i++) {
|
for (int i = 0; i < upperl; i++) {
|
||||||
try {
|
try {
|
||||||
CategoryPath category = this.getPath(i);
|
CategoryPath category = this.getPath(i);
|
||||||
|
@ -548,75 +444,5 @@ public class DirectoryTaxonomyReader implements TaxonomyReader {
|
||||||
}
|
}
|
||||||
return sb.toString();
|
return sb.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final class ChildrenArraysImpl implements ChildrenArrays {
|
|
||||||
private int[] youngestChildArray, olderSiblingArray;
|
|
||||||
public ChildrenArraysImpl(int[] youngestChildArray, int[] olderSiblingArray) {
|
|
||||||
this.youngestChildArray = youngestChildArray;
|
|
||||||
this.olderSiblingArray = olderSiblingArray;
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public int[] getOlderSiblingArray() {
|
|
||||||
return olderSiblingArray;
|
|
||||||
}
|
|
||||||
@Override
|
|
||||||
public int[] getYoungestChildArray() {
|
|
||||||
return youngestChildArray;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Expert: This method is only for expert use.
|
|
||||||
* Note also that any call to refresh() will invalidate the returned reader,
|
|
||||||
* so the caller needs to take care of appropriate locking.
|
|
||||||
*
|
|
||||||
* @return lucene indexReader
|
|
||||||
*/
|
|
||||||
DirectoryReader getInternalIndexReader() {
|
|
||||||
ensureOpen();
|
|
||||||
return this.indexReader;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Expert: decreases the refCount of this TaxonomyReader instance. If the
|
|
||||||
* refCount drops to 0, then this reader is closed.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void decRef() throws IOException {
|
|
||||||
ensureOpen();
|
|
||||||
final int rc = refCount.decrementAndGet();
|
|
||||||
if (rc == 0) {
|
|
||||||
boolean success = false;
|
|
||||||
try {
|
|
||||||
doClose();
|
|
||||||
success = true;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
// Put reference back on failure
|
|
||||||
refCount.incrementAndGet();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else if (rc < 0) {
|
|
||||||
throw new IllegalStateException("too many decRef calls: refCount is " + rc + " after decrement");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/** Expert: returns the current refCount for this taxonomy reader */
|
|
||||||
@Override
|
|
||||||
public int getRefCount() {
|
|
||||||
return refCount.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Expert: increments the refCount of this TaxonomyReader instance.
|
|
||||||
* RefCounts are used to determine when a taxonomy reader can be closed
|
|
||||||
* safely, i.e. as soon as there are no more references.
|
|
||||||
* Be sure to always call a corresponding decRef(), in a finally clause;
|
|
||||||
* otherwise the reader may never be closed.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void incRef() {
|
|
||||||
ensureOpen();
|
|
||||||
refCount.incrementAndGet();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -294,6 +294,9 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
* @param openMode see {@link OpenMode}
|
* @param openMode see {@link OpenMode}
|
||||||
*/
|
*/
|
||||||
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
|
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
|
||||||
|
// TODO: should we use a more optimized Codec, e.g. Pulsing (or write custom)?
|
||||||
|
// The taxonomy has a unique structure, where each term is associated with one document
|
||||||
|
|
||||||
// Make sure we use a MergePolicy which always merges adjacent segments and thus
|
// Make sure we use a MergePolicy which always merges adjacent segments and thus
|
||||||
// keeps the doc IDs ordered as well (this is crucial for the taxonomy index).
|
// keeps the doc IDs ordered as well (this is crucial for the taxonomy index).
|
||||||
return new IndexWriterConfig(Version.LUCENE_50,
|
return new IndexWriterConfig(Version.LUCENE_50,
|
||||||
|
@ -583,7 +586,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
addToCache(categoryPath, length, id);
|
addToCache(categoryPath, length, id);
|
||||||
|
|
||||||
// also add to the parent array
|
// also add to the parent array
|
||||||
getParentArray().add(id, parent);
|
parentArray = getParentArray().add(id, parent);
|
||||||
|
|
||||||
return id;
|
return id;
|
||||||
}
|
}
|
||||||
|
@ -811,10 +814,9 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
synchronized (this) {
|
synchronized (this) {
|
||||||
if (parentArray == null) {
|
if (parentArray == null) {
|
||||||
initReaderManager();
|
initReaderManager();
|
||||||
parentArray = new ParentArray();
|
|
||||||
DirectoryReader reader = readerManager.acquire();
|
DirectoryReader reader = readerManager.acquire();
|
||||||
try {
|
try {
|
||||||
parentArray.refresh(reader);
|
parentArray = new ParentArray(reader);
|
||||||
} finally {
|
} finally {
|
||||||
readerManager.release(reader);
|
readerManager.release(reader);
|
||||||
}
|
}
|
||||||
|
@ -1035,5 +1037,21 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||||
public Directory getDirectory() {
|
public Directory getDirectory() {
|
||||||
return dir;
|
return dir;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used by {@link DirectoryTaxonomyReader} to support NRT.
|
||||||
|
* <p>
|
||||||
|
* <b>NOTE:</b> you should not use the obtained {@link IndexWriter} in any
|
||||||
|
* way, other than opening an IndexReader on it, or otherwise, the taxonomy
|
||||||
|
* index may become corrupt!
|
||||||
|
*/
|
||||||
|
final IndexWriter getInternalIndexWriter() {
|
||||||
|
return indexWriter;
|
||||||
|
}
|
||||||
|
|
||||||
|
/** Used by {@link DirectoryTaxonomyReader} to support NRT. */
|
||||||
|
final long getTaxonomyEpoch() {
|
||||||
|
return indexEpoch;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -2,15 +2,14 @@ package org.apache.lucene.facet.taxonomy.directory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.CorruptIndexException;
|
import org.apache.lucene.index.CorruptIndexException;
|
||||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexReader;
|
||||||
import org.apache.lucene.index.MultiFields;
|
import org.apache.lucene.index.TermsEnum;
|
||||||
import org.apache.lucene.search.DocIdSetIterator;
|
import org.apache.lucene.search.DocIdSetIterator;
|
||||||
import org.apache.lucene.util.Bits;
|
import org.apache.lucene.util.ArrayUtil;
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -29,55 +28,23 @@ import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// getParent() needs to be extremely efficient, to the point that we need
|
|
||||||
// to fetch all the data in advance into memory, and answer these calls
|
|
||||||
// from memory. Currently we use a large integer array, which is
|
|
||||||
// initialized when the taxonomy is opened, and potentially enlarged
|
|
||||||
// when it is refresh()ed.
|
|
||||||
/**
|
/**
|
||||||
* @lucene.experimental
|
* @lucene.experimental
|
||||||
*/
|
*/
|
||||||
class ParentArray {
|
class ParentArray {
|
||||||
|
|
||||||
// These arrays are not syncrhonized. Rather, the reference to the array
|
// TODO: maybe use PackedInts?
|
||||||
// is volatile, and the only writing operation (refreshPrefetchArrays)
|
private final int[] parentOrdinals;
|
||||||
// simply creates a new array and replaces the reference. The volatility
|
|
||||||
// of the reference ensures the correct atomic replacement and its
|
|
||||||
// visibility properties (the content of the array is visible when the
|
|
||||||
// new reference is visible).
|
|
||||||
private volatile int prefetchParentOrdinal[] = null;
|
|
||||||
|
|
||||||
public int[] getArray() {
|
/** Used by {@link #add(int, int)} when the array needs to grow. */
|
||||||
return prefetchParentOrdinal;
|
ParentArray(int[] parentOrdinals) {
|
||||||
|
this.parentOrdinals = parentOrdinals;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
public ParentArray(IndexReader reader) throws IOException {
|
||||||
* refreshPrefetch() refreshes the parent array. Initially, it fills the
|
parentOrdinals = new int[reader.maxDoc()];
|
||||||
* array from the positions of an appropriate posting list. If called during
|
if (parentOrdinals.length > 0) {
|
||||||
* a refresh(), when the arrays already exist, only values for new documents
|
initFromReader(reader, 0);
|
||||||
* (those beyond the last one in the array) are read from the positions and
|
|
||||||
* added to the arrays (that are appropriately enlarged). We assume (and
|
|
||||||
* this is indeed a correct assumption in our case) that existing categories
|
|
||||||
* are never modified or deleted.
|
|
||||||
*/
|
|
||||||
void refresh(IndexReader indexReader) throws IOException {
|
|
||||||
// Note that it is not necessary for us to obtain the read lock.
|
|
||||||
// The reason is that we are only called from refresh() (precluding
|
|
||||||
// another concurrent writer) or from the constructor (when no method
|
|
||||||
// could be running).
|
|
||||||
// The write lock is also not held during the following code, meaning
|
|
||||||
// that reads *can* happen while this code is running. The "volatile"
|
|
||||||
// property of the prefetchParentOrdinal and prefetchDepth array
|
|
||||||
// references ensure the correct visibility property of the assignment
|
|
||||||
// but other than that, we do *not* guarantee that a reader will not
|
|
||||||
// use an old version of one of these arrays (or both) while a refresh
|
|
||||||
// is going on. But we find this acceptable - until a refresh has
|
|
||||||
// finished, the reader should not expect to see new information
|
|
||||||
// (and the old information is the same in the old and new versions).
|
|
||||||
int first;
|
|
||||||
int num = indexReader.maxDoc();
|
|
||||||
if (prefetchParentOrdinal==null) {
|
|
||||||
prefetchParentOrdinal = new int[num];
|
|
||||||
// Starting Lucene 2.9, following the change LUCENE-1542, we can
|
// Starting Lucene 2.9, following the change LUCENE-1542, we can
|
||||||
// no longer reliably read the parent "-1" (see comment in
|
// no longer reliably read the parent "-1" (see comment in
|
||||||
// LuceneTaxonomyWriter.SinglePositionTokenStream). We have no way
|
// LuceneTaxonomyWriter.SinglePositionTokenStream). We have no way
|
||||||
|
@ -85,78 +52,88 @@ class ParentArray {
|
||||||
// with existing indexes, so what we'll do instead is just
|
// with existing indexes, so what we'll do instead is just
|
||||||
// hard-code the parent of ordinal 0 to be -1, and assume (as is
|
// hard-code the parent of ordinal 0 to be -1, and assume (as is
|
||||||
// indeed the case) that no other parent can be -1.
|
// indeed the case) that no other parent can be -1.
|
||||||
if (num>0) {
|
parentOrdinals[0] = TaxonomyReader.INVALID_ORDINAL;
|
||||||
prefetchParentOrdinal[0] = TaxonomyReader.INVALID_ORDINAL;
|
|
||||||
}
|
|
||||||
first = 1;
|
|
||||||
} else {
|
|
||||||
first = prefetchParentOrdinal.length;
|
|
||||||
if (first==num) {
|
|
||||||
return; // nothing to do - no category was added
|
|
||||||
}
|
|
||||||
// In Java 6, we could just do Arrays.copyOf()...
|
|
||||||
int[] newarray = new int[num];
|
|
||||||
System.arraycopy(prefetchParentOrdinal, 0, newarray, 0,
|
|
||||||
prefetchParentOrdinal.length);
|
|
||||||
prefetchParentOrdinal = newarray;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read the new part of the parents array from the positions:
|
|
||||||
// TODO (Facet): avoid Multi*?
|
|
||||||
Bits liveDocs = MultiFields.getLiveDocs(indexReader);
|
|
||||||
DocsAndPositionsEnum positions = MultiFields.getTermPositionsEnum(indexReader, liveDocs,
|
|
||||||
Consts.FIELD_PAYLOADS, new BytesRef(Consts.PAYLOAD_PARENT),
|
|
||||||
DocsAndPositionsEnum.FLAG_PAYLOADS);
|
|
||||||
if ((positions == null || positions.advance(first) == DocIdSetIterator.NO_MORE_DOCS) && first < num) {
|
|
||||||
throw new CorruptIndexException("Missing parent data for category " + first);
|
|
||||||
}
|
|
||||||
for (int i=first; i<num; i++) {
|
|
||||||
// Note that we know positions.doc() >= i (this is an
|
|
||||||
// invariant kept throughout this loop)
|
|
||||||
if (positions.docID()==i) {
|
|
||||||
if (positions.freq() == 0) { // shouldn't happen
|
|
||||||
throw new CorruptIndexException(
|
|
||||||
"Missing parent data for category "+i);
|
|
||||||
}
|
|
||||||
|
|
||||||
// TODO (Facet): keep a local (non-volatile) copy of the prefetchParentOrdinal
|
|
||||||
// reference, because access to volatile reference is slower (?).
|
|
||||||
// Note: The positions we get here are one less than the position
|
|
||||||
// increment we added originally, so we get here the right numbers:
|
|
||||||
prefetchParentOrdinal[i] = positions.nextPosition();
|
|
||||||
|
|
||||||
if (positions.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
|
|
||||||
if ( i+1 < num ) {
|
|
||||||
throw new CorruptIndexException(
|
|
||||||
"Missing parent data for category "+(i+1));
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
} else { // this shouldn't happen
|
|
||||||
throw new CorruptIndexException(
|
|
||||||
"Missing parent data for category "+i);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public ParentArray(IndexReader reader, ParentArray copyFrom) throws IOException {
|
||||||
|
assert copyFrom != null;
|
||||||
|
int[] copyParents = copyFrom.getArray();
|
||||||
|
assert copyParents.length < reader.maxDoc() : "do not init a new ParentArray if the index hasn't changed";
|
||||||
|
|
||||||
|
this.parentOrdinals = new int[reader.maxDoc()];
|
||||||
|
System.arraycopy(copyParents, 0, parentOrdinals, 0, copyParents.length);
|
||||||
|
initFromReader(reader, copyParents.length);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
// Read the parents of the new categories
|
||||||
* add() is used in LuceneTaxonomyWriter, not in LuceneTaxonomyReader.
|
private void initFromReader(IndexReader reader, int first) throws IOException {
|
||||||
* It is only called from a synchronized method, so it is not reentrant,
|
if (reader.maxDoc() == first) {
|
||||||
* and also doesn't need to worry about reads happening at the same time.
|
return;
|
||||||
*
|
|
||||||
* NOTE: add() and refresh() CANNOT be used together. If you call add(),
|
|
||||||
* this changes the arrays and refresh() can no longer be used.
|
|
||||||
*/
|
|
||||||
void add(int ordinal, int parentOrdinal) {
|
|
||||||
if (ordinal >= prefetchParentOrdinal.length) {
|
|
||||||
// grow the array, if necessary.
|
|
||||||
// In Java 6, we could just do Arrays.copyOf()...
|
|
||||||
int[] newarray = new int[ordinal*2+1];
|
|
||||||
System.arraycopy(prefetchParentOrdinal, 0, newarray, 0,
|
|
||||||
prefetchParentOrdinal.length);
|
|
||||||
prefetchParentOrdinal = newarray;
|
|
||||||
}
|
}
|
||||||
prefetchParentOrdinal[ordinal] = parentOrdinal;
|
|
||||||
|
TermsEnum termsEnum = null;
|
||||||
|
DocsAndPositionsEnum positions = null;
|
||||||
|
int idx = 0;
|
||||||
|
for (AtomicReaderContext context : reader.leaves()) {
|
||||||
|
if (context.docBase < first) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// in general we could call readerCtx.reader().termPositionsEnum(), but that
|
||||||
|
// passes the liveDocs. Since we know there are no deletions, the code
|
||||||
|
// below may save some CPU cycles.
|
||||||
|
termsEnum = context.reader().fields().terms(Consts.FIELD_PAYLOADS).iterator(termsEnum);
|
||||||
|
if (!termsEnum.seekExact(Consts.PAYLOAD_PARENT_BYTES_REF, true)) {
|
||||||
|
throw new CorruptIndexException("Missing parent stream data for segment " + context.reader());
|
||||||
|
}
|
||||||
|
positions = termsEnum.docsAndPositions(null /* no deletes in taxonomy */, positions);
|
||||||
|
if (positions == null) {
|
||||||
|
throw new CorruptIndexException("Missing parent stream data for segment " + context.reader());
|
||||||
|
}
|
||||||
|
|
||||||
|
idx = context.docBase;
|
||||||
|
int doc;
|
||||||
|
while ((doc = positions.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
|
||||||
|
doc += context.docBase;
|
||||||
|
if (doc == idx) {
|
||||||
|
if (positions.freq() == 0) { // shouldn't happen
|
||||||
|
throw new CorruptIndexException("Missing parent data for category " + idx);
|
||||||
|
}
|
||||||
|
|
||||||
|
parentOrdinals[idx++] = positions.nextPosition();
|
||||||
|
} else { // this shouldn't happen
|
||||||
|
throw new CorruptIndexException("Missing parent data for category " + idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (idx + 1 < context.reader().maxDoc()) {
|
||||||
|
throw new CorruptIndexException("Missing parent data for category " + (idx + 1));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (idx != reader.maxDoc()) {
|
||||||
|
throw new CorruptIndexException("Missing parent data for category " + idx);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int[] getArray() {
|
||||||
|
return parentOrdinals;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Adds the given ordinal/parent info and returns either a new instance if the
|
||||||
|
* underlying array had to grow, or this instance otherwise.
|
||||||
|
* <p>
|
||||||
|
* <b>NOTE:</b> you should call this method from a thread-safe code.
|
||||||
|
*/
|
||||||
|
ParentArray add(int ordinal, int parentOrdinal) {
|
||||||
|
if (ordinal >= parentOrdinals.length) {
|
||||||
|
int[] newarray = ArrayUtil.grow(parentOrdinals);
|
||||||
|
newarray[ordinal] = parentOrdinal;
|
||||||
|
return new ParentArray(newarray);
|
||||||
|
}
|
||||||
|
parentOrdinals[ordinal] = parentOrdinal;
|
||||||
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -102,4 +102,10 @@ public class LRUHashMap<K,V> extends LinkedHashMap<K,V> {
|
||||||
return size() > maxSize;
|
return size() > maxSize;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unchecked")
|
||||||
|
@Override
|
||||||
|
public LRUHashMap<K,V> clone() {
|
||||||
|
return (LRUHashMap<K,V>) super.clone();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -131,7 +131,7 @@ public class FacetTestUtils {
|
||||||
|
|
||||||
public static class IndexTaxonomyReaderPair {
|
public static class IndexTaxonomyReaderPair {
|
||||||
public DirectoryReader indexReader;
|
public DirectoryReader indexReader;
|
||||||
public TaxonomyReader taxReader;
|
public DirectoryTaxonomyReader taxReader;
|
||||||
public IndexSearcher indexSearcher;
|
public IndexSearcher indexSearcher;
|
||||||
|
|
||||||
public void close() throws IOException {
|
public void close() throws IOException {
|
||||||
|
|
|
@ -78,11 +78,9 @@ public class TestTotalFacetCounts extends LuceneTestCase {
|
||||||
TestTotalFacetCountsCache.addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "b", "c");
|
TestTotalFacetCountsCache.addFacets(iParams, writers[0].indexWriter, writers[0].taxWriter, "b", "c");
|
||||||
|
|
||||||
// Commit Changes
|
// Commit Changes
|
||||||
writers[0].commit();
|
|
||||||
writers[0].close();
|
writers[0].close();
|
||||||
|
|
||||||
IndexTaxonomyReaderPair[] readers =
|
IndexTaxonomyReaderPair[] readers = FacetTestUtils.createIndexTaxonomyReaderPair(dirs);
|
||||||
FacetTestUtils.createIndexTaxonomyReaderPair(dirs);
|
|
||||||
|
|
||||||
int[] intArray = new int[iParams.getPartitionSize()];
|
int[] intArray = new int[iParams.getPartitionSize()];
|
||||||
|
|
||||||
|
@ -93,8 +91,7 @@ public class TestTotalFacetCounts extends LuceneTestCase {
|
||||||
tfcc.load(tmpFile, readers[0].indexReader, readers[0].taxReader, iParams);
|
tfcc.load(tmpFile, readers[0].indexReader, readers[0].taxReader, iParams);
|
||||||
|
|
||||||
// now retrieve the one just loaded
|
// now retrieve the one just loaded
|
||||||
TotalFacetCounts totalCounts =
|
TotalFacetCounts totalCounts = tfcc.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null);
|
||||||
tfcc.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null);
|
|
||||||
|
|
||||||
int partition = 0;
|
int partition = 0;
|
||||||
for (int i=0; i<expectedCounts.length; i+=partitionSize) {
|
for (int i=0; i<expectedCounts.length; i+=partitionSize) {
|
||||||
|
|
|
@ -297,23 +297,17 @@ public class TestTotalFacetCountsCache extends LuceneTestCase {
|
||||||
writers[0].indexWriter.close();
|
writers[0].indexWriter.close();
|
||||||
writers[0].taxWriter.close();
|
writers[0].taxWriter.close();
|
||||||
|
|
||||||
readers[0].taxReader.refresh();
|
DirectoryTaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(readers[0].taxReader);
|
||||||
|
assertNotNull(newTaxoReader);
|
||||||
|
assertTrue("should have received more cagtegories in updated taxonomy", newTaxoReader.getSize() > readers[0].taxReader.getSize());
|
||||||
|
readers[0].taxReader.close();
|
||||||
|
readers[0].taxReader = newTaxoReader;
|
||||||
|
|
||||||
DirectoryReader r2 = DirectoryReader.openIfChanged(readers[0].indexReader);
|
DirectoryReader r2 = DirectoryReader.openIfChanged(readers[0].indexReader);
|
||||||
assertNotNull(r2);
|
assertNotNull(r2);
|
||||||
// Hold on to the 'original' reader so we can do some checks with it
|
readers[0].indexReader.close();
|
||||||
IndexReader origReader = null;
|
|
||||||
|
|
||||||
assertTrue("Reader must be updated!", readers[0].indexReader != r2);
|
|
||||||
|
|
||||||
// Set the 'original' reader
|
|
||||||
origReader = readers[0].indexReader;
|
|
||||||
// Set the new master index Reader
|
|
||||||
readers[0].indexReader = r2;
|
readers[0].indexReader = r2;
|
||||||
|
|
||||||
// Try to get total-counts the originalReader AGAIN, just for sanity. Should pull from the cache - not recomputed.
|
|
||||||
assertTrue("Should be obtained from cache at 6th attempt",totalCounts ==
|
|
||||||
TFC.getTotalCounts(origReader, readers[0].taxReader, iParams, null));
|
|
||||||
|
|
||||||
// now use the new reader - should recompute
|
// now use the new reader - should recompute
|
||||||
totalCounts = TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null);
|
totalCounts = TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null);
|
||||||
prevGen = assertRecomputed(totalCounts, prevGen, "after updating the index - 7th attempt!");
|
prevGen = assertRecomputed(totalCounts, prevGen, "after updating the index - 7th attempt!");
|
||||||
|
@ -322,9 +316,7 @@ public class TestTotalFacetCountsCache extends LuceneTestCase {
|
||||||
assertTrue("Should be obtained from cache at 8th attempt",totalCounts ==
|
assertTrue("Should be obtained from cache at 8th attempt",totalCounts ==
|
||||||
TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null));
|
TFC.getTotalCounts(readers[0].indexReader, readers[0].taxReader, iParams, null));
|
||||||
|
|
||||||
origReader.close();
|
|
||||||
readers[0].close();
|
readers[0].close();
|
||||||
r2.close();
|
|
||||||
outputFile.delete();
|
outputFile.delete();
|
||||||
IOUtils.close(dirs[0]);
|
IOUtils.close(dirs[0]);
|
||||||
}
|
}
|
||||||
|
@ -380,7 +372,10 @@ public class TestTotalFacetCountsCache extends LuceneTestCase {
|
||||||
writers[0].taxWriter.addCategory(new CategoryPath("foo", Integer.toString(i)));
|
writers[0].taxWriter.addCategory(new CategoryPath("foo", Integer.toString(i)));
|
||||||
}
|
}
|
||||||
writers[0].taxWriter.commit();
|
writers[0].taxWriter.commit();
|
||||||
readers[0].taxReader.refresh();
|
DirectoryTaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(readers[0].taxReader);
|
||||||
|
assertNotNull(newTaxoReader);
|
||||||
|
readers[0].taxReader.close();
|
||||||
|
readers[0].taxReader = newTaxoReader;
|
||||||
|
|
||||||
initCache();
|
initCache();
|
||||||
|
|
||||||
|
|
|
@ -5,18 +5,17 @@ import java.io.PrintWriter;
|
||||||
import java.io.StringWriter;
|
import java.io.StringWriter;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||||
|
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.LockObtainFailedException;
|
import org.apache.lucene.store.LockObtainFailedException;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
import org.junit.Ignore;
|
|
||||||
import org.junit.Test;
|
|
||||||
|
|
||||||
import org.apache.lucene.util.LuceneTestCase;
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenArrays;
|
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
|
||||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
|
||||||
import org.apache.lucene.util.SlowRAMDirectory;
|
import org.apache.lucene.util.SlowRAMDirectory;
|
||||||
|
import org.junit.Test;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
@ -35,6 +34,8 @@ import org.apache.lucene.util.SlowRAMDirectory;
|
||||||
* limitations under the License.
|
* limitations under the License.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
// TODO: remove this suppress after we fix the TaxoWriter Codec to a non-default (see todo in DirTW)
|
||||||
|
@SuppressCodecs("SimpleText")
|
||||||
public class TestTaxonomyCombined extends LuceneTestCase {
|
public class TestTaxonomyCombined extends LuceneTestCase {
|
||||||
|
|
||||||
/** The following categories will be added to the taxonomy by
|
/** The following categories will be added to the taxonomy by
|
||||||
|
@ -725,7 +726,10 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
||||||
assertEquals(3, ca.getOlderSiblingArray().length);
|
assertEquals(3, ca.getOlderSiblingArray().length);
|
||||||
assertEquals(3, ca.getYoungestChildArray().length);
|
assertEquals(3, ca.getYoungestChildArray().length);
|
||||||
// After the refresh, things change:
|
// After the refresh, things change:
|
||||||
tr.refresh();
|
TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
|
||||||
|
assertNotNull(newtr);
|
||||||
|
tr.close();
|
||||||
|
tr = newtr;
|
||||||
ca = tr.getChildrenArrays();
|
ca = tr.getChildrenArrays();
|
||||||
assertEquals(5, tr.getSize());
|
assertEquals(5, tr.getSize());
|
||||||
assertEquals(5, ca.getOlderSiblingArray().length);
|
assertEquals(5, ca.getOlderSiblingArray().length);
|
||||||
|
@ -737,14 +741,11 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
||||||
indexDir.close();
|
indexDir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
// Test that getParentArrays is valid when retrieved during refresh
|
||||||
* Test that getParentArrays is valid when retrieved during refresh
|
|
||||||
*/
|
|
||||||
@Test
|
@Test
|
||||||
@Ignore
|
|
||||||
public void testTaxonomyReaderRefreshRaces() throws Exception {
|
public void testTaxonomyReaderRefreshRaces() throws Exception {
|
||||||
// compute base child arrays - after first chunk, and after the other
|
// compute base child arrays - after first chunk, and after the other
|
||||||
Directory indexDirBase = newDirectory();
|
Directory indexDirBase = newDirectory();
|
||||||
TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase);
|
TaxonomyWriter twBase = new DirectoryTaxonomyWriter(indexDirBase);
|
||||||
twBase.addCategory(new CategoryPath("a", "0"));
|
twBase.addCategory(new CategoryPath("a", "0"));
|
||||||
final CategoryPath abPath = new CategoryPath("a", "b");
|
final CategoryPath abPath = new CategoryPath("a", "b");
|
||||||
|
@ -757,56 +758,64 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
||||||
final int abOrd = trBase.getOrdinal(abPath);
|
final int abOrd = trBase.getOrdinal(abPath);
|
||||||
final int abYoungChildBase1 = ca1.getYoungestChildArray()[abOrd];
|
final int abYoungChildBase1 = ca1.getYoungestChildArray()[abOrd];
|
||||||
|
|
||||||
for (int i=0; i < 1<<10; i++) { //1024 facets
|
final int numCategories = atLeast(800);
|
||||||
|
for (int i = 0; i < numCategories; i++) {
|
||||||
twBase.addCategory(new CategoryPath("a", "b", Integer.toString(i)));
|
twBase.addCategory(new CategoryPath("a", "b", Integer.toString(i)));
|
||||||
}
|
}
|
||||||
twBase.commit();
|
twBase.close();
|
||||||
|
|
||||||
trBase.refresh();
|
TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(trBase);
|
||||||
|
assertNotNull(newTaxoReader);
|
||||||
|
trBase.close();
|
||||||
|
trBase = newTaxoReader;
|
||||||
|
|
||||||
final ChildrenArrays ca2 = trBase.getChildrenArrays();
|
final ChildrenArrays ca2 = trBase.getChildrenArrays();
|
||||||
final int abYoungChildBase2 = ca2.getYoungestChildArray()[abOrd];
|
final int abYoungChildBase2 = ca2.getYoungestChildArray()[abOrd];
|
||||||
|
|
||||||
for (int retry=0; retry<100; retry++) {
|
int numRetries = atLeast(50);
|
||||||
assertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry);
|
for (int retry = 0; retry < numRetries; retry++) {
|
||||||
|
assertConsistentYoungestChild(abPath, abOrd, abYoungChildBase1, abYoungChildBase2, retry, numCategories);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
trBase.close();
|
||||||
indexDirBase.close();
|
indexDirBase.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
private void assertConsistentYoungestChild(final CategoryPath abPath,
|
private void assertConsistentYoungestChild(final CategoryPath abPath,
|
||||||
final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry)
|
final int abOrd, final int abYoungChildBase1, final int abYoungChildBase2, final int retry, int numCategories)
|
||||||
throws Exception {
|
throws Exception {
|
||||||
SlowRAMDirectory indexDir = new SlowRAMDirectory(-1,null); // no slowness for intialization
|
SlowRAMDirectory indexDir = new SlowRAMDirectory(-1, null); // no slowness for intialization
|
||||||
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
|
TaxonomyWriter tw = new DirectoryTaxonomyWriter(indexDir);
|
||||||
tw.addCategory(new CategoryPath("a", "0"));
|
tw.addCategory(new CategoryPath("a", "0"));
|
||||||
tw.addCategory(abPath);
|
tw.addCategory(abPath);
|
||||||
tw.commit();
|
tw.commit();
|
||||||
|
|
||||||
final TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
final DirectoryTaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||||
for (int i=0; i < 1<<10; i++) { //1024 facets
|
for (int i = 0; i < numCategories; i++) {
|
||||||
final CategoryPath cp = new CategoryPath("a", "b", Integer.toString(i));
|
final CategoryPath cp = new CategoryPath("a", "b", Integer.toString(i));
|
||||||
tw.addCategory(cp);
|
tw.addCategory(cp);
|
||||||
assertEquals("Ordinal of "+cp+" must be invalid until Taxonomy Reader was refreshed", TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(cp));
|
assertEquals("Ordinal of "+cp+" must be invalid until Taxonomy Reader was refreshed", TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(cp));
|
||||||
}
|
}
|
||||||
tw.commit();
|
tw.close();
|
||||||
|
|
||||||
final boolean[] stop = new boolean[] { false };
|
final AtomicBoolean stop = new AtomicBoolean(false);
|
||||||
final Throwable[] error = new Throwable[] { null };
|
final Throwable[] error = new Throwable[] { null };
|
||||||
final int retrieval[] = { 0 };
|
final int retrieval[] = { 0 };
|
||||||
|
|
||||||
Thread thread = new Thread("Child Arrays Verifier") {
|
Thread thread = new Thread("Child Arrays Verifier") {
|
||||||
@Override
|
@Override
|
||||||
public void run() {
|
public void run() {
|
||||||
setPriority(1+getPriority());
|
setPriority(1 + getPriority());
|
||||||
try {
|
try {
|
||||||
while (!stop[0]) {
|
while (!stop.get()) {
|
||||||
int lastOrd = tr.getParentArray().length-1;
|
int lastOrd = tr.getParentArray().length - 1;
|
||||||
assertNotNull("path of last-ord "+lastOrd+" is not found!",tr.getPath(lastOrd));
|
assertNotNull("path of last-ord " + lastOrd + " is not found!", tr.getPath(lastOrd));
|
||||||
assertChildrenArrays(tr.getChildrenArrays(),retry,retrieval[0]++);
|
assertChildrenArrays(tr.getChildrenArrays(), retry, retrieval[0]++);
|
||||||
|
sleep(10); // don't starve refresh()'s CPU, which sleeps every 50 bytes for 1 ms
|
||||||
}
|
}
|
||||||
} catch (Throwable e) {
|
} catch (Throwable e) {
|
||||||
error[0] = e;
|
error[0] = e;
|
||||||
stop[0] = true;
|
stop.set(true);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -822,13 +831,15 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
||||||
thread.start();
|
thread.start();
|
||||||
|
|
||||||
indexDir.setSleepMillis(1); // some delay for refresh
|
indexDir.setSleepMillis(1); // some delay for refresh
|
||||||
tr.refresh();
|
TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
|
||||||
|
if (newTaxoReader != null) {
|
||||||
|
newTaxoReader.close();
|
||||||
|
}
|
||||||
|
|
||||||
stop[0] = true;
|
stop.set(true);
|
||||||
thread.join();
|
thread.join();
|
||||||
assertNull("Unexpcted exception at retry "+retry+" retrieval "+retrieval[0]+": \n"+stackTraceStr(error[0]), error[0]);
|
assertNull("Unexpcted exception at retry "+retry+" retrieval "+retrieval[0]+": \n"+stackTraceStr(error[0]), error[0]);
|
||||||
|
|
||||||
tw.close();
|
|
||||||
tr.close();
|
tr.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -885,7 +896,7 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
||||||
// ok
|
// ok
|
||||||
}
|
}
|
||||||
assertEquals(1, tr.getSize()); // still root only...
|
assertEquals(1, tr.getSize()); // still root only...
|
||||||
tr.refresh(); // this is not enough, because tw.commit() hasn't been done yet
|
assertNull(TaxonomyReader.openIfChanged(tr)); // this is not enough, because tw.commit() hasn't been done yet
|
||||||
try {
|
try {
|
||||||
tr.getParent(author);
|
tr.getParent(author);
|
||||||
fail("Before commit() and refresh(), getParent for "+author+" should still throw exception");
|
fail("Before commit() and refresh(), getParent for "+author+" should still throw exception");
|
||||||
|
@ -901,7 +912,11 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
||||||
// ok
|
// ok
|
||||||
}
|
}
|
||||||
assertEquals(1, tr.getSize()); // still root only...
|
assertEquals(1, tr.getSize()); // still root only...
|
||||||
tr.refresh();
|
TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
|
||||||
|
assertNotNull(newTaxoReader);
|
||||||
|
tr.close();
|
||||||
|
tr = newTaxoReader;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author));
|
assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author));
|
||||||
// ok
|
// ok
|
||||||
|
@ -917,7 +932,10 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
||||||
tw.addCategory(new CategoryPath("Author", "Richard Dawkins"));
|
tw.addCategory(new CategoryPath("Author", "Richard Dawkins"));
|
||||||
int dawkins = 2;
|
int dawkins = 2;
|
||||||
tw.commit();
|
tw.commit();
|
||||||
tr.refresh();
|
newTaxoReader = TaxonomyReader.openIfChanged(tr);
|
||||||
|
assertNotNull(newTaxoReader);
|
||||||
|
tr.close();
|
||||||
|
tr = newTaxoReader;
|
||||||
assertEquals(author, tr.getParent(dawkins));
|
assertEquals(author, tr.getParent(dawkins));
|
||||||
assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author));
|
assertEquals(TaxonomyReader.ROOT_ORDINAL, tr.getParent(author));
|
||||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(TaxonomyReader.ROOT_ORDINAL));
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(TaxonomyReader.ROOT_ORDINAL));
|
||||||
|
@ -943,16 +961,19 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
||||||
// before commit and refresh, no change:
|
// before commit and refresh, no change:
|
||||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
|
||||||
assertEquals(1, tr.getSize()); // still root only...
|
assertEquals(1, tr.getSize()); // still root only...
|
||||||
tr.refresh(); // this is not enough, because tw.commit() hasn't been done yet
|
assertNull(TaxonomyReader.openIfChanged(tr)); // this is not enough, because tw.commit() hasn't been done yet
|
||||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
|
||||||
assertEquals(1, tr.getSize()); // still root only...
|
assertEquals(1, tr.getSize()); // still root only...
|
||||||
tw.commit();
|
tw.commit();
|
||||||
// still not enough before refresh:
|
// still not enough before refresh:
|
||||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getOrdinal(author));
|
||||||
assertEquals(1, tr.getSize()); // still root only...
|
assertEquals(1, tr.getSize()); // still root only...
|
||||||
tr.refresh(); // finally
|
TaxonomyReader newTaxoReader = TaxonomyReader.openIfChanged(tr);
|
||||||
|
assertNotNull(newTaxoReader);
|
||||||
|
tr.close();
|
||||||
|
tr = newTaxoReader;
|
||||||
assertEquals(1, tr.getOrdinal(author));
|
assertEquals(1, tr.getOrdinal(author));
|
||||||
assertEquals(2, tr.getSize()); // still root only...
|
assertEquals(2, tr.getSize());
|
||||||
tw.close();
|
tw.close();
|
||||||
tr.close();
|
tr.close();
|
||||||
indexDir.close();
|
indexDir.close();
|
||||||
|
@ -977,7 +998,7 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
||||||
// Try to open a second writer, with the first one locking the directory.
|
// Try to open a second writer, with the first one locking the directory.
|
||||||
// We expect to get a LockObtainFailedException.
|
// We expect to get a LockObtainFailedException.
|
||||||
try {
|
try {
|
||||||
new DirectoryTaxonomyWriter(indexDir);
|
assertNull(new DirectoryTaxonomyWriter(indexDir));
|
||||||
fail("should have failed to write in locked directory");
|
fail("should have failed to write in locked directory");
|
||||||
} catch (LockObtainFailedException e) {
|
} catch (LockObtainFailedException e) {
|
||||||
// this is what we expect to happen.
|
// this is what we expect to happen.
|
||||||
|
@ -989,7 +1010,10 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
||||||
tw2.addCategory(new CategoryPath("hey"));
|
tw2.addCategory(new CategoryPath("hey"));
|
||||||
tw2.close();
|
tw2.close();
|
||||||
// See that the writer indeed wrote:
|
// See that the writer indeed wrote:
|
||||||
tr.refresh();
|
TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
|
||||||
|
assertNotNull(newtr);
|
||||||
|
tr.close();
|
||||||
|
tr = newtr;
|
||||||
assertEquals(3, tr.getOrdinal(new CategoryPath("hey")));
|
assertEquals(3, tr.getOrdinal(new CategoryPath("hey")));
|
||||||
tr.close();
|
tr.close();
|
||||||
tw.close();
|
tw.close();
|
||||||
|
@ -1086,6 +1110,27 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
||||||
indexDir.close();
|
indexDir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testNRT() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
|
||||||
|
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
|
||||||
|
|
||||||
|
CategoryPath cp = new CategoryPath("a");
|
||||||
|
writer.addCategory(cp);
|
||||||
|
TaxonomyReader newReader = TaxonomyReader.openIfChanged(reader);
|
||||||
|
assertNotNull("expected a new instance", newReader);
|
||||||
|
assertEquals(2, newReader.getSize());
|
||||||
|
assertNotSame(TaxonomyReader.INVALID_ORDINAL, newReader.getOrdinal(cp));
|
||||||
|
reader.close();
|
||||||
|
reader = newReader;
|
||||||
|
|
||||||
|
writer.close();
|
||||||
|
reader.close();
|
||||||
|
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
// TODO (Facet): test multiple readers, one writer. Have the multiple readers
|
// TODO (Facet): test multiple readers, one writer. Have the multiple readers
|
||||||
// using the same object (simulating threads) or different objects
|
// using the same object (simulating threads) or different objects
|
||||||
// (simulating processes).
|
// (simulating processes).
|
||||||
|
|
|
@ -3,12 +3,11 @@ package org.apache.lucene.facet.taxonomy.directory;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||||
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
|
||||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||||
|
import org.apache.lucene.index.LogMergePolicy;
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
import org.apache.lucene.store.AlreadyClosedException;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.RAMDirectory;
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
@ -67,11 +66,8 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Test the boolean returned by TR.refresh
|
|
||||||
*/
|
|
||||||
@Test
|
@Test
|
||||||
public void testReaderRefreshResult() throws Exception {
|
public void testOpenIfChangedResult() throws Exception {
|
||||||
Directory dir = null;
|
Directory dir = null;
|
||||||
DirectoryTaxonomyWriter ltw = null;
|
DirectoryTaxonomyWriter ltw = null;
|
||||||
DirectoryTaxonomyReader ltr = null;
|
DirectoryTaxonomyReader ltr = null;
|
||||||
|
@ -84,13 +80,15 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
|
||||||
ltw.commit();
|
ltw.commit();
|
||||||
|
|
||||||
ltr = new DirectoryTaxonomyReader(dir);
|
ltr = new DirectoryTaxonomyReader(dir);
|
||||||
assertFalse("Nothing has changed",ltr.refresh());
|
assertNull("Nothing has changed", TaxonomyReader.openIfChanged(ltr));
|
||||||
|
|
||||||
ltw.addCategory(new CategoryPath("b"));
|
ltw.addCategory(new CategoryPath("b"));
|
||||||
ltw.commit();
|
ltw.commit();
|
||||||
|
|
||||||
assertTrue("changes were committed",ltr.refresh());
|
DirectoryTaxonomyReader newtr = TaxonomyReader.openIfChanged(ltr);
|
||||||
assertFalse("Nothing has changed",ltr.refresh());
|
assertNotNull("changes were committed", newtr);
|
||||||
|
assertNull("Nothing has changed", TaxonomyReader.openIfChanged(newtr));
|
||||||
|
newtr.close();
|
||||||
} finally {
|
} finally {
|
||||||
IOUtils.close(ltw, ltr, dir);
|
IOUtils.close(ltw, ltr, dir);
|
||||||
}
|
}
|
||||||
|
@ -119,18 +117,15 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testFreshReadRecreatedTaxonomy() throws Exception {
|
public void testFreshReadRecreatedTaxonomy() throws Exception {
|
||||||
doTestReadRecreatedTaxono(random(), true);
|
doTestReadRecreatedTaxonomy(random(), true);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* recreating a taxonomy should work well with a refreshed taxonomy reader
|
|
||||||
*/
|
|
||||||
@Test
|
@Test
|
||||||
public void testRefreshReadRecreatedTaxonomy() throws Exception {
|
public void testOpenIfChangedReadRecreatedTaxonomy() throws Exception {
|
||||||
doTestReadRecreatedTaxono(random(), false);
|
doTestReadRecreatedTaxonomy(random(), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void doTestReadRecreatedTaxono(Random random, boolean closeReader) throws Exception {
|
private void doTestReadRecreatedTaxonomy(Random random, boolean closeReader) throws Exception {
|
||||||
Directory dir = null;
|
Directory dir = null;
|
||||||
TaxonomyWriter tw = null;
|
TaxonomyWriter tw = null;
|
||||||
TaxonomyReader tr = null;
|
TaxonomyReader tr = null;
|
||||||
|
@ -163,13 +158,10 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
|
||||||
tr.close();
|
tr.close();
|
||||||
tr = new DirectoryTaxonomyReader(dir);
|
tr = new DirectoryTaxonomyReader(dir);
|
||||||
} else {
|
} else {
|
||||||
try {
|
TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
|
||||||
tr.refresh();
|
assertNotNull(newtr);
|
||||||
fail("Expected InconsistentTaxonomyException");
|
tr.close();
|
||||||
} catch (InconsistentTaxonomyException e) {
|
tr = newtr;
|
||||||
tr.close();
|
|
||||||
tr = new DirectoryTaxonomyReader(dir);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
assertEquals("Wrong #categories in taxonomy (i="+i+", k="+k+")", baseNumCategories + 1 + k, tr.getSize());
|
assertEquals("Wrong #categories in taxonomy (i="+i+", k="+k+")", baseNumCategories + 1 + k, tr.getSize());
|
||||||
}
|
}
|
||||||
|
@ -179,14 +171,14 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testRefreshAndRefCount() throws Exception {
|
public void testOpenIfChangedAndRefCount() throws Exception {
|
||||||
Directory dir = new RAMDirectory(); // no need for random directories here
|
Directory dir = new RAMDirectory(); // no need for random directories here
|
||||||
|
|
||||||
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
|
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
|
||||||
taxoWriter.addCategory(new CategoryPath("a"));
|
taxoWriter.addCategory(new CategoryPath("a"));
|
||||||
taxoWriter.commit();
|
taxoWriter.commit();
|
||||||
|
|
||||||
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
|
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
|
||||||
assertEquals("wrong refCount", 1, taxoReader.getRefCount());
|
assertEquals("wrong refCount", 1, taxoReader.getRefCount());
|
||||||
|
|
||||||
taxoReader.incRef();
|
taxoReader.incRef();
|
||||||
|
@ -194,12 +186,189 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
|
||||||
|
|
||||||
taxoWriter.addCategory(new CategoryPath("a", "b"));
|
taxoWriter.addCategory(new CategoryPath("a", "b"));
|
||||||
taxoWriter.commit();
|
taxoWriter.commit();
|
||||||
taxoReader.refresh();
|
TaxonomyReader newtr = TaxonomyReader.openIfChanged(taxoReader);
|
||||||
assertEquals("wrong refCount", 2, taxoReader.getRefCount());
|
assertNotNull(newtr);
|
||||||
|
taxoReader.close();
|
||||||
|
taxoReader = newtr;
|
||||||
|
assertEquals("wrong refCount", 1, taxoReader.getRefCount());
|
||||||
|
|
||||||
taxoWriter.close();
|
taxoWriter.close();
|
||||||
taxoReader.close();
|
taxoReader.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOpenIfChangedManySegments() throws Exception {
|
||||||
|
// test openIfChanged() when the taxonomy contains many segments
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
|
||||||
|
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir) {
|
||||||
|
@Override
|
||||||
|
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
|
||||||
|
IndexWriterConfig conf = super.createIndexWriterConfig(openMode);
|
||||||
|
LogMergePolicy lmp = (LogMergePolicy) conf.getMergePolicy();
|
||||||
|
lmp.setMergeFactor(2);
|
||||||
|
return conf;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
|
||||||
|
|
||||||
|
int numRounds = random().nextInt(10) + 10;
|
||||||
|
int numCategories = 1; // one for root
|
||||||
|
for (int i = 0; i < numRounds; i++) {
|
||||||
|
int numCats = random().nextInt(4) + 1;
|
||||||
|
for (int j = 0; j < numCats; j++) {
|
||||||
|
writer.addCategory(new CategoryPath(Integer.toString(i), Integer.toString(j)));
|
||||||
|
}
|
||||||
|
numCategories += numCats + 1 /* one for round-parent */;
|
||||||
|
TaxonomyReader newtr = TaxonomyReader.openIfChanged(reader);
|
||||||
|
assertNotNull(newtr);
|
||||||
|
reader.close();
|
||||||
|
reader = newtr;
|
||||||
|
|
||||||
|
// assert categories
|
||||||
|
assertEquals(numCategories, reader.getSize());
|
||||||
|
int roundOrdinal = reader.getOrdinal(new CategoryPath(Integer.toString(i)));
|
||||||
|
int[] parents = reader.getParentArray();
|
||||||
|
assertEquals(0, parents[roundOrdinal]); // round's parent is root
|
||||||
|
for (int j = 0; j < numCats; j++) {
|
||||||
|
int ord = reader.getOrdinal(new CategoryPath(Integer.toString(i), Integer.toString(j)));
|
||||||
|
assertEquals(roundOrdinal, parents[ord]); // round's parent is root
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
reader.close();
|
||||||
|
writer.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOpenIfChangedReuseAfterRecreate() throws Exception {
|
||||||
|
// tests that if the taxonomy is recreated, no data is reused from the previous taxonomy
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
|
||||||
|
CategoryPath cp_a = new CategoryPath("a");
|
||||||
|
writer.addCategory(cp_a);
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
DirectoryTaxonomyReader r1 = new DirectoryTaxonomyReader(dir);
|
||||||
|
// fill r1's caches
|
||||||
|
assertEquals(1, r1.getOrdinal(cp_a));
|
||||||
|
assertEquals(cp_a, r1.getPath(1));
|
||||||
|
|
||||||
|
// now recreate, add a different category
|
||||||
|
writer = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
|
||||||
|
CategoryPath cp_b = new CategoryPath("b");
|
||||||
|
writer.addCategory(cp_b);
|
||||||
|
writer.close();
|
||||||
|
|
||||||
|
DirectoryTaxonomyReader r2 = TaxonomyReader.openIfChanged(r1);
|
||||||
|
assertNotNull(r2);
|
||||||
|
|
||||||
|
// fill r2's caches
|
||||||
|
assertEquals(1, r2.getOrdinal(cp_b));
|
||||||
|
assertEquals(cp_b, r2.getPath(1));
|
||||||
|
|
||||||
|
// check that r1 doesn't see cp_b
|
||||||
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
|
||||||
|
assertEquals(cp_a, r1.getPath(1));
|
||||||
|
|
||||||
|
// check that r2 doesn't see cp_a
|
||||||
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, r2.getOrdinal(cp_a));
|
||||||
|
assertEquals(cp_b, r2.getPath(1));
|
||||||
|
|
||||||
|
r2.close();
|
||||||
|
r1.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOpenIfChangedReuse() throws Exception {
|
||||||
|
// test the reuse of data from the old DTR instance
|
||||||
|
for (boolean nrt : new boolean[] {false, true}) {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
|
||||||
|
|
||||||
|
CategoryPath cp_a = new CategoryPath("a");
|
||||||
|
writer.addCategory(cp_a);
|
||||||
|
if (!nrt) writer.commit();
|
||||||
|
|
||||||
|
DirectoryTaxonomyReader r1 = nrt ? new DirectoryTaxonomyReader(writer) : new DirectoryTaxonomyReader(dir);
|
||||||
|
// fill r1's caches
|
||||||
|
assertEquals(1, r1.getOrdinal(cp_a));
|
||||||
|
assertEquals(cp_a, r1.getPath(1));
|
||||||
|
|
||||||
|
CategoryPath cp_b = new CategoryPath("b");
|
||||||
|
writer.addCategory(cp_b);
|
||||||
|
if (!nrt) writer.commit();
|
||||||
|
|
||||||
|
DirectoryTaxonomyReader r2 = TaxonomyReader.openIfChanged(r1);
|
||||||
|
assertNotNull(r2);
|
||||||
|
|
||||||
|
// add r2's categories to the caches
|
||||||
|
assertEquals(2, r2.getOrdinal(cp_b));
|
||||||
|
assertEquals(cp_b, r2.getPath(2));
|
||||||
|
|
||||||
|
// check that r1 doesn't see cp_b
|
||||||
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
|
||||||
|
assertNull(r1.getPath(2));
|
||||||
|
|
||||||
|
r1.close();
|
||||||
|
r2.close();
|
||||||
|
writer.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testOpenIfChangedReplaceTaxonomy() throws Exception {
|
||||||
|
// test openIfChanged when replaceTaxonomy is called, which is equivalent to recreate
|
||||||
|
// only can work with NRT as well
|
||||||
|
Directory src = newDirectory();
|
||||||
|
DirectoryTaxonomyWriter w = new DirectoryTaxonomyWriter(src);
|
||||||
|
CategoryPath cp_b = new CategoryPath("b");
|
||||||
|
w.addCategory(cp_b);
|
||||||
|
w.close();
|
||||||
|
|
||||||
|
for (boolean nrt : new boolean[] {false, true}) {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
DirectoryTaxonomyWriter writer = new DirectoryTaxonomyWriter(dir);
|
||||||
|
|
||||||
|
CategoryPath cp_a = new CategoryPath("a");
|
||||||
|
writer.addCategory(cp_a);
|
||||||
|
if (!nrt) writer.commit();
|
||||||
|
|
||||||
|
DirectoryTaxonomyReader r1 = nrt ? new DirectoryTaxonomyReader(writer) : new DirectoryTaxonomyReader(dir);
|
||||||
|
// fill r1's caches
|
||||||
|
assertEquals(1, r1.getOrdinal(cp_a));
|
||||||
|
assertEquals(cp_a, r1.getPath(1));
|
||||||
|
|
||||||
|
// now replace taxonomy
|
||||||
|
writer.replaceTaxonomy(src);
|
||||||
|
if (!nrt) writer.commit();
|
||||||
|
|
||||||
|
DirectoryTaxonomyReader r2 = TaxonomyReader.openIfChanged(r1);
|
||||||
|
assertNotNull(r2);
|
||||||
|
|
||||||
|
// fill r2's caches
|
||||||
|
assertEquals(1, r2.getOrdinal(cp_b));
|
||||||
|
assertEquals(cp_b, r2.getPath(1));
|
||||||
|
|
||||||
|
// check that r1 doesn't see cp_b
|
||||||
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, r1.getOrdinal(cp_b));
|
||||||
|
assertEquals(cp_a, r1.getPath(1));
|
||||||
|
|
||||||
|
// check that r2 doesn't see cp_a
|
||||||
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, r2.getOrdinal(cp_a));
|
||||||
|
assertEquals(cp_b, r2.getPath(1));
|
||||||
|
|
||||||
|
r2.close();
|
||||||
|
r1.close();
|
||||||
|
writer.close();
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
src.close();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -8,7 +8,7 @@ import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||||
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
|
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
|
||||||
import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
|
import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
|
||||||
import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
|
import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
|
||||||
|
@ -178,12 +178,14 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
|
||||||
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
|
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, NO_OP_CACHE);
|
||||||
touchTaxo(taxoWriter, new CategoryPath("a"));
|
touchTaxo(taxoWriter, new CategoryPath("a"));
|
||||||
|
|
||||||
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
|
TaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
|
||||||
|
|
||||||
touchTaxo(taxoWriter, new CategoryPath("b"));
|
touchTaxo(taxoWriter, new CategoryPath("b"));
|
||||||
|
|
||||||
// this should not fail
|
TaxonomyReader newtr = TaxonomyReader.openIfChanged(taxoReader);
|
||||||
taxoReader.refresh();
|
taxoReader.close();
|
||||||
|
taxoReader = newtr;
|
||||||
|
assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
|
||||||
|
|
||||||
// now recreate the taxonomy, and check that the epoch is preserved after opening DirTW again.
|
// now recreate the taxonomy, and check that the epoch is preserved after opening DirTW again.
|
||||||
taxoWriter.close();
|
taxoWriter.close();
|
||||||
|
@ -195,14 +197,11 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
|
||||||
touchTaxo(taxoWriter, new CategoryPath("d"));
|
touchTaxo(taxoWriter, new CategoryPath("d"));
|
||||||
taxoWriter.close();
|
taxoWriter.close();
|
||||||
|
|
||||||
// this should fail
|
newtr = TaxonomyReader.openIfChanged(taxoReader);
|
||||||
try {
|
taxoReader.close();
|
||||||
taxoReader.refresh();
|
taxoReader = newtr;
|
||||||
fail("IconsistentTaxonomyException should have been thrown");
|
assertEquals(2, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
|
||||||
} catch (InconsistentTaxonomyException e) {
|
|
||||||
// ok, expected
|
|
||||||
}
|
|
||||||
|
|
||||||
taxoReader.close();
|
taxoReader.close();
|
||||||
dir.close();
|
dir.close();
|
||||||
}
|
}
|
||||||
|
@ -221,7 +220,7 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
|
||||||
|
|
||||||
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
|
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
|
||||||
assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
|
assertEquals(1, Integer.parseInt(taxoReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_EPOCH)));
|
||||||
taxoReader.refresh();
|
assertNull(TaxonomyReader.openIfChanged(taxoReader));
|
||||||
taxoReader.close();
|
taxoReader.close();
|
||||||
|
|
||||||
dir.close();
|
dir.close();
|
||||||
|
|
Loading…
Reference in New Issue