mirror of https://github.com/apache/lucene.git
LUCENE-4565: Consolidate ParentArray and ChildrenArrays into ParallelTaxonomyArrays
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1417889 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
72d71de227
commit
01fd342513
|
@ -64,6 +64,11 @@ Changes in backwards compatibility policy
|
|||
even if the commitData is the only thing that changes.
|
||||
(Shai Erera, Michael McCandless)
|
||||
|
||||
* LUCENE-4565: TaxonomyReader.getParentArray and .getChildrenArrays consolidated
|
||||
into one getParallelTaxonomyArrays(). You can obtain the 3 arrays that the
|
||||
previous two methods returned by calling parents(), children() or siblings()
|
||||
on the returned ParallelTaxonomyArrays. (Shai Erera)
|
||||
|
||||
New Features
|
||||
|
||||
* LUCENE-4226: New experimental StoredFieldsFormat that compresses chunks of
|
||||
|
|
|
@ -6,10 +6,10 @@ import java.util.ArrayList;
|
|||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
|
||||
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.ChildrenArrays;
|
||||
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
|
||||
import org.apache.lucene.facet.util.ResultSortUtils;
|
||||
|
||||
/*
|
||||
|
@ -123,9 +123,9 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
|||
MutableFacetResultNode parentResultNode, FacetArrays facetArrays, int offset) throws IOException {
|
||||
int partitionSize = facetArrays.getArraysLength();
|
||||
int endOffset = offset + partitionSize;
|
||||
ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays();
|
||||
int[] youngestChild = childrenArray.getYoungestChildArray();
|
||||
int[] olderSibling = childrenArray.getOlderSiblingArray();
|
||||
ParallelTaxonomyArrays childrenArray = taxonomyReader.getParallelTaxonomyArrays();
|
||||
int[] children = childrenArray.children();
|
||||
int[] siblings = childrenArray.siblings();
|
||||
FacetResultNode reusable = null;
|
||||
int localDepth = 0;
|
||||
int depth = facetRequest.getDepth();
|
||||
|
@ -134,9 +134,9 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
|||
|
||||
int tosOrdinal; // top of stack element
|
||||
|
||||
int yc = youngestChild[ordinal];
|
||||
int yc = children[ordinal];
|
||||
while (yc >= endOffset) {
|
||||
yc = olderSibling[yc];
|
||||
yc = siblings[yc];
|
||||
}
|
||||
// make use of the fact that TaxonomyReader.INVALID_ORDINAL == -1, < endOffset
|
||||
// and it, too, can stop the loop.
|
||||
|
@ -161,7 +161,7 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
|||
// need to proceed to its sibling
|
||||
localDepth--;
|
||||
// change element now on top of stack to its sibling.
|
||||
ordinalStack[localDepth] = olderSibling[ordinalStack[localDepth]];
|
||||
ordinalStack[localDepth] = siblings[ordinalStack[localDepth]];
|
||||
continue;
|
||||
}
|
||||
// top of stack is not invalid, this is the first time we see it on top of stack.
|
||||
|
@ -187,9 +187,9 @@ public class TopKFacetResultsHandler extends FacetResultsHandler {
|
|||
}
|
||||
if (localDepth < depth) {
|
||||
// push kid of current tos
|
||||
yc = youngestChild[tosOrdinal];
|
||||
yc = children[tosOrdinal];
|
||||
while (yc >= endOffset) {
|
||||
yc = olderSibling[yc];
|
||||
yc = siblings[yc];
|
||||
}
|
||||
ordinalStack[++localDepth] = yc;
|
||||
} else { // localDepth == depth; current tos exhausted its possible children, mark this by pushing INVALID_ORDINAL
|
||||
|
|
|
@ -4,16 +4,15 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
|
||||
import org.apache.lucene.facet.search.params.FacetRequest;
|
||||
import org.apache.lucene.facet.search.params.FacetRequest.SortOrder;
|
||||
import org.apache.lucene.facet.search.results.FacetResult;
|
||||
import org.apache.lucene.facet.search.results.FacetResultNode;
|
||||
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
|
||||
import org.apache.lucene.facet.search.results.IntermediateFacetResult;
|
||||
import org.apache.lucene.facet.taxonomy.ChildrenArrays;
|
||||
import org.apache.lucene.facet.search.results.MutableFacetResultNode;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.util.collections.IntIterator;
|
||||
import org.apache.lucene.util.collections.IntToObjectMap;
|
||||
|
||||
|
@ -141,9 +140,9 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
|
|||
}
|
||||
|
||||
int endOffset = offset + partitionSize; // one past the largest ordinal in the partition
|
||||
ChildrenArrays childrenArray = taxonomyReader.getChildrenArrays();
|
||||
int[] youngestChild = childrenArray.getYoungestChildArray();
|
||||
int[] olderSibling = childrenArray.getOlderSiblingArray();
|
||||
ParallelTaxonomyArrays childrenArray = taxonomyReader.getParallelTaxonomyArrays();
|
||||
int[] children = childrenArray.children();
|
||||
int[] siblings = childrenArray.siblings();
|
||||
int totalNumOfDescendantsConsidered = 0; // total number of facets with value != 0,
|
||||
// in the tree. These include those selected as top K in each node, and all the others that
|
||||
// were not. Not including rootNode
|
||||
|
@ -217,7 +216,7 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
|
|||
* we can continue to the older sibling of rootNode once the localDepth goes down, before we verify that
|
||||
* it went that down)
|
||||
*/
|
||||
ordinalStack[++localDepth] = youngestChild[rootNode];
|
||||
ordinalStack[++localDepth] = children[rootNode];
|
||||
siblingExplored[localDepth] = Integer.MAX_VALUE; // we have not verified position wrt current partition
|
||||
siblingExplored[0] = -1; // as if rootNode resides to the left of current position
|
||||
|
||||
|
@ -238,7 +237,7 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
|
|||
// its child, now just removed, would not have been pushed on it.
|
||||
// so the father is either inside the partition, or smaller ordinal
|
||||
if (siblingExplored[localDepth] < 0 ) {
|
||||
ordinalStack[localDepth] = olderSibling[ordinalStack[localDepth]];
|
||||
ordinalStack[localDepth] = siblings[ordinalStack[localDepth]];
|
||||
continue;
|
||||
}
|
||||
// in this point, siblingExplored[localDepth] between 0 and number of bestSiblings
|
||||
|
@ -264,7 +263,7 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
|
|||
//tosOrdinal was not examined yet for its position relative to current partition
|
||||
// and the best K of current partition, among its siblings, have not been determined yet
|
||||
while (tosOrdinal >= endOffset) {
|
||||
tosOrdinal = olderSibling[tosOrdinal];
|
||||
tosOrdinal = siblings[tosOrdinal];
|
||||
}
|
||||
// now it is inside. Run it and all its siblings inside the partition through a heap
|
||||
// and in doing so, count them, find best K, and sum into residue
|
||||
|
@ -297,12 +296,12 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
|
|||
// update totalNumOfDescendants by the now excluded node and all its descendants
|
||||
totalNumOfDescendantsConsidered--; // reduce the 1 earned when the excluded node entered the heap
|
||||
// and now return it and all its descendants. These will never make it to FacetResult
|
||||
totalNumOfDescendantsConsidered += countOnly (ac.ordinal, youngestChild,
|
||||
olderSibling, arrays, partitionSize, offset, endOffset, localDepth, depth);
|
||||
totalNumOfDescendantsConsidered += countOnly (ac.ordinal, children,
|
||||
siblings, arrays, partitionSize, offset, endOffset, localDepth, depth);
|
||||
reusables[++tosReuslables] = ac;
|
||||
}
|
||||
}
|
||||
tosOrdinal = olderSibling[tosOrdinal];
|
||||
tosOrdinal = siblings[tosOrdinal];
|
||||
}
|
||||
// now pq has best K children of ordinals that belong to the given partition.
|
||||
// Populate a new AACO with them.
|
||||
|
@ -343,7 +342,7 @@ public class TopKInEachNodeHandler extends FacetResultsHandler {
|
|||
ordinalStack[++localDepth] = TaxonomyReader.INVALID_ORDINAL;
|
||||
continue;
|
||||
}
|
||||
ordinalStack[++localDepth] = youngestChild[tosOrdinal];
|
||||
ordinalStack[++localDepth] = children[tosOrdinal];
|
||||
siblingExplored[localDepth] = Integer.MAX_VALUE;
|
||||
} // endof loop while stack is not empty
|
||||
|
||||
|
|
|
@ -1,87 +0,0 @@
|
|||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Equivalent representations of the taxonomy's parent info,
|
||||
* used internally for efficient computation of facet results:
|
||||
* "youngest child" and "oldest sibling"
|
||||
*/
|
||||
public class ChildrenArrays {
|
||||
|
||||
private final int[] youngestChild, olderSibling;
|
||||
|
||||
public ChildrenArrays(int[] parents) {
|
||||
this(parents, null);
|
||||
}
|
||||
|
||||
public ChildrenArrays(int[] parents, ChildrenArrays copyFrom) {
|
||||
youngestChild = new int[parents.length];
|
||||
olderSibling = new int[parents.length];
|
||||
int first = 0;
|
||||
if (copyFrom != null) {
|
||||
System.arraycopy(copyFrom.getYoungestChildArray(), 0, youngestChild, 0, copyFrom.getYoungestChildArray().length);
|
||||
System.arraycopy(copyFrom.getOlderSiblingArray(), 0, olderSibling, 0, copyFrom.getOlderSiblingArray().length);
|
||||
first = copyFrom.getOlderSiblingArray().length;
|
||||
}
|
||||
computeArrays(parents, first);
|
||||
}
|
||||
|
||||
private void computeArrays(int[] parents, int first) {
|
||||
// reset the youngest child of all ordinals. while this should be done only
|
||||
// for the leaves, we don't know up front which are the leaves, so we reset
|
||||
// all of them.
|
||||
for (int i = first; i < parents.length; i++) {
|
||||
youngestChild[i] = TaxonomyReader.INVALID_ORDINAL;
|
||||
}
|
||||
|
||||
// the root category has no parent, and therefore no siblings
|
||||
if (first == 0) {
|
||||
first = 1;
|
||||
olderSibling[0] = TaxonomyReader.INVALID_ORDINAL;
|
||||
}
|
||||
|
||||
for (int i = first; i < parents.length; i++) {
|
||||
// note that parents[i] is always < i, so the right-hand-side of
|
||||
// the following line is already set when we get here
|
||||
olderSibling[i] = youngestChild[parents[i]];
|
||||
youngestChild[parents[i]] = i;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an {@code int[]} the size of the taxonomy listing for each category
|
||||
* the ordinal of its immediate older sibling (the sibling in the taxonomy
|
||||
* tree with the highest ordinal below that of the given ordinal). The value
|
||||
* for a category with no older sibling is {@link TaxonomyReader#INVALID_ORDINAL}.
|
||||
*/
|
||||
public int[] getOlderSiblingArray() {
|
||||
return olderSibling;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns an {@code int[]} the size of the taxonomy listing the ordinal of
|
||||
* the youngest (highest numbered) child category of each category in the
|
||||
* taxonomy. The value for a leaf category (a category without children) is
|
||||
* {@link TaxonomyReader#INVALID_ORDINAL}.
|
||||
*/
|
||||
public int[] getYoungestChildArray() {
|
||||
return youngestChild;
|
||||
}
|
||||
|
||||
}
|
|
@ -5,6 +5,7 @@ import java.io.IOException;
|
|||
import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
|
||||
/*
|
||||
|
@ -162,16 +163,10 @@ public abstract class TaxonomyReader implements Closeable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns a {@link ChildrenArrays} object which can be used together to
|
||||
* efficiently enumerate the children of any category.
|
||||
* <p>
|
||||
* The caller can hold on to the object it got indefinitely - it is guaranteed
|
||||
* that no-one else will modify it. The other side of the same coin is that
|
||||
* the caller must treat the object which it got (and the arrays it contains)
|
||||
* as read-only and <b>not modify it</b>, because other callers might have
|
||||
* gotten the same object too.
|
||||
* Returns a {@link ParallelTaxonomyArrays} object which can be used to
|
||||
* efficiently traverse the taxonomy tree.
|
||||
*/
|
||||
public abstract ChildrenArrays getChildrenArrays() throws IOException;
|
||||
public abstract ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException;
|
||||
|
||||
/**
|
||||
* Retrieve user committed data.
|
||||
|
@ -195,7 +190,6 @@ public abstract class TaxonomyReader implements Closeable {
|
|||
* Returns the ordinal of the parent category of the category with the given
|
||||
* ordinal, according to the following rules:
|
||||
*
|
||||
*
|
||||
* <ul>
|
||||
* <li>If the given ordinal is the {@link #ROOT_ORDINAL}, an
|
||||
* {@link #INVALID_ORDINAL} is returned.
|
||||
|
@ -210,19 +204,7 @@ public abstract class TaxonomyReader implements Closeable {
|
|||
* available ordinal)
|
||||
*/
|
||||
public abstract int getParent(int ordinal) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns an {@code int[]} the size of the taxonomy listing the ordinal of
|
||||
* the parent category of each category in the taxonomy.
|
||||
* <p>
|
||||
* The caller can hold on to the array it got indefinitely - it is guaranteed
|
||||
* that no-one else will modify it. The other side of the same coin is that
|
||||
* the caller must treat the array it got as read-only and <b>not modify
|
||||
* it</b>, because other callers might have gotten the same array too (and
|
||||
* getParent() calls might be answered from the same array).
|
||||
*/
|
||||
public abstract int[] getParentArray() throws IOException;
|
||||
|
||||
|
||||
/**
|
||||
* Returns the path name of the category with the given ordinal. The path is
|
||||
* returned as a new CategoryPath object - to reuse an existing object, use
|
||||
|
|
|
@ -6,7 +6,6 @@ import java.util.logging.Level;
|
|||
import java.util.logging.Logger;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.ChildrenArrays;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.Consts.LoadFullPathOnly;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
|
@ -63,9 +62,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
private LRUHashMap<String, Integer> ordinalCache;
|
||||
private LRUHashMap<Integer, String> categoryCache;
|
||||
|
||||
// TODO: consolidate these objects into one ParentInfo or something?
|
||||
private volatile ParentArray parentArray;
|
||||
private volatile ChildrenArrays childrenArrays;
|
||||
private volatile ParallelTaxonomyArrays taxoArrays;
|
||||
|
||||
private char delimiter = Consts.DEFAULT_DELIMITER;
|
||||
|
||||
|
@ -75,9 +72,8 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
* arrays.
|
||||
*/
|
||||
DirectoryTaxonomyReader(DirectoryReader indexReader, DirectoryTaxonomyWriter taxoWriter,
|
||||
LRUHashMap<String,Integer> ordinalCache,
|
||||
LRUHashMap<Integer,String> categoryCache, ParentArray parentArray,
|
||||
ChildrenArrays childrenArrays) throws IOException {
|
||||
LRUHashMap<String,Integer> ordinalCache, LRUHashMap<Integer,String> categoryCache,
|
||||
ParallelTaxonomyArrays taxoArrays) throws IOException {
|
||||
this.indexReader = indexReader;
|
||||
this.taxoWriter = taxoWriter;
|
||||
this.taxoEpoch = taxoWriter == null ? -1 : taxoWriter.getTaxonomyEpoch();
|
||||
|
@ -86,14 +82,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
this.ordinalCache = ordinalCache == null ? new LRUHashMap<String,Integer>(DEFAULT_CACHE_VALUE) : ordinalCache;
|
||||
this.categoryCache = categoryCache == null ? new LRUHashMap<Integer,String>(DEFAULT_CACHE_VALUE) : categoryCache;
|
||||
|
||||
this.parentArray = null;
|
||||
this.childrenArrays = null;
|
||||
if (parentArray != null) {
|
||||
this.parentArray = new ParentArray(indexReader, parentArray);
|
||||
if (childrenArrays != null) {
|
||||
this.childrenArrays = new ChildrenArrays(this.parentArray.getArray(), childrenArrays);
|
||||
}
|
||||
}
|
||||
this.taxoArrays = taxoArrays != null ? new ParallelTaxonomyArrays(indexReader, taxoArrays) : null;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -167,11 +156,20 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
return ret;
|
||||
}
|
||||
|
||||
private synchronized void initTaxoArrays() throws IOException {
|
||||
if (taxoArrays == null) {
|
||||
// according to Java Concurrency in Practice, this might perform better on
|
||||
// some JVMs, because the array initialization doesn't happen on the
|
||||
// volatile member.
|
||||
ParallelTaxonomyArrays tmpArrays = new ParallelTaxonomyArrays(indexReader);
|
||||
taxoArrays = tmpArrays;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doClose() throws IOException {
|
||||
indexReader.close();
|
||||
parentArray = null;
|
||||
childrenArrays = null;
|
||||
taxoArrays = null;
|
||||
// do not clear() the caches, as they may be used by other DTR instances.
|
||||
ordinalCache = null;
|
||||
categoryCache = null;
|
||||
|
@ -233,9 +231,9 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
if (recreated) {
|
||||
// if recreated, do not reuse anything from this instace. the information
|
||||
// will be lazily computed by the new instance when needed.
|
||||
newtr = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null, null);
|
||||
newtr = new DirectoryTaxonomyReader(r2, taxoWriter, null, null, null);
|
||||
} else {
|
||||
newtr = new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, parentArray, childrenArrays);
|
||||
newtr = new DirectoryTaxonomyReader(r2, taxoWriter, ordinalCache, categoryCache, taxoArrays);
|
||||
}
|
||||
|
||||
success = true;
|
||||
|
@ -265,16 +263,12 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public ChildrenArrays getChildrenArrays() throws IOException {
|
||||
public ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException {
|
||||
ensureOpen();
|
||||
if (childrenArrays == null) {
|
||||
synchronized (this) {
|
||||
if (childrenArrays == null) {
|
||||
childrenArrays = new ChildrenArrays(getParentArray());
|
||||
}
|
||||
}
|
||||
if (taxoArrays == null) {
|
||||
initTaxoArrays();
|
||||
}
|
||||
return childrenArrays;
|
||||
return taxoArrays;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -330,26 +324,12 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
return ret;
|
||||
}
|
||||
|
||||
// TODO: move to a ParentInfo class? (see TODO for parentArray)
|
||||
@Override
|
||||
public int getParent(int ordinal) throws IOException {
|
||||
ensureOpen();
|
||||
return getParentArray()[ordinal];
|
||||
return getParallelTaxonomyArrays().parents()[ordinal];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int[] getParentArray() throws IOException {
|
||||
ensureOpen();
|
||||
if (parentArray == null) {
|
||||
synchronized (this) {
|
||||
if (parentArray == null) {
|
||||
parentArray = new ParentArray(indexReader);
|
||||
}
|
||||
}
|
||||
}
|
||||
return parentArray.getArray();
|
||||
}
|
||||
|
||||
@Override
|
||||
public CategoryPath getPath(int ordinal) throws IOException {
|
||||
ensureOpen();
|
||||
|
|
|
@ -112,6 +112,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
private int cacheMissesUntilFill = 11;
|
||||
private boolean shouldFillCache = true;
|
||||
|
||||
// even though lazily initialized, not volatile so that access to it is
|
||||
// faster. we keep a volatile boolean init instead.
|
||||
private ReaderManager readerManager;
|
||||
private volatile boolean initializedReaderManager = false;
|
||||
private volatile boolean shouldRefreshReaderManager;
|
||||
|
||||
/**
|
||||
* We call the cache "complete" if we know that every category in our
|
||||
* taxonomy is in the cache. When the cache is <B>not</B> complete, and
|
||||
|
@ -123,14 +129,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
* that some of the cached data was cleared).
|
||||
*/
|
||||
private volatile boolean cacheIsComplete;
|
||||
private volatile ReaderManager readerManager;
|
||||
private volatile boolean shouldRefreshReaderManager;
|
||||
private volatile boolean isClosed = false;
|
||||
private volatile ParentArray parentArray;
|
||||
private volatile ParallelTaxonomyArrays taxoArrays;
|
||||
private volatile int nextID;
|
||||
|
||||
// private Map<String,String> commitData;
|
||||
|
||||
/** Reads the commit data from a Directory. */
|
||||
private static Map<String, String> readCommitData(Directory dir) throws IOException {
|
||||
SegmentInfos infos = new SegmentInfos();
|
||||
|
@ -308,13 +310,14 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
|
||||
/** Opens a {@link ReaderManager} from the internal {@link IndexWriter}. */
|
||||
private void initReaderManager() throws IOException {
|
||||
if (readerManager == null) {
|
||||
if (!initializedReaderManager) {
|
||||
synchronized (this) {
|
||||
// verify that the taxo-writer hasn't been closed on us.
|
||||
ensureOpen();
|
||||
if (readerManager == null) {
|
||||
if (!initializedReaderManager) {
|
||||
readerManager = new ReaderManager(indexWriter, false);
|
||||
shouldRefreshReaderManager = false;
|
||||
initializedReaderManager = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -341,8 +344,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
return new Cl2oTaxonomyWriterCache(1024, 0.15f, 3);
|
||||
}
|
||||
|
||||
// convenience constructors:
|
||||
|
||||
public DirectoryTaxonomyWriter(Directory d) throws IOException {
|
||||
this(d, OpenMode.CREATE_OR_APPEND);
|
||||
}
|
||||
|
@ -375,9 +376,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
* <code>super.closeResources()</code> call in your implementation.
|
||||
*/
|
||||
protected synchronized void closeResources() throws IOException {
|
||||
if (readerManager != null) {
|
||||
if (initializedReaderManager) {
|
||||
readerManager.close();
|
||||
readerManager = null;
|
||||
initializedReaderManager = false;
|
||||
}
|
||||
if (cache != null) {
|
||||
cache.close();
|
||||
|
@ -467,15 +469,19 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
int doc = -1;
|
||||
DirectoryReader reader = readerManager.acquire();
|
||||
try {
|
||||
TermsEnum termsEnum = null; // reuse
|
||||
DocsEnum docs = null; // reuse
|
||||
final BytesRef catTerm = new BytesRef(categoryPath.toString(delimiter, prefixLen));
|
||||
for (AtomicReaderContext ctx : reader.leaves()) {
|
||||
Terms terms = ctx.reader().terms(Consts.FULL);
|
||||
if (terms != null) {
|
||||
TermsEnum termsEnum = terms.iterator(null);
|
||||
termsEnum = terms.iterator(termsEnum);
|
||||
if (termsEnum.seekExact(catTerm, true)) {
|
||||
// TODO: is it really ok that null is passed here as liveDocs?
|
||||
DocsEnum docs = termsEnum.docs(null, null, 0);
|
||||
// liveDocs=null because the taxonomy has no deletes
|
||||
docs = termsEnum.docs(null, docs, 0 /* freqs not required */);
|
||||
// if the term was found, we know it has exactly one document.
|
||||
doc = docs.nextDoc() + ctx.docBase;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -589,7 +595,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
addToCache(categoryPath, length, id);
|
||||
|
||||
// also add to the parent array
|
||||
parentArray = getParentArray().add(id, parent);
|
||||
taxoArrays = getTaxoArrays().add(id, parent);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
@ -657,7 +663,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// NOTE: since this method is sync'ed, it can call maybeRefresh, instead of
|
||||
// maybeRefreshBlocking. If ever this is changed, make sure to change the
|
||||
// call too.
|
||||
if (shouldRefreshReaderManager && readerManager != null) {
|
||||
if (shouldRefreshReaderManager && initializedReaderManager) {
|
||||
readerManager.maybeRefresh();
|
||||
shouldRefreshReaderManager = false;
|
||||
}
|
||||
|
@ -791,25 +797,30 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// initReaderManager called in parallel.
|
||||
readerManager.close();
|
||||
readerManager = null;
|
||||
initializedReaderManager = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private ParentArray getParentArray() throws IOException {
|
||||
if (parentArray == null) {
|
||||
private ParallelTaxonomyArrays getTaxoArrays() throws IOException {
|
||||
if (taxoArrays == null) {
|
||||
synchronized (this) {
|
||||
if (parentArray == null) {
|
||||
if (taxoArrays == null) {
|
||||
initReaderManager();
|
||||
DirectoryReader reader = readerManager.acquire();
|
||||
try {
|
||||
parentArray = new ParentArray(reader);
|
||||
// according to Java Concurrency, this might perform better on some
|
||||
// JVMs, since the object initialization doesn't happen on the
|
||||
// volatile member.
|
||||
ParallelTaxonomyArrays tmpArrays = new ParallelTaxonomyArrays(reader);
|
||||
taxoArrays = tmpArrays;
|
||||
} finally {
|
||||
readerManager.release(reader);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return parentArray;
|
||||
return taxoArrays;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -821,7 +832,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
if (ordinal >= nextID) {
|
||||
throw new ArrayIndexOutOfBoundsException("requested ordinal is bigger than the largest ordinal in the taxonomy");
|
||||
}
|
||||
return getParentArray().getArray()[ordinal];
|
||||
return getTaxoArrays().parents()[ordinal];
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -0,0 +1,230 @@
|
|||
package org.apache.lucene.facet.taxonomy.directory;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Returns 3 arrays for traversing the taxonomy:
|
||||
* <ul>
|
||||
* <li>{@code parents}: {@code parents[i]} denotes the parent of category
|
||||
* ordinal {@code i}.</li>
|
||||
* <li>{@code children}: {@code children[i]} denotes the youngest child of
|
||||
* category ordinal {@code i}. The youngest child is defined as the category
|
||||
* that was added last to the taxonomy as an immediate child of {@code i}.</li>
|
||||
* <li>{@code siblings}: {@code siblings[i]} denotes the sibling of category
|
||||
* ordinal {@code i}. The sibling is defined as the previous youngest child of
|
||||
* {@code parents[i]}.</li>
|
||||
* </ul>
|
||||
*
|
||||
* To traverse the taxonomy tree, you typically start with {@code children[0]}
|
||||
* (ordinal 0 is reserved for ROOT), and then depends if you want to do DFS or
|
||||
* BFS, you call {@code children[children[0]]} or {@code siblings[children[0]]}
|
||||
* and so forth, respectively.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> you are not expected to modify the values of the arrays, since
|
||||
* the arrays are shared with other threads.
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class ParallelTaxonomyArrays {
|
||||
|
||||
private final int[] parents;
|
||||
|
||||
// the following two arrays are lazily intialized. note that we only keep a
|
||||
// single boolean member as volatile, instead of declaring the arrays
|
||||
// volatile. the code guarantees that only after the boolean is set to true,
|
||||
// the arrays are returned.
|
||||
private volatile boolean initializedChildren = false;
|
||||
private int[] children, siblings;
|
||||
|
||||
/** Used by {@link #add(int, int)} after the array grew. */
|
||||
private ParallelTaxonomyArrays(int[] parents) {
|
||||
this.parents = parents;
|
||||
}
|
||||
|
||||
public ParallelTaxonomyArrays(IndexReader reader) throws IOException {
|
||||
parents = new int[reader.maxDoc()];
|
||||
if (parents.length > 0) {
|
||||
initParents(reader, 0);
|
||||
// Starting Lucene 2.9, following the change LUCENE-1542, we can
|
||||
// no longer reliably read the parent "-1" (see comment in
|
||||
// LuceneTaxonomyWriter.SinglePositionTokenStream). We have no way
|
||||
// to fix this in indexing without breaking backward-compatibility
|
||||
// with existing indexes, so what we'll do instead is just
|
||||
// hard-code the parent of ordinal 0 to be -1, and assume (as is
|
||||
// indeed the case) that no other parent can be -1.
|
||||
parents[0] = TaxonomyReader.INVALID_ORDINAL;
|
||||
}
|
||||
}
|
||||
|
||||
public ParallelTaxonomyArrays(IndexReader reader, ParallelTaxonomyArrays copyFrom) throws IOException {
|
||||
assert copyFrom != null;
|
||||
|
||||
// note that copyParents.length may be equal to reader.maxDoc(). this is not a bug
|
||||
// it may be caused if e.g. the taxonomy segments were merged, and so an updated
|
||||
// NRT reader was obtained, even though nothing was changed. this is not very likely
|
||||
// to happen.
|
||||
int[] copyParents = copyFrom.parents();
|
||||
this.parents = new int[reader.maxDoc()];
|
||||
System.arraycopy(copyParents, 0, parents, 0, copyParents.length);
|
||||
initParents(reader, copyParents.length);
|
||||
|
||||
if (copyFrom.initializedChildren) {
|
||||
initChildrenSiblings(copyFrom);
|
||||
}
|
||||
}
|
||||
|
||||
private final synchronized void initChildrenSiblings(ParallelTaxonomyArrays copyFrom) {
|
||||
if (!initializedChildren) { // must do this check !
|
||||
children = new int[parents.length];
|
||||
siblings = new int[parents.length];
|
||||
if (copyFrom != null) {
|
||||
// called from the ctor, after we know copyFrom has initialized children/siblings
|
||||
System.arraycopy(copyFrom.children(), 0, children, 0, copyFrom.children().length);
|
||||
System.arraycopy(copyFrom.siblings(), 0, siblings, 0, copyFrom.siblings().length);
|
||||
}
|
||||
computeChildrenSiblings(parents, 0);
|
||||
initializedChildren = true;
|
||||
}
|
||||
}
|
||||
|
||||
private void computeChildrenSiblings(int[] parents, int first) {
|
||||
// reset the youngest child of all ordinals. while this should be done only
|
||||
// for the leaves, we don't know up front which are the leaves, so we reset
|
||||
// all of them.
|
||||
for (int i = first; i < parents.length; i++) {
|
||||
children[i] = TaxonomyReader.INVALID_ORDINAL;
|
||||
}
|
||||
|
||||
// the root category has no parent, and therefore no siblings
|
||||
if (first == 0) {
|
||||
first = 1;
|
||||
siblings[0] = TaxonomyReader.INVALID_ORDINAL;
|
||||
}
|
||||
|
||||
for (int i = first; i < parents.length; i++) {
|
||||
// note that parents[i] is always < i, so the right-hand-side of
|
||||
// the following line is already set when we get here
|
||||
siblings[i] = children[parents[i]];
|
||||
children[parents[i]] = i;
|
||||
}
|
||||
}
|
||||
|
||||
// Read the parents of the new categories
|
||||
private void initParents(IndexReader reader, int first) throws IOException {
|
||||
if (reader.maxDoc() == first) {
|
||||
return;
|
||||
}
|
||||
|
||||
// it's ok to use MultiFields because we only iterate on one posting list.
|
||||
// breaking it to loop over the leaves() only complicates code for no
|
||||
// apparent gain.
|
||||
DocsAndPositionsEnum positions = MultiFields.getTermPositionsEnum(reader, null,
|
||||
Consts.FIELD_PAYLOADS, Consts.PAYLOAD_PARENT_BYTES_REF,
|
||||
DocsAndPositionsEnum.FLAG_PAYLOADS);
|
||||
|
||||
// shouldn't really happen, if it does, something's wrong
|
||||
if (positions == null || positions.advance(first) == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
throw new CorruptIndexException("Missing parent data for category " + first);
|
||||
}
|
||||
|
||||
int num = reader.maxDoc();
|
||||
for (int i = first; i < num; i++) {
|
||||
if (positions.docID() == i) {
|
||||
if (positions.freq() == 0) { // shouldn't happen
|
||||
throw new CorruptIndexException("Missing parent data for category " + i);
|
||||
}
|
||||
|
||||
parents[i] = positions.nextPosition();
|
||||
|
||||
if (positions.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (i + 1 < num) {
|
||||
throw new CorruptIndexException("Missing parent data for category "+ (i + 1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else { // this shouldn't happen
|
||||
throw new CorruptIndexException("Missing parent data for category " + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given ordinal/parent info and returns either a new instance if the
|
||||
* underlying array had to grow, or this instance otherwise.
|
||||
* <p>
|
||||
* <b>NOTE:</b> you should call this method from a thread-safe code.
|
||||
*/
|
||||
ParallelTaxonomyArrays add(int ordinal, int parentOrdinal) {
|
||||
if (ordinal >= parents.length) {
|
||||
int[] newarray = ArrayUtil.grow(parents);
|
||||
newarray[ordinal] = parentOrdinal;
|
||||
return new ParallelTaxonomyArrays(newarray);
|
||||
}
|
||||
parents[ordinal] = parentOrdinal;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the parents array, where {@code parents[i]} denotes the parent of
|
||||
* category ordinal {@code i}.
|
||||
*/
|
||||
public int[] parents() {
|
||||
return parents;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the children array, where {@code children[i]} denotes the youngest
|
||||
* child of category ordinal {@code i}. The youngest child is defined as the
|
||||
* category that was added last to the taxonomy as an immediate child of
|
||||
* {@code i}.
|
||||
*/
|
||||
public int[] children() {
|
||||
if (!initializedChildren) {
|
||||
initChildrenSiblings(null);
|
||||
}
|
||||
|
||||
// the array is guaranteed to be populated
|
||||
return children;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the siblings array, where {@code siblings[i]} denotes the sibling
|
||||
* of category ordinal {@code i}. The sibling is defined as the previous
|
||||
* youngest child of {@code parents[i]}.
|
||||
*/
|
||||
public int[] siblings() {
|
||||
if (!initializedChildren) {
|
||||
initChildrenSiblings(null);
|
||||
}
|
||||
|
||||
// the array is guaranteed to be populated
|
||||
return siblings;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,130 +0,0 @@
|
|||
package org.apache.lucene.facet.taxonomy.directory;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DocsAndPositionsEnum;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* @lucene.experimental
|
||||
*/
|
||||
class ParentArray {
|
||||
|
||||
// TODO: maybe use PackedInts?
|
||||
private final int[] parentOrdinals;
|
||||
|
||||
/** Used by {@link #add(int, int)} when the array needs to grow. */
|
||||
ParentArray(int[] parentOrdinals) {
|
||||
this.parentOrdinals = parentOrdinals;
|
||||
}
|
||||
|
||||
public ParentArray(IndexReader reader) throws IOException {
|
||||
parentOrdinals = new int[reader.maxDoc()];
|
||||
if (parentOrdinals.length > 0) {
|
||||
initFromReader(reader, 0);
|
||||
// Starting Lucene 2.9, following the change LUCENE-1542, we can
|
||||
// no longer reliably read the parent "-1" (see comment in
|
||||
// LuceneTaxonomyWriter.SinglePositionTokenStream). We have no way
|
||||
// to fix this in indexing without breaking backward-compatibility
|
||||
// with existing indexes, so what we'll do instead is just
|
||||
// hard-code the parent of ordinal 0 to be -1, and assume (as is
|
||||
// indeed the case) that no other parent can be -1.
|
||||
parentOrdinals[0] = TaxonomyReader.INVALID_ORDINAL;
|
||||
}
|
||||
}
|
||||
|
||||
public ParentArray(IndexReader reader, ParentArray copyFrom) throws IOException {
|
||||
assert copyFrom != null;
|
||||
|
||||
// note that copyParents.length may be equal to reader.maxDoc(). this is not a bug
|
||||
// it may be caused if e.g. the taxonomy segments were merged, and so an updated
|
||||
// NRT reader was obtained, even though nothing was changed. this is not very likely
|
||||
// to happen.
|
||||
int[] copyParents = copyFrom.getArray();
|
||||
this.parentOrdinals = new int[reader.maxDoc()];
|
||||
System.arraycopy(copyParents, 0, parentOrdinals, 0, copyParents.length);
|
||||
initFromReader(reader, copyParents.length);
|
||||
}
|
||||
|
||||
// Read the parents of the new categories
|
||||
private void initFromReader(IndexReader reader, int first) throws IOException {
|
||||
if (reader.maxDoc() == first) {
|
||||
return;
|
||||
}
|
||||
|
||||
// it's ok to use MultiFields because we only iterate on one posting list.
|
||||
// breaking it to loop over the leaves() only complicates code for no
|
||||
// apparent gain.
|
||||
DocsAndPositionsEnum positions = MultiFields.getTermPositionsEnum(reader, null,
|
||||
Consts.FIELD_PAYLOADS, Consts.PAYLOAD_PARENT_BYTES_REF,
|
||||
DocsAndPositionsEnum.FLAG_PAYLOADS);
|
||||
|
||||
// shouldn't really happen, if it does, something's wrong
|
||||
if (positions == null || positions.advance(first) == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
throw new CorruptIndexException("Missing parent data for category " + first);
|
||||
}
|
||||
|
||||
int num = reader.maxDoc();
|
||||
for (int i = first; i < num; i++) {
|
||||
if (positions.docID() == i) {
|
||||
if (positions.freq() == 0) { // shouldn't happen
|
||||
throw new CorruptIndexException("Missing parent data for category " + i);
|
||||
}
|
||||
|
||||
parentOrdinals[i] = positions.nextPosition();
|
||||
|
||||
if (positions.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
if (i + 1 < num) {
|
||||
throw new CorruptIndexException("Missing parent data for category "+ (i + 1));
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else { // this shouldn't happen
|
||||
throw new CorruptIndexException("Missing parent data for category " + i);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public int[] getArray() {
|
||||
return parentOrdinals;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds the given ordinal/parent info and returns either a new instance if the
|
||||
* underlying array had to grow, or this instance otherwise.
|
||||
* <p>
|
||||
* <b>NOTE:</b> you should call this method from a thread-safe code.
|
||||
*/
|
||||
ParentArray add(int ordinal, int parentOrdinal) {
|
||||
if (ordinal >= parentOrdinals.length) {
|
||||
int[] newarray = ArrayUtil.grow(parentOrdinals);
|
||||
newarray[ordinal] = parentOrdinal;
|
||||
return new ParentArray(newarray);
|
||||
}
|
||||
parentOrdinals[ordinal] = parentOrdinal;
|
||||
return this;
|
||||
}
|
||||
|
||||
}
|
|
@ -232,13 +232,15 @@ public class MultiIteratorsPerCLParamsTest extends LuceneTestCase {
|
|||
CategoryPath cp = new CategoryPath(requestedPath.getComponent(0));
|
||||
parentOrdinal = taxo.getOrdinal(cp);
|
||||
}
|
||||
parentArray = taxo.getParentArray();
|
||||
parentArray = taxo.getParallelTaxonomyArrays().parents();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean init() throws IOException {
|
||||
return superCLI.init();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long nextCategory() throws IOException {
|
||||
long next;
|
||||
while ((next = superCLI.nextCategory()) <= Integer.MAX_VALUE
|
||||
|
@ -259,6 +261,7 @@ public class MultiIteratorsPerCLParamsTest extends LuceneTestCase {
|
|||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean skipTo(int docId) throws IOException {
|
||||
return superCLI.skipTo(docId);
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
|||
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.directory.ParallelTaxonomyArrays;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
|
@ -545,7 +546,7 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
fillTaxonomy(tw);
|
||||
tw.close();
|
||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||
int[] parents = tr.getParentArray();
|
||||
int[] parents = tr.getParallelTaxonomyArrays().parents();
|
||||
assertEquals(tr.getSize(), parents.length);
|
||||
for (int i=0; i<tr.getSize(); i++) {
|
||||
assertEquals(tr.getParent(i), parents[i]);
|
||||
|
@ -566,10 +567,10 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
fillTaxonomy(tw);
|
||||
tw.close();
|
||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||
ChildrenArrays ca = tr.getChildrenArrays();
|
||||
int[] youngestChildArray = ca.getYoungestChildArray();
|
||||
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
|
||||
int[] youngestChildArray = ca.children();
|
||||
assertEquals(tr.getSize(), youngestChildArray.length);
|
||||
int[] olderSiblingArray = ca.getOlderSiblingArray();
|
||||
int[] olderSiblingArray = ca.siblings();
|
||||
assertEquals(tr.getSize(), olderSiblingArray.length);
|
||||
for (int i=0; i<expectedCategories.length; i++) {
|
||||
// find expected children by looking at all expectedCategories
|
||||
|
@ -630,15 +631,15 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
fillTaxonomy(tw);
|
||||
tw.close();
|
||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||
ChildrenArrays ca = tr.getChildrenArrays();
|
||||
int[] youngestChildArray = ca.getYoungestChildArray();
|
||||
assertEquals(tr.getSize(), youngestChildArray.length);
|
||||
int[] olderSiblingArray = ca.getOlderSiblingArray();
|
||||
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
|
||||
int[] children = ca.children();
|
||||
assertEquals(tr.getSize(), children.length);
|
||||
int[] olderSiblingArray = ca.siblings();
|
||||
assertEquals(tr.getSize(), olderSiblingArray.length);
|
||||
|
||||
// test that the "youngest child" of every category is indeed a child:
|
||||
for (int i=0; i<tr.getSize(); i++) {
|
||||
int youngestChild = youngestChildArray[i];
|
||||
int youngestChild = children[i];
|
||||
if (youngestChild != TaxonomyReader.INVALID_ORDINAL) {
|
||||
assertEquals(i, tr.getParent(youngestChild));
|
||||
}
|
||||
|
@ -676,7 +677,7 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
if (j==i) { // no child found
|
||||
j=TaxonomyReader.INVALID_ORDINAL;
|
||||
}
|
||||
assertEquals(j, youngestChildArray[i]);
|
||||
assertEquals(j, children[i]);
|
||||
}
|
||||
|
||||
// test that the "older sibling" is indeed the least oldest one - and
|
||||
|
@ -710,32 +711,32 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
tw.addCategory(new CategoryPath("hi", "there"));
|
||||
tw.commit();
|
||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||
ChildrenArrays ca = tr.getChildrenArrays();
|
||||
ParallelTaxonomyArrays ca = tr.getParallelTaxonomyArrays();
|
||||
assertEquals(3, tr.getSize());
|
||||
assertEquals(3, ca.getOlderSiblingArray().length);
|
||||
assertEquals(3, ca.getYoungestChildArray().length);
|
||||
assertTrue(Arrays.equals(new int[] { 1, 2, -1 }, ca.getYoungestChildArray()));
|
||||
assertTrue(Arrays.equals(new int[] { -1, -1, -1 }, ca.getOlderSiblingArray()));
|
||||
assertEquals(3, ca.siblings().length);
|
||||
assertEquals(3, ca.children().length);
|
||||
assertTrue(Arrays.equals(new int[] { 1, 2, -1 }, ca.children()));
|
||||
assertTrue(Arrays.equals(new int[] { -1, -1, -1 }, ca.siblings()));
|
||||
tw.addCategory(new CategoryPath("hi", "ho"));
|
||||
tw.addCategory(new CategoryPath("hello"));
|
||||
tw.commit();
|
||||
// Before refresh, nothing changed..
|
||||
ChildrenArrays newca = tr.getChildrenArrays();
|
||||
ParallelTaxonomyArrays newca = tr.getParallelTaxonomyArrays();
|
||||
assertSame(newca, ca); // we got exactly the same object
|
||||
assertEquals(3, tr.getSize());
|
||||
assertEquals(3, ca.getOlderSiblingArray().length);
|
||||
assertEquals(3, ca.getYoungestChildArray().length);
|
||||
assertEquals(3, ca.siblings().length);
|
||||
assertEquals(3, ca.children().length);
|
||||
// After the refresh, things change:
|
||||
TaxonomyReader newtr = TaxonomyReader.openIfChanged(tr);
|
||||
assertNotNull(newtr);
|
||||
tr.close();
|
||||
tr = newtr;
|
||||
ca = tr.getChildrenArrays();
|
||||
ca = tr.getParallelTaxonomyArrays();
|
||||
assertEquals(5, tr.getSize());
|
||||
assertEquals(5, ca.getOlderSiblingArray().length);
|
||||
assertEquals(5, ca.getYoungestChildArray().length);
|
||||
assertTrue(Arrays.equals(new int[] { 4, 3, -1, -1, -1 }, ca.getYoungestChildArray()));
|
||||
assertTrue(Arrays.equals(new int[] { -1, -1, -1, 2, 1 }, ca.getOlderSiblingArray()));
|
||||
assertEquals(5, ca.siblings().length);
|
||||
assertEquals(5, ca.children().length);
|
||||
assertTrue(Arrays.equals(new int[] { 4, 3, -1, -1, -1 }, ca.children()));
|
||||
assertTrue(Arrays.equals(new int[] { -1, -1, -1, 2, 1 }, ca.siblings()));
|
||||
tw.close();
|
||||
tr.close();
|
||||
indexDir.close();
|
||||
|
@ -753,10 +754,10 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
twBase.commit();
|
||||
TaxonomyReader trBase = new DirectoryTaxonomyReader(indexDirBase);
|
||||
|
||||
final ChildrenArrays ca1 = trBase.getChildrenArrays();
|
||||
final ParallelTaxonomyArrays ca1 = trBase.getParallelTaxonomyArrays();
|
||||
|
||||
final int abOrd = trBase.getOrdinal(abPath);
|
||||
final int abYoungChildBase1 = ca1.getYoungestChildArray()[abOrd];
|
||||
final int abYoungChildBase1 = ca1.children()[abOrd];
|
||||
|
||||
final int numCategories = atLeast(800);
|
||||
for (int i = 0; i < numCategories; i++) {
|
||||
|
@ -769,8 +770,8 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
trBase.close();
|
||||
trBase = newTaxoReader;
|
||||
|
||||
final ChildrenArrays ca2 = trBase.getChildrenArrays();
|
||||
final int abYoungChildBase2 = ca2.getYoungestChildArray()[abOrd];
|
||||
final ParallelTaxonomyArrays ca2 = trBase.getParallelTaxonomyArrays();
|
||||
final int abYoungChildBase2 = ca2.children()[abOrd];
|
||||
|
||||
int numRetries = atLeast(50);
|
||||
for (int retry = 0; retry < numRetries; retry++) {
|
||||
|
@ -808,9 +809,9 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
setPriority(1 + getPriority());
|
||||
try {
|
||||
while (!stop.get()) {
|
||||
int lastOrd = tr.getParentArray().length - 1;
|
||||
int lastOrd = tr.getParallelTaxonomyArrays().parents().length - 1;
|
||||
assertNotNull("path of last-ord " + lastOrd + " is not found!", tr.getPath(lastOrd));
|
||||
assertChildrenArrays(tr.getChildrenArrays(), retry, retrieval[0]++);
|
||||
assertChildrenArrays(tr.getParallelTaxonomyArrays(), retry, retrieval[0]++);
|
||||
sleep(10); // don't starve refresh()'s CPU, which sleeps every 50 bytes for 1 ms
|
||||
}
|
||||
} catch (Throwable e) {
|
||||
|
@ -819,13 +820,13 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
private void assertChildrenArrays(ChildrenArrays ca, int retry, int retrieval) {
|
||||
final int abYoungChild = ca.getYoungestChildArray()[abOrd];
|
||||
private void assertChildrenArrays(ParallelTaxonomyArrays ca, int retry, int retrieval) {
|
||||
final int abYoungChild = ca.children()[abOrd];
|
||||
assertTrue(
|
||||
"Retry "+retry+": retrieval: "+retrieval+": wrong youngest child for category "+abPath+" (ord="+abOrd+
|
||||
") - must be either "+abYoungChildBase1+" or "+abYoungChildBase2+" but was: "+abYoungChild,
|
||||
abYoungChildBase1==abYoungChild ||
|
||||
abYoungChildBase2==ca.getYoungestChildArray()[abOrd]);
|
||||
abYoungChildBase2==ca.children()[abOrd]);
|
||||
}
|
||||
};
|
||||
thread.start();
|
||||
|
|
|
@ -233,7 +233,7 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
|
|||
// assert categories
|
||||
assertEquals(numCategories, reader.getSize());
|
||||
int roundOrdinal = reader.getOrdinal(new CategoryPath(Integer.toString(i)));
|
||||
int[] parents = reader.getParentArray();
|
||||
int[] parents = reader.getParallelTaxonomyArrays().parents();
|
||||
assertEquals(0, parents[roundOrdinal]); // round's parent is root
|
||||
for (int j = 0; j < numCats; j++) {
|
||||
int ord = reader.getOrdinal(new CategoryPath(Integer.toString(i), Integer.toString(j)));
|
||||
|
@ -268,7 +268,7 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
|
|||
|
||||
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
|
||||
assertEquals(1, reader.getSize());
|
||||
assertEquals(1, reader.getParentArray().length);
|
||||
assertEquals(1, reader.getParallelTaxonomyArrays().parents().length);
|
||||
|
||||
// add category and call forceMerge -- this should flush IW and merge segments down to 1
|
||||
// in ParentArray.initFromReader, this used to fail assuming there are no parents.
|
||||
|
@ -281,7 +281,7 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
|
|||
reader.close();
|
||||
reader = newtr;
|
||||
assertEquals(2, reader.getSize());
|
||||
assertEquals(2, reader.getParentArray().length);
|
||||
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
|
||||
|
||||
reader.close();
|
||||
writer.close();
|
||||
|
@ -315,7 +315,7 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
|
|||
|
||||
TaxonomyReader reader = new DirectoryTaxonomyReader(writer);
|
||||
assertEquals(2, reader.getSize());
|
||||
assertEquals(2, reader.getParentArray().length);
|
||||
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
|
||||
|
||||
// merge all the segments so that NRT reader thinks there's a change
|
||||
iw.forceMerge(1);
|
||||
|
@ -326,7 +326,7 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
|
|||
reader.close();
|
||||
reader = newtr;
|
||||
assertEquals(2, reader.getSize());
|
||||
assertEquals(2, reader.getParentArray().length);
|
||||
assertEquals(2, reader.getParallelTaxonomyArrays().parents().length);
|
||||
|
||||
reader.close();
|
||||
writer.close();
|
||||
|
|
Loading…
Reference in New Issue