mirror of https://github.com/apache/lucene.git
LUCENE-4659: Cleanup CategoryPath
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1429570 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
cf8a0ca50b
commit
b9a3d9ca70
|
@ -103,6 +103,9 @@ Changes in backwards compatibility policy
|
|||
implementations.
|
||||
NOTE: indexes that contain category enhancements/associations are not supported
|
||||
by the new code and should be recreated. (Shai Erera)
|
||||
|
||||
* LUCENE-4659: Massive cleanup to CategoryPath API. Additionally, CategoryPath is
|
||||
now immutable, so you don't need to clone() it. (Shai Erera)
|
||||
|
||||
New Features
|
||||
|
||||
|
|
|
@ -53,13 +53,14 @@ public class RandomFacetSource extends FacetSource {
|
|||
facets.clear();
|
||||
}
|
||||
int numFacets = 1 + random.nextInt(maxDocFacets-1); // at least one facet to each doc
|
||||
for (int i=0; i<numFacets; i++) {
|
||||
CategoryPath cp = new CategoryPath();
|
||||
int depth = 1 + random.nextInt(maxFacetDepth-1); // depth 0 is not useful
|
||||
for (int k=0; k<depth; k++) {
|
||||
cp.add(Integer.toString(random.nextInt(maxValue)));
|
||||
for (int i = 0; i < numFacets; i++) {
|
||||
int depth = 1 + random.nextInt(maxFacetDepth - 1); // depth 0 is not useful
|
||||
String[] components = new String[depth];
|
||||
for (int k = 0; k < depth; k++) {
|
||||
components[k] = Integer.toString(random.nextInt(maxValue));
|
||||
addItem();
|
||||
}
|
||||
CategoryPath cp = new CategoryPath(components);
|
||||
facets.setAssociation(cp, null);
|
||||
addBytes(cp.toString().length()); // very rough approximation
|
||||
}
|
||||
|
|
|
@ -89,11 +89,6 @@ public class AssociationsFacetFields extends FacetFields {
|
|||
clpContainer = new CategoryAssociationsContainer();
|
||||
categoryLists.put(clp, clpContainer);
|
||||
}
|
||||
// DrillDownStream modifies the CategoryPath by calling trim(). That means
|
||||
// that the source category, as the app ses it, is modified. While for
|
||||
// most apps this is not a problem, we need to protect against it. If
|
||||
// CategoryPath will be made immutable, we can stop cloning.
|
||||
cp = cp.clone();
|
||||
clpContainer.setAssociation(cp, categoryAssociations.getAssociation(cp));
|
||||
}
|
||||
return categoryLists;
|
||||
|
|
|
@ -55,12 +55,12 @@ public class DrillDownStream extends TokenStream {
|
|||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (current.length() == 0) {
|
||||
if (current.length == 0) {
|
||||
if (!categories.hasNext()) {
|
||||
return false; // no more categories
|
||||
}
|
||||
current = categories.next();
|
||||
termAttribute.resizeBuffer(current.charsNeededForFullPath());
|
||||
termAttribute.resizeBuffer(current.fullPathLength());
|
||||
isParent = false;
|
||||
}
|
||||
|
||||
|
@ -73,8 +73,8 @@ public class DrillDownStream extends TokenStream {
|
|||
// prepare current for next call by trimming the last component (parents)
|
||||
do {
|
||||
// skip all parent categories which are not accepted by PathPolicy
|
||||
current.trim(1);
|
||||
} while (!pathPolicy.shouldAdd(current) && current.length() > 0);
|
||||
current = current.subpath(current.length - 1);
|
||||
} while (!pathPolicy.shouldAdd(current) && current.length > 0);
|
||||
isParent = true;
|
||||
return true;
|
||||
}
|
||||
|
@ -82,7 +82,7 @@ public class DrillDownStream extends TokenStream {
|
|||
@Override
|
||||
public void reset() throws IOException {
|
||||
current = categories.next();
|
||||
termAttribute.resizeBuffer(current.charsNeededForFullPath());
|
||||
termAttribute.resizeBuffer(current.fullPathLength());
|
||||
isParent = false;
|
||||
}
|
||||
|
||||
|
|
|
@ -142,11 +142,7 @@ public class FacetFields {
|
|||
list = new ArrayList<CategoryPath>();
|
||||
categoryLists.put(clp, list);
|
||||
}
|
||||
// DrillDownStream modifies the CategoryPath by calling trim(). That means
|
||||
// that the source category, as the app ses it, is modified. While for
|
||||
// most apps this is not a problem, we need to protect against it. If
|
||||
// CategoryPath will be made immutable, we can stop cloning.
|
||||
list.add(cp.clone());
|
||||
list.add(cp);
|
||||
}
|
||||
return categoryLists;
|
||||
}
|
||||
|
|
|
@ -39,6 +39,6 @@ public class NonTopLevelPathPolicy implements PathPolicy {
|
|||
*/
|
||||
@Override
|
||||
public boolean shouldAdd(CategoryPath categoryPath) {
|
||||
return categoryPath.length() >= DEFAULT_MINIMAL_SUBPATH_LENGTH;
|
||||
return categoryPath.length >= DEFAULT_MINIMAL_SUBPATH_LENGTH;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -33,12 +33,12 @@ public interface PathPolicy extends Serializable {
|
|||
|
||||
/**
|
||||
* A {@link PathPolicy} which adds all {@link CategoryPath} that have at least
|
||||
* one component (i.e. {@link CategoryPath#length()} > 0) to the categories
|
||||
* one component (i.e. {@link CategoryPath#length} > 0) to the categories
|
||||
* stream.
|
||||
*/
|
||||
public static final PathPolicy ALL_CATEGORIES = new PathPolicy() {
|
||||
@Override
|
||||
public boolean shouldAdd(CategoryPath categoryPath) { return categoryPath.length() > 0; }
|
||||
public boolean shouldAdd(CategoryPath categoryPath) { return categoryPath.length > 0; }
|
||||
};
|
||||
|
||||
/**
|
||||
|
|
|
@ -54,11 +54,11 @@ public class FacetIndexingParams {
|
|||
public static final FacetIndexingParams ALL_PARENTS = new FacetIndexingParams();
|
||||
|
||||
/**
|
||||
* The default delimiter with which {@link CategoryPath#getComponent(int)
|
||||
* components} are concatenated when written to the index, e.g. as drill-down
|
||||
* terms. If you choose to override it by overiding
|
||||
* {@link #getFacetDelimChar()}, you should make sure that you return a
|
||||
* character that's not found in any path component.
|
||||
* The default delimiter with which {@link CategoryPath#components} are
|
||||
* concatenated when written to the index, e.g. as drill-down terms. If you
|
||||
* choose to override it by overiding {@link #getFacetDelimChar()}, you should
|
||||
* make sure that you return a character that's not found in any path
|
||||
* component.
|
||||
*/
|
||||
public static final char DEFAULT_FACET_DELIM_CHAR = '\uF749';
|
||||
|
||||
|
@ -108,10 +108,10 @@ public class FacetIndexingParams {
|
|||
* that were written.
|
||||
* <p>
|
||||
* <b>NOTE:</b> You should make sure that the {@code char[]} is large enough,
|
||||
* by e.g. calling {@link CategoryPath#charsNeededForFullPath()}.
|
||||
* by e.g. calling {@link CategoryPath#fullPathLength()}.
|
||||
*/
|
||||
public int drillDownTermText(CategoryPath path, char[] buffer) {
|
||||
return path.copyToCharArray(buffer, 0, -1, getFacetDelimChar());
|
||||
return path.copyFullPath(buffer, 0, getFacetDelimChar());
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -43,7 +43,7 @@ public class PerDimensionIndexingParams extends FacetIndexingParams {
|
|||
/**
|
||||
* Initializes a new instance with the given dimension-to-params mapping. The
|
||||
* dimension is considered as what's returned by
|
||||
* {@link CategoryPath#getComponent(int) cp.getComponent(0)}.
|
||||
* {@link CategoryPath#components cp.components[0]}.
|
||||
*
|
||||
* <p>
|
||||
* <b>NOTE:</b> for any dimension whose {@link CategoryListParams} is not
|
||||
|
@ -65,7 +65,7 @@ public class PerDimensionIndexingParams extends FacetIndexingParams {
|
|||
super(categoryListParams);
|
||||
clParamsMap = new HashMap<String,CategoryListParams>();
|
||||
for (Entry<CategoryPath, CategoryListParams> e : paramsMap.entrySet()) {
|
||||
clParamsMap.put(e.getKey().getComponent(0), e.getValue());
|
||||
clParamsMap.put(e.getKey().components[0], e.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -83,7 +83,7 @@ public class PerDimensionIndexingParams extends FacetIndexingParams {
|
|||
@Override
|
||||
public CategoryListParams getCategoryListParams(CategoryPath category) {
|
||||
if (category != null) {
|
||||
CategoryListParams clParams = clParamsMap.get(category.getComponent(0));
|
||||
CategoryListParams clParams = clParamsMap.get(category.components[0]);
|
||||
if (clParams != null) {
|
||||
return clParams;
|
||||
}
|
||||
|
|
|
@ -53,7 +53,7 @@ public final class DrillDown {
|
|||
/** Return a drill-down {@link Term} for a category. */
|
||||
public static final Term term(FacetIndexingParams iParams, CategoryPath path) {
|
||||
CategoryListParams clp = iParams.getCategoryListParams(path);
|
||||
char[] buffer = new char[path.charsNeededForFullPath()];
|
||||
char[] buffer = new char[path.fullPathLength()];
|
||||
iParams.drillDownTermText(path, buffer);
|
||||
return new Term(clp.getTerm().field(), String.valueOf(buffer));
|
||||
}
|
||||
|
|
|
@ -153,7 +153,7 @@ public class TotalFacetCounts {
|
|||
|
||||
// needed because FacetSearchParams do not allow empty FacetRequests
|
||||
private static final List<FacetRequest> DUMMY_REQ = Arrays.asList(
|
||||
new FacetRequest[] { new CountFacetRequest(new CategoryPath(), 1) });
|
||||
new FacetRequest[] { new CountFacetRequest(CategoryPath.EMPTY, 1) });
|
||||
|
||||
static TotalFacetCounts compute(final IndexReader indexReader,
|
||||
final TaxonomyReader taxonomy, final FacetIndexingParams facetIndexingParams,
|
||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -205,22 +205,9 @@ public abstract class TaxonomyReader implements Closeable {
|
|||
*/
|
||||
public abstract int getParent(int ordinal) throws IOException;
|
||||
|
||||
/**
|
||||
* Returns the path name of the category with the given ordinal. The path is
|
||||
* returned as a new CategoryPath object - to reuse an existing object, use
|
||||
* {@link #getPath(int, CategoryPath)}.
|
||||
*
|
||||
* @return a {@link CategoryPath} with the required path, or {@code null} if
|
||||
* the given ordinal is unknown to the taxonomy.
|
||||
*/
|
||||
/** Returns the path name of the category with the given ordinal. */
|
||||
public abstract CategoryPath getPath(int ordinal) throws IOException;
|
||||
|
||||
/**
|
||||
* Same as {@link #getPath(int)}, only reuses the given {@link CategoryPath}
|
||||
* instances.
|
||||
*/
|
||||
public abstract boolean getPath(int ordinal, CategoryPath result) throws IOException;
|
||||
|
||||
/** Returns the current refCount for this taxonomy reader. */
|
||||
public final int getRefCount() {
|
||||
return refCount.get();
|
||||
|
|
|
@ -59,8 +59,8 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
private final DirectoryReader indexReader;
|
||||
|
||||
// TODO: test DoubleBarrelLRUCache and consider using it instead
|
||||
private LRUHashMap<String, Integer> ordinalCache;
|
||||
private LRUHashMap<Integer, String> categoryCache;
|
||||
private LRUHashMap<CategoryPath, Integer> ordinalCache;
|
||||
private LRUHashMap<Integer, CategoryPath> categoryCache;
|
||||
|
||||
private volatile ParallelTaxonomyArrays taxoArrays;
|
||||
|
||||
|
@ -72,15 +72,15 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
* arrays.
|
||||
*/
|
||||
DirectoryTaxonomyReader(DirectoryReader indexReader, DirectoryTaxonomyWriter taxoWriter,
|
||||
LRUHashMap<String,Integer> ordinalCache, LRUHashMap<Integer,String> categoryCache,
|
||||
LRUHashMap<CategoryPath,Integer> ordinalCache, LRUHashMap<Integer,CategoryPath> categoryCache,
|
||||
ParallelTaxonomyArrays taxoArrays) throws IOException {
|
||||
this.indexReader = indexReader;
|
||||
this.taxoWriter = taxoWriter;
|
||||
this.taxoEpoch = taxoWriter == null ? -1 : taxoWriter.getTaxonomyEpoch();
|
||||
|
||||
// use the same instance of the cache, note the protective code in getOrdinal and getPath
|
||||
this.ordinalCache = ordinalCache == null ? new LRUHashMap<String,Integer>(DEFAULT_CACHE_VALUE) : ordinalCache;
|
||||
this.categoryCache = categoryCache == null ? new LRUHashMap<Integer,String>(DEFAULT_CACHE_VALUE) : categoryCache;
|
||||
this.ordinalCache = ordinalCache == null ? new LRUHashMap<CategoryPath,Integer>(DEFAULT_CACHE_VALUE) : ordinalCache;
|
||||
this.categoryCache = categoryCache == null ? new LRUHashMap<Integer,CategoryPath>(DEFAULT_CACHE_VALUE) : categoryCache;
|
||||
|
||||
this.taxoArrays = taxoArrays != null ? new ParallelTaxonomyArrays(indexReader, taxoArrays) : null;
|
||||
}
|
||||
|
@ -102,8 +102,8 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
|
||||
// These are the default cache sizes; they can be configured after
|
||||
// construction with the cache's setMaxSize() method
|
||||
ordinalCache = new LRUHashMap<String, Integer>(DEFAULT_CACHE_VALUE);
|
||||
categoryCache = new LRUHashMap<Integer, String>(DEFAULT_CACHE_VALUE);
|
||||
ordinalCache = new LRUHashMap<CategoryPath, Integer>(DEFAULT_CACHE_VALUE);
|
||||
categoryCache = new LRUHashMap<Integer, CategoryPath>(DEFAULT_CACHE_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -121,39 +121,8 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
|
||||
// These are the default cache sizes; they can be configured after
|
||||
// construction with the cache's setMaxSize() method
|
||||
ordinalCache = new LRUHashMap<String, Integer>(DEFAULT_CACHE_VALUE);
|
||||
categoryCache = new LRUHashMap<Integer, String>(DEFAULT_CACHE_VALUE);
|
||||
}
|
||||
|
||||
private String getLabel(int catID) throws IOException {
|
||||
ensureOpen();
|
||||
|
||||
// Since the cache is shared with DTR instances allocated from
|
||||
// doOpenIfChanged, we need to ensure that the ordinal is one that this DTR
|
||||
// instance recognizes. Therefore we do this check up front, before we hit
|
||||
// the cache.
|
||||
if (catID < 0 || catID >= indexReader.maxDoc()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
// TODO: can we use an int-based hash impl, such as IntToObjectMap,
|
||||
// wrapped as LRU?
|
||||
Integer catIDInteger = Integer.valueOf(catID);
|
||||
synchronized (categoryCache) {
|
||||
String res = categoryCache.get(catIDInteger);
|
||||
if (res != null) {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
|
||||
final LoadFullPathOnly loader = new LoadFullPathOnly();
|
||||
indexReader.document(catID, loader);
|
||||
String ret = loader.getFullPath();
|
||||
synchronized (categoryCache) {
|
||||
categoryCache.put(catIDInteger, ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
ordinalCache = new LRUHashMap<CategoryPath, Integer>(DEFAULT_CACHE_VALUE);
|
||||
categoryCache = new LRUHashMap<Integer, CategoryPath>(DEFAULT_CACHE_VALUE);
|
||||
}
|
||||
|
||||
private synchronized void initTaxoArrays() throws IOException {
|
||||
|
@ -278,16 +247,15 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int getOrdinal(CategoryPath categoryPath) throws IOException {
|
||||
public int getOrdinal(CategoryPath cp) throws IOException {
|
||||
ensureOpen();
|
||||
if (categoryPath.length() == 0) {
|
||||
if (cp.length == 0) {
|
||||
return ROOT_ORDINAL;
|
||||
}
|
||||
String path = categoryPath.toString(delimiter);
|
||||
|
||||
// First try to find the answer in the LRU cache:
|
||||
synchronized (ordinalCache) {
|
||||
Integer res = ordinalCache.get(path);
|
||||
Integer res = ordinalCache.get(cp);
|
||||
if (res != null) {
|
||||
if (res.intValue() < indexReader.maxDoc()) {
|
||||
// Since the cache is shared with DTR instances allocated from
|
||||
|
@ -307,7 +275,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
// If we're still here, we have a cache miss. We need to fetch the
|
||||
// value from disk, and then also put it in the cache:
|
||||
int ret = TaxonomyReader.INVALID_ORDINAL;
|
||||
DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(path), 0);
|
||||
DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(cp.toString(delimiter)), 0);
|
||||
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
|
||||
ret = docs.docID();
|
||||
|
||||
|
@ -317,7 +285,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
// information about found categories, we cannot accidently tell a new
|
||||
// generation of DTR that a category does not exist.
|
||||
synchronized (ordinalCache) {
|
||||
ordinalCache.put(path, Integer.valueOf(ret));
|
||||
ordinalCache.put(cp, Integer.valueOf(ret));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -333,31 +301,33 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
@Override
|
||||
public CategoryPath getPath(int ordinal) throws IOException {
|
||||
ensureOpen();
|
||||
// TODO (Facet): Currently, the LRU cache we use (getCategoryCache) holds
|
||||
// strings with delimiters, not CategoryPath objects, so even if
|
||||
// we have a cache hit, we need to process the string and build a new
|
||||
// CategoryPath object every time. What is preventing us from putting
|
||||
// the actual CategoryPath object in the cache is the fact that these
|
||||
// objects are mutable. So we should create an immutable (read-only)
|
||||
// interface that CategoryPath implements, and this method should
|
||||
// return this interface, not the writable CategoryPath.
|
||||
String label = getLabel(ordinal);
|
||||
if (label == null) {
|
||||
|
||||
// Since the cache is shared with DTR instances allocated from
|
||||
// doOpenIfChanged, we need to ensure that the ordinal is one that this DTR
|
||||
// instance recognizes. Therefore we do this check up front, before we hit
|
||||
// the cache.
|
||||
if (ordinal < 0 || ordinal >= indexReader.maxDoc()) {
|
||||
return null;
|
||||
}
|
||||
return new CategoryPath(label, delimiter);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean getPath(int ordinal, CategoryPath result) throws IOException {
|
||||
ensureOpen();
|
||||
String label = getLabel(ordinal);
|
||||
if (label == null) {
|
||||
return false;
|
||||
|
||||
// TODO: can we use an int-based hash impl, such as IntToObjectMap,
|
||||
// wrapped as LRU?
|
||||
Integer catIDInteger = Integer.valueOf(ordinal);
|
||||
synchronized (categoryCache) {
|
||||
CategoryPath res = categoryCache.get(catIDInteger);
|
||||
if (res != null) {
|
||||
return res;
|
||||
}
|
||||
}
|
||||
result.clear();
|
||||
result.add(label, delimiter);
|
||||
return true;
|
||||
|
||||
final LoadFullPathOnly loader = new LoadFullPathOnly();
|
||||
indexReader.document(ordinal, loader);
|
||||
CategoryPath ret = new CategoryPath(loader.getFullPath(), delimiter);
|
||||
synchronized (categoryCache) {
|
||||
categoryCache.put(catIDInteger, ret);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -411,7 +381,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
|
|||
sb.append(i + ": NULL!! \n");
|
||||
continue;
|
||||
}
|
||||
if (category.length() == 0) {
|
||||
if (category.length == 0) {
|
||||
sb.append(i + ": EMPTY STRING!! \n");
|
||||
continue;
|
||||
}
|
||||
|
|
|
@ -249,7 +249,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
cacheIsComplete = true;
|
||||
// Make sure that the taxonomy always contain the root category
|
||||
// with category id 0.
|
||||
addCategory(new CategoryPath());
|
||||
addCategory(CategoryPath.EMPTY);
|
||||
} else {
|
||||
// There are some categories on the disk, which we have not yet
|
||||
// read into the cache, and therefore the cache is incomplete.
|
||||
|
@ -449,56 +449,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
return doc;
|
||||
}
|
||||
|
||||
/**
|
||||
* Look up the given prefix of the given category in the cache and/or the
|
||||
* on-disk storage, returning that prefix's ordinal, or a negative number in
|
||||
* case the category does not yet exist in the taxonomy.
|
||||
*/
|
||||
private int findCategory(CategoryPath categoryPath, int prefixLen)
|
||||
throws IOException {
|
||||
int res = cache.get(categoryPath, prefixLen);
|
||||
if (res >= 0 || cacheIsComplete) {
|
||||
return res;
|
||||
}
|
||||
|
||||
cacheMisses.incrementAndGet();
|
||||
perhapsFillCache();
|
||||
res = cache.get(categoryPath, prefixLen);
|
||||
if (res >= 0 || cacheIsComplete) {
|
||||
return res;
|
||||
}
|
||||
|
||||
initReaderManager();
|
||||
|
||||
int doc = -1;
|
||||
DirectoryReader reader = readerManager.acquire();
|
||||
try {
|
||||
TermsEnum termsEnum = null; // reuse
|
||||
DocsEnum docs = null; // reuse
|
||||
final BytesRef catTerm = new BytesRef(categoryPath.toString(delimiter, prefixLen));
|
||||
for (AtomicReaderContext ctx : reader.leaves()) {
|
||||
Terms terms = ctx.reader().terms(Consts.FULL);
|
||||
if (terms != null) {
|
||||
termsEnum = terms.iterator(termsEnum);
|
||||
if (termsEnum.seekExact(catTerm, true)) {
|
||||
// liveDocs=null because the taxonomy has no deletes
|
||||
docs = termsEnum.docs(null, docs, 0 /* freqs not required */);
|
||||
// if the term was found, we know it has exactly one document.
|
||||
doc = docs.nextDoc() + ctx.docBase;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
readerManager.release(reader);
|
||||
}
|
||||
|
||||
if (doc > 0) {
|
||||
addToCache(categoryPath, prefixLen, doc);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int addCategory(CategoryPath categoryPath) throws IOException {
|
||||
ensureOpen();
|
||||
|
@ -516,7 +466,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// (while keeping the invariant that a parent is always added to
|
||||
// the taxonomy before its child). internalAddCategory() does all
|
||||
// this recursively
|
||||
res = internalAddCategory(categoryPath, categoryPath.length());
|
||||
res = internalAddCategory(categoryPath);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -532,25 +482,24 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
* parent is always added to the taxonomy before its child). We do this by
|
||||
* recursion.
|
||||
*/
|
||||
private int internalAddCategory(CategoryPath categoryPath, int length)
|
||||
throws IOException {
|
||||
|
||||
private int internalAddCategory(CategoryPath cp) throws IOException {
|
||||
// Find our parent's ordinal (recursively adding the parent category
|
||||
// to the taxonomy if it's not already there). Then add the parent
|
||||
// ordinal as payloads (rather than a stored field; payloads can be
|
||||
// more efficiently read into memory in bulk by LuceneTaxonomyReader)
|
||||
int parent;
|
||||
if (length > 1) {
|
||||
parent = findCategory(categoryPath, length - 1);
|
||||
if (cp.length > 1) {
|
||||
CategoryPath parentPath = cp.subpath(cp.length - 1);
|
||||
parent = findCategory(parentPath);
|
||||
if (parent < 0) {
|
||||
parent = internalAddCategory(categoryPath, length - 1);
|
||||
parent = internalAddCategory(parentPath);
|
||||
}
|
||||
} else if (length == 1) {
|
||||
} else if (cp.length == 1) {
|
||||
parent = TaxonomyReader.ROOT_ORDINAL;
|
||||
} else {
|
||||
parent = TaxonomyReader.INVALID_ORDINAL;
|
||||
}
|
||||
int id = addCategoryDocument(categoryPath, length, parent);
|
||||
int id = addCategoryDocument(cp, parent);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
@ -569,8 +518,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
* Note that the methods calling addCategoryDocument() are synchornized, so
|
||||
* this method is effectively synchronized as well.
|
||||
*/
|
||||
private int addCategoryDocument(CategoryPath categoryPath, int length,
|
||||
int parent) throws IOException {
|
||||
private int addCategoryDocument(CategoryPath categoryPath, int parent) throws IOException {
|
||||
// Before Lucene 2.9, position increments >=0 were supported, so we
|
||||
// added 1 to parent to allow the parent -1 (the parent of the root).
|
||||
// Unfortunately, starting with Lucene 2.9, after LUCENE-1542, this is
|
||||
|
@ -580,11 +528,11 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// we write here (e.g., to write parent+2), and need to do a workaround
|
||||
// in the reader (which knows that anyway only category 0 has a parent
|
||||
// -1).
|
||||
parentStream.set(Math.max(parent+1, 1));
|
||||
parentStream.set(Math.max(parent + 1, 1));
|
||||
Document d = new Document();
|
||||
d.add(parentStreamField);
|
||||
|
||||
fullPathField.setStringValue(categoryPath.toString(delimiter, length));
|
||||
fullPathField.setStringValue(categoryPath.toString(delimiter));
|
||||
d.add(fullPathField);
|
||||
|
||||
// Note that we do no pass an Analyzer here because the fields that are
|
||||
|
@ -601,7 +549,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
|
||||
// NOTE: this line must be executed last, or else the cache gets updated
|
||||
// before the parents array (LUCENE-4596)
|
||||
addToCache(categoryPath, length, id);
|
||||
addToCache(categoryPath, id);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
@ -653,14 +601,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
}
|
||||
}
|
||||
|
||||
private void addToCache(CategoryPath categoryPath, int prefixLen, int id)
|
||||
throws IOException {
|
||||
if (cache.put(categoryPath, prefixLen, id)) {
|
||||
refreshReaderManager();
|
||||
cacheIsComplete = false;
|
||||
}
|
||||
}
|
||||
|
||||
private synchronized void refreshReaderManager() throws IOException {
|
||||
// this method is synchronized since it cannot happen concurrently with
|
||||
// addCategoryDocument -- when this method returns, we must know that the
|
||||
|
@ -760,7 +700,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
boolean aborted = false;
|
||||
DirectoryReader reader = readerManager.acquire();
|
||||
try {
|
||||
CategoryPath cp = new CategoryPath();
|
||||
TermsEnum termsEnum = null;
|
||||
DocsEnum docsEnum = null;
|
||||
for (AtomicReaderContext ctx : reader.leaves()) {
|
||||
|
@ -775,8 +714,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// hence documents), there are no deletions in the index. Therefore, it
|
||||
// is sufficient to call next(), and then doc(), exactly once with no
|
||||
// 'validation' checks.
|
||||
cp.clear();
|
||||
cp.add(t.utf8ToString(), delimiter);
|
||||
CategoryPath cp = new CategoryPath(t.utf8ToString(), delimiter);
|
||||
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
|
||||
boolean res = cache.put(cp, docsEnum.nextDoc() + ctx.docBase);
|
||||
assert !res : "entries should not have been evicted from the cache";
|
||||
|
@ -857,7 +795,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
final int size = r.numDocs();
|
||||
final OrdinalMap ordinalMap = map;
|
||||
ordinalMap.setSize(size);
|
||||
CategoryPath cp = new CategoryPath();
|
||||
int base = 0;
|
||||
TermsEnum te = null;
|
||||
DocsEnum docs = null;
|
||||
|
@ -867,8 +804,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
te = terms.iterator(te);
|
||||
while (te.next() != null) {
|
||||
String value = te.term().utf8ToString();
|
||||
cp.clear();
|
||||
cp.add(value, Consts.DEFAULT_DELIMITER);
|
||||
CategoryPath cp = new CategoryPath(value, Consts.DEFAULT_DELIMITER);
|
||||
final int ordinal = addCategory(cp);
|
||||
docs = te.docs(null, docs, DocsEnum.FLAG_NONE);
|
||||
ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
|
||||
|
|
|
@ -64,15 +64,6 @@ public interface TaxonomyWriterCache {
|
|||
*/
|
||||
public int get(CategoryPath categoryPath);
|
||||
|
||||
/**
|
||||
* Like {@link #get(CategoryPath)}, but for a given prefix of the
|
||||
* category path.
|
||||
* <P>
|
||||
* If the given length is negative or bigger than the path's actual
|
||||
* length, the full path is taken.
|
||||
*/
|
||||
public int get(CategoryPath categoryPath, int length);
|
||||
|
||||
/**
|
||||
* Add a category to the cache, with the given ordinal as the value.
|
||||
* <P>
|
||||
|
@ -93,15 +84,6 @@ public interface TaxonomyWriterCache {
|
|||
*/
|
||||
public boolean put(CategoryPath categoryPath, int ordinal);
|
||||
|
||||
/**
|
||||
* Like {@link #put(CategoryPath, int)}, but for a given prefix of the
|
||||
* category path.
|
||||
* <P>
|
||||
* If the given length is negative or bigger than the path's actual
|
||||
* length, the full path is taken.
|
||||
*/
|
||||
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal);
|
||||
|
||||
/**
|
||||
* Returns true if the cache is full, such that the next {@link #put} will
|
||||
* evict entries from it, false otherwise.
|
||||
|
|
|
@ -0,0 +1,82 @@
|
|||
package org.apache.lucene.facet.taxonomy.writercache.cl2o;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/** Utilities for use of {@link CategoryPath} by {@link CompactLabelToOrdinal}. */
|
||||
public class CategoryPathUtils {
|
||||
|
||||
/** Serializes the given {@link CategoryPath} to the {@link CharBlockArray}. */
|
||||
public static void serialize(CategoryPath cp, CharBlockArray charBlockArray) {
|
||||
charBlockArray.append((char) cp.length);
|
||||
if (cp.length == 0) {
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < cp.length; i++) {
|
||||
charBlockArray.append((char) cp.components[i].length());
|
||||
charBlockArray.append(cp.components[i]);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculates a hash function of a path that serialized with
|
||||
* {@link #serialize(CategoryPath, CharBlockArray)}.
|
||||
*/
|
||||
public static int hashCodeOfSerialized(CharBlockArray charBlockArray, int offset) {
|
||||
int length = (short) charBlockArray.charAt(offset++);
|
||||
if (length == 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hash = length;
|
||||
for (int i = 0; i < length; i++) {
|
||||
int len = (short) charBlockArray.charAt(offset++);
|
||||
hash = hash * 31 + charBlockArray.subSequence(offset, offset + len).hashCode();
|
||||
offset += len;
|
||||
}
|
||||
return hash;
|
||||
}
|
||||
|
||||
/**
|
||||
* Check whether the {@link CategoryPath} is equal to the one serialized in
|
||||
* {@link CharBlockArray}.
|
||||
*/
|
||||
public static boolean equalsToSerialized(CategoryPath cp, CharBlockArray charBlockArray, int offset) {
|
||||
int n = charBlockArray.charAt(offset++);
|
||||
if (cp.length != n) {
|
||||
return false;
|
||||
}
|
||||
if (cp.length == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (int i = 0; i < cp.length; i++) {
|
||||
int len = (short) charBlockArray.charAt(offset++);
|
||||
if (len != cp.components[i].length()) {
|
||||
return false;
|
||||
}
|
||||
if (!cp.components[i].equals(charBlockArray.subSequence(offset, offset + len))) {
|
||||
return false;
|
||||
}
|
||||
offset += len;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
|
@ -41,7 +41,7 @@ class CharBlockArray implements Appendable, Serializable, CharSequence {
|
|||
final static class Block implements Serializable, Cloneable {
|
||||
private static final long serialVersionUID = 1L;
|
||||
|
||||
char[] chars;
|
||||
final char[] chars;
|
||||
int length;
|
||||
|
||||
Block(int size) {
|
||||
|
@ -149,7 +149,7 @@ class CharBlockArray implements Appendable, Serializable, CharSequence {
|
|||
|
||||
@Override
|
||||
public char charAt(int index) {
|
||||
Block b = this.blocks.get(blockIndex(index));
|
||||
Block b = blocks.get(blockIndex(index));
|
||||
return b.chars[indexInBlock(index)];
|
||||
}
|
||||
|
||||
|
@ -160,16 +160,27 @@ class CharBlockArray implements Appendable, Serializable, CharSequence {
|
|||
|
||||
@Override
|
||||
public CharSequence subSequence(int start, int end) {
|
||||
throw new UnsupportedOperationException("subsequence not implemented yet");
|
||||
int remaining = end - start;
|
||||
StringBuilder sb = new StringBuilder(remaining);
|
||||
int blockIdx = blockIndex(start);
|
||||
int indexInBlock = indexInBlock(start);
|
||||
while (remaining > 0) {
|
||||
Block b = blocks.get(blockIdx++);
|
||||
int numToAppend = Math.min(remaining, b.length - indexInBlock);
|
||||
sb.append(b.chars, indexInBlock, numToAppend);
|
||||
remaining -= numToAppend;
|
||||
indexInBlock = 0; // 2nd+ iterations read from start of the block
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
StringBuilder b = new StringBuilder(blockSize * this.blocks.size());
|
||||
for (int i = 0; i < this.blocks.size(); i++) {
|
||||
b.append(this.blocks.get(i).chars);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (Block b : blocks) {
|
||||
sb.append(b.chars, 0, b.length);
|
||||
}
|
||||
return b.toString();
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
void flush(OutputStream out) throws IOException {
|
||||
|
|
|
@ -77,19 +77,6 @@ public class Cl2oTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(CategoryPath categoryPath, int length) {
|
||||
if (length < 0 || length > categoryPath.length()) {
|
||||
length = categoryPath.length();
|
||||
}
|
||||
lock.readLock().lock();
|
||||
try {
|
||||
return cache.getOrdinal(categoryPath, length);
|
||||
} finally {
|
||||
lock.readLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean put(CategoryPath categoryPath, int ordinal) {
|
||||
lock.writeLock().lock();
|
||||
|
@ -103,23 +90,7 @@ public class Cl2oTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) {
|
||||
lock.writeLock().lock();
|
||||
try {
|
||||
cache.addLabel(categoryPath, prefixLen, ordinal);
|
||||
// Tell the caller we didn't clear part of the cache, so it doesn't
|
||||
// have to flush its on-disk index now
|
||||
return false;
|
||||
} finally {
|
||||
lock.writeLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bytes in memory used by this object.
|
||||
* @return Number of bytes in memory used by this object.
|
||||
*/
|
||||
/** Returns the number of bytes in memory used by this object. */
|
||||
public int getMemoryUsage() {
|
||||
return cache == null ? 0 : cache.getMemoryUsage();
|
||||
}
|
||||
|
|
|
@ -1,6 +1,5 @@
|
|||
package org.apache.lucene.facet.taxonomy.writercache.cl2o;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
import java.util.NoSuchElementException;
|
||||
|
||||
|
@ -108,25 +107,11 @@ public class CollisionMap {
|
|||
int bucketIndex = indexFor(hash, this.capacity);
|
||||
Entry e = this.entries[bucketIndex];
|
||||
|
||||
while (e != null && !(hash == e.hash && label.equalsToSerialized(this.labelRepository, e.offset))) {
|
||||
while (e != null && !(hash == e.hash && CategoryPathUtils.equalsToSerialized(label, labelRepository, e.offset))) {
|
||||
e = e.next;
|
||||
}
|
||||
if (e == null) {
|
||||
return LabelToOrdinal.InvalidOrdinal;
|
||||
}
|
||||
|
||||
return e.cid;
|
||||
}
|
||||
|
||||
public int get(CategoryPath label, int prefixLen, int hash) {
|
||||
int bucketIndex = indexFor(hash, this.capacity);
|
||||
Entry e = this.entries[bucketIndex];
|
||||
|
||||
while (e != null && !(hash == e.hash && label.equalsToSerialized(prefixLen, this.labelRepository, e.offset))) {
|
||||
e = e.next;
|
||||
}
|
||||
if (e == null) {
|
||||
return LabelToOrdinal.InvalidOrdinal;
|
||||
return LabelToOrdinal.INVALID_ORDINAL;
|
||||
}
|
||||
|
||||
return e.cid;
|
||||
|
@ -135,47 +120,22 @@ public class CollisionMap {
|
|||
public int addLabel(CategoryPath label, int hash, int cid) {
|
||||
int bucketIndex = indexFor(hash, this.capacity);
|
||||
for (Entry e = this.entries[bucketIndex]; e != null; e = e.next) {
|
||||
if (e.hash == hash && label.equalsToSerialized(this.labelRepository, e.offset)) {
|
||||
if (e.hash == hash && CategoryPathUtils.equalsToSerialized(label, labelRepository, e.offset)) {
|
||||
return e.cid;
|
||||
}
|
||||
}
|
||||
|
||||
// new string; add to label repository
|
||||
int offset = this.labelRepository.length();
|
||||
try {
|
||||
label.serializeAppendTo(labelRepository);
|
||||
} catch (IOException e) {
|
||||
// can't happen, because labelRepository.append() doesn't throw an exception
|
||||
}
|
||||
|
||||
addEntry(offset, cid, hash, bucketIndex);
|
||||
return cid;
|
||||
}
|
||||
|
||||
public int addLabel(CategoryPath label, int prefixLen, int hash, int cid) {
|
||||
int bucketIndex = indexFor(hash, this.capacity);
|
||||
for (Entry e = this.entries[bucketIndex]; e != null; e = e.next) {
|
||||
if (e.hash == hash && label.equalsToSerialized(prefixLen, this.labelRepository, e.offset)) {
|
||||
return e.cid;
|
||||
}
|
||||
}
|
||||
|
||||
// new string; add to label repository
|
||||
int offset = this.labelRepository.length();
|
||||
try {
|
||||
label.serializeAppendTo(prefixLen, labelRepository);
|
||||
} catch (IOException e) {
|
||||
// can't happen, because labelRepository.append() doesn't throw an exception
|
||||
}
|
||||
|
||||
int offset = labelRepository.length();
|
||||
CategoryPathUtils.serialize(label, labelRepository);
|
||||
addEntry(offset, cid, hash, bucketIndex);
|
||||
return cid;
|
||||
}
|
||||
|
||||
/**
|
||||
* This method does not check if the same value is already
|
||||
* in the map because we pass in an char-array offset, so
|
||||
* so we now that we're in resize-mode here.
|
||||
* This method does not check if the same value is already in the map because
|
||||
* we pass in an char-array offset, so so we now that we're in resize-mode
|
||||
* here.
|
||||
*/
|
||||
public void addLabelOffset(int hash, int offset, int cid) {
|
||||
int bucketIndex = indexFor(hash, this.capacity);
|
||||
|
|
|
@ -29,8 +29,6 @@ import java.util.Iterator;
|
|||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
|
||||
// TODO: maybe this could use an FST instead...
|
||||
|
||||
/**
|
||||
* This is a very efficient LabelToOrdinal implementation that uses a
|
||||
* CharBlockArray to store all labels and a configurable number of HashArrays to
|
||||
|
@ -59,8 +57,8 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
|
|||
|
||||
public static final float DefaultLoadFactor = 0.15f;
|
||||
|
||||
static final char TerminatorChar = 0xffff;
|
||||
private static final int Collision = -5;
|
||||
static final char TERMINATOR_CHAR = 0xffff;
|
||||
private static final int COLLISION = -5;
|
||||
|
||||
private HashArray[] hashArrays;
|
||||
private CollisionMap collisionMap;
|
||||
|
@ -103,9 +101,7 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
|
|||
|
||||
private void init() {
|
||||
labelRepository = new CharBlockArray();
|
||||
try {
|
||||
new CategoryPath().serializeAppendTo(labelRepository);
|
||||
} catch (IOException e) { } //can't happen
|
||||
CategoryPathUtils.serialize(CategoryPath.EMPTY, labelRepository);
|
||||
|
||||
int c = this.capacity;
|
||||
for (int i = 0; i < this.hashArrays.length; i++) {
|
||||
|
@ -116,7 +112,7 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
|
|||
|
||||
@Override
|
||||
public void addLabel(CategoryPath label, int ordinal) {
|
||||
if (this.collisionMap.size() > this.threshold) {
|
||||
if (collisionMap.size() > threshold) {
|
||||
grow();
|
||||
}
|
||||
|
||||
|
@ -127,43 +123,22 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
|
|||
}
|
||||
}
|
||||
|
||||
int prevVal = this.collisionMap.addLabel(label, hash, ordinal);
|
||||
int prevVal = collisionMap.addLabel(label, hash, ordinal);
|
||||
if (prevVal != ordinal) {
|
||||
throw new IllegalArgumentException("Label already exists: " +
|
||||
label.toString('/') + " prev ordinal " + prevVal);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addLabel(CategoryPath label, int prefixLen, int ordinal) {
|
||||
if (this.collisionMap.size() > this.threshold) {
|
||||
grow();
|
||||
}
|
||||
|
||||
int hash = CompactLabelToOrdinal.stringHashCode(label, prefixLen);
|
||||
for (int i = 0; i < this.hashArrays.length; i++) {
|
||||
if (addLabel(this.hashArrays[i], label, prefixLen, hash, ordinal)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
int prevVal = this.collisionMap.addLabel(label, prefixLen, hash, ordinal);
|
||||
if (prevVal != ordinal) {
|
||||
throw new IllegalArgumentException("Label already exists: " +
|
||||
label.toString('/', prefixLen) + " prev ordinal " + prevVal);
|
||||
throw new IllegalArgumentException("Label already exists: " + label.toString('/') + " prev ordinal " + prevVal);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOrdinal(CategoryPath label) {
|
||||
if (label == null) {
|
||||
return LabelToOrdinal.InvalidOrdinal;
|
||||
return LabelToOrdinal.INVALID_ORDINAL;
|
||||
}
|
||||
|
||||
int hash = CompactLabelToOrdinal.stringHashCode(label);
|
||||
for (int i = 0; i < this.hashArrays.length; i++) {
|
||||
int ord = getOrdinal(this.hashArrays[i], label, hash);
|
||||
if (ord != Collision) {
|
||||
if (ord != COLLISION) {
|
||||
return ord;
|
||||
}
|
||||
}
|
||||
|
@ -171,23 +146,6 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
|
|||
return this.collisionMap.get(label, hash);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOrdinal(CategoryPath label, int prefixLen) {
|
||||
if (label == null) {
|
||||
return LabelToOrdinal.InvalidOrdinal;
|
||||
}
|
||||
|
||||
int hash = CompactLabelToOrdinal.stringHashCode(label, prefixLen);
|
||||
for (int i = 0; i < this.hashArrays.length; i++) {
|
||||
int ord = getOrdinal(this.hashArrays[i], label, prefixLen, hash);
|
||||
if (ord != Collision) {
|
||||
return ord;
|
||||
}
|
||||
}
|
||||
|
||||
return this.collisionMap.get(label, prefixLen, hash);
|
||||
}
|
||||
|
||||
private void grow() {
|
||||
HashArray temp = this.hashArrays[this.hashArrays.length - 1];
|
||||
|
||||
|
@ -241,39 +199,13 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
|
|||
}
|
||||
}
|
||||
|
||||
private boolean addLabel(HashArray a, CategoryPath label, int hash,
|
||||
int ordinal) {
|
||||
private boolean addLabel(HashArray a, CategoryPath label, int hash, int ordinal) {
|
||||
int index = CompactLabelToOrdinal.indexFor(hash, a.offsets.length);
|
||||
int offset = a.offsets[index];
|
||||
|
||||
if (offset == 0) {
|
||||
a.offsets[index] = this.labelRepository.length();
|
||||
try {
|
||||
label.serializeAppendTo(this.labelRepository);
|
||||
} catch (IOException e) {
|
||||
// can't happen - LabelRepository.append() never throws an
|
||||
// exception
|
||||
}
|
||||
a.cids[index] = ordinal;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean addLabel(HashArray a, CategoryPath label, int prefixLen,
|
||||
int hash, int ordinal) {
|
||||
int index = CompactLabelToOrdinal.indexFor(hash, a.offsets.length);
|
||||
int offset = a.offsets[index];
|
||||
|
||||
if (offset == 0) {
|
||||
a.offsets[index] = this.labelRepository.length();
|
||||
try {
|
||||
label.serializeAppendTo(prefixLen, this.labelRepository);
|
||||
} catch (IOException e) {
|
||||
// can't happen - LabelRepository.append() never throws an
|
||||
// exception
|
||||
}
|
||||
CategoryPathUtils.serialize(label, labelRepository);
|
||||
a.cids[index] = ordinal;
|
||||
return true;
|
||||
}
|
||||
|
@ -313,43 +245,23 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
|
|||
|
||||
private int getOrdinal(HashArray a, CategoryPath label, int hash) {
|
||||
if (label == null) {
|
||||
return LabelToOrdinal.InvalidOrdinal;
|
||||
return LabelToOrdinal.INVALID_ORDINAL;
|
||||
}
|
||||
|
||||
int index = CompactLabelToOrdinal.indexFor(hash, a.offsets.length);
|
||||
int index = indexFor(hash, a.offsets.length);
|
||||
int offset = a.offsets[index];
|
||||
if (offset == 0) {
|
||||
return LabelToOrdinal.InvalidOrdinal;
|
||||
return LabelToOrdinal.INVALID_ORDINAL;
|
||||
}
|
||||
|
||||
if (label.equalsToSerialized(labelRepository, offset)) {
|
||||
if (CategoryPathUtils.equalsToSerialized(label, labelRepository, offset)) {
|
||||
return a.cids[index];
|
||||
}
|
||||
|
||||
return Collision;
|
||||
return COLLISION;
|
||||
}
|
||||
|
||||
private int getOrdinal(HashArray a, CategoryPath label, int prefixLen, int hash) {
|
||||
if (label == null) {
|
||||
return LabelToOrdinal.InvalidOrdinal;
|
||||
}
|
||||
|
||||
int index = CompactLabelToOrdinal.indexFor(hash, a.offsets.length);
|
||||
int offset = a.offsets[index];
|
||||
if (offset == 0) {
|
||||
return LabelToOrdinal.InvalidOrdinal;
|
||||
}
|
||||
|
||||
if (label.equalsToSerialized(prefixLen, labelRepository, offset)) {
|
||||
return a.cids[index];
|
||||
}
|
||||
|
||||
return Collision;
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns index for hash code h.
|
||||
*/
|
||||
/** Returns index for hash code h. */
|
||||
static int indexFor(int h, int length) {
|
||||
return h & (length - 1);
|
||||
}
|
||||
|
@ -378,22 +290,10 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
|
|||
|
||||
}
|
||||
|
||||
static int stringHashCode(CategoryPath label, int prefixLen) {
|
||||
int hash = label.hashCode(prefixLen);
|
||||
|
||||
hash = hash ^ ((hash >>> 20) ^ (hash >>> 12));
|
||||
hash = hash ^ (hash >>> 7) ^ (hash >>> 4);
|
||||
|
||||
return hash;
|
||||
|
||||
}
|
||||
|
||||
static int stringHashCode(CharBlockArray labelRepository, int offset) {
|
||||
int hash = CategoryPath.hashCodeOfSerialized(labelRepository, offset);
|
||||
|
||||
int hash = CategoryPathUtils.hashCodeOfSerialized(labelRepository, offset);
|
||||
hash = hash ^ ((hash >>> 20) ^ (hash >>> 12));
|
||||
hash = hash ^ (hash >>> 7) ^ (hash >>> 4);
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
|
@ -495,25 +395,16 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
|
|||
// that array offsets will work). Since the initial file is machine
|
||||
// generated, I think this should be OK.
|
||||
while (offset < l2o.labelRepository.length()) {
|
||||
// First component is numcomponents, so we initialize the hash
|
||||
// to this
|
||||
int ncomponents = l2o.labelRepository.charAt(offset++);
|
||||
int hash = ncomponents;
|
||||
// If ncomponents is 0, then we are done?
|
||||
if (ncomponents != 0) {
|
||||
|
||||
// usedchars is always the last member of the 'ends' array
|
||||
// in serialization. Rather than rebuild the entire array,
|
||||
// assign usedchars to the last value we read in. This will
|
||||
// be slightly more memory efficient.
|
||||
int usedchars = 0;
|
||||
for (int i = 0; i < ncomponents; i++) {
|
||||
usedchars = l2o.labelRepository.charAt(offset++);
|
||||
hash = hash * 31 + usedchars;
|
||||
}
|
||||
// Hash the usedchars for this label
|
||||
for (int i = 0; i < usedchars; i++) {
|
||||
hash = hash * 31 + l2o.labelRepository.charAt(offset++);
|
||||
// identical code to CategoryPath.hashFromSerialized. since we need to
|
||||
// advance offset, we cannot call the method directly. perhaps if we
|
||||
// could pass a mutable Integer or something...
|
||||
int length = (short) l2o.labelRepository.charAt(offset++);
|
||||
int hash = length;
|
||||
if (length != 0) {
|
||||
for (int i = 0; i < length; i++) {
|
||||
int len = (short) l2o.labelRepository.charAt(offset++);
|
||||
hash = hash * 31 + l2o.labelRepository.subSequence(offset, offset + len).hashCode();
|
||||
offset += len;
|
||||
}
|
||||
}
|
||||
// Now that we've hashed the components of the label, do the
|
||||
|
|
|
@ -27,7 +27,7 @@ import org.apache.lucene.facet.taxonomy.CategoryPath;
|
|||
public abstract class LabelToOrdinal {
|
||||
|
||||
protected int counter;
|
||||
public static final int InvalidOrdinal = -2;
|
||||
public static final int INVALID_ORDINAL = -2;
|
||||
|
||||
/**
|
||||
* return the maximal Ordinal assigned so far
|
||||
|
@ -51,23 +51,10 @@ public abstract class LabelToOrdinal {
|
|||
*/
|
||||
public abstract void addLabel(CategoryPath label, int ordinal);
|
||||
|
||||
/**
|
||||
* Adds a new label if its not yet in the table.
|
||||
* Throws an {@link IllegalArgumentException} if the same label with
|
||||
* a different ordinal was previoulsy added to this table.
|
||||
*/
|
||||
public abstract void addLabel(CategoryPath label, int prefixLen, int ordinal);
|
||||
|
||||
/**
|
||||
* @return the ordinal assigned to the given label,
|
||||
* or {@link #InvalidOrdinal} if the label cannot be found in this table.
|
||||
* or {@link #INVALID_ORDINAL} if the label cannot be found in this table.
|
||||
*/
|
||||
public abstract int getOrdinal(CategoryPath label);
|
||||
|
||||
/**
|
||||
* @return the ordinal assigned to the given label,
|
||||
* or {@link #InvalidOrdinal} if the label cannot be found in this table.
|
||||
*/
|
||||
public abstract int getOrdinal(CategoryPath label, int prefixLen);
|
||||
|
||||
}
|
||||
|
|
|
@ -86,23 +86,6 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
return res.intValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized int get(CategoryPath categoryPath, int length) {
|
||||
if (length<0 || length>categoryPath.length()) {
|
||||
length = categoryPath.length();
|
||||
}
|
||||
// TODO (Facet): unfortunately, we make a copy here! we can avoid part of
|
||||
// the copy by creating a wrapper object (but this still creates a new
|
||||
// object). A better implementation of the cache would not use Java's
|
||||
// hash table, but rather some other hash table we can control, and
|
||||
// pass the length parameter into it...
|
||||
Integer res = cache.get(new CategoryPath(categoryPath, length));
|
||||
if (res==null) {
|
||||
return -1;
|
||||
}
|
||||
return res.intValue();
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean put(CategoryPath categoryPath, int ordinal) {
|
||||
boolean ret = cache.put(categoryPath, new Integer(ordinal));
|
||||
|
@ -119,20 +102,4 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
return ret;
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) {
|
||||
boolean ret = cache.put(categoryPath, prefixLen, new Integer(ordinal));
|
||||
// If the cache is full, we need to clear one or more old entries
|
||||
// from the cache. However, if we delete from the cache a recent
|
||||
// addition that isn't yet in our reader, for this entry to be
|
||||
// visible to us we need to make sure that the changes have been
|
||||
// committed and we reopen the reader. Because this is a slow
|
||||
// operation, we don't delete entries one-by-one but rather in bulk
|
||||
// (put() removes the 2/3rd oldest entries).
|
||||
if (ret) {
|
||||
cache.makeRoomLRU();
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -41,6 +41,7 @@ public class NameHashIntCacheLRU extends NameIntCacheLRU {
|
|||
|
||||
@Override
|
||||
Object key(CategoryPath name, int prefixLen) {
|
||||
return new Long(name.longHashCode(prefixLen));
|
||||
return new Long(name.subpath(prefixLen).longHashCode());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -68,23 +68,13 @@ class NameIntCacheLRU {
|
|||
return res;
|
||||
}
|
||||
|
||||
/**
|
||||
* Subclasses can override this to provide caching by e.g. hash of the string.
|
||||
*/
|
||||
/** Subclasses can override this to provide caching by e.g. hash of the string. */
|
||||
Object key(CategoryPath name) {
|
||||
// Note that a copy constructor (cloning) here is necessary, because a
|
||||
// CategoryPath object is mutable, so we cannot save a reference to an
|
||||
// existing CategoryPath. Subclasses which override this method can
|
||||
// avoid this cloning by, e.g., hashing the name.
|
||||
return new CategoryPath(name);
|
||||
return name;
|
||||
}
|
||||
|
||||
Object key(CategoryPath name, int prefixLen) {
|
||||
// Note that a copy constructor (cloning) here is necessary, because a
|
||||
// CategoryPath object is mutable, so we cannot save a reference to an
|
||||
// existing CategoryPath. Subclasses which override this method can
|
||||
// avoid this cloning by, e.g., hashing the name.
|
||||
return new CategoryPath(name, prefixLen);
|
||||
return name.subpath(prefixLen);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -77,7 +77,7 @@ public class OrdinalMappingReaderTest extends LuceneTestCase {
|
|||
FacetResultNode node = result.getFacetResultNode();
|
||||
for (FacetResultNode facet: node.getSubResults()) {
|
||||
int weight = (int)facet.getValue();
|
||||
int label = Integer.parseInt(facet.getLabel().getComponent(1));
|
||||
int label = Integer.parseInt(facet.getLabel().components[1]);
|
||||
//System.out.println(label + ": " + weight);
|
||||
if (VERBOSE) {
|
||||
System.out.println(label + ": " + weight);
|
||||
|
|
|
@ -31,12 +31,10 @@ public class OrdinalPolicyTest extends LuceneTestCase {
|
|||
public void testDefaultOrdinalPolicy() {
|
||||
// check ordinal policy
|
||||
OrdinalPolicy ordinalPolicy = OrdinalPolicy.ALL_PARENTS;
|
||||
assertFalse("default ordinal policy should not match root", ordinalPolicy
|
||||
.shouldAdd(TaxonomyReader.ROOT_ORDINAL));
|
||||
assertFalse("default ordinal policy should not match root", ordinalPolicy.shouldAdd(TaxonomyReader.ROOT_ORDINAL));
|
||||
for (int i = 0; i < 300; i++) {
|
||||
int ordinal = 1 + random().nextInt(Integer.MAX_VALUE - 1);
|
||||
assertTrue("default ordinal policy should match " + ordinal,
|
||||
ordinalPolicy.shouldAdd(ordinal));
|
||||
assertTrue("default ordinal policy should match " + ordinal, ordinalPolicy.shouldAdd(ordinal));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -50,8 +48,7 @@ public class OrdinalPolicyTest extends LuceneTestCase {
|
|||
String[] topLevelStrings = new String[10];
|
||||
for (int i = 0; i < 10; i++) {
|
||||
topLevelStrings[i] = Integer.valueOf(random().nextInt(30)).toString();
|
||||
topLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(
|
||||
topLevelStrings[i]));
|
||||
topLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(topLevelStrings[i]));
|
||||
}
|
||||
int[] nonTopLevelOrdinals = new int[300];
|
||||
for (int i = 0; i < 300; i++) {
|
||||
|
@ -61,22 +58,18 @@ public class OrdinalPolicyTest extends LuceneTestCase {
|
|||
for (int j = 1; j < components.length; j++) {
|
||||
components[j] = (Integer.valueOf(random().nextInt(30))).toString();
|
||||
}
|
||||
nonTopLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(
|
||||
components));
|
||||
nonTopLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(components));
|
||||
}
|
||||
// check ordinal policy
|
||||
OrdinalPolicy ordinalPolicy = new NonTopLevelOrdinalPolicy();
|
||||
ordinalPolicy.init(taxonomy);
|
||||
assertFalse("top level ordinal policy should not match root", ordinalPolicy
|
||||
.shouldAdd(TaxonomyReader.ROOT_ORDINAL));
|
||||
assertFalse("top level ordinal policy should not match root", ordinalPolicy.shouldAdd(TaxonomyReader.ROOT_ORDINAL));
|
||||
for (int i = 0; i < 10; i++) {
|
||||
assertFalse("top level ordinal policy should not match "
|
||||
+ topLevelOrdinals[i],
|
||||
assertFalse("top level ordinal policy should not match " + topLevelOrdinals[i],
|
||||
ordinalPolicy.shouldAdd(topLevelOrdinals[i]));
|
||||
}
|
||||
for (int i = 0; i < 300; i++) {
|
||||
assertTrue("top level ordinal policy should match "
|
||||
+ nonTopLevelOrdinals[i],
|
||||
assertTrue("top level ordinal policy should match " + nonTopLevelOrdinals[i],
|
||||
ordinalPolicy.shouldAdd(nonTopLevelOrdinals[i]));
|
||||
}
|
||||
|
||||
|
|
|
@ -29,10 +29,9 @@ public class PathPolicyTest extends LuceneTestCase {
|
|||
@Test
|
||||
public void testDefaultPathPolicy() {
|
||||
// check path policy
|
||||
CategoryPath cp = new CategoryPath();
|
||||
CategoryPath cp = CategoryPath.EMPTY;
|
||||
PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
|
||||
assertFalse("default path policy should not accept root",
|
||||
pathPolicy.shouldAdd(cp));
|
||||
assertFalse("default path policy should not accept root", pathPolicy.shouldAdd(cp));
|
||||
for (int i = 0; i < 300; i++) {
|
||||
int nComponents = 1 + random().nextInt(10);
|
||||
String[] components = new String[nComponents];
|
||||
|
@ -40,9 +39,7 @@ public class PathPolicyTest extends LuceneTestCase {
|
|||
components[j] = (Integer.valueOf(random().nextInt(30))).toString();
|
||||
}
|
||||
cp = new CategoryPath(components);
|
||||
assertTrue("default path policy should accept "
|
||||
+ cp.toString('/'),
|
||||
pathPolicy.shouldAdd(cp));
|
||||
assertTrue("default path policy should accept " + cp.toString('/'), pathPolicy.shouldAdd(cp));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -74,7 +71,7 @@ public class PathPolicyTest extends LuceneTestCase {
|
|||
// check ordinal policy
|
||||
PathPolicy pathPolicy = new NonTopLevelPathPolicy();
|
||||
assertFalse("top level path policy should not match root",
|
||||
pathPolicy.shouldAdd(new CategoryPath()));
|
||||
pathPolicy.shouldAdd(CategoryPath.EMPTY));
|
||||
for (int i = 0; i < 10; i++) {
|
||||
assertFalse("top level path policy should not match "
|
||||
+ topLevelPaths[i],
|
||||
|
|
|
@ -74,7 +74,7 @@ public class FacetIndexingParamsTest extends LuceneTestCase {
|
|||
public void testCategoryPolicies() {
|
||||
FacetIndexingParams dfip = FacetIndexingParams.ALL_PARENTS;
|
||||
// check path policy
|
||||
CategoryPath cp = new CategoryPath();
|
||||
CategoryPath cp = CategoryPath.EMPTY;
|
||||
PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
|
||||
assertEquals("path policy does not match default for root", pathPolicy.shouldAdd(cp), dfip.getPathPolicy().shouldAdd(cp));
|
||||
for (int i = 0; i < 30; i++) {
|
||||
|
|
|
@ -129,7 +129,7 @@ public class TestDemoFacets extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private void toSimpleString(int depth, StringBuilder sb, FacetResultNode node, String indent) {
|
||||
sb.append(indent + node.getLabel().getComponent(depth) + " (" + (int) node.getValue() + ")\n");
|
||||
sb.append(indent + node.getLabel().components[depth] + " (" + (int) node.getValue() + ")\n");
|
||||
for(FacetResultNode childNode : node.getSubResults()) {
|
||||
toSimpleString(depth+1, sb, childNode, indent + " ");
|
||||
}
|
||||
|
|
|
@ -178,7 +178,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
|
|||
}
|
||||
|
||||
FacetResult fr = facetResults.get(0); // a, depth=3, K=2
|
||||
boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
|
||||
boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
|
||||
assertEquals(9, fr.getNumValidDescendants());
|
||||
FacetResultNode parentRes = fr.getFacetResultNode();
|
||||
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
|
@ -219,7 +219,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
|
|||
}
|
||||
|
||||
fr = facetResults.get(1); // a, depth=2, K=2. same result as before
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
|
||||
assertEquals(9, fr.getNumValidDescendants());
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
|
@ -239,7 +239,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
|
|||
}
|
||||
|
||||
fr = facetResults.get(2); // a, depth=1, K=2
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
|
||||
assertEquals(4, fr.getNumValidDescendants(), 4);
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
|
@ -257,7 +257,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
|
|||
}
|
||||
|
||||
fr = facetResults.get(3); // a/b, depth=3, K=2
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
|
||||
assertEquals(4, fr.getNumValidDescendants());
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
|
@ -272,7 +272,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
|
|||
}
|
||||
|
||||
fr = facetResults.get(4); // a/b, depth=2, K=2
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
|
||||
assertEquals(4, fr.getNumValidDescendants());
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
|
@ -286,7 +286,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
|
|||
}
|
||||
|
||||
fr = facetResults.get(5); // a/b, depth=1, K=2
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
|
||||
assertEquals(4, fr.getNumValidDescendants());
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
|
@ -300,13 +300,13 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
|
|||
}
|
||||
|
||||
fr = facetResults.get(6); // a/b, depth=0, K=2
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
|
||||
assertEquals(0, fr.getNumValidDescendants()); // 0 descendants but rootnode
|
||||
parentRes = fr.getFacetResultNode();
|
||||
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
|
||||
assertEquals(0.0, parentRes.getResidue(), Double.MIN_VALUE);
|
||||
assertEquals(0, parentRes.getNumSubResults());
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
|
||||
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
|
||||
|
||||
// doctor, depth=1, K=2
|
||||
assertFalse("Shouldn't have found anything for a FacetRequest " +
|
||||
|
|
|
@ -231,7 +231,7 @@ public class MultiIteratorsPerCLParamsTest extends LuceneTestCase {
|
|||
if (requestedPath == null) {
|
||||
parentOrdinal = 0;
|
||||
} else {
|
||||
CategoryPath cp = new CategoryPath(requestedPath.getComponent(0));
|
||||
CategoryPath cp = new CategoryPath(requestedPath.components[0]);
|
||||
parentOrdinal = taxo.getOrdinal(cp);
|
||||
}
|
||||
parentArray = taxo.getParallelTaxonomyArrays().parents();
|
||||
|
|
|
@ -1,15 +1,7 @@
|
|||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.OutputStreamWriter;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.junit.Test;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -32,843 +24,145 @@ public class TestCategoryPath extends LuceneTestCase {
|
|||
|
||||
@Test
|
||||
public void testBasic() {
|
||||
CategoryPath p = new CategoryPath(0,0);
|
||||
assertEquals(0, p.length());
|
||||
for (int i=0; i<1000; i++) {
|
||||
p.add("hello");
|
||||
assertEquals(i+1, p.length());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testConstructorCapacity() {
|
||||
CategoryPath p = new CategoryPath(0,0);
|
||||
assertEquals(0, p.capacityChars());
|
||||
assertEquals(0, p.capacityComponents());
|
||||
assertEquals(0, p.length());
|
||||
p = new CategoryPath(5,18);
|
||||
assertEquals(5, p.capacityChars());
|
||||
assertEquals(18, p.capacityComponents());
|
||||
assertEquals(0, p.length());
|
||||
p = new CategoryPath(27,13);
|
||||
assertEquals(27, p.capacityChars());
|
||||
assertEquals(13, p.capacityComponents());
|
||||
assertEquals(0, p.length());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testClear() {
|
||||
CategoryPath p = new CategoryPath(0,0);
|
||||
p.add("hi");
|
||||
p.add("there");
|
||||
assertEquals(2, p.length());
|
||||
p.clear();
|
||||
assertEquals(0, p.length());
|
||||
p.add("yo!");
|
||||
assertEquals(1, p.length());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTrim() {
|
||||
CategoryPath p = new CategoryPath(0,0);
|
||||
p.add("this");
|
||||
p.add("message");
|
||||
p.add("will");
|
||||
p.add("self");
|
||||
p.add("destruct");
|
||||
p.add("in");
|
||||
p.add("five");
|
||||
p.add("seconds");
|
||||
assertEquals(8, p.length());
|
||||
p.trim(3);
|
||||
assertEquals(5, p.length());
|
||||
p.trim(0); // no-op
|
||||
assertEquals(5, p.length());
|
||||
p.trim(-3); // no-op
|
||||
assertEquals(5, p.length());
|
||||
p.trim(1);
|
||||
assertEquals(4, p.length());
|
||||
p.trim(8); // clear
|
||||
assertEquals(0, p.length());
|
||||
p.add("yo!");
|
||||
assertEquals(1, p.length());
|
||||
p.trim(1); // clear
|
||||
assertEquals(0, p.length());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testComponentsLimit() {
|
||||
// Test that we can add up to 2^15-1 components
|
||||
CategoryPath p = new CategoryPath(0,0);
|
||||
for (int i=0; i<32767; i++) {
|
||||
p.add("");
|
||||
assertEquals(i+1, p.length());
|
||||
}
|
||||
// Also see that in the current implementation, this is actually
|
||||
// the limit: if we add one more component, things break (because
|
||||
// we used a short to hold ncomponents). See that it breaks in the
|
||||
// way we expect it to:
|
||||
p.add(""); // this still works, but...
|
||||
assertEquals(-32768, p.length()); // now the length is wrong and negative
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCharsLimit() {
|
||||
// Test that we can add up to 2^15-1 characters
|
||||
CategoryPath p = new CategoryPath(0,0);
|
||||
for (int i=0; i<8192; i++) {
|
||||
p.add("aaaa");
|
||||
}
|
||||
// Also see that in the current implementation, this is actually the
|
||||
// limit: If we add one more character, things break (because ends[]
|
||||
// is an array of shorts), and we actually get an exception.
|
||||
try {
|
||||
p.add("a");
|
||||
fail("Should have thrown an exception");
|
||||
} catch (ArrayIndexOutOfBoundsException e) {
|
||||
// good.
|
||||
}
|
||||
assertEquals(0, CategoryPath.EMPTY.length);
|
||||
assertEquals(1, new CategoryPath("hello").length);
|
||||
assertEquals(2, new CategoryPath("hello", "world").length);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToString() {
|
||||
CategoryPath p = new CategoryPath(0,0);
|
||||
// When the category is empty, we expect an empty string
|
||||
assertEquals("", p.toString('/'));
|
||||
// This is (deliberately, in our implementation) indistinguishable
|
||||
// from the case of a single empty component:
|
||||
p.add("");
|
||||
assertEquals("", p.toString('/'));
|
||||
// Check just one category (so no delimiter needed):
|
||||
p.clear();
|
||||
p.add("hello");
|
||||
assertEquals("hello", p.toString('/'));
|
||||
// Now for two categories:
|
||||
p.clear();
|
||||
p.add("hello");
|
||||
p.add("world");
|
||||
assertEquals("hello/world", p.toString('/'));
|
||||
// And for a thousand...
|
||||
p.clear();
|
||||
p.add("0");
|
||||
StringBuilder expected = new StringBuilder("0");
|
||||
for (int i=1; i<1000; i++) {
|
||||
String num = Integer.toString(i);
|
||||
p.add(num);
|
||||
expected.append('/');
|
||||
expected.append(num);
|
||||
}
|
||||
assertEquals(expected.toString(), p.toString('/'));
|
||||
// Check that toString() without a parameter just defaults to '/':
|
||||
assertEquals(expected.toString(), p.toString());
|
||||
assertEquals("", CategoryPath.EMPTY.toString('/'));
|
||||
// one category (so no delimiter needed)
|
||||
assertEquals("hello", new CategoryPath("hello").toString('/'));
|
||||
// more than one category (so no delimiter needed)
|
||||
assertEquals("hello/world", new CategoryPath("hello", "world").toString('/'));
|
||||
}
|
||||
|
||||
// testing toString() and its variants already test most of the appendTo()
|
||||
// code, but not all of it (the "eclemma" code-coverage tool discovered
|
||||
// this for us). Here we complete the coverage of the appendTo() methods:
|
||||
@Test
|
||||
public void testAppendTo() throws IOException {
|
||||
CategoryPath p = new CategoryPath(0,0);
|
||||
StringBuilder sb = new StringBuilder();
|
||||
p.appendTo(sb, '/');
|
||||
assertEquals(0, sb.length());
|
||||
p.appendTo(sb, '/', -1);
|
||||
assertEquals(0, sb.length());
|
||||
p.appendTo(sb, '/', 1);
|
||||
assertEquals(0, sb.length());
|
||||
p.appendTo(sb, '/', -1, 1);
|
||||
assertEquals(0, sb.length());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLastComponent() {
|
||||
CategoryPath p = new CategoryPath(1000,1000);
|
||||
// When the category is empty, we expect a null
|
||||
assertNull(p.lastComponent());
|
||||
for (int i=0; i<=100; i++) {
|
||||
String num = Integer.toString(i);
|
||||
p.add(num);
|
||||
assertEquals(num, p.lastComponent());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetComponent() {
|
||||
CategoryPath p = new CategoryPath(1000,1000);
|
||||
// When the category is empty, we expect a null
|
||||
assertNull(p.getComponent(0));
|
||||
assertNull(p.getComponent(1));
|
||||
assertNull(p.getComponent(-1));
|
||||
for (int i=0; i<=100; i++) {
|
||||
p.add(Integer.toString(i));
|
||||
for (int j=0; j<=i; j++) {
|
||||
assertEquals(j, Integer.parseInt(p.getComponent(j)));
|
||||
}
|
||||
assertNull(p.getComponent(-1));
|
||||
assertNull(p.getComponent(i+1));
|
||||
String[] components = new String[atLeast(10)];
|
||||
for (int i = 0; i < components.length; i++) {
|
||||
components[i] = Integer.toString(i);
|
||||
}
|
||||
CategoryPath cp = new CategoryPath(components);
|
||||
for (int i = 0; i < components.length; i++) {
|
||||
assertEquals(i, Integer.parseInt(cp.components[i]));
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToStringPrefix() {
|
||||
CategoryPath p = new CategoryPath(0,0);
|
||||
p.add("hi");
|
||||
p.add("there");
|
||||
p.add("man");
|
||||
assertEquals("hi/there/man", p.toString('/'));
|
||||
assertEquals("", p.toString('/', 0));
|
||||
assertEquals("hi", p.toString('/', 1));
|
||||
assertEquals("hi/there", p.toString('/', 2));
|
||||
assertEquals("hi/there/man", p.toString('/', 3));
|
||||
assertEquals("hi/there/man", p.toString('/', 4));
|
||||
assertEquals("hi/there/man", p.toString('/', -1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testToStringSubpath() {
|
||||
CategoryPath p = new CategoryPath(0,0);
|
||||
assertEquals("", p.toString('/', 0, 0));
|
||||
p.add("hi");
|
||||
p.add("there");
|
||||
p.add("man");
|
||||
assertEquals("", p.toString('/', 0, 0));
|
||||
assertEquals("hi", p.toString('/', 0, 1));
|
||||
assertEquals("hi/there", p.toString('/', 0, 2));
|
||||
assertEquals("hi/there/man", p.toString('/', 0, 3));
|
||||
assertEquals("hi/there/man", p.toString('/', 0, 4));
|
||||
assertEquals("hi/there/man", p.toString('/', 0, -1));
|
||||
assertEquals("hi/there/man", p.toString('/', -1, -1));
|
||||
assertEquals("there/man", p.toString('/', 1, -1));
|
||||
assertEquals("man", p.toString('/', 2, -1));
|
||||
assertEquals("", p.toString('/', 3, -1));
|
||||
assertEquals("there/man", p.toString('/', 1, 3));
|
||||
assertEquals("there", p.toString('/', 1, 2));
|
||||
assertEquals("", p.toString('/', 1, 1));
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test
|
||||
public void testDelimiterConstructor() {
|
||||
// Test that the constructor that takes a string and a delimiter
|
||||
// works correctly. Also check that it allocates exactly the needed
|
||||
// needed size for the array - not more.
|
||||
CategoryPath p = new CategoryPath("", '/');
|
||||
assertEquals(p.length(), 0);
|
||||
assertEquals(p.capacityChars(), 0);
|
||||
assertEquals(p.capacityComponents(), 0);
|
||||
assertEquals(0, p.length);
|
||||
p = new CategoryPath("hello", '/');
|
||||
assertEquals(p.length(), 1);
|
||||
assertEquals(p.capacityChars(), 5);
|
||||
assertEquals(p.capacityComponents(), 1);
|
||||
assertEquals(p.length, 1);
|
||||
assertEquals(p.toString('@'), "hello");
|
||||
p = new CategoryPath("hi/there", '/');
|
||||
assertEquals(p.length(), 2);
|
||||
assertEquals(p.capacityChars(), 7);
|
||||
assertEquals(p.capacityComponents(), 2);
|
||||
assertEquals(p.length, 2);
|
||||
assertEquals(p.toString('@'), "hi@there");
|
||||
p = new CategoryPath("how/are/you/doing?", '/');
|
||||
assertEquals(p.length(), 4);
|
||||
assertEquals(p.capacityChars(), 15);
|
||||
assertEquals(p.capacityComponents(), 4);
|
||||
assertEquals(p.length, 4);
|
||||
assertEquals(p.toString('@'), "how@are@you@doing?");
|
||||
}
|
||||
|
||||
@Test
|
||||
@Test
|
||||
public void testDefaultConstructor() {
|
||||
// test that the default constructor (no parameters) currently
|
||||
// defaults to creating an object with a 0 initial capacity.
|
||||
// If we change this default later, we also need to change this
|
||||
// test.
|
||||
CategoryPath p = new CategoryPath();
|
||||
assertEquals(0, p.capacityChars());
|
||||
assertEquals(0, p.capacityComponents());
|
||||
assertEquals(0, p.length());
|
||||
CategoryPath p = CategoryPath.EMPTY;
|
||||
assertEquals(0, p.length);
|
||||
assertEquals("", p.toString('/'));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAddEmpty() {
|
||||
// In the current implementation, p.add("") should add en empty
|
||||
// component (which is, admitingly, not a useful case. On the other
|
||||
// hand, p.add("", delimiter) should add no components at all.
|
||||
// Verify this:
|
||||
CategoryPath p = new CategoryPath(0, 0);
|
||||
p.add("");
|
||||
assertEquals(1, p.length());
|
||||
p.add("");
|
||||
assertEquals(2, p.length());
|
||||
p.add("", '/');
|
||||
assertEquals(2, p.length());
|
||||
p.clear();
|
||||
p.add("", '/');
|
||||
assertEquals(0, p.length());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDelimiterAdd() {
|
||||
// Test that the add() that takes a string and a delimiter
|
||||
// works correctly. Note that unlike the constructor test above,
|
||||
// we can't expect the capacity to grow to exactly the length of
|
||||
// the given category, so we do not test this.
|
||||
CategoryPath p = new CategoryPath(0, 0);
|
||||
p.add("", '/');
|
||||
assertEquals(0, p.length());
|
||||
assertEquals("", p.toString('@'), "");
|
||||
p.clear();
|
||||
p.add("hello", '/');
|
||||
assertEquals(p.length(), 1);
|
||||
assertEquals(p.toString('@'), "hello");
|
||||
p.clear();
|
||||
p.add("hi/there", '/');
|
||||
assertEquals(p.length(), 2);
|
||||
assertEquals(p.toString('@'), "hi@there");
|
||||
p.clear();
|
||||
p.add("how/are/you/doing?", '/');
|
||||
assertEquals(p.length(), 4);
|
||||
assertEquals(p.toString('@'), "how@are@you@doing?");
|
||||
// See that this is really an add, not replace:
|
||||
p.clear();
|
||||
p.add("hi/there", '/');
|
||||
assertEquals(p.length(), 2);
|
||||
assertEquals(p.toString('@'), "hi@there");
|
||||
p.add("how/are/you/doing", '/');
|
||||
assertEquals(p.length(), 6);
|
||||
assertEquals(p.toString('@'), "hi@there@how@are@you@doing");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCopyConstructor() {
|
||||
CategoryPath p = new CategoryPath(0,0);
|
||||
int expectedchars=0;
|
||||
for (int i=0; i<1000; i++) {
|
||||
CategoryPath clone = new CategoryPath(p);
|
||||
assertEquals(p.length(), clone.length());
|
||||
assertEquals(p.toString('/'), clone.toString('/'));
|
||||
// verify that the newly created clone has exactly the right
|
||||
// capacity, with no spare (while the original path p probably
|
||||
// does have spare)
|
||||
assertEquals(i, clone.capacityComponents());
|
||||
assertEquals(expectedchars, clone.capacityChars());
|
||||
// Finally, add another component to the path, for the next
|
||||
// round of this loop
|
||||
String num = Integer.toString(i);
|
||||
p.add(num);
|
||||
expectedchars+=num.length();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testPrefixCopyConstructor() {
|
||||
CategoryPath p = new CategoryPath(0,0);
|
||||
p.add("hi");
|
||||
p.add("there");
|
||||
p.add("man");
|
||||
assertEquals(p.length(), 3);
|
||||
public void testSubPath() {
|
||||
final CategoryPath p = new CategoryPath("hi", "there", "man");
|
||||
assertEquals(p.length, 3);
|
||||
|
||||
CategoryPath p1 = new CategoryPath(p,2);
|
||||
assertEquals(2, p1.length());
|
||||
CategoryPath p1 = p.subpath(2);
|
||||
assertEquals(2, p1.length);
|
||||
assertEquals("hi/there", p1.toString('/'));
|
||||
// the new prefix object should only take the space it needs:
|
||||
assertEquals(2, p1.capacityComponents());
|
||||
assertEquals(7, p1.capacityChars());
|
||||
|
||||
p1 = new CategoryPath(p,1);
|
||||
assertEquals(1, p1.length());
|
||||
p1 = p.subpath(1);
|
||||
assertEquals(1, p1.length);
|
||||
assertEquals("hi", p1.toString('/'));
|
||||
assertEquals(1, p1.capacityComponents());
|
||||
assertEquals(2, p1.capacityChars());
|
||||
|
||||
p1 = new CategoryPath(p,0);
|
||||
assertEquals(0, p1.length());
|
||||
p1 = p.subpath(0);
|
||||
assertEquals(0, p1.length);
|
||||
assertEquals("", p1.toString('/'));
|
||||
assertEquals(0, p1.capacityComponents());
|
||||
assertEquals(0, p1.capacityChars());
|
||||
|
||||
// with all the following lengths, the prefix should be the whole path:
|
||||
// with all the following lengths, the prefix should be the whole path
|
||||
int[] lengths = { 3, -1, 4 };
|
||||
for (int i=0; i<lengths.length; i++) {
|
||||
p1 = new CategoryPath(p, lengths[i]);
|
||||
assertEquals(3, p1.length());
|
||||
for (int i = 0; i < lengths.length; i++) {
|
||||
p1 = p.subpath(lengths[i]);
|
||||
assertEquals(3, p1.length);
|
||||
assertEquals("hi/there/man", p1.toString('/'));
|
||||
assertEquals(p, p1);
|
||||
assertEquals(3, p1.capacityComponents());
|
||||
assertEquals(10, p1.capacityChars());
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testEquals() {
|
||||
// check that two empty paths are equal, even if they have different
|
||||
// capacities:
|
||||
CategoryPath p1 = new CategoryPath(0,0);
|
||||
CategoryPath p2 = new CategoryPath(1000,300);
|
||||
assertEquals(true, p1.equals(p2));
|
||||
// If we make p2 different, it is no longer equals:
|
||||
p2.add("hi");
|
||||
assertEquals(false, p1.equals(p2));
|
||||
// A categoryPath is definitely not equals to an object of some other
|
||||
// type:
|
||||
assertEquals(false, p1.equals(Integer.valueOf(3)));
|
||||
// Build two paths separately, and compare them
|
||||
p1.clear();
|
||||
p1.add("hello");
|
||||
p1.add("world");
|
||||
p2.clear();
|
||||
p2.add("hello");
|
||||
p2.add("world");
|
||||
assertEquals(true, p1.equals(p2));
|
||||
// Check that comparison really don't look at old data which might
|
||||
// be stored in the array
|
||||
p1.clear();
|
||||
p1.add("averylongcategoryname");
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("hi");
|
||||
assertEquals(true, p1.equals(p2));
|
||||
// Being of the same length is obviously not enough to be equal
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("hello");
|
||||
assertEquals(false, p1.equals(p2));
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("ho");
|
||||
assertEquals(false, p1.equals(p2));
|
||||
assertEquals(CategoryPath.EMPTY, CategoryPath.EMPTY);
|
||||
assertFalse(CategoryPath.EMPTY.equals(new CategoryPath("hi")));
|
||||
assertFalse(CategoryPath.EMPTY.equals(Integer.valueOf(3)));
|
||||
assertEquals(new CategoryPath("hello", "world"), new CategoryPath("hello", "world"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHashCode() {
|
||||
// Note: in this test, we assume that if two paths are not equal,
|
||||
// their hash codes should come out differently. This is *not*
|
||||
// always the case, but in the examples we use below, it comes out
|
||||
// fine, and unless we have some really bad luck in changing our
|
||||
// hash function, this should also remain true in the future.
|
||||
|
||||
// check that two empty paths are equal, even if they have different
|
||||
// capacities:
|
||||
CategoryPath p1 = new CategoryPath(0,0);
|
||||
CategoryPath p2 = new CategoryPath(1000,300);
|
||||
assertEquals(p1.hashCode(), p2.hashCode());
|
||||
// If we make p2 different, it is no longer equals:
|
||||
p2.add("hi");
|
||||
assertEquals(false, p1.hashCode()==p2.hashCode());
|
||||
// Build two paths separately, and compare them
|
||||
p1.clear();
|
||||
p1.add("hello");
|
||||
p1.add("world");
|
||||
p2.clear();
|
||||
p2.add("hello");
|
||||
p2.add("world");
|
||||
assertEquals(p1.hashCode(), p2.hashCode());
|
||||
// Check that comparison really don't look at old data which might
|
||||
// be stored in the array
|
||||
p1.clear();
|
||||
p1.add("averylongcategoryname");
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("hi");
|
||||
assertEquals(p1.hashCode(), p2.hashCode());
|
||||
// Being of the same length is obviously not enough to be equal
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("hello");
|
||||
assertEquals(false, p1.hashCode()==p2.hashCode());
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("ho");
|
||||
assertEquals(false, p1.hashCode()==p2.hashCode());
|
||||
assertEquals(CategoryPath.EMPTY.hashCode(), CategoryPath.EMPTY.hashCode());
|
||||
assertFalse(CategoryPath.EMPTY.hashCode() == new CategoryPath("hi").hashCode());
|
||||
assertEquals(new CategoryPath("hello", "world").hashCode(), new CategoryPath("hello", "world").hashCode());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testHashCodePrefix() {
|
||||
// First, repeat the tests of testHashCode() using hashCode(-1)
|
||||
// just to make sure nothing was broken in this variant:
|
||||
CategoryPath p1 = new CategoryPath(0,0);
|
||||
CategoryPath p2 = new CategoryPath(1000,300);
|
||||
assertEquals(p1.hashCode(-1), p2.hashCode(-1));
|
||||
p2.add("hi");
|
||||
assertEquals(false, p1.hashCode(-1)==p2.hashCode(-1));
|
||||
p1.clear();
|
||||
p1.add("hello");
|
||||
p1.add("world");
|
||||
p2.clear();
|
||||
p2.add("hello");
|
||||
p2.add("world");
|
||||
assertEquals(p1.hashCode(-1), p2.hashCode(-1));
|
||||
p1.clear();
|
||||
p1.add("averylongcategoryname");
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("hi");
|
||||
assertEquals(p1.hashCode(-1), p2.hashCode(-1));
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("hello");
|
||||
assertEquals(false, p1.hashCode(-1)==p2.hashCode(-1));
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("ho");
|
||||
assertEquals(false, p1.hashCode(-1)==p2.hashCode(-1));
|
||||
|
||||
// Now move to testing prefixes:
|
||||
CategoryPath p = new CategoryPath();
|
||||
p.add("this");
|
||||
p.add("is");
|
||||
p.add("a");
|
||||
p.add("test");
|
||||
assertEquals(p.hashCode(), p.hashCode(4));
|
||||
assertEquals(new CategoryPath().hashCode(), p.hashCode(0));
|
||||
assertEquals(new CategoryPath(p, 1).hashCode(), p.hashCode(1));
|
||||
assertEquals(new CategoryPath(p, 2).hashCode(), p.hashCode(2));
|
||||
assertEquals(new CategoryPath(p, 3).hashCode(), p.hashCode(3));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLongHashCode() {
|
||||
// Note: in this test, we assume that if two paths are not equal,
|
||||
// their hash codes should come out differently. This is *not*
|
||||
// always the case, but in the examples we use below, it comes out
|
||||
// fine, and unless we have some really bad luck in changing our
|
||||
// hash function, this should also remain true in the future.
|
||||
|
||||
// check that two empty paths are equal, even if they have different
|
||||
// capacities:
|
||||
CategoryPath p1 = new CategoryPath(0,0);
|
||||
CategoryPath p2 = new CategoryPath(1000,300);
|
||||
assertEquals(p1.longHashCode(), p2.longHashCode());
|
||||
// If we make p2 different, it is no longer equals:
|
||||
p2.add("hi");
|
||||
assertEquals(false, p1.longHashCode()==p2.longHashCode());
|
||||
// Build two paths separately, and compare them
|
||||
p1.clear();
|
||||
p1.add("hello");
|
||||
p1.add("world");
|
||||
p2.clear();
|
||||
p2.add("hello");
|
||||
p2.add("world");
|
||||
assertEquals(p1.longHashCode(), p2.longHashCode());
|
||||
// Check that comparison really don't look at old data which might
|
||||
// be stored in the array
|
||||
p1.clear();
|
||||
p1.add("averylongcategoryname");
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("hi");
|
||||
assertEquals(p1.longHashCode(), p2.longHashCode());
|
||||
// Being of the same length is obviously not enough to be equal
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("hello");
|
||||
assertEquals(false, p1.longHashCode()==p2.longHashCode());
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("ho");
|
||||
assertEquals(false, p1.longHashCode()==p2.longHashCode());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testLongHashCodePrefix() {
|
||||
// First, repeat the tests of testLongHashCode() using longHashCode(-1)
|
||||
// just to make sure nothing was broken in this variant:
|
||||
|
||||
// check that two empty paths are equal, even if they have different
|
||||
// capacities:
|
||||
CategoryPath p1 = new CategoryPath(0,0);
|
||||
CategoryPath p2 = new CategoryPath(1000,300);
|
||||
assertEquals(p1.longHashCode(-1), p2.longHashCode(-1));
|
||||
// If we make p2 different, it is no longer equals:
|
||||
p2.add("hi");
|
||||
assertEquals(false, p1.longHashCode(-1)==p2.longHashCode(-1));
|
||||
// Build two paths separately, and compare them
|
||||
p1.clear();
|
||||
p1.add("hello");
|
||||
p1.add("world");
|
||||
p2.clear();
|
||||
p2.add("hello");
|
||||
p2.add("world");
|
||||
assertEquals(p1.longHashCode(-1), p2.longHashCode(-1));
|
||||
// Check that comparison really don't look at old data which might
|
||||
// be stored in the array
|
||||
p1.clear();
|
||||
p1.add("averylongcategoryname");
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("hi");
|
||||
assertEquals(p1.longHashCode(-1), p2.longHashCode(-1));
|
||||
// Being of the same length is obviously not enough to be equal
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("hello");
|
||||
assertEquals(false, p1.longHashCode(-1)==p2.longHashCode(-1));
|
||||
p1.clear();
|
||||
p1.add("hi");
|
||||
p2.clear();
|
||||
p2.add("ho");
|
||||
assertEquals(false, p1.longHashCode(-1)==p2.longHashCode(-1));
|
||||
|
||||
// Now move to testing prefixes:
|
||||
CategoryPath p = new CategoryPath();
|
||||
p.add("this");
|
||||
p.add("is");
|
||||
p.add("a");
|
||||
p.add("test");
|
||||
assertEquals(p.longHashCode(), p.longHashCode(4));
|
||||
assertEquals(new CategoryPath().longHashCode(), p.longHashCode(0));
|
||||
assertEquals(new CategoryPath(p, 1).longHashCode(), p.longHashCode(1));
|
||||
assertEquals(new CategoryPath(p, 2).longHashCode(), p.longHashCode(2));
|
||||
assertEquals(new CategoryPath(p, 3).longHashCode(), p.longHashCode(3));
|
||||
assertEquals(CategoryPath.EMPTY.longHashCode(), CategoryPath.EMPTY.longHashCode());
|
||||
assertFalse(CategoryPath.EMPTY.longHashCode() == new CategoryPath("hi").longHashCode());
|
||||
assertEquals(new CategoryPath("hello", "world").longHashCode(), new CategoryPath("hello", "world").longHashCode());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testArrayConstructor() {
|
||||
CategoryPath p = new CategoryPath("hello", "world", "yo");
|
||||
assertEquals(3, p.length());
|
||||
assertEquals(12, p.capacityChars());
|
||||
assertEquals(3, p.capacityComponents());
|
||||
assertEquals(3, p.length);
|
||||
assertEquals("hello/world/yo", p.toString('/'));
|
||||
|
||||
p = new CategoryPath(new String[0]);
|
||||
assertEquals(0, p.length());
|
||||
assertEquals(0, p.capacityChars());
|
||||
assertEquals(0, p.capacityComponents());
|
||||
assertEquals(0, p.length);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCharsNeededForFullPath() {
|
||||
assertEquals(0, CategoryPath.EMPTY.fullPathLength());
|
||||
String[] components = { "hello", "world", "yo" };
|
||||
CategoryPath p = new CategoryPath();
|
||||
assertEquals(0, p.charsNeededForFullPath());
|
||||
CategoryPath cp = new CategoryPath(components);
|
||||
int expectedCharsNeeded = 0;
|
||||
for (int i=0; i<components.length; i++) {
|
||||
p.add(components[i]);
|
||||
expectedCharsNeeded += components[i].length();
|
||||
if (i>0) {
|
||||
expectedCharsNeeded++;
|
||||
}
|
||||
assertEquals(expectedCharsNeeded, p.charsNeededForFullPath());
|
||||
for (String comp : components) {
|
||||
expectedCharsNeeded += comp.length();
|
||||
}
|
||||
expectedCharsNeeded += cp.length - 1; // delimiter chars
|
||||
assertEquals(expectedCharsNeeded, cp.fullPathLength());
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCopyToCharArray() {
|
||||
String[] components = { "hello", "world", "yo" };
|
||||
CategoryPath p = new CategoryPath(components);
|
||||
char[] charArray = new char[p.charsNeededForFullPath()];
|
||||
int numCharsCopied = 0;
|
||||
|
||||
numCharsCopied = p.copyToCharArray(charArray, 0, 0, '.');
|
||||
assertEquals(0, numCharsCopied);
|
||||
assertEquals("", new String(charArray, 0, numCharsCopied));
|
||||
|
||||
numCharsCopied = p.copyToCharArray(charArray, 0, 1, '.');
|
||||
assertEquals(5, numCharsCopied);
|
||||
assertEquals("hello", new String(charArray, 0, numCharsCopied));
|
||||
|
||||
numCharsCopied = p.copyToCharArray(charArray, 0, 3, '.');
|
||||
assertEquals(14, numCharsCopied);
|
||||
assertEquals("hello.world.yo", new String(charArray, 0, numCharsCopied));
|
||||
|
||||
numCharsCopied = p.copyToCharArray(charArray, 0, -1, '.');
|
||||
assertEquals(14, numCharsCopied);
|
||||
assertEquals("hello.world.yo", new String(charArray, 0, numCharsCopied));
|
||||
numCharsCopied = p.copyToCharArray(charArray, 0, 4, '.');
|
||||
assertEquals(14, numCharsCopied);
|
||||
CategoryPath p = new CategoryPath("hello", "world", "yo");
|
||||
char[] charArray = new char[p.fullPathLength()];
|
||||
int numCharsCopied = p.copyFullPath(charArray, 0, '.');
|
||||
assertEquals(p.fullPathLength(), numCharsCopied);
|
||||
assertEquals("hello.world.yo", new String(charArray, 0, numCharsCopied));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCharSerialization() throws Exception {
|
||||
CategoryPath[] testCategories = {
|
||||
new CategoryPath("hi", "there", "man"),
|
||||
new CategoryPath("hello"),
|
||||
new CategoryPath("what's", "up"),
|
||||
// See that an empty category, which generates a (char)0,
|
||||
// doesn't cause any problems in the middle of the serialization:
|
||||
new CategoryPath(),
|
||||
new CategoryPath("another", "example"),
|
||||
new CategoryPath(),
|
||||
new CategoryPath()
|
||||
};
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i=0; i<testCategories.length; i++) {
|
||||
testCategories[i].serializeAppendTo(sb);
|
||||
}
|
||||
|
||||
CategoryPath tmp = new CategoryPath();
|
||||
int offset=0;
|
||||
for (int i=0; i<testCategories.length; i++) {
|
||||
// check equalsToSerialized, in a equal and non-equal case:
|
||||
assertTrue(testCategories[i].equalsToSerialized(sb, offset));
|
||||
assertFalse(new CategoryPath("Hello", "world").equalsToSerialized(sb, offset));
|
||||
assertFalse(new CategoryPath("world").equalsToSerialized(sb, offset));
|
||||
// and check hashCodeFromSerialized:
|
||||
assertEquals(testCategories[i].hashCode(), CategoryPath.hashCodeOfSerialized(sb, offset));
|
||||
// and check setFromSerialized:
|
||||
offset = tmp.setFromSerialized(sb, offset);
|
||||
assertEquals(testCategories[i], tmp);
|
||||
}
|
||||
assertEquals(offset, sb.length());
|
||||
// A similar test, for a much longer path (though not larger than the
|
||||
// 2^15-1 character limit that CategoryPath allows:
|
||||
sb = new StringBuilder();
|
||||
CategoryPath p = new CategoryPath();
|
||||
for (int i=0; i<1000; i++) {
|
||||
p.add(Integer.toString(i));
|
||||
}
|
||||
p.serializeAppendTo(sb);
|
||||
p.serializeAppendTo(sb);
|
||||
p.serializeAppendTo(sb);
|
||||
offset=0;
|
||||
assertTrue(p.equalsToSerialized(sb, offset));
|
||||
assertEquals(p.hashCode(), CategoryPath.hashCodeOfSerialized(sb, offset));
|
||||
offset = tmp.setFromSerialized(sb, offset);
|
||||
assertEquals(p, tmp);
|
||||
assertTrue(p.equalsToSerialized(sb, offset));
|
||||
assertEquals(p.hashCode(), CategoryPath.hashCodeOfSerialized(sb, offset));
|
||||
offset = tmp.setFromSerialized(sb, offset);
|
||||
assertEquals(p, tmp);
|
||||
assertTrue(p.equalsToSerialized(sb, offset));
|
||||
assertEquals(p.hashCode(), CategoryPath.hashCodeOfSerialized(sb, offset));
|
||||
offset = tmp.setFromSerialized(sb, offset);
|
||||
assertEquals(p, tmp);
|
||||
assertEquals(offset, sb.length());
|
||||
|
||||
// Test the serializeAppendTo variant with a prefixLen
|
||||
p = new CategoryPath();
|
||||
for (int i=0; i<783; i++) {
|
||||
p.add(Integer.toString(i));
|
||||
}
|
||||
int[] prefixLengths = { 0, 574, 782, 783, 784, -1 };
|
||||
for (int prefixLen : prefixLengths) {
|
||||
sb = new StringBuilder();
|
||||
p.serializeAppendTo(prefixLen, sb);
|
||||
assertTrue(new CategoryPath(p, prefixLen).equalsToSerialized(sb, 0));
|
||||
}
|
||||
|
||||
// Test the equalsToSerialized variant with a prefixLen
|
||||
// We use p and prefixLengths set above.
|
||||
for (int prefixLen : prefixLengths) {
|
||||
sb = new StringBuilder();
|
||||
new CategoryPath(p, prefixLen).serializeAppendTo(sb);
|
||||
assertTrue(p.equalsToSerialized(prefixLen, sb, 0));
|
||||
}
|
||||
|
||||
// Check also the false case of equalsToSerialized with prefixLen:
|
||||
sb = new StringBuilder();
|
||||
new CategoryPath().serializeAppendTo(sb);
|
||||
assertTrue(new CategoryPath().equalsToSerialized(0, sb, 0));
|
||||
assertTrue(new CategoryPath("a", "b").equalsToSerialized(0, sb, 0));
|
||||
assertFalse(new CategoryPath("a", "b").equalsToSerialized(1, sb, 0));
|
||||
sb = new StringBuilder();
|
||||
new CategoryPath("a", "b").serializeAppendTo(sb);
|
||||
assertFalse(new CategoryPath().equalsToSerialized(0, sb, 0));
|
||||
assertFalse(new CategoryPath("a").equalsToSerialized(0, sb, 0));
|
||||
assertFalse(new CategoryPath("a").equalsToSerialized(1, sb, 0));
|
||||
assertFalse(new CategoryPath("a", "b").equalsToSerialized(0, sb, 0));
|
||||
assertFalse(new CategoryPath("a", "b").equalsToSerialized(1, sb, 0));
|
||||
assertTrue(new CategoryPath("a", "b").equalsToSerialized(2, sb, 0));
|
||||
assertTrue(new CategoryPath("a", "b", "c").equalsToSerialized(2, sb, 0));
|
||||
assertFalse(new CategoryPath("z", "b", "c").equalsToSerialized(2, sb, 0));
|
||||
assertFalse(new CategoryPath("aa", "b", "c").equalsToSerialized(2, sb, 0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testStreamWriterSerialization() throws Exception {
|
||||
CategoryPath[] testPaths = {
|
||||
new CategoryPath("hi", "there", "man"),
|
||||
new CategoryPath("hello"),
|
||||
new CategoryPath("date", "2009", "May", "13", "14", "59", "00"),
|
||||
// See that an empty category, which generates a (char)0,
|
||||
// doesn't cause any problems in the middle of the serialization:
|
||||
new CategoryPath(),
|
||||
new CategoryPath("another", "example")
|
||||
};
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
OutputStreamWriter osw = new OutputStreamWriter(baos, "UTF-8"); // UTF-8 is always supported.
|
||||
for (CategoryPath cp : testPaths) {
|
||||
cp.serializeToStreamWriter(osw);
|
||||
}
|
||||
osw.flush();
|
||||
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
|
||||
InputStreamReader isr = new InputStreamReader(bais, "UTF-8");
|
||||
CategoryPath[] checkPaths = {
|
||||
new CategoryPath(), new CategoryPath(), new CategoryPath(), new CategoryPath(), new CategoryPath()
|
||||
};
|
||||
for (int j = 0; j < checkPaths.length; j++) {
|
||||
checkPaths[j].deserializeFromStreamReader(isr);
|
||||
assertEquals("Paths not equal", testPaths[j], checkPaths[j]);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCharSequenceCtor() throws Exception {
|
||||
CategoryPath[] testPaths = {
|
||||
new CategoryPath(new CS("hi"), new CS("there"), new CS("man")),
|
||||
new CategoryPath(new CS("hello")),
|
||||
new CategoryPath(new CS("date"), new CS("2009"), new CS("May"), new CS("13"),
|
||||
new CS("14"), new CS("59"), new CS("00")),
|
||||
new CategoryPath(),
|
||||
new CategoryPath(new CS("another"), new CS("example"))
|
||||
};
|
||||
assertEquals("Wrong capacity", 10, testPaths[0].capacityChars());
|
||||
assertEquals("Wrong capacity", 5, testPaths[1].capacityChars());
|
||||
assertEquals("Wrong capacity", 19, testPaths[2].capacityChars());
|
||||
assertEquals("Wrong capacity", 0, testPaths[3].capacityChars());
|
||||
assertEquals("Wrong capacity", 14, testPaths[4].capacityChars());
|
||||
|
||||
assertEquals("Wrong component", "hi", testPaths[0].getComponent(0));
|
||||
assertEquals("Wrong component", "there", testPaths[0].getComponent(1));
|
||||
assertEquals("Wrong component", "man", testPaths[0].getComponent(2));
|
||||
assertEquals("Wrong component", "hello", testPaths[1].getComponent(0));
|
||||
assertEquals("Wrong component", "date", testPaths[2].getComponent(0));
|
||||
assertEquals("Wrong component", "2009", testPaths[2].getComponent(1));
|
||||
assertEquals("Wrong component", "May", testPaths[2].getComponent(2));
|
||||
assertEquals("Wrong component", "13", testPaths[2].getComponent(3));
|
||||
assertEquals("Wrong component", "14", testPaths[2].getComponent(4));
|
||||
assertEquals("Wrong component", "59", testPaths[2].getComponent(5));
|
||||
assertEquals("Wrong component", "00", testPaths[2].getComponent(6));
|
||||
assertNull("Not null component", testPaths[3].getComponent(0));
|
||||
assertEquals("Wrong component", "another", testPaths[4].getComponent(0));
|
||||
assertEquals("Wrong component", "example", testPaths[4].getComponent(1));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testIsDescendantOf() throws Exception {
|
||||
CategoryPath[] testPaths = {
|
||||
new CategoryPath(new CS("hi"), new CS("there")),
|
||||
new CategoryPath(new CS("hi"), new CS("there"), new CS("man")),
|
||||
new CategoryPath(new CS("hithere"), new CS("man")),
|
||||
new CategoryPath(new CS("hi"), new CS("there"), new CS("mano")),
|
||||
new CategoryPath(),
|
||||
};
|
||||
assertTrue(testPaths[0].isDescendantOf(testPaths[0]));
|
||||
assertTrue(testPaths[0].isDescendantOf(testPaths[4]));
|
||||
assertFalse(testPaths[4].isDescendantOf(testPaths[0]));
|
||||
assertTrue(testPaths[1].isDescendantOf(testPaths[0]));
|
||||
assertTrue(testPaths[1].isDescendantOf(testPaths[1]));
|
||||
assertTrue(testPaths[3].isDescendantOf(testPaths[0]));
|
||||
assertFalse(testPaths[2].isDescendantOf(testPaths[0]));
|
||||
assertFalse(testPaths[2].isDescendantOf(testPaths[1]));
|
||||
assertFalse(testPaths[3].isDescendantOf(testPaths[1]));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCompareTo() {
|
||||
CategoryPath p = new CategoryPath("a/b/c/d", '/');
|
||||
CategoryPath pother = new CategoryPath("a/b/c/d", '/');
|
||||
assertTrue(pother.compareTo(p) == 0);
|
||||
assertEquals(0, pother.compareTo(p));
|
||||
pother = new CategoryPath("", '/');
|
||||
assertTrue(pother.compareTo(p) < 0);
|
||||
pother = new CategoryPath("a/b_/c/d", '/');
|
||||
|
@ -880,25 +174,5 @@ public class TestCategoryPath extends LuceneTestCase {
|
|||
pother = new CategoryPath("a/b/c//e", '/');
|
||||
assertTrue(pother.compareTo(p) < 0);
|
||||
}
|
||||
|
||||
private static class CS implements CharSequence {
|
||||
public CS(String s) {
|
||||
this.ca = new char[s.length()];
|
||||
s.getChars(0, s.length(), this.ca, 0);
|
||||
}
|
||||
@Override
|
||||
public char charAt(int index) {
|
||||
return this.ca[index];
|
||||
}
|
||||
@Override
|
||||
public int length() {
|
||||
return this.ca.length;
|
||||
}
|
||||
@Override
|
||||
public CharSequence subSequence(int start, int end) {
|
||||
return null; // not used.
|
||||
}
|
||||
private char[] ca;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -146,7 +146,7 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
if (path==null) {
|
||||
return "<null>";
|
||||
}
|
||||
if (path.length()==0) {
|
||||
if (path.length==0) {
|
||||
return "<empty>";
|
||||
}
|
||||
return "<"+path.toString('/')+">";
|
||||
|
@ -304,9 +304,9 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
tw.close();
|
||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||
assertEquals(1, tr.getSize());
|
||||
assertEquals(0, tr.getPath(0).length());
|
||||
assertEquals(0, tr.getPath(0).length);
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
|
||||
assertEquals(0, tr.getOrdinal(new CategoryPath()));
|
||||
assertEquals(0, tr.getOrdinal(CategoryPath.EMPTY));
|
||||
tr.close();
|
||||
indexDir.close();
|
||||
}
|
||||
|
@ -323,9 +323,9 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
tw.commit();
|
||||
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
|
||||
assertEquals(1, tr.getSize());
|
||||
assertEquals(0, tr.getPath(0).length());
|
||||
assertEquals(0, tr.getPath(0).length);
|
||||
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
|
||||
assertEquals(0, tr.getOrdinal(new CategoryPath()));
|
||||
assertEquals(0, tr.getOrdinal(CategoryPath.EMPTY));
|
||||
tw.close();
|
||||
tr.close();
|
||||
indexDir.close();
|
||||
|
@ -416,7 +416,7 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
", but this is not a valid category.");
|
||||
}
|
||||
// verify that the parent is indeed my parent, according to the strings
|
||||
if (!new CategoryPath(me, me.length()-1).equals(parent)) {
|
||||
if (!me.subpath(me.length-1).equals(parent)) {
|
||||
fail("Got parent "+parentOrdinal+" for ordinal "+ordinal+
|
||||
" but categories are "+showcat(parent)+" and "+showcat(me)+
|
||||
" respectively.");
|
||||
|
@ -506,7 +506,7 @@ public class TestTaxonomyCombined extends LuceneTestCase {
|
|||
}
|
||||
// verify that the parent is indeed my parent, according to the
|
||||
// strings
|
||||
if (!new CategoryPath(me, me.length() - 1).equals(parent)) {
|
||||
if (!me.subpath(me.length - 1).equals(parent)) {
|
||||
fail("Got parent " + parentOrdinal + " for ordinal " + ordinal
|
||||
+ " but categories are " + showcat(parent) + " and "
|
||||
+ showcat(me) + " respectively.");
|
||||
|
|
|
@ -81,7 +81,6 @@ public class TestAddTaxonomy extends LuceneTestCase {
|
|||
}
|
||||
|
||||
private void validate(Directory dest, Directory src, OrdinalMap ordMap) throws Exception {
|
||||
CategoryPath cp = new CategoryPath();
|
||||
DirectoryTaxonomyReader destTR = new DirectoryTaxonomyReader(dest);
|
||||
try {
|
||||
final int destSize = destTR.getSize();
|
||||
|
@ -98,7 +97,7 @@ public class TestAddTaxonomy extends LuceneTestCase {
|
|||
// validate that all source categories exist in destination, and their
|
||||
// ordinals are as expected.
|
||||
for (int j = 1; j < srcSize; j++) {
|
||||
srcTR.getPath(j, cp);
|
||||
CategoryPath cp = srcTR.getPath(j);
|
||||
int destOrdinal = destTR.getOrdinal(cp);
|
||||
assertTrue(cp + " not found in destination", destOrdinal > 0);
|
||||
assertEquals(destOrdinal, map[j]);
|
||||
|
|
|
@ -48,12 +48,8 @@ public class TestConcurrentFacetedIndexing extends LuceneTestCase {
|
|||
@Override
|
||||
public int get(CategoryPath categoryPath) { return -1; }
|
||||
@Override
|
||||
public int get(CategoryPath categoryPath, int length) { return -1; }
|
||||
@Override
|
||||
public boolean put(CategoryPath categoryPath, int ordinal) { return true; }
|
||||
@Override
|
||||
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) { return true; }
|
||||
@Override
|
||||
public boolean isFull() { return true; }
|
||||
@Override
|
||||
public void clear() {}
|
||||
|
@ -108,9 +104,9 @@ public class TestConcurrentFacetedIndexing extends LuceneTestCase {
|
|||
CategoryPath cp = newCategory();
|
||||
cats.add(cp);
|
||||
// add all prefixes to values
|
||||
int level = cp.length();
|
||||
int level = cp.length;
|
||||
while (level > 0) {
|
||||
String s = cp.toString('/', level);
|
||||
String s = cp.subpath(level).toString('/');
|
||||
values.put(s, s);
|
||||
--level;
|
||||
}
|
||||
|
@ -134,11 +130,11 @@ public class TestConcurrentFacetedIndexing extends LuceneTestCase {
|
|||
for (String cat : values.keySet()) {
|
||||
CategoryPath cp = new CategoryPath(cat, '/');
|
||||
assertTrue("category not found " + cp, tr.getOrdinal(cp) > 0);
|
||||
int level = cp.length();
|
||||
int level = cp.length;
|
||||
int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
|
||||
CategoryPath path = new CategoryPath();
|
||||
CategoryPath path = CategoryPath.EMPTY;
|
||||
for (int i = 0; i < level; i++) {
|
||||
path.add(cp.getComponent(i));
|
||||
path = cp.subpath(i + 1);
|
||||
int ord = tr.getOrdinal(path);
|
||||
assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]);
|
||||
parentOrd = ord; // next level should have this parent
|
||||
|
|
|
@ -154,8 +154,8 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
|
|||
for (int i=0; i<n; i++) {
|
||||
int k = random.nextInt(n);
|
||||
tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
|
||||
for (int j=0; j<=k; j++) {
|
||||
tw.addCategory(new CategoryPath(cp[j]));
|
||||
for (int j = 0; j <= k; j++) {
|
||||
tw.addCategory(cp[j]);
|
||||
}
|
||||
tw.close();
|
||||
if (closeReader) {
|
||||
|
|
|
@ -52,12 +52,8 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
|
|||
@Override
|
||||
public int get(CategoryPath categoryPath) { return -1; }
|
||||
@Override
|
||||
public int get(CategoryPath categoryPath, int length) { return -1; }
|
||||
@Override
|
||||
public boolean put(CategoryPath categoryPath, int ordinal) { return true; }
|
||||
@Override
|
||||
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) { return true; }
|
||||
@Override
|
||||
public boolean isFull() { return true; }
|
||||
@Override
|
||||
public void clear() {}
|
||||
|
@ -266,10 +262,10 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
|
|||
Integer.toString(value / 100000), Integer.toString(value));
|
||||
int ord = tw.addCategory(cp);
|
||||
assertTrue("invalid parent for ordinal " + ord + ", category " + cp, tw.getParent(ord) != -1);
|
||||
String l1 = cp.toString('/', 1);
|
||||
String l2 = cp.toString('/', 2);
|
||||
String l3 = cp.toString('/', 3);
|
||||
String l4 = cp.toString('/', 4);
|
||||
String l1 = cp.subpath(1).toString('/');
|
||||
String l2 = cp.subpath(2).toString('/');
|
||||
String l3 = cp.subpath(3).toString('/');
|
||||
String l4 = cp.subpath(4).toString('/');
|
||||
values.put(l1, l1);
|
||||
values.put(l2, l2);
|
||||
values.put(l3, l3);
|
||||
|
@ -292,11 +288,11 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
|
|||
for (String cat : values.keySet()) {
|
||||
CategoryPath cp = new CategoryPath(cat, '/');
|
||||
assertTrue("category not found " + cp, dtr.getOrdinal(cp) > 0);
|
||||
int level = cp.length();
|
||||
int level = cp.length;
|
||||
int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
|
||||
CategoryPath path = new CategoryPath();
|
||||
CategoryPath path = CategoryPath.EMPTY;
|
||||
for (int i = 0; i < level; i++) {
|
||||
path.add(cp.getComponent(i));
|
||||
path = cp.subpath(i + 1);
|
||||
int ord = dtr.getOrdinal(path);
|
||||
assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]);
|
||||
parentOrd = ord; // next level should have this parent
|
||||
|
|
|
@ -6,11 +6,13 @@ import java.nio.charset.CharsetDecoder;
|
|||
import java.nio.charset.CodingErrorAction;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
|
||||
import org.junit.Test;
|
||||
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.cl2o.CompactLabelToOrdinal;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.cl2o.LabelToOrdinal;
|
||||
|
@ -46,9 +48,10 @@ public class TestCompactLabelToOrdinal extends LuceneTestCase {
|
|||
String[] uniqueValues = new String[numUniqueValues];
|
||||
byte[] buffer = new byte[50];
|
||||
|
||||
Random random = random();
|
||||
for (int i = 0; i < numUniqueValues;) {
|
||||
random().nextBytes(buffer);
|
||||
int size = 1 + random().nextInt(50);
|
||||
random.nextBytes(buffer);
|
||||
int size = 1 + random.nextInt(buffer.length);
|
||||
|
||||
// This test is turning random bytes into a string,
|
||||
// this is asking for trouble.
|
||||
|
@ -56,16 +59,16 @@ public class TestCompactLabelToOrdinal extends LuceneTestCase {
|
|||
.onUnmappableCharacter(CodingErrorAction.REPLACE)
|
||||
.onMalformedInput(CodingErrorAction.REPLACE);
|
||||
uniqueValues[i] = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString();
|
||||
if (uniqueValues[i].indexOf(CompactLabelToOrdinal.TerminatorChar) == -1) {
|
||||
if (uniqueValues[i].indexOf(CompactLabelToOrdinal.TERMINATOR_CHAR) == -1) {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
TEMP_DIR.mkdirs();
|
||||
File f = new File(TEMP_DIR, "CompactLabelToOrdinalTest.tmp");
|
||||
File tmpDir = _TestUtil.getTempDir("testLableToOrdinal");
|
||||
File f = new File(tmpDir, "CompactLabelToOrdinalTest.tmp");
|
||||
int flushInterval = 10;
|
||||
|
||||
for (int i = 0; i < n * 10; i++) {
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (i > 0 && i % flushInterval == 0) {
|
||||
compact.flush(f);
|
||||
compact = CompactLabelToOrdinal.open(f, 0.15f, 3);
|
||||
|
@ -75,19 +78,16 @@ public class TestCompactLabelToOrdinal extends LuceneTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
int index = random().nextInt(numUniqueValues);
|
||||
int index = random.nextInt(numUniqueValues);
|
||||
CategoryPath label = new CategoryPath(uniqueValues[index], '/');
|
||||
|
||||
int ord1 = map.getOrdinal(label);
|
||||
int ord2 = compact.getOrdinal(label);
|
||||
|
||||
//System.err.println(ord1+" "+ord2);
|
||||
|
||||
assertEquals(ord1, ord2);
|
||||
|
||||
if (ord1 == LabelToOrdinal.InvalidOrdinal) {
|
||||
if (ord1 == LabelToOrdinal.INVALID_ORDINAL) {
|
||||
ord1 = compact.getNextOrdinal();
|
||||
|
||||
map.addLabel(label, ord1);
|
||||
compact.addLabel(label, ord1);
|
||||
}
|
||||
|
@ -108,25 +108,15 @@ public class TestCompactLabelToOrdinal extends LuceneTestCase {
|
|||
|
||||
@Override
|
||||
public void addLabel(CategoryPath label, int ordinal) {
|
||||
map.put(new CategoryPath(label), ordinal);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addLabel(CategoryPath label, int prefixLen, int ordinal) {
|
||||
map.put(new CategoryPath(label, prefixLen), ordinal);
|
||||
map.put(label, ordinal);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOrdinal(CategoryPath label) {
|
||||
Integer value = map.get(label);
|
||||
return (value != null) ? value.intValue() : LabelToOrdinal.InvalidOrdinal;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getOrdinal(CategoryPath label, int prefixLen) {
|
||||
Integer value = map.get(new CategoryPath(label, prefixLen));
|
||||
return (value != null) ? value.intValue() : LabelToOrdinal.InvalidOrdinal;
|
||||
return (value != null) ? value.intValue() : LabelToOrdinal.INVALID_ORDINAL;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue