LUCENE-4659: Cleanup CategoryPath

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1429570 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2013-01-06 19:07:44 +00:00
parent cf8a0ca50b
commit b9a3d9ca70
39 changed files with 503 additions and 2364 deletions

View File

@ -103,6 +103,9 @@ Changes in backwards compatibility policy
implementations.
NOTE: indexes that contain category enhancements/associations are not supported
by the new code and should be recreated. (Shai Erera)
* LUCENE-4659: Massive cleanup to CategoryPath API. Additionally, CategoryPath is
now immutable, so you don't need to clone() it. (Shai Erera)
New Features

View File

@ -53,13 +53,14 @@ public class RandomFacetSource extends FacetSource {
facets.clear();
}
int numFacets = 1 + random.nextInt(maxDocFacets-1); // at least one facet to each doc
for (int i=0; i<numFacets; i++) {
CategoryPath cp = new CategoryPath();
int depth = 1 + random.nextInt(maxFacetDepth-1); // depth 0 is not useful
for (int k=0; k<depth; k++) {
cp.add(Integer.toString(random.nextInt(maxValue)));
for (int i = 0; i < numFacets; i++) {
int depth = 1 + random.nextInt(maxFacetDepth - 1); // depth 0 is not useful
String[] components = new String[depth];
for (int k = 0; k < depth; k++) {
components[k] = Integer.toString(random.nextInt(maxValue));
addItem();
}
CategoryPath cp = new CategoryPath(components);
facets.setAssociation(cp, null);
addBytes(cp.toString().length()); // very rough approximation
}

View File

@ -89,11 +89,6 @@ public class AssociationsFacetFields extends FacetFields {
clpContainer = new CategoryAssociationsContainer();
categoryLists.put(clp, clpContainer);
}
// DrillDownStream modifies the CategoryPath by calling trim(). That means
// that the source category, as the app ses it, is modified. While for
// most apps this is not a problem, we need to protect against it. If
// CategoryPath will be made immutable, we can stop cloning.
cp = cp.clone();
clpContainer.setAssociation(cp, categoryAssociations.getAssociation(cp));
}
return categoryLists;

View File

@ -55,12 +55,12 @@ public class DrillDownStream extends TokenStream {
@Override
public final boolean incrementToken() throws IOException {
if (current.length() == 0) {
if (current.length == 0) {
if (!categories.hasNext()) {
return false; // no more categories
}
current = categories.next();
termAttribute.resizeBuffer(current.charsNeededForFullPath());
termAttribute.resizeBuffer(current.fullPathLength());
isParent = false;
}
@ -73,8 +73,8 @@ public class DrillDownStream extends TokenStream {
// prepare current for next call by trimming the last component (parents)
do {
// skip all parent categories which are not accepted by PathPolicy
current.trim(1);
} while (!pathPolicy.shouldAdd(current) && current.length() > 0);
current = current.subpath(current.length - 1);
} while (!pathPolicy.shouldAdd(current) && current.length > 0);
isParent = true;
return true;
}
@ -82,7 +82,7 @@ public class DrillDownStream extends TokenStream {
@Override
public void reset() throws IOException {
current = categories.next();
termAttribute.resizeBuffer(current.charsNeededForFullPath());
termAttribute.resizeBuffer(current.fullPathLength());
isParent = false;
}

View File

@ -142,11 +142,7 @@ public class FacetFields {
list = new ArrayList<CategoryPath>();
categoryLists.put(clp, list);
}
// DrillDownStream modifies the CategoryPath by calling trim(). That means
// that the source category, as the app ses it, is modified. While for
// most apps this is not a problem, we need to protect against it. If
// CategoryPath will be made immutable, we can stop cloning.
list.add(cp.clone());
list.add(cp);
}
return categoryLists;
}

View File

@ -39,6 +39,6 @@ public class NonTopLevelPathPolicy implements PathPolicy {
*/
@Override
public boolean shouldAdd(CategoryPath categoryPath) {
return categoryPath.length() >= DEFAULT_MINIMAL_SUBPATH_LENGTH;
return categoryPath.length >= DEFAULT_MINIMAL_SUBPATH_LENGTH;
}
}

View File

@ -33,12 +33,12 @@ public interface PathPolicy extends Serializable {
/**
* A {@link PathPolicy} which adds all {@link CategoryPath} that have at least
* one component (i.e. {@link CategoryPath#length()} &gt; 0) to the categories
* one component (i.e. {@link CategoryPath#length} &gt; 0) to the categories
* stream.
*/
public static final PathPolicy ALL_CATEGORIES = new PathPolicy() {
@Override
public boolean shouldAdd(CategoryPath categoryPath) { return categoryPath.length() > 0; }
public boolean shouldAdd(CategoryPath categoryPath) { return categoryPath.length > 0; }
};
/**

View File

@ -54,11 +54,11 @@ public class FacetIndexingParams {
public static final FacetIndexingParams ALL_PARENTS = new FacetIndexingParams();
/**
* The default delimiter with which {@link CategoryPath#getComponent(int)
* components} are concatenated when written to the index, e.g. as drill-down
* terms. If you choose to override it by overiding
* {@link #getFacetDelimChar()}, you should make sure that you return a
* character that's not found in any path component.
* The default delimiter with which {@link CategoryPath#components} are
* concatenated when written to the index, e.g. as drill-down terms. If you
* choose to override it by overiding {@link #getFacetDelimChar()}, you should
* make sure that you return a character that's not found in any path
* component.
*/
public static final char DEFAULT_FACET_DELIM_CHAR = '\uF749';
@ -108,10 +108,10 @@ public class FacetIndexingParams {
* that were written.
* <p>
* <b>NOTE:</b> You should make sure that the {@code char[]} is large enough,
* by e.g. calling {@link CategoryPath#charsNeededForFullPath()}.
* by e.g. calling {@link CategoryPath#fullPathLength()}.
*/
public int drillDownTermText(CategoryPath path, char[] buffer) {
return path.copyToCharArray(buffer, 0, -1, getFacetDelimChar());
return path.copyFullPath(buffer, 0, getFacetDelimChar());
}
/**

View File

@ -43,7 +43,7 @@ public class PerDimensionIndexingParams extends FacetIndexingParams {
/**
* Initializes a new instance with the given dimension-to-params mapping. The
* dimension is considered as what's returned by
* {@link CategoryPath#getComponent(int) cp.getComponent(0)}.
* {@link CategoryPath#components cp.components[0]}.
*
* <p>
* <b>NOTE:</b> for any dimension whose {@link CategoryListParams} is not
@ -65,7 +65,7 @@ public class PerDimensionIndexingParams extends FacetIndexingParams {
super(categoryListParams);
clParamsMap = new HashMap<String,CategoryListParams>();
for (Entry<CategoryPath, CategoryListParams> e : paramsMap.entrySet()) {
clParamsMap.put(e.getKey().getComponent(0), e.getValue());
clParamsMap.put(e.getKey().components[0], e.getValue());
}
}
@ -83,7 +83,7 @@ public class PerDimensionIndexingParams extends FacetIndexingParams {
@Override
public CategoryListParams getCategoryListParams(CategoryPath category) {
if (category != null) {
CategoryListParams clParams = clParamsMap.get(category.getComponent(0));
CategoryListParams clParams = clParamsMap.get(category.components[0]);
if (clParams != null) {
return clParams;
}

View File

@ -53,7 +53,7 @@ public final class DrillDown {
/** Return a drill-down {@link Term} for a category. */
public static final Term term(FacetIndexingParams iParams, CategoryPath path) {
CategoryListParams clp = iParams.getCategoryListParams(path);
char[] buffer = new char[path.charsNeededForFullPath()];
char[] buffer = new char[path.fullPathLength()];
iParams.drillDownTermText(path, buffer);
return new Term(clp.getTerm().field(), String.valueOf(buffer));
}

View File

@ -153,7 +153,7 @@ public class TotalFacetCounts {
// needed because FacetSearchParams do not allow empty FacetRequests
private static final List<FacetRequest> DUMMY_REQ = Arrays.asList(
new FacetRequest[] { new CountFacetRequest(new CategoryPath(), 1) });
new FacetRequest[] { new CountFacetRequest(CategoryPath.EMPTY, 1) });
static TotalFacetCounts compute(final IndexReader indexReader,
final TaxonomyReader taxonomy, final FacetIndexingParams facetIndexingParams,

View File

@ -205,22 +205,9 @@ public abstract class TaxonomyReader implements Closeable {
*/
public abstract int getParent(int ordinal) throws IOException;
/**
* Returns the path name of the category with the given ordinal. The path is
* returned as a new CategoryPath object - to reuse an existing object, use
* {@link #getPath(int, CategoryPath)}.
*
* @return a {@link CategoryPath} with the required path, or {@code null} if
* the given ordinal is unknown to the taxonomy.
*/
/** Returns the path name of the category with the given ordinal. */
public abstract CategoryPath getPath(int ordinal) throws IOException;
/**
* Same as {@link #getPath(int)}, only reuses the given {@link CategoryPath}
* instances.
*/
public abstract boolean getPath(int ordinal, CategoryPath result) throws IOException;
/** Returns the current refCount for this taxonomy reader. */
public final int getRefCount() {
return refCount.get();

View File

@ -59,8 +59,8 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
private final DirectoryReader indexReader;
// TODO: test DoubleBarrelLRUCache and consider using it instead
private LRUHashMap<String, Integer> ordinalCache;
private LRUHashMap<Integer, String> categoryCache;
private LRUHashMap<CategoryPath, Integer> ordinalCache;
private LRUHashMap<Integer, CategoryPath> categoryCache;
private volatile ParallelTaxonomyArrays taxoArrays;
@ -72,15 +72,15 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
* arrays.
*/
DirectoryTaxonomyReader(DirectoryReader indexReader, DirectoryTaxonomyWriter taxoWriter,
LRUHashMap<String,Integer> ordinalCache, LRUHashMap<Integer,String> categoryCache,
LRUHashMap<CategoryPath,Integer> ordinalCache, LRUHashMap<Integer,CategoryPath> categoryCache,
ParallelTaxonomyArrays taxoArrays) throws IOException {
this.indexReader = indexReader;
this.taxoWriter = taxoWriter;
this.taxoEpoch = taxoWriter == null ? -1 : taxoWriter.getTaxonomyEpoch();
// use the same instance of the cache, note the protective code in getOrdinal and getPath
this.ordinalCache = ordinalCache == null ? new LRUHashMap<String,Integer>(DEFAULT_CACHE_VALUE) : ordinalCache;
this.categoryCache = categoryCache == null ? new LRUHashMap<Integer,String>(DEFAULT_CACHE_VALUE) : categoryCache;
this.ordinalCache = ordinalCache == null ? new LRUHashMap<CategoryPath,Integer>(DEFAULT_CACHE_VALUE) : ordinalCache;
this.categoryCache = categoryCache == null ? new LRUHashMap<Integer,CategoryPath>(DEFAULT_CACHE_VALUE) : categoryCache;
this.taxoArrays = taxoArrays != null ? new ParallelTaxonomyArrays(indexReader, taxoArrays) : null;
}
@ -102,8 +102,8 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
// These are the default cache sizes; they can be configured after
// construction with the cache's setMaxSize() method
ordinalCache = new LRUHashMap<String, Integer>(DEFAULT_CACHE_VALUE);
categoryCache = new LRUHashMap<Integer, String>(DEFAULT_CACHE_VALUE);
ordinalCache = new LRUHashMap<CategoryPath, Integer>(DEFAULT_CACHE_VALUE);
categoryCache = new LRUHashMap<Integer, CategoryPath>(DEFAULT_CACHE_VALUE);
}
/**
@ -121,39 +121,8 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
// These are the default cache sizes; they can be configured after
// construction with the cache's setMaxSize() method
ordinalCache = new LRUHashMap<String, Integer>(DEFAULT_CACHE_VALUE);
categoryCache = new LRUHashMap<Integer, String>(DEFAULT_CACHE_VALUE);
}
private String getLabel(int catID) throws IOException {
ensureOpen();
// Since the cache is shared with DTR instances allocated from
// doOpenIfChanged, we need to ensure that the ordinal is one that this DTR
// instance recognizes. Therefore we do this check up front, before we hit
// the cache.
if (catID < 0 || catID >= indexReader.maxDoc()) {
return null;
}
// TODO: can we use an int-based hash impl, such as IntToObjectMap,
// wrapped as LRU?
Integer catIDInteger = Integer.valueOf(catID);
synchronized (categoryCache) {
String res = categoryCache.get(catIDInteger);
if (res != null) {
return res;
}
}
final LoadFullPathOnly loader = new LoadFullPathOnly();
indexReader.document(catID, loader);
String ret = loader.getFullPath();
synchronized (categoryCache) {
categoryCache.put(catIDInteger, ret);
}
return ret;
ordinalCache = new LRUHashMap<CategoryPath, Integer>(DEFAULT_CACHE_VALUE);
categoryCache = new LRUHashMap<Integer, CategoryPath>(DEFAULT_CACHE_VALUE);
}
private synchronized void initTaxoArrays() throws IOException {
@ -278,16 +247,15 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
}
@Override
public int getOrdinal(CategoryPath categoryPath) throws IOException {
public int getOrdinal(CategoryPath cp) throws IOException {
ensureOpen();
if (categoryPath.length() == 0) {
if (cp.length == 0) {
return ROOT_ORDINAL;
}
String path = categoryPath.toString(delimiter);
// First try to find the answer in the LRU cache:
synchronized (ordinalCache) {
Integer res = ordinalCache.get(path);
Integer res = ordinalCache.get(cp);
if (res != null) {
if (res.intValue() < indexReader.maxDoc()) {
// Since the cache is shared with DTR instances allocated from
@ -307,7 +275,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
// If we're still here, we have a cache miss. We need to fetch the
// value from disk, and then also put it in the cache:
int ret = TaxonomyReader.INVALID_ORDINAL;
DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(path), 0);
DocsEnum docs = MultiFields.getTermDocsEnum(indexReader, null, Consts.FULL, new BytesRef(cp.toString(delimiter)), 0);
if (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
ret = docs.docID();
@ -317,7 +285,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
// information about found categories, we cannot accidently tell a new
// generation of DTR that a category does not exist.
synchronized (ordinalCache) {
ordinalCache.put(path, Integer.valueOf(ret));
ordinalCache.put(cp, Integer.valueOf(ret));
}
}
@ -333,31 +301,33 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
@Override
public CategoryPath getPath(int ordinal) throws IOException {
ensureOpen();
// TODO (Facet): Currently, the LRU cache we use (getCategoryCache) holds
// strings with delimiters, not CategoryPath objects, so even if
// we have a cache hit, we need to process the string and build a new
// CategoryPath object every time. What is preventing us from putting
// the actual CategoryPath object in the cache is the fact that these
// objects are mutable. So we should create an immutable (read-only)
// interface that CategoryPath implements, and this method should
// return this interface, not the writable CategoryPath.
String label = getLabel(ordinal);
if (label == null) {
// Since the cache is shared with DTR instances allocated from
// doOpenIfChanged, we need to ensure that the ordinal is one that this DTR
// instance recognizes. Therefore we do this check up front, before we hit
// the cache.
if (ordinal < 0 || ordinal >= indexReader.maxDoc()) {
return null;
}
return new CategoryPath(label, delimiter);
}
@Override
public boolean getPath(int ordinal, CategoryPath result) throws IOException {
ensureOpen();
String label = getLabel(ordinal);
if (label == null) {
return false;
// TODO: can we use an int-based hash impl, such as IntToObjectMap,
// wrapped as LRU?
Integer catIDInteger = Integer.valueOf(ordinal);
synchronized (categoryCache) {
CategoryPath res = categoryCache.get(catIDInteger);
if (res != null) {
return res;
}
}
result.clear();
result.add(label, delimiter);
return true;
final LoadFullPathOnly loader = new LoadFullPathOnly();
indexReader.document(ordinal, loader);
CategoryPath ret = new CategoryPath(loader.getFullPath(), delimiter);
synchronized (categoryCache) {
categoryCache.put(catIDInteger, ret);
}
return ret;
}
@Override
@ -411,7 +381,7 @@ public class DirectoryTaxonomyReader extends TaxonomyReader {
sb.append(i + ": NULL!! \n");
continue;
}
if (category.length() == 0) {
if (category.length == 0) {
sb.append(i + ": EMPTY STRING!! \n");
continue;
}

View File

@ -249,7 +249,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
cacheIsComplete = true;
// Make sure that the taxonomy always contain the root category
// with category id 0.
addCategory(new CategoryPath());
addCategory(CategoryPath.EMPTY);
} else {
// There are some categories on the disk, which we have not yet
// read into the cache, and therefore the cache is incomplete.
@ -449,56 +449,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
return doc;
}
/**
* Look up the given prefix of the given category in the cache and/or the
* on-disk storage, returning that prefix's ordinal, or a negative number in
* case the category does not yet exist in the taxonomy.
*/
private int findCategory(CategoryPath categoryPath, int prefixLen)
throws IOException {
int res = cache.get(categoryPath, prefixLen);
if (res >= 0 || cacheIsComplete) {
return res;
}
cacheMisses.incrementAndGet();
perhapsFillCache();
res = cache.get(categoryPath, prefixLen);
if (res >= 0 || cacheIsComplete) {
return res;
}
initReaderManager();
int doc = -1;
DirectoryReader reader = readerManager.acquire();
try {
TermsEnum termsEnum = null; // reuse
DocsEnum docs = null; // reuse
final BytesRef catTerm = new BytesRef(categoryPath.toString(delimiter, prefixLen));
for (AtomicReaderContext ctx : reader.leaves()) {
Terms terms = ctx.reader().terms(Consts.FULL);
if (terms != null) {
termsEnum = terms.iterator(termsEnum);
if (termsEnum.seekExact(catTerm, true)) {
// liveDocs=null because the taxonomy has no deletes
docs = termsEnum.docs(null, docs, 0 /* freqs not required */);
// if the term was found, we know it has exactly one document.
doc = docs.nextDoc() + ctx.docBase;
break;
}
}
}
} finally {
readerManager.release(reader);
}
if (doc > 0) {
addToCache(categoryPath, prefixLen, doc);
}
return doc;
}
@Override
public int addCategory(CategoryPath categoryPath) throws IOException {
ensureOpen();
@ -516,7 +466,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// (while keeping the invariant that a parent is always added to
// the taxonomy before its child). internalAddCategory() does all
// this recursively
res = internalAddCategory(categoryPath, categoryPath.length());
res = internalAddCategory(categoryPath);
}
}
}
@ -532,25 +482,24 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
* parent is always added to the taxonomy before its child). We do this by
* recursion.
*/
private int internalAddCategory(CategoryPath categoryPath, int length)
throws IOException {
private int internalAddCategory(CategoryPath cp) throws IOException {
// Find our parent's ordinal (recursively adding the parent category
// to the taxonomy if it's not already there). Then add the parent
// ordinal as payloads (rather than a stored field; payloads can be
// more efficiently read into memory in bulk by LuceneTaxonomyReader)
int parent;
if (length > 1) {
parent = findCategory(categoryPath, length - 1);
if (cp.length > 1) {
CategoryPath parentPath = cp.subpath(cp.length - 1);
parent = findCategory(parentPath);
if (parent < 0) {
parent = internalAddCategory(categoryPath, length - 1);
parent = internalAddCategory(parentPath);
}
} else if (length == 1) {
} else if (cp.length == 1) {
parent = TaxonomyReader.ROOT_ORDINAL;
} else {
parent = TaxonomyReader.INVALID_ORDINAL;
}
int id = addCategoryDocument(categoryPath, length, parent);
int id = addCategoryDocument(cp, parent);
return id;
}
@ -569,8 +518,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
* Note that the methods calling addCategoryDocument() are synchornized, so
* this method is effectively synchronized as well.
*/
private int addCategoryDocument(CategoryPath categoryPath, int length,
int parent) throws IOException {
private int addCategoryDocument(CategoryPath categoryPath, int parent) throws IOException {
// Before Lucene 2.9, position increments >=0 were supported, so we
// added 1 to parent to allow the parent -1 (the parent of the root).
// Unfortunately, starting with Lucene 2.9, after LUCENE-1542, this is
@ -580,11 +528,11 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// we write here (e.g., to write parent+2), and need to do a workaround
// in the reader (which knows that anyway only category 0 has a parent
// -1).
parentStream.set(Math.max(parent+1, 1));
parentStream.set(Math.max(parent + 1, 1));
Document d = new Document();
d.add(parentStreamField);
fullPathField.setStringValue(categoryPath.toString(delimiter, length));
fullPathField.setStringValue(categoryPath.toString(delimiter));
d.add(fullPathField);
// Note that we do no pass an Analyzer here because the fields that are
@ -601,7 +549,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// NOTE: this line must be executed last, or else the cache gets updated
// before the parents array (LUCENE-4596)
addToCache(categoryPath, length, id);
addToCache(categoryPath, id);
return id;
}
@ -653,14 +601,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
}
}
private void addToCache(CategoryPath categoryPath, int prefixLen, int id)
throws IOException {
if (cache.put(categoryPath, prefixLen, id)) {
refreshReaderManager();
cacheIsComplete = false;
}
}
private synchronized void refreshReaderManager() throws IOException {
// this method is synchronized since it cannot happen concurrently with
// addCategoryDocument -- when this method returns, we must know that the
@ -760,7 +700,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
boolean aborted = false;
DirectoryReader reader = readerManager.acquire();
try {
CategoryPath cp = new CategoryPath();
TermsEnum termsEnum = null;
DocsEnum docsEnum = null;
for (AtomicReaderContext ctx : reader.leaves()) {
@ -775,8 +714,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// hence documents), there are no deletions in the index. Therefore, it
// is sufficient to call next(), and then doc(), exactly once with no
// 'validation' checks.
cp.clear();
cp.add(t.utf8ToString(), delimiter);
CategoryPath cp = new CategoryPath(t.utf8ToString(), delimiter);
docsEnum = termsEnum.docs(null, docsEnum, DocsEnum.FLAG_NONE);
boolean res = cache.put(cp, docsEnum.nextDoc() + ctx.docBase);
assert !res : "entries should not have been evicted from the cache";
@ -857,7 +795,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
final int size = r.numDocs();
final OrdinalMap ordinalMap = map;
ordinalMap.setSize(size);
CategoryPath cp = new CategoryPath();
int base = 0;
TermsEnum te = null;
DocsEnum docs = null;
@ -867,8 +804,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
te = terms.iterator(te);
while (te.next() != null) {
String value = te.term().utf8ToString();
cp.clear();
cp.add(value, Consts.DEFAULT_DELIMITER);
CategoryPath cp = new CategoryPath(value, Consts.DEFAULT_DELIMITER);
final int ordinal = addCategory(cp);
docs = te.docs(null, docs, DocsEnum.FLAG_NONE);
ordinalMap.addMapping(docs.nextDoc() + base, ordinal);

View File

@ -64,15 +64,6 @@ public interface TaxonomyWriterCache {
*/
public int get(CategoryPath categoryPath);
/**
* Like {@link #get(CategoryPath)}, but for a given prefix of the
* category path.
* <P>
* If the given length is negative or bigger than the path's actual
* length, the full path is taken.
*/
public int get(CategoryPath categoryPath, int length);
/**
* Add a category to the cache, with the given ordinal as the value.
* <P>
@ -93,15 +84,6 @@ public interface TaxonomyWriterCache {
*/
public boolean put(CategoryPath categoryPath, int ordinal);
/**
* Like {@link #put(CategoryPath, int)}, but for a given prefix of the
* category path.
* <P>
* If the given length is negative or bigger than the path's actual
* length, the full path is taken.
*/
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal);
/**
* Returns true if the cache is full, such that the next {@link #put} will
* evict entries from it, false otherwise.

View File

@ -0,0 +1,82 @@
package org.apache.lucene.facet.taxonomy.writercache.cl2o;
import org.apache.lucene.facet.taxonomy.CategoryPath;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/** Utilities for use of {@link CategoryPath} by {@link CompactLabelToOrdinal}. */
public class CategoryPathUtils {
/** Serializes the given {@link CategoryPath} to the {@link CharBlockArray}. */
public static void serialize(CategoryPath cp, CharBlockArray charBlockArray) {
charBlockArray.append((char) cp.length);
if (cp.length == 0) {
return;
}
for (int i = 0; i < cp.length; i++) {
charBlockArray.append((char) cp.components[i].length());
charBlockArray.append(cp.components[i]);
}
}
/**
* Calculates a hash function of a path that serialized with
* {@link #serialize(CategoryPath, CharBlockArray)}.
*/
public static int hashCodeOfSerialized(CharBlockArray charBlockArray, int offset) {
int length = (short) charBlockArray.charAt(offset++);
if (length == 0) {
return 0;
}
int hash = length;
for (int i = 0; i < length; i++) {
int len = (short) charBlockArray.charAt(offset++);
hash = hash * 31 + charBlockArray.subSequence(offset, offset + len).hashCode();
offset += len;
}
return hash;
}
/**
* Check whether the {@link CategoryPath} is equal to the one serialized in
* {@link CharBlockArray}.
*/
public static boolean equalsToSerialized(CategoryPath cp, CharBlockArray charBlockArray, int offset) {
int n = charBlockArray.charAt(offset++);
if (cp.length != n) {
return false;
}
if (cp.length == 0) {
return true;
}
for (int i = 0; i < cp.length; i++) {
int len = (short) charBlockArray.charAt(offset++);
if (len != cp.components[i].length()) {
return false;
}
if (!cp.components[i].equals(charBlockArray.subSequence(offset, offset + len))) {
return false;
}
offset += len;
}
return true;
}
}

View File

@ -41,7 +41,7 @@ class CharBlockArray implements Appendable, Serializable, CharSequence {
final static class Block implements Serializable, Cloneable {
private static final long serialVersionUID = 1L;
char[] chars;
final char[] chars;
int length;
Block(int size) {
@ -149,7 +149,7 @@ class CharBlockArray implements Appendable, Serializable, CharSequence {
@Override
public char charAt(int index) {
Block b = this.blocks.get(blockIndex(index));
Block b = blocks.get(blockIndex(index));
return b.chars[indexInBlock(index)];
}
@ -160,16 +160,27 @@ class CharBlockArray implements Appendable, Serializable, CharSequence {
@Override
public CharSequence subSequence(int start, int end) {
throw new UnsupportedOperationException("subsequence not implemented yet");
int remaining = end - start;
StringBuilder sb = new StringBuilder(remaining);
int blockIdx = blockIndex(start);
int indexInBlock = indexInBlock(start);
while (remaining > 0) {
Block b = blocks.get(blockIdx++);
int numToAppend = Math.min(remaining, b.length - indexInBlock);
sb.append(b.chars, indexInBlock, numToAppend);
remaining -= numToAppend;
indexInBlock = 0; // 2nd+ iterations read from start of the block
}
return sb.toString();
}
@Override
public String toString() {
StringBuilder b = new StringBuilder(blockSize * this.blocks.size());
for (int i = 0; i < this.blocks.size(); i++) {
b.append(this.blocks.get(i).chars);
StringBuilder sb = new StringBuilder();
for (Block b : blocks) {
sb.append(b.chars, 0, b.length);
}
return b.toString();
return sb.toString();
}
void flush(OutputStream out) throws IOException {

View File

@ -77,19 +77,6 @@ public class Cl2oTaxonomyWriterCache implements TaxonomyWriterCache {
}
}
@Override
public int get(CategoryPath categoryPath, int length) {
if (length < 0 || length > categoryPath.length()) {
length = categoryPath.length();
}
lock.readLock().lock();
try {
return cache.getOrdinal(categoryPath, length);
} finally {
lock.readLock().unlock();
}
}
@Override
public boolean put(CategoryPath categoryPath, int ordinal) {
lock.writeLock().lock();
@ -103,23 +90,7 @@ public class Cl2oTaxonomyWriterCache implements TaxonomyWriterCache {
}
}
@Override
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) {
lock.writeLock().lock();
try {
cache.addLabel(categoryPath, prefixLen, ordinal);
// Tell the caller we didn't clear part of the cache, so it doesn't
// have to flush its on-disk index now
return false;
} finally {
lock.writeLock().unlock();
}
}
/**
* Returns the number of bytes in memory used by this object.
* @return Number of bytes in memory used by this object.
*/
/** Returns the number of bytes in memory used by this object. */
public int getMemoryUsage() {
return cache == null ? 0 : cache.getMemoryUsage();
}

View File

@ -1,6 +1,5 @@
package org.apache.lucene.facet.taxonomy.writercache.cl2o;
import java.io.IOException;
import java.util.Iterator;
import java.util.NoSuchElementException;
@ -108,25 +107,11 @@ public class CollisionMap {
int bucketIndex = indexFor(hash, this.capacity);
Entry e = this.entries[bucketIndex];
while (e != null && !(hash == e.hash && label.equalsToSerialized(this.labelRepository, e.offset))) {
while (e != null && !(hash == e.hash && CategoryPathUtils.equalsToSerialized(label, labelRepository, e.offset))) {
e = e.next;
}
if (e == null) {
return LabelToOrdinal.InvalidOrdinal;
}
return e.cid;
}
public int get(CategoryPath label, int prefixLen, int hash) {
int bucketIndex = indexFor(hash, this.capacity);
Entry e = this.entries[bucketIndex];
while (e != null && !(hash == e.hash && label.equalsToSerialized(prefixLen, this.labelRepository, e.offset))) {
e = e.next;
}
if (e == null) {
return LabelToOrdinal.InvalidOrdinal;
return LabelToOrdinal.INVALID_ORDINAL;
}
return e.cid;
@ -135,47 +120,22 @@ public class CollisionMap {
public int addLabel(CategoryPath label, int hash, int cid) {
int bucketIndex = indexFor(hash, this.capacity);
for (Entry e = this.entries[bucketIndex]; e != null; e = e.next) {
if (e.hash == hash && label.equalsToSerialized(this.labelRepository, e.offset)) {
if (e.hash == hash && CategoryPathUtils.equalsToSerialized(label, labelRepository, e.offset)) {
return e.cid;
}
}
// new string; add to label repository
int offset = this.labelRepository.length();
try {
label.serializeAppendTo(labelRepository);
} catch (IOException e) {
// can't happen, because labelRepository.append() doesn't throw an exception
}
addEntry(offset, cid, hash, bucketIndex);
return cid;
}
public int addLabel(CategoryPath label, int prefixLen, int hash, int cid) {
int bucketIndex = indexFor(hash, this.capacity);
for (Entry e = this.entries[bucketIndex]; e != null; e = e.next) {
if (e.hash == hash && label.equalsToSerialized(prefixLen, this.labelRepository, e.offset)) {
return e.cid;
}
}
// new string; add to label repository
int offset = this.labelRepository.length();
try {
label.serializeAppendTo(prefixLen, labelRepository);
} catch (IOException e) {
// can't happen, because labelRepository.append() doesn't throw an exception
}
int offset = labelRepository.length();
CategoryPathUtils.serialize(label, labelRepository);
addEntry(offset, cid, hash, bucketIndex);
return cid;
}
/**
* This method does not check if the same value is already
* in the map because we pass in an char-array offset, so
* so we now that we're in resize-mode here.
* This method does not check if the same value is already in the map because
* we pass in an char-array offset, so so we now that we're in resize-mode
* here.
*/
public void addLabelOffset(int hash, int offset, int cid) {
int bucketIndex = indexFor(hash, this.capacity);

View File

@ -29,8 +29,6 @@ import java.util.Iterator;
import org.apache.lucene.facet.taxonomy.CategoryPath;
// TODO: maybe this could use an FST instead...
/**
* This is a very efficient LabelToOrdinal implementation that uses a
* CharBlockArray to store all labels and a configurable number of HashArrays to
@ -59,8 +57,8 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
public static final float DefaultLoadFactor = 0.15f;
static final char TerminatorChar = 0xffff;
private static final int Collision = -5;
static final char TERMINATOR_CHAR = 0xffff;
private static final int COLLISION = -5;
private HashArray[] hashArrays;
private CollisionMap collisionMap;
@ -103,9 +101,7 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
private void init() {
labelRepository = new CharBlockArray();
try {
new CategoryPath().serializeAppendTo(labelRepository);
} catch (IOException e) { } //can't happen
CategoryPathUtils.serialize(CategoryPath.EMPTY, labelRepository);
int c = this.capacity;
for (int i = 0; i < this.hashArrays.length; i++) {
@ -116,7 +112,7 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
@Override
public void addLabel(CategoryPath label, int ordinal) {
if (this.collisionMap.size() > this.threshold) {
if (collisionMap.size() > threshold) {
grow();
}
@ -127,43 +123,22 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
}
}
int prevVal = this.collisionMap.addLabel(label, hash, ordinal);
int prevVal = collisionMap.addLabel(label, hash, ordinal);
if (prevVal != ordinal) {
throw new IllegalArgumentException("Label already exists: " +
label.toString('/') + " prev ordinal " + prevVal);
}
}
@Override
public void addLabel(CategoryPath label, int prefixLen, int ordinal) {
if (this.collisionMap.size() > this.threshold) {
grow();
}
int hash = CompactLabelToOrdinal.stringHashCode(label, prefixLen);
for (int i = 0; i < this.hashArrays.length; i++) {
if (addLabel(this.hashArrays[i], label, prefixLen, hash, ordinal)) {
return;
}
}
int prevVal = this.collisionMap.addLabel(label, prefixLen, hash, ordinal);
if (prevVal != ordinal) {
throw new IllegalArgumentException("Label already exists: " +
label.toString('/', prefixLen) + " prev ordinal " + prevVal);
throw new IllegalArgumentException("Label already exists: " + label.toString('/') + " prev ordinal " + prevVal);
}
}
@Override
public int getOrdinal(CategoryPath label) {
if (label == null) {
return LabelToOrdinal.InvalidOrdinal;
return LabelToOrdinal.INVALID_ORDINAL;
}
int hash = CompactLabelToOrdinal.stringHashCode(label);
for (int i = 0; i < this.hashArrays.length; i++) {
int ord = getOrdinal(this.hashArrays[i], label, hash);
if (ord != Collision) {
if (ord != COLLISION) {
return ord;
}
}
@ -171,23 +146,6 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
return this.collisionMap.get(label, hash);
}
@Override
public int getOrdinal(CategoryPath label, int prefixLen) {
if (label == null) {
return LabelToOrdinal.InvalidOrdinal;
}
int hash = CompactLabelToOrdinal.stringHashCode(label, prefixLen);
for (int i = 0; i < this.hashArrays.length; i++) {
int ord = getOrdinal(this.hashArrays[i], label, prefixLen, hash);
if (ord != Collision) {
return ord;
}
}
return this.collisionMap.get(label, prefixLen, hash);
}
private void grow() {
HashArray temp = this.hashArrays[this.hashArrays.length - 1];
@ -241,39 +199,13 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
}
}
private boolean addLabel(HashArray a, CategoryPath label, int hash,
int ordinal) {
private boolean addLabel(HashArray a, CategoryPath label, int hash, int ordinal) {
int index = CompactLabelToOrdinal.indexFor(hash, a.offsets.length);
int offset = a.offsets[index];
if (offset == 0) {
a.offsets[index] = this.labelRepository.length();
try {
label.serializeAppendTo(this.labelRepository);
} catch (IOException e) {
// can't happen - LabelRepository.append() never throws an
// exception
}
a.cids[index] = ordinal;
return true;
}
return false;
}
private boolean addLabel(HashArray a, CategoryPath label, int prefixLen,
int hash, int ordinal) {
int index = CompactLabelToOrdinal.indexFor(hash, a.offsets.length);
int offset = a.offsets[index];
if (offset == 0) {
a.offsets[index] = this.labelRepository.length();
try {
label.serializeAppendTo(prefixLen, this.labelRepository);
} catch (IOException e) {
// can't happen - LabelRepository.append() never throws an
// exception
}
CategoryPathUtils.serialize(label, labelRepository);
a.cids[index] = ordinal;
return true;
}
@ -313,43 +245,23 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
private int getOrdinal(HashArray a, CategoryPath label, int hash) {
if (label == null) {
return LabelToOrdinal.InvalidOrdinal;
return LabelToOrdinal.INVALID_ORDINAL;
}
int index = CompactLabelToOrdinal.indexFor(hash, a.offsets.length);
int index = indexFor(hash, a.offsets.length);
int offset = a.offsets[index];
if (offset == 0) {
return LabelToOrdinal.InvalidOrdinal;
return LabelToOrdinal.INVALID_ORDINAL;
}
if (label.equalsToSerialized(labelRepository, offset)) {
if (CategoryPathUtils.equalsToSerialized(label, labelRepository, offset)) {
return a.cids[index];
}
return Collision;
return COLLISION;
}
private int getOrdinal(HashArray a, CategoryPath label, int prefixLen, int hash) {
if (label == null) {
return LabelToOrdinal.InvalidOrdinal;
}
int index = CompactLabelToOrdinal.indexFor(hash, a.offsets.length);
int offset = a.offsets[index];
if (offset == 0) {
return LabelToOrdinal.InvalidOrdinal;
}
if (label.equalsToSerialized(prefixLen, labelRepository, offset)) {
return a.cids[index];
}
return Collision;
}
/**
* Returns index for hash code h.
*/
/** Returns index for hash code h. */
static int indexFor(int h, int length) {
return h & (length - 1);
}
@ -378,22 +290,10 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
}
static int stringHashCode(CategoryPath label, int prefixLen) {
int hash = label.hashCode(prefixLen);
hash = hash ^ ((hash >>> 20) ^ (hash >>> 12));
hash = hash ^ (hash >>> 7) ^ (hash >>> 4);
return hash;
}
static int stringHashCode(CharBlockArray labelRepository, int offset) {
int hash = CategoryPath.hashCodeOfSerialized(labelRepository, offset);
int hash = CategoryPathUtils.hashCodeOfSerialized(labelRepository, offset);
hash = hash ^ ((hash >>> 20) ^ (hash >>> 12));
hash = hash ^ (hash >>> 7) ^ (hash >>> 4);
return hash;
}
@ -495,25 +395,16 @@ public class CompactLabelToOrdinal extends LabelToOrdinal {
// that array offsets will work). Since the initial file is machine
// generated, I think this should be OK.
while (offset < l2o.labelRepository.length()) {
// First component is numcomponents, so we initialize the hash
// to this
int ncomponents = l2o.labelRepository.charAt(offset++);
int hash = ncomponents;
// If ncomponents is 0, then we are done?
if (ncomponents != 0) {
// usedchars is always the last member of the 'ends' array
// in serialization. Rather than rebuild the entire array,
// assign usedchars to the last value we read in. This will
// be slightly more memory efficient.
int usedchars = 0;
for (int i = 0; i < ncomponents; i++) {
usedchars = l2o.labelRepository.charAt(offset++);
hash = hash * 31 + usedchars;
}
// Hash the usedchars for this label
for (int i = 0; i < usedchars; i++) {
hash = hash * 31 + l2o.labelRepository.charAt(offset++);
// identical code to CategoryPath.hashFromSerialized. since we need to
// advance offset, we cannot call the method directly. perhaps if we
// could pass a mutable Integer or something...
int length = (short) l2o.labelRepository.charAt(offset++);
int hash = length;
if (length != 0) {
for (int i = 0; i < length; i++) {
int len = (short) l2o.labelRepository.charAt(offset++);
hash = hash * 31 + l2o.labelRepository.subSequence(offset, offset + len).hashCode();
offset += len;
}
}
// Now that we've hashed the components of the label, do the

View File

@ -27,7 +27,7 @@ import org.apache.lucene.facet.taxonomy.CategoryPath;
public abstract class LabelToOrdinal {
protected int counter;
public static final int InvalidOrdinal = -2;
public static final int INVALID_ORDINAL = -2;
/**
* return the maximal Ordinal assigned so far
@ -51,23 +51,10 @@ public abstract class LabelToOrdinal {
*/
public abstract void addLabel(CategoryPath label, int ordinal);
/**
* Adds a new label if its not yet in the table.
* Throws an {@link IllegalArgumentException} if the same label with
* a different ordinal was previoulsy added to this table.
*/
public abstract void addLabel(CategoryPath label, int prefixLen, int ordinal);
/**
* @return the ordinal assigned to the given label,
* or {@link #InvalidOrdinal} if the label cannot be found in this table.
* or {@link #INVALID_ORDINAL} if the label cannot be found in this table.
*/
public abstract int getOrdinal(CategoryPath label);
/**
* @return the ordinal assigned to the given label,
* or {@link #InvalidOrdinal} if the label cannot be found in this table.
*/
public abstract int getOrdinal(CategoryPath label, int prefixLen);
}

View File

@ -86,23 +86,6 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
return res.intValue();
}
@Override
public synchronized int get(CategoryPath categoryPath, int length) {
if (length<0 || length>categoryPath.length()) {
length = categoryPath.length();
}
// TODO (Facet): unfortunately, we make a copy here! we can avoid part of
// the copy by creating a wrapper object (but this still creates a new
// object). A better implementation of the cache would not use Java's
// hash table, but rather some other hash table we can control, and
// pass the length parameter into it...
Integer res = cache.get(new CategoryPath(categoryPath, length));
if (res==null) {
return -1;
}
return res.intValue();
}
@Override
public synchronized boolean put(CategoryPath categoryPath, int ordinal) {
boolean ret = cache.put(categoryPath, new Integer(ordinal));
@ -119,20 +102,4 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
return ret;
}
@Override
public synchronized boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) {
boolean ret = cache.put(categoryPath, prefixLen, new Integer(ordinal));
// If the cache is full, we need to clear one or more old entries
// from the cache. However, if we delete from the cache a recent
// addition that isn't yet in our reader, for this entry to be
// visible to us we need to make sure that the changes have been
// committed and we reopen the reader. Because this is a slow
// operation, we don't delete entries one-by-one but rather in bulk
// (put() removes the 2/3rd oldest entries).
if (ret) {
cache.makeRoomLRU();
}
return ret;
}
}

View File

@ -41,6 +41,7 @@ public class NameHashIntCacheLRU extends NameIntCacheLRU {
@Override
Object key(CategoryPath name, int prefixLen) {
return new Long(name.longHashCode(prefixLen));
return new Long(name.subpath(prefixLen).longHashCode());
}
}

View File

@ -68,23 +68,13 @@ class NameIntCacheLRU {
return res;
}
/**
* Subclasses can override this to provide caching by e.g. hash of the string.
*/
/** Subclasses can override this to provide caching by e.g. hash of the string. */
Object key(CategoryPath name) {
// Note that a copy constructor (cloning) here is necessary, because a
// CategoryPath object is mutable, so we cannot save a reference to an
// existing CategoryPath. Subclasses which override this method can
// avoid this cloning by, e.g., hashing the name.
return new CategoryPath(name);
return name;
}
Object key(CategoryPath name, int prefixLen) {
// Note that a copy constructor (cloning) here is necessary, because a
// CategoryPath object is mutable, so we cannot save a reference to an
// existing CategoryPath. Subclasses which override this method can
// avoid this cloning by, e.g., hashing the name.
return new CategoryPath(name, prefixLen);
return name.subpath(prefixLen);
}
/**

View File

@ -77,7 +77,7 @@ public class OrdinalMappingReaderTest extends LuceneTestCase {
FacetResultNode node = result.getFacetResultNode();
for (FacetResultNode facet: node.getSubResults()) {
int weight = (int)facet.getValue();
int label = Integer.parseInt(facet.getLabel().getComponent(1));
int label = Integer.parseInt(facet.getLabel().components[1]);
//System.out.println(label + ": " + weight);
if (VERBOSE) {
System.out.println(label + ": " + weight);

View File

@ -31,12 +31,10 @@ public class OrdinalPolicyTest extends LuceneTestCase {
public void testDefaultOrdinalPolicy() {
// check ordinal policy
OrdinalPolicy ordinalPolicy = OrdinalPolicy.ALL_PARENTS;
assertFalse("default ordinal policy should not match root", ordinalPolicy
.shouldAdd(TaxonomyReader.ROOT_ORDINAL));
assertFalse("default ordinal policy should not match root", ordinalPolicy.shouldAdd(TaxonomyReader.ROOT_ORDINAL));
for (int i = 0; i < 300; i++) {
int ordinal = 1 + random().nextInt(Integer.MAX_VALUE - 1);
assertTrue("default ordinal policy should match " + ordinal,
ordinalPolicy.shouldAdd(ordinal));
assertTrue("default ordinal policy should match " + ordinal, ordinalPolicy.shouldAdd(ordinal));
}
}
@ -50,8 +48,7 @@ public class OrdinalPolicyTest extends LuceneTestCase {
String[] topLevelStrings = new String[10];
for (int i = 0; i < 10; i++) {
topLevelStrings[i] = Integer.valueOf(random().nextInt(30)).toString();
topLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(
topLevelStrings[i]));
topLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(topLevelStrings[i]));
}
int[] nonTopLevelOrdinals = new int[300];
for (int i = 0; i < 300; i++) {
@ -61,22 +58,18 @@ public class OrdinalPolicyTest extends LuceneTestCase {
for (int j = 1; j < components.length; j++) {
components[j] = (Integer.valueOf(random().nextInt(30))).toString();
}
nonTopLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(
components));
nonTopLevelOrdinals[i] = taxonomy.addCategory(new CategoryPath(components));
}
// check ordinal policy
OrdinalPolicy ordinalPolicy = new NonTopLevelOrdinalPolicy();
ordinalPolicy.init(taxonomy);
assertFalse("top level ordinal policy should not match root", ordinalPolicy
.shouldAdd(TaxonomyReader.ROOT_ORDINAL));
assertFalse("top level ordinal policy should not match root", ordinalPolicy.shouldAdd(TaxonomyReader.ROOT_ORDINAL));
for (int i = 0; i < 10; i++) {
assertFalse("top level ordinal policy should not match "
+ topLevelOrdinals[i],
assertFalse("top level ordinal policy should not match " + topLevelOrdinals[i],
ordinalPolicy.shouldAdd(topLevelOrdinals[i]));
}
for (int i = 0; i < 300; i++) {
assertTrue("top level ordinal policy should match "
+ nonTopLevelOrdinals[i],
assertTrue("top level ordinal policy should match " + nonTopLevelOrdinals[i],
ordinalPolicy.shouldAdd(nonTopLevelOrdinals[i]));
}

View File

@ -29,10 +29,9 @@ public class PathPolicyTest extends LuceneTestCase {
@Test
public void testDefaultPathPolicy() {
// check path policy
CategoryPath cp = new CategoryPath();
CategoryPath cp = CategoryPath.EMPTY;
PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
assertFalse("default path policy should not accept root",
pathPolicy.shouldAdd(cp));
assertFalse("default path policy should not accept root", pathPolicy.shouldAdd(cp));
for (int i = 0; i < 300; i++) {
int nComponents = 1 + random().nextInt(10);
String[] components = new String[nComponents];
@ -40,9 +39,7 @@ public class PathPolicyTest extends LuceneTestCase {
components[j] = (Integer.valueOf(random().nextInt(30))).toString();
}
cp = new CategoryPath(components);
assertTrue("default path policy should accept "
+ cp.toString('/'),
pathPolicy.shouldAdd(cp));
assertTrue("default path policy should accept " + cp.toString('/'), pathPolicy.shouldAdd(cp));
}
}
@ -74,7 +71,7 @@ public class PathPolicyTest extends LuceneTestCase {
// check ordinal policy
PathPolicy pathPolicy = new NonTopLevelPathPolicy();
assertFalse("top level path policy should not match root",
pathPolicy.shouldAdd(new CategoryPath()));
pathPolicy.shouldAdd(CategoryPath.EMPTY));
for (int i = 0; i < 10; i++) {
assertFalse("top level path policy should not match "
+ topLevelPaths[i],

View File

@ -74,7 +74,7 @@ public class FacetIndexingParamsTest extends LuceneTestCase {
public void testCategoryPolicies() {
FacetIndexingParams dfip = FacetIndexingParams.ALL_PARENTS;
// check path policy
CategoryPath cp = new CategoryPath();
CategoryPath cp = CategoryPath.EMPTY;
PathPolicy pathPolicy = PathPolicy.ALL_CATEGORIES;
assertEquals("path policy does not match default for root", pathPolicy.shouldAdd(cp), dfip.getPathPolicy().shouldAdd(cp));
for (int i = 0; i < 30; i++) {

View File

@ -129,7 +129,7 @@ public class TestDemoFacets extends LuceneTestCase {
}
private void toSimpleString(int depth, StringBuilder sb, FacetResultNode node, String indent) {
sb.append(indent + node.getLabel().getComponent(depth) + " (" + (int) node.getValue() + ")\n");
sb.append(indent + node.getLabel().components[depth] + " (" + (int) node.getValue() + ")\n");
for(FacetResultNode childNode : node.getSubResults()) {
toSimpleString(depth+1, sb, childNode, indent + " ");
}

View File

@ -178,7 +178,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
}
FacetResult fr = facetResults.get(0); // a, depth=3, K=2
boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
boolean hasDoctor = "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
assertEquals(9, fr.getNumValidDescendants());
FacetResultNode parentRes = fr.getFacetResultNode();
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
@ -219,7 +219,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
}
fr = facetResults.get(1); // a, depth=2, K=2. same result as before
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
assertEquals(9, fr.getNumValidDescendants());
parentRes = fr.getFacetResultNode();
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
@ -239,7 +239,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
}
fr = facetResults.get(2); // a, depth=1, K=2
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
assertEquals(4, fr.getNumValidDescendants(), 4);
parentRes = fr.getFacetResultNode();
assertEquals(16.0, parentRes.getValue(), Double.MIN_VALUE);
@ -257,7 +257,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
}
fr = facetResults.get(3); // a/b, depth=3, K=2
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
assertEquals(4, fr.getNumValidDescendants());
parentRes = fr.getFacetResultNode();
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
@ -272,7 +272,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
}
fr = facetResults.get(4); // a/b, depth=2, K=2
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
assertEquals(4, fr.getNumValidDescendants());
parentRes = fr.getFacetResultNode();
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
@ -286,7 +286,7 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
}
fr = facetResults.get(5); // a/b, depth=1, K=2
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
assertEquals(4, fr.getNumValidDescendants());
parentRes = fr.getFacetResultNode();
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
@ -300,13 +300,13 @@ public class TestTopKInEachNodeResultHandler extends LuceneTestCase {
}
fr = facetResults.get(6); // a/b, depth=0, K=2
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
assertEquals(0, fr.getNumValidDescendants()); // 0 descendants but rootnode
parentRes = fr.getFacetResultNode();
assertEquals(8.0, parentRes.getValue(), Double.MIN_VALUE);
assertEquals(0.0, parentRes.getResidue(), Double.MIN_VALUE);
assertEquals(0, parentRes.getNumSubResults());
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().getComponent(0));
hasDoctor |= "Doctor".equals(fr.getFacetRequest().getCategoryPath().components[0]);
// doctor, depth=1, K=2
assertFalse("Shouldn't have found anything for a FacetRequest " +

View File

@ -231,7 +231,7 @@ public class MultiIteratorsPerCLParamsTest extends LuceneTestCase {
if (requestedPath == null) {
parentOrdinal = 0;
} else {
CategoryPath cp = new CategoryPath(requestedPath.getComponent(0));
CategoryPath cp = new CategoryPath(requestedPath.components[0]);
parentOrdinal = taxo.getOrdinal(cp);
}
parentArray = taxo.getParallelTaxonomyArrays().parents();

View File

@ -1,15 +1,7 @@
package org.apache.lucene.facet.taxonomy;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import org.junit.Test;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.junit.Test;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -32,843 +24,145 @@ public class TestCategoryPath extends LuceneTestCase {
@Test
public void testBasic() {
CategoryPath p = new CategoryPath(0,0);
assertEquals(0, p.length());
for (int i=0; i<1000; i++) {
p.add("hello");
assertEquals(i+1, p.length());
}
}
@Test
public void testConstructorCapacity() {
CategoryPath p = new CategoryPath(0,0);
assertEquals(0, p.capacityChars());
assertEquals(0, p.capacityComponents());
assertEquals(0, p.length());
p = new CategoryPath(5,18);
assertEquals(5, p.capacityChars());
assertEquals(18, p.capacityComponents());
assertEquals(0, p.length());
p = new CategoryPath(27,13);
assertEquals(27, p.capacityChars());
assertEquals(13, p.capacityComponents());
assertEquals(0, p.length());
}
@Test
public void testClear() {
CategoryPath p = new CategoryPath(0,0);
p.add("hi");
p.add("there");
assertEquals(2, p.length());
p.clear();
assertEquals(0, p.length());
p.add("yo!");
assertEquals(1, p.length());
}
@Test
public void testTrim() {
CategoryPath p = new CategoryPath(0,0);
p.add("this");
p.add("message");
p.add("will");
p.add("self");
p.add("destruct");
p.add("in");
p.add("five");
p.add("seconds");
assertEquals(8, p.length());
p.trim(3);
assertEquals(5, p.length());
p.trim(0); // no-op
assertEquals(5, p.length());
p.trim(-3); // no-op
assertEquals(5, p.length());
p.trim(1);
assertEquals(4, p.length());
p.trim(8); // clear
assertEquals(0, p.length());
p.add("yo!");
assertEquals(1, p.length());
p.trim(1); // clear
assertEquals(0, p.length());
}
@Test
public void testComponentsLimit() {
// Test that we can add up to 2^15-1 components
CategoryPath p = new CategoryPath(0,0);
for (int i=0; i<32767; i++) {
p.add("");
assertEquals(i+1, p.length());
}
// Also see that in the current implementation, this is actually
// the limit: if we add one more component, things break (because
// we used a short to hold ncomponents). See that it breaks in the
// way we expect it to:
p.add(""); // this still works, but...
assertEquals(-32768, p.length()); // now the length is wrong and negative
}
@Test
public void testCharsLimit() {
// Test that we can add up to 2^15-1 characters
CategoryPath p = new CategoryPath(0,0);
for (int i=0; i<8192; i++) {
p.add("aaaa");
}
// Also see that in the current implementation, this is actually the
// limit: If we add one more character, things break (because ends[]
// is an array of shorts), and we actually get an exception.
try {
p.add("a");
fail("Should have thrown an exception");
} catch (ArrayIndexOutOfBoundsException e) {
// good.
}
assertEquals(0, CategoryPath.EMPTY.length);
assertEquals(1, new CategoryPath("hello").length);
assertEquals(2, new CategoryPath("hello", "world").length);
}
@Test
public void testToString() {
CategoryPath p = new CategoryPath(0,0);
// When the category is empty, we expect an empty string
assertEquals("", p.toString('/'));
// This is (deliberately, in our implementation) indistinguishable
// from the case of a single empty component:
p.add("");
assertEquals("", p.toString('/'));
// Check just one category (so no delimiter needed):
p.clear();
p.add("hello");
assertEquals("hello", p.toString('/'));
// Now for two categories:
p.clear();
p.add("hello");
p.add("world");
assertEquals("hello/world", p.toString('/'));
// And for a thousand...
p.clear();
p.add("0");
StringBuilder expected = new StringBuilder("0");
for (int i=1; i<1000; i++) {
String num = Integer.toString(i);
p.add(num);
expected.append('/');
expected.append(num);
}
assertEquals(expected.toString(), p.toString('/'));
// Check that toString() without a parameter just defaults to '/':
assertEquals(expected.toString(), p.toString());
assertEquals("", CategoryPath.EMPTY.toString('/'));
// one category (so no delimiter needed)
assertEquals("hello", new CategoryPath("hello").toString('/'));
// more than one category (so no delimiter needed)
assertEquals("hello/world", new CategoryPath("hello", "world").toString('/'));
}
// testing toString() and its variants already test most of the appendTo()
// code, but not all of it (the "eclemma" code-coverage tool discovered
// this for us). Here we complete the coverage of the appendTo() methods:
@Test
public void testAppendTo() throws IOException {
CategoryPath p = new CategoryPath(0,0);
StringBuilder sb = new StringBuilder();
p.appendTo(sb, '/');
assertEquals(0, sb.length());
p.appendTo(sb, '/', -1);
assertEquals(0, sb.length());
p.appendTo(sb, '/', 1);
assertEquals(0, sb.length());
p.appendTo(sb, '/', -1, 1);
assertEquals(0, sb.length());
}
@Test
public void testLastComponent() {
CategoryPath p = new CategoryPath(1000,1000);
// When the category is empty, we expect a null
assertNull(p.lastComponent());
for (int i=0; i<=100; i++) {
String num = Integer.toString(i);
p.add(num);
assertEquals(num, p.lastComponent());
}
}
@Test
public void testGetComponent() {
CategoryPath p = new CategoryPath(1000,1000);
// When the category is empty, we expect a null
assertNull(p.getComponent(0));
assertNull(p.getComponent(1));
assertNull(p.getComponent(-1));
for (int i=0; i<=100; i++) {
p.add(Integer.toString(i));
for (int j=0; j<=i; j++) {
assertEquals(j, Integer.parseInt(p.getComponent(j)));
}
assertNull(p.getComponent(-1));
assertNull(p.getComponent(i+1));
String[] components = new String[atLeast(10)];
for (int i = 0; i < components.length; i++) {
components[i] = Integer.toString(i);
}
CategoryPath cp = new CategoryPath(components);
for (int i = 0; i < components.length; i++) {
assertEquals(i, Integer.parseInt(cp.components[i]));
}
}
@Test
public void testToStringPrefix() {
CategoryPath p = new CategoryPath(0,0);
p.add("hi");
p.add("there");
p.add("man");
assertEquals("hi/there/man", p.toString('/'));
assertEquals("", p.toString('/', 0));
assertEquals("hi", p.toString('/', 1));
assertEquals("hi/there", p.toString('/', 2));
assertEquals("hi/there/man", p.toString('/', 3));
assertEquals("hi/there/man", p.toString('/', 4));
assertEquals("hi/there/man", p.toString('/', -1));
}
@Test
public void testToStringSubpath() {
CategoryPath p = new CategoryPath(0,0);
assertEquals("", p.toString('/', 0, 0));
p.add("hi");
p.add("there");
p.add("man");
assertEquals("", p.toString('/', 0, 0));
assertEquals("hi", p.toString('/', 0, 1));
assertEquals("hi/there", p.toString('/', 0, 2));
assertEquals("hi/there/man", p.toString('/', 0, 3));
assertEquals("hi/there/man", p.toString('/', 0, 4));
assertEquals("hi/there/man", p.toString('/', 0, -1));
assertEquals("hi/there/man", p.toString('/', -1, -1));
assertEquals("there/man", p.toString('/', 1, -1));
assertEquals("man", p.toString('/', 2, -1));
assertEquals("", p.toString('/', 3, -1));
assertEquals("there/man", p.toString('/', 1, 3));
assertEquals("there", p.toString('/', 1, 2));
assertEquals("", p.toString('/', 1, 1));
}
@Test
@Test
public void testDelimiterConstructor() {
// Test that the constructor that takes a string and a delimiter
// works correctly. Also check that it allocates exactly the needed
// needed size for the array - not more.
CategoryPath p = new CategoryPath("", '/');
assertEquals(p.length(), 0);
assertEquals(p.capacityChars(), 0);
assertEquals(p.capacityComponents(), 0);
assertEquals(0, p.length);
p = new CategoryPath("hello", '/');
assertEquals(p.length(), 1);
assertEquals(p.capacityChars(), 5);
assertEquals(p.capacityComponents(), 1);
assertEquals(p.length, 1);
assertEquals(p.toString('@'), "hello");
p = new CategoryPath("hi/there", '/');
assertEquals(p.length(), 2);
assertEquals(p.capacityChars(), 7);
assertEquals(p.capacityComponents(), 2);
assertEquals(p.length, 2);
assertEquals(p.toString('@'), "hi@there");
p = new CategoryPath("how/are/you/doing?", '/');
assertEquals(p.length(), 4);
assertEquals(p.capacityChars(), 15);
assertEquals(p.capacityComponents(), 4);
assertEquals(p.length, 4);
assertEquals(p.toString('@'), "how@are@you@doing?");
}
@Test
@Test
public void testDefaultConstructor() {
// test that the default constructor (no parameters) currently
// defaults to creating an object with a 0 initial capacity.
// If we change this default later, we also need to change this
// test.
CategoryPath p = new CategoryPath();
assertEquals(0, p.capacityChars());
assertEquals(0, p.capacityComponents());
assertEquals(0, p.length());
CategoryPath p = CategoryPath.EMPTY;
assertEquals(0, p.length);
assertEquals("", p.toString('/'));
}
@Test
public void testAddEmpty() {
// In the current implementation, p.add("") should add en empty
// component (which is, admitingly, not a useful case. On the other
// hand, p.add("", delimiter) should add no components at all.
// Verify this:
CategoryPath p = new CategoryPath(0, 0);
p.add("");
assertEquals(1, p.length());
p.add("");
assertEquals(2, p.length());
p.add("", '/');
assertEquals(2, p.length());
p.clear();
p.add("", '/');
assertEquals(0, p.length());
}
@Test
public void testDelimiterAdd() {
// Test that the add() that takes a string and a delimiter
// works correctly. Note that unlike the constructor test above,
// we can't expect the capacity to grow to exactly the length of
// the given category, so we do not test this.
CategoryPath p = new CategoryPath(0, 0);
p.add("", '/');
assertEquals(0, p.length());
assertEquals("", p.toString('@'), "");
p.clear();
p.add("hello", '/');
assertEquals(p.length(), 1);
assertEquals(p.toString('@'), "hello");
p.clear();
p.add("hi/there", '/');
assertEquals(p.length(), 2);
assertEquals(p.toString('@'), "hi@there");
p.clear();
p.add("how/are/you/doing?", '/');
assertEquals(p.length(), 4);
assertEquals(p.toString('@'), "how@are@you@doing?");
// See that this is really an add, not replace:
p.clear();
p.add("hi/there", '/');
assertEquals(p.length(), 2);
assertEquals(p.toString('@'), "hi@there");
p.add("how/are/you/doing", '/');
assertEquals(p.length(), 6);
assertEquals(p.toString('@'), "hi@there@how@are@you@doing");
}
@Test
public void testCopyConstructor() {
CategoryPath p = new CategoryPath(0,0);
int expectedchars=0;
for (int i=0; i<1000; i++) {
CategoryPath clone = new CategoryPath(p);
assertEquals(p.length(), clone.length());
assertEquals(p.toString('/'), clone.toString('/'));
// verify that the newly created clone has exactly the right
// capacity, with no spare (while the original path p probably
// does have spare)
assertEquals(i, clone.capacityComponents());
assertEquals(expectedchars, clone.capacityChars());
// Finally, add another component to the path, for the next
// round of this loop
String num = Integer.toString(i);
p.add(num);
expectedchars+=num.length();
}
}
@Test
public void testPrefixCopyConstructor() {
CategoryPath p = new CategoryPath(0,0);
p.add("hi");
p.add("there");
p.add("man");
assertEquals(p.length(), 3);
public void testSubPath() {
final CategoryPath p = new CategoryPath("hi", "there", "man");
assertEquals(p.length, 3);
CategoryPath p1 = new CategoryPath(p,2);
assertEquals(2, p1.length());
CategoryPath p1 = p.subpath(2);
assertEquals(2, p1.length);
assertEquals("hi/there", p1.toString('/'));
// the new prefix object should only take the space it needs:
assertEquals(2, p1.capacityComponents());
assertEquals(7, p1.capacityChars());
p1 = new CategoryPath(p,1);
assertEquals(1, p1.length());
p1 = p.subpath(1);
assertEquals(1, p1.length);
assertEquals("hi", p1.toString('/'));
assertEquals(1, p1.capacityComponents());
assertEquals(2, p1.capacityChars());
p1 = new CategoryPath(p,0);
assertEquals(0, p1.length());
p1 = p.subpath(0);
assertEquals(0, p1.length);
assertEquals("", p1.toString('/'));
assertEquals(0, p1.capacityComponents());
assertEquals(0, p1.capacityChars());
// with all the following lengths, the prefix should be the whole path:
// with all the following lengths, the prefix should be the whole path
int[] lengths = { 3, -1, 4 };
for (int i=0; i<lengths.length; i++) {
p1 = new CategoryPath(p, lengths[i]);
assertEquals(3, p1.length());
for (int i = 0; i < lengths.length; i++) {
p1 = p.subpath(lengths[i]);
assertEquals(3, p1.length);
assertEquals("hi/there/man", p1.toString('/'));
assertEquals(p, p1);
assertEquals(3, p1.capacityComponents());
assertEquals(10, p1.capacityChars());
}
}
@Test
public void testEquals() {
// check that two empty paths are equal, even if they have different
// capacities:
CategoryPath p1 = new CategoryPath(0,0);
CategoryPath p2 = new CategoryPath(1000,300);
assertEquals(true, p1.equals(p2));
// If we make p2 different, it is no longer equals:
p2.add("hi");
assertEquals(false, p1.equals(p2));
// A categoryPath is definitely not equals to an object of some other
// type:
assertEquals(false, p1.equals(Integer.valueOf(3)));
// Build two paths separately, and compare them
p1.clear();
p1.add("hello");
p1.add("world");
p2.clear();
p2.add("hello");
p2.add("world");
assertEquals(true, p1.equals(p2));
// Check that comparison really don't look at old data which might
// be stored in the array
p1.clear();
p1.add("averylongcategoryname");
p1.clear();
p1.add("hi");
p2.clear();
p2.add("hi");
assertEquals(true, p1.equals(p2));
// Being of the same length is obviously not enough to be equal
p1.clear();
p1.add("hi");
p2.clear();
p2.add("hello");
assertEquals(false, p1.equals(p2));
p1.clear();
p1.add("hi");
p2.clear();
p2.add("ho");
assertEquals(false, p1.equals(p2));
assertEquals(CategoryPath.EMPTY, CategoryPath.EMPTY);
assertFalse(CategoryPath.EMPTY.equals(new CategoryPath("hi")));
assertFalse(CategoryPath.EMPTY.equals(Integer.valueOf(3)));
assertEquals(new CategoryPath("hello", "world"), new CategoryPath("hello", "world"));
}
@Test
public void testHashCode() {
// Note: in this test, we assume that if two paths are not equal,
// their hash codes should come out differently. This is *not*
// always the case, but in the examples we use below, it comes out
// fine, and unless we have some really bad luck in changing our
// hash function, this should also remain true in the future.
// check that two empty paths are equal, even if they have different
// capacities:
CategoryPath p1 = new CategoryPath(0,0);
CategoryPath p2 = new CategoryPath(1000,300);
assertEquals(p1.hashCode(), p2.hashCode());
// If we make p2 different, it is no longer equals:
p2.add("hi");
assertEquals(false, p1.hashCode()==p2.hashCode());
// Build two paths separately, and compare them
p1.clear();
p1.add("hello");
p1.add("world");
p2.clear();
p2.add("hello");
p2.add("world");
assertEquals(p1.hashCode(), p2.hashCode());
// Check that comparison really don't look at old data which might
// be stored in the array
p1.clear();
p1.add("averylongcategoryname");
p1.clear();
p1.add("hi");
p2.clear();
p2.add("hi");
assertEquals(p1.hashCode(), p2.hashCode());
// Being of the same length is obviously not enough to be equal
p1.clear();
p1.add("hi");
p2.clear();
p2.add("hello");
assertEquals(false, p1.hashCode()==p2.hashCode());
p1.clear();
p1.add("hi");
p2.clear();
p2.add("ho");
assertEquals(false, p1.hashCode()==p2.hashCode());
assertEquals(CategoryPath.EMPTY.hashCode(), CategoryPath.EMPTY.hashCode());
assertFalse(CategoryPath.EMPTY.hashCode() == new CategoryPath("hi").hashCode());
assertEquals(new CategoryPath("hello", "world").hashCode(), new CategoryPath("hello", "world").hashCode());
}
@Test
public void testHashCodePrefix() {
// First, repeat the tests of testHashCode() using hashCode(-1)
// just to make sure nothing was broken in this variant:
CategoryPath p1 = new CategoryPath(0,0);
CategoryPath p2 = new CategoryPath(1000,300);
assertEquals(p1.hashCode(-1), p2.hashCode(-1));
p2.add("hi");
assertEquals(false, p1.hashCode(-1)==p2.hashCode(-1));
p1.clear();
p1.add("hello");
p1.add("world");
p2.clear();
p2.add("hello");
p2.add("world");
assertEquals(p1.hashCode(-1), p2.hashCode(-1));
p1.clear();
p1.add("averylongcategoryname");
p1.clear();
p1.add("hi");
p2.clear();
p2.add("hi");
assertEquals(p1.hashCode(-1), p2.hashCode(-1));
p1.clear();
p1.add("hi");
p2.clear();
p2.add("hello");
assertEquals(false, p1.hashCode(-1)==p2.hashCode(-1));
p1.clear();
p1.add("hi");
p2.clear();
p2.add("ho");
assertEquals(false, p1.hashCode(-1)==p2.hashCode(-1));
// Now move to testing prefixes:
CategoryPath p = new CategoryPath();
p.add("this");
p.add("is");
p.add("a");
p.add("test");
assertEquals(p.hashCode(), p.hashCode(4));
assertEquals(new CategoryPath().hashCode(), p.hashCode(0));
assertEquals(new CategoryPath(p, 1).hashCode(), p.hashCode(1));
assertEquals(new CategoryPath(p, 2).hashCode(), p.hashCode(2));
assertEquals(new CategoryPath(p, 3).hashCode(), p.hashCode(3));
}
@Test
public void testLongHashCode() {
// Note: in this test, we assume that if two paths are not equal,
// their hash codes should come out differently. This is *not*
// always the case, but in the examples we use below, it comes out
// fine, and unless we have some really bad luck in changing our
// hash function, this should also remain true in the future.
// check that two empty paths are equal, even if they have different
// capacities:
CategoryPath p1 = new CategoryPath(0,0);
CategoryPath p2 = new CategoryPath(1000,300);
assertEquals(p1.longHashCode(), p2.longHashCode());
// If we make p2 different, it is no longer equals:
p2.add("hi");
assertEquals(false, p1.longHashCode()==p2.longHashCode());
// Build two paths separately, and compare them
p1.clear();
p1.add("hello");
p1.add("world");
p2.clear();
p2.add("hello");
p2.add("world");
assertEquals(p1.longHashCode(), p2.longHashCode());
// Check that comparison really don't look at old data which might
// be stored in the array
p1.clear();
p1.add("averylongcategoryname");
p1.clear();
p1.add("hi");
p2.clear();
p2.add("hi");
assertEquals(p1.longHashCode(), p2.longHashCode());
// Being of the same length is obviously not enough to be equal
p1.clear();
p1.add("hi");
p2.clear();
p2.add("hello");
assertEquals(false, p1.longHashCode()==p2.longHashCode());
p1.clear();
p1.add("hi");
p2.clear();
p2.add("ho");
assertEquals(false, p1.longHashCode()==p2.longHashCode());
}
@Test
public void testLongHashCodePrefix() {
// First, repeat the tests of testLongHashCode() using longHashCode(-1)
// just to make sure nothing was broken in this variant:
// check that two empty paths are equal, even if they have different
// capacities:
CategoryPath p1 = new CategoryPath(0,0);
CategoryPath p2 = new CategoryPath(1000,300);
assertEquals(p1.longHashCode(-1), p2.longHashCode(-1));
// If we make p2 different, it is no longer equals:
p2.add("hi");
assertEquals(false, p1.longHashCode(-1)==p2.longHashCode(-1));
// Build two paths separately, and compare them
p1.clear();
p1.add("hello");
p1.add("world");
p2.clear();
p2.add("hello");
p2.add("world");
assertEquals(p1.longHashCode(-1), p2.longHashCode(-1));
// Check that comparison really don't look at old data which might
// be stored in the array
p1.clear();
p1.add("averylongcategoryname");
p1.clear();
p1.add("hi");
p2.clear();
p2.add("hi");
assertEquals(p1.longHashCode(-1), p2.longHashCode(-1));
// Being of the same length is obviously not enough to be equal
p1.clear();
p1.add("hi");
p2.clear();
p2.add("hello");
assertEquals(false, p1.longHashCode(-1)==p2.longHashCode(-1));
p1.clear();
p1.add("hi");
p2.clear();
p2.add("ho");
assertEquals(false, p1.longHashCode(-1)==p2.longHashCode(-1));
// Now move to testing prefixes:
CategoryPath p = new CategoryPath();
p.add("this");
p.add("is");
p.add("a");
p.add("test");
assertEquals(p.longHashCode(), p.longHashCode(4));
assertEquals(new CategoryPath().longHashCode(), p.longHashCode(0));
assertEquals(new CategoryPath(p, 1).longHashCode(), p.longHashCode(1));
assertEquals(new CategoryPath(p, 2).longHashCode(), p.longHashCode(2));
assertEquals(new CategoryPath(p, 3).longHashCode(), p.longHashCode(3));
assertEquals(CategoryPath.EMPTY.longHashCode(), CategoryPath.EMPTY.longHashCode());
assertFalse(CategoryPath.EMPTY.longHashCode() == new CategoryPath("hi").longHashCode());
assertEquals(new CategoryPath("hello", "world").longHashCode(), new CategoryPath("hello", "world").longHashCode());
}
@Test
public void testArrayConstructor() {
CategoryPath p = new CategoryPath("hello", "world", "yo");
assertEquals(3, p.length());
assertEquals(12, p.capacityChars());
assertEquals(3, p.capacityComponents());
assertEquals(3, p.length);
assertEquals("hello/world/yo", p.toString('/'));
p = new CategoryPath(new String[0]);
assertEquals(0, p.length());
assertEquals(0, p.capacityChars());
assertEquals(0, p.capacityComponents());
assertEquals(0, p.length);
}
@Test
public void testCharsNeededForFullPath() {
assertEquals(0, CategoryPath.EMPTY.fullPathLength());
String[] components = { "hello", "world", "yo" };
CategoryPath p = new CategoryPath();
assertEquals(0, p.charsNeededForFullPath());
CategoryPath cp = new CategoryPath(components);
int expectedCharsNeeded = 0;
for (int i=0; i<components.length; i++) {
p.add(components[i]);
expectedCharsNeeded += components[i].length();
if (i>0) {
expectedCharsNeeded++;
}
assertEquals(expectedCharsNeeded, p.charsNeededForFullPath());
for (String comp : components) {
expectedCharsNeeded += comp.length();
}
expectedCharsNeeded += cp.length - 1; // delimiter chars
assertEquals(expectedCharsNeeded, cp.fullPathLength());
}
@Test
public void testCopyToCharArray() {
String[] components = { "hello", "world", "yo" };
CategoryPath p = new CategoryPath(components);
char[] charArray = new char[p.charsNeededForFullPath()];
int numCharsCopied = 0;
numCharsCopied = p.copyToCharArray(charArray, 0, 0, '.');
assertEquals(0, numCharsCopied);
assertEquals("", new String(charArray, 0, numCharsCopied));
numCharsCopied = p.copyToCharArray(charArray, 0, 1, '.');
assertEquals(5, numCharsCopied);
assertEquals("hello", new String(charArray, 0, numCharsCopied));
numCharsCopied = p.copyToCharArray(charArray, 0, 3, '.');
assertEquals(14, numCharsCopied);
assertEquals("hello.world.yo", new String(charArray, 0, numCharsCopied));
numCharsCopied = p.copyToCharArray(charArray, 0, -1, '.');
assertEquals(14, numCharsCopied);
assertEquals("hello.world.yo", new String(charArray, 0, numCharsCopied));
numCharsCopied = p.copyToCharArray(charArray, 0, 4, '.');
assertEquals(14, numCharsCopied);
CategoryPath p = new CategoryPath("hello", "world", "yo");
char[] charArray = new char[p.fullPathLength()];
int numCharsCopied = p.copyFullPath(charArray, 0, '.');
assertEquals(p.fullPathLength(), numCharsCopied);
assertEquals("hello.world.yo", new String(charArray, 0, numCharsCopied));
}
@Test
public void testCharSerialization() throws Exception {
CategoryPath[] testCategories = {
new CategoryPath("hi", "there", "man"),
new CategoryPath("hello"),
new CategoryPath("what's", "up"),
// See that an empty category, which generates a (char)0,
// doesn't cause any problems in the middle of the serialization:
new CategoryPath(),
new CategoryPath("another", "example"),
new CategoryPath(),
new CategoryPath()
};
StringBuilder sb = new StringBuilder();
for (int i=0; i<testCategories.length; i++) {
testCategories[i].serializeAppendTo(sb);
}
CategoryPath tmp = new CategoryPath();
int offset=0;
for (int i=0; i<testCategories.length; i++) {
// check equalsToSerialized, in a equal and non-equal case:
assertTrue(testCategories[i].equalsToSerialized(sb, offset));
assertFalse(new CategoryPath("Hello", "world").equalsToSerialized(sb, offset));
assertFalse(new CategoryPath("world").equalsToSerialized(sb, offset));
// and check hashCodeFromSerialized:
assertEquals(testCategories[i].hashCode(), CategoryPath.hashCodeOfSerialized(sb, offset));
// and check setFromSerialized:
offset = tmp.setFromSerialized(sb, offset);
assertEquals(testCategories[i], tmp);
}
assertEquals(offset, sb.length());
// A similar test, for a much longer path (though not larger than the
// 2^15-1 character limit that CategoryPath allows:
sb = new StringBuilder();
CategoryPath p = new CategoryPath();
for (int i=0; i<1000; i++) {
p.add(Integer.toString(i));
}
p.serializeAppendTo(sb);
p.serializeAppendTo(sb);
p.serializeAppendTo(sb);
offset=0;
assertTrue(p.equalsToSerialized(sb, offset));
assertEquals(p.hashCode(), CategoryPath.hashCodeOfSerialized(sb, offset));
offset = tmp.setFromSerialized(sb, offset);
assertEquals(p, tmp);
assertTrue(p.equalsToSerialized(sb, offset));
assertEquals(p.hashCode(), CategoryPath.hashCodeOfSerialized(sb, offset));
offset = tmp.setFromSerialized(sb, offset);
assertEquals(p, tmp);
assertTrue(p.equalsToSerialized(sb, offset));
assertEquals(p.hashCode(), CategoryPath.hashCodeOfSerialized(sb, offset));
offset = tmp.setFromSerialized(sb, offset);
assertEquals(p, tmp);
assertEquals(offset, sb.length());
// Test the serializeAppendTo variant with a prefixLen
p = new CategoryPath();
for (int i=0; i<783; i++) {
p.add(Integer.toString(i));
}
int[] prefixLengths = { 0, 574, 782, 783, 784, -1 };
for (int prefixLen : prefixLengths) {
sb = new StringBuilder();
p.serializeAppendTo(prefixLen, sb);
assertTrue(new CategoryPath(p, prefixLen).equalsToSerialized(sb, 0));
}
// Test the equalsToSerialized variant with a prefixLen
// We use p and prefixLengths set above.
for (int prefixLen : prefixLengths) {
sb = new StringBuilder();
new CategoryPath(p, prefixLen).serializeAppendTo(sb);
assertTrue(p.equalsToSerialized(prefixLen, sb, 0));
}
// Check also the false case of equalsToSerialized with prefixLen:
sb = new StringBuilder();
new CategoryPath().serializeAppendTo(sb);
assertTrue(new CategoryPath().equalsToSerialized(0, sb, 0));
assertTrue(new CategoryPath("a", "b").equalsToSerialized(0, sb, 0));
assertFalse(new CategoryPath("a", "b").equalsToSerialized(1, sb, 0));
sb = new StringBuilder();
new CategoryPath("a", "b").serializeAppendTo(sb);
assertFalse(new CategoryPath().equalsToSerialized(0, sb, 0));
assertFalse(new CategoryPath("a").equalsToSerialized(0, sb, 0));
assertFalse(new CategoryPath("a").equalsToSerialized(1, sb, 0));
assertFalse(new CategoryPath("a", "b").equalsToSerialized(0, sb, 0));
assertFalse(new CategoryPath("a", "b").equalsToSerialized(1, sb, 0));
assertTrue(new CategoryPath("a", "b").equalsToSerialized(2, sb, 0));
assertTrue(new CategoryPath("a", "b", "c").equalsToSerialized(2, sb, 0));
assertFalse(new CategoryPath("z", "b", "c").equalsToSerialized(2, sb, 0));
assertFalse(new CategoryPath("aa", "b", "c").equalsToSerialized(2, sb, 0));
}
@Test
public void testStreamWriterSerialization() throws Exception {
CategoryPath[] testPaths = {
new CategoryPath("hi", "there", "man"),
new CategoryPath("hello"),
new CategoryPath("date", "2009", "May", "13", "14", "59", "00"),
// See that an empty category, which generates a (char)0,
// doesn't cause any problems in the middle of the serialization:
new CategoryPath(),
new CategoryPath("another", "example")
};
ByteArrayOutputStream baos = new ByteArrayOutputStream();
OutputStreamWriter osw = new OutputStreamWriter(baos, "UTF-8"); // UTF-8 is always supported.
for (CategoryPath cp : testPaths) {
cp.serializeToStreamWriter(osw);
}
osw.flush();
ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
InputStreamReader isr = new InputStreamReader(bais, "UTF-8");
CategoryPath[] checkPaths = {
new CategoryPath(), new CategoryPath(), new CategoryPath(), new CategoryPath(), new CategoryPath()
};
for (int j = 0; j < checkPaths.length; j++) {
checkPaths[j].deserializeFromStreamReader(isr);
assertEquals("Paths not equal", testPaths[j], checkPaths[j]);
}
}
@Test
public void testCharSequenceCtor() throws Exception {
CategoryPath[] testPaths = {
new CategoryPath(new CS("hi"), new CS("there"), new CS("man")),
new CategoryPath(new CS("hello")),
new CategoryPath(new CS("date"), new CS("2009"), new CS("May"), new CS("13"),
new CS("14"), new CS("59"), new CS("00")),
new CategoryPath(),
new CategoryPath(new CS("another"), new CS("example"))
};
assertEquals("Wrong capacity", 10, testPaths[0].capacityChars());
assertEquals("Wrong capacity", 5, testPaths[1].capacityChars());
assertEquals("Wrong capacity", 19, testPaths[2].capacityChars());
assertEquals("Wrong capacity", 0, testPaths[3].capacityChars());
assertEquals("Wrong capacity", 14, testPaths[4].capacityChars());
assertEquals("Wrong component", "hi", testPaths[0].getComponent(0));
assertEquals("Wrong component", "there", testPaths[0].getComponent(1));
assertEquals("Wrong component", "man", testPaths[0].getComponent(2));
assertEquals("Wrong component", "hello", testPaths[1].getComponent(0));
assertEquals("Wrong component", "date", testPaths[2].getComponent(0));
assertEquals("Wrong component", "2009", testPaths[2].getComponent(1));
assertEquals("Wrong component", "May", testPaths[2].getComponent(2));
assertEquals("Wrong component", "13", testPaths[2].getComponent(3));
assertEquals("Wrong component", "14", testPaths[2].getComponent(4));
assertEquals("Wrong component", "59", testPaths[2].getComponent(5));
assertEquals("Wrong component", "00", testPaths[2].getComponent(6));
assertNull("Not null component", testPaths[3].getComponent(0));
assertEquals("Wrong component", "another", testPaths[4].getComponent(0));
assertEquals("Wrong component", "example", testPaths[4].getComponent(1));
}
@Test
public void testIsDescendantOf() throws Exception {
CategoryPath[] testPaths = {
new CategoryPath(new CS("hi"), new CS("there")),
new CategoryPath(new CS("hi"), new CS("there"), new CS("man")),
new CategoryPath(new CS("hithere"), new CS("man")),
new CategoryPath(new CS("hi"), new CS("there"), new CS("mano")),
new CategoryPath(),
};
assertTrue(testPaths[0].isDescendantOf(testPaths[0]));
assertTrue(testPaths[0].isDescendantOf(testPaths[4]));
assertFalse(testPaths[4].isDescendantOf(testPaths[0]));
assertTrue(testPaths[1].isDescendantOf(testPaths[0]));
assertTrue(testPaths[1].isDescendantOf(testPaths[1]));
assertTrue(testPaths[3].isDescendantOf(testPaths[0]));
assertFalse(testPaths[2].isDescendantOf(testPaths[0]));
assertFalse(testPaths[2].isDescendantOf(testPaths[1]));
assertFalse(testPaths[3].isDescendantOf(testPaths[1]));
}
@Test
public void testCompareTo() {
CategoryPath p = new CategoryPath("a/b/c/d", '/');
CategoryPath pother = new CategoryPath("a/b/c/d", '/');
assertTrue(pother.compareTo(p) == 0);
assertEquals(0, pother.compareTo(p));
pother = new CategoryPath("", '/');
assertTrue(pother.compareTo(p) < 0);
pother = new CategoryPath("a/b_/c/d", '/');
@ -880,25 +174,5 @@ public class TestCategoryPath extends LuceneTestCase {
pother = new CategoryPath("a/b/c//e", '/');
assertTrue(pother.compareTo(p) < 0);
}
private static class CS implements CharSequence {
public CS(String s) {
this.ca = new char[s.length()];
s.getChars(0, s.length(), this.ca, 0);
}
@Override
public char charAt(int index) {
return this.ca[index];
}
@Override
public int length() {
return this.ca.length;
}
@Override
public CharSequence subSequence(int start, int end) {
return null; // not used.
}
private char[] ca;
}
}

View File

@ -146,7 +146,7 @@ public class TestTaxonomyCombined extends LuceneTestCase {
if (path==null) {
return "<null>";
}
if (path.length()==0) {
if (path.length==0) {
return "<empty>";
}
return "<"+path.toString('/')+">";
@ -304,9 +304,9 @@ public class TestTaxonomyCombined extends LuceneTestCase {
tw.close();
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
assertEquals(1, tr.getSize());
assertEquals(0, tr.getPath(0).length());
assertEquals(0, tr.getPath(0).length);
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
assertEquals(0, tr.getOrdinal(new CategoryPath()));
assertEquals(0, tr.getOrdinal(CategoryPath.EMPTY));
tr.close();
indexDir.close();
}
@ -323,9 +323,9 @@ public class TestTaxonomyCombined extends LuceneTestCase {
tw.commit();
TaxonomyReader tr = new DirectoryTaxonomyReader(indexDir);
assertEquals(1, tr.getSize());
assertEquals(0, tr.getPath(0).length());
assertEquals(0, tr.getPath(0).length);
assertEquals(TaxonomyReader.INVALID_ORDINAL, tr.getParent(0));
assertEquals(0, tr.getOrdinal(new CategoryPath()));
assertEquals(0, tr.getOrdinal(CategoryPath.EMPTY));
tw.close();
tr.close();
indexDir.close();
@ -416,7 +416,7 @@ public class TestTaxonomyCombined extends LuceneTestCase {
", but this is not a valid category.");
}
// verify that the parent is indeed my parent, according to the strings
if (!new CategoryPath(me, me.length()-1).equals(parent)) {
if (!me.subpath(me.length-1).equals(parent)) {
fail("Got parent "+parentOrdinal+" for ordinal "+ordinal+
" but categories are "+showcat(parent)+" and "+showcat(me)+
" respectively.");
@ -506,7 +506,7 @@ public class TestTaxonomyCombined extends LuceneTestCase {
}
// verify that the parent is indeed my parent, according to the
// strings
if (!new CategoryPath(me, me.length() - 1).equals(parent)) {
if (!me.subpath(me.length - 1).equals(parent)) {
fail("Got parent " + parentOrdinal + " for ordinal " + ordinal
+ " but categories are " + showcat(parent) + " and "
+ showcat(me) + " respectively.");

View File

@ -81,7 +81,6 @@ public class TestAddTaxonomy extends LuceneTestCase {
}
private void validate(Directory dest, Directory src, OrdinalMap ordMap) throws Exception {
CategoryPath cp = new CategoryPath();
DirectoryTaxonomyReader destTR = new DirectoryTaxonomyReader(dest);
try {
final int destSize = destTR.getSize();
@ -98,7 +97,7 @@ public class TestAddTaxonomy extends LuceneTestCase {
// validate that all source categories exist in destination, and their
// ordinals are as expected.
for (int j = 1; j < srcSize; j++) {
srcTR.getPath(j, cp);
CategoryPath cp = srcTR.getPath(j);
int destOrdinal = destTR.getOrdinal(cp);
assertTrue(cp + " not found in destination", destOrdinal > 0);
assertEquals(destOrdinal, map[j]);

View File

@ -48,12 +48,8 @@ public class TestConcurrentFacetedIndexing extends LuceneTestCase {
@Override
public int get(CategoryPath categoryPath) { return -1; }
@Override
public int get(CategoryPath categoryPath, int length) { return -1; }
@Override
public boolean put(CategoryPath categoryPath, int ordinal) { return true; }
@Override
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) { return true; }
@Override
public boolean isFull() { return true; }
@Override
public void clear() {}
@ -108,9 +104,9 @@ public class TestConcurrentFacetedIndexing extends LuceneTestCase {
CategoryPath cp = newCategory();
cats.add(cp);
// add all prefixes to values
int level = cp.length();
int level = cp.length;
while (level > 0) {
String s = cp.toString('/', level);
String s = cp.subpath(level).toString('/');
values.put(s, s);
--level;
}
@ -134,11 +130,11 @@ public class TestConcurrentFacetedIndexing extends LuceneTestCase {
for (String cat : values.keySet()) {
CategoryPath cp = new CategoryPath(cat, '/');
assertTrue("category not found " + cp, tr.getOrdinal(cp) > 0);
int level = cp.length();
int level = cp.length;
int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
CategoryPath path = new CategoryPath();
CategoryPath path = CategoryPath.EMPTY;
for (int i = 0; i < level; i++) {
path.add(cp.getComponent(i));
path = cp.subpath(i + 1);
int ord = tr.getOrdinal(path);
assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]);
parentOrd = ord; // next level should have this parent

View File

@ -154,8 +154,8 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
for (int i=0; i<n; i++) {
int k = random.nextInt(n);
tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
for (int j=0; j<=k; j++) {
tw.addCategory(new CategoryPath(cp[j]));
for (int j = 0; j <= k; j++) {
tw.addCategory(cp[j]);
}
tw.close();
if (closeReader) {

View File

@ -52,12 +52,8 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
@Override
public int get(CategoryPath categoryPath) { return -1; }
@Override
public int get(CategoryPath categoryPath, int length) { return -1; }
@Override
public boolean put(CategoryPath categoryPath, int ordinal) { return true; }
@Override
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) { return true; }
@Override
public boolean isFull() { return true; }
@Override
public void clear() {}
@ -266,10 +262,10 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
Integer.toString(value / 100000), Integer.toString(value));
int ord = tw.addCategory(cp);
assertTrue("invalid parent for ordinal " + ord + ", category " + cp, tw.getParent(ord) != -1);
String l1 = cp.toString('/', 1);
String l2 = cp.toString('/', 2);
String l3 = cp.toString('/', 3);
String l4 = cp.toString('/', 4);
String l1 = cp.subpath(1).toString('/');
String l2 = cp.subpath(2).toString('/');
String l3 = cp.subpath(3).toString('/');
String l4 = cp.subpath(4).toString('/');
values.put(l1, l1);
values.put(l2, l2);
values.put(l3, l3);
@ -292,11 +288,11 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
for (String cat : values.keySet()) {
CategoryPath cp = new CategoryPath(cat, '/');
assertTrue("category not found " + cp, dtr.getOrdinal(cp) > 0);
int level = cp.length();
int level = cp.length;
int parentOrd = 0; // for root, parent is always virtual ROOT (ord=0)
CategoryPath path = new CategoryPath();
CategoryPath path = CategoryPath.EMPTY;
for (int i = 0; i < level; i++) {
path.add(cp.getComponent(i));
path = cp.subpath(i + 1);
int ord = dtr.getOrdinal(path);
assertEquals("invalid parent for cp=" + path, parentOrd, parents[ord]);
parentOrd = ord; // next level should have this parent

View File

@ -6,11 +6,13 @@ import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import org.junit.Test;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.writercache.cl2o.CompactLabelToOrdinal;
import org.apache.lucene.facet.taxonomy.writercache.cl2o.LabelToOrdinal;
@ -46,9 +48,10 @@ public class TestCompactLabelToOrdinal extends LuceneTestCase {
String[] uniqueValues = new String[numUniqueValues];
byte[] buffer = new byte[50];
Random random = random();
for (int i = 0; i < numUniqueValues;) {
random().nextBytes(buffer);
int size = 1 + random().nextInt(50);
random.nextBytes(buffer);
int size = 1 + random.nextInt(buffer.length);
// This test is turning random bytes into a string,
// this is asking for trouble.
@ -56,16 +59,16 @@ public class TestCompactLabelToOrdinal extends LuceneTestCase {
.onUnmappableCharacter(CodingErrorAction.REPLACE)
.onMalformedInput(CodingErrorAction.REPLACE);
uniqueValues[i] = decoder.decode(ByteBuffer.wrap(buffer, 0, size)).toString();
if (uniqueValues[i].indexOf(CompactLabelToOrdinal.TerminatorChar) == -1) {
if (uniqueValues[i].indexOf(CompactLabelToOrdinal.TERMINATOR_CHAR) == -1) {
i++;
}
}
TEMP_DIR.mkdirs();
File f = new File(TEMP_DIR, "CompactLabelToOrdinalTest.tmp");
File tmpDir = _TestUtil.getTempDir("testLableToOrdinal");
File f = new File(tmpDir, "CompactLabelToOrdinalTest.tmp");
int flushInterval = 10;
for (int i = 0; i < n * 10; i++) {
for (int i = 0; i < n; i++) {
if (i > 0 && i % flushInterval == 0) {
compact.flush(f);
compact = CompactLabelToOrdinal.open(f, 0.15f, 3);
@ -75,19 +78,16 @@ public class TestCompactLabelToOrdinal extends LuceneTestCase {
}
}
int index = random().nextInt(numUniqueValues);
int index = random.nextInt(numUniqueValues);
CategoryPath label = new CategoryPath(uniqueValues[index], '/');
int ord1 = map.getOrdinal(label);
int ord2 = compact.getOrdinal(label);
//System.err.println(ord1+" "+ord2);
assertEquals(ord1, ord2);
if (ord1 == LabelToOrdinal.InvalidOrdinal) {
if (ord1 == LabelToOrdinal.INVALID_ORDINAL) {
ord1 = compact.getNextOrdinal();
map.addLabel(label, ord1);
compact.addLabel(label, ord1);
}
@ -108,25 +108,15 @@ public class TestCompactLabelToOrdinal extends LuceneTestCase {
@Override
public void addLabel(CategoryPath label, int ordinal) {
map.put(new CategoryPath(label), ordinal);
}
@Override
public void addLabel(CategoryPath label, int prefixLen, int ordinal) {
map.put(new CategoryPath(label, prefixLen), ordinal);
map.put(label, ordinal);
}
@Override
public int getOrdinal(CategoryPath label) {
Integer value = map.get(label);
return (value != null) ? value.intValue() : LabelToOrdinal.InvalidOrdinal;
}
@Override
public int getOrdinal(CategoryPath label, int prefixLen) {
Integer value = map.get(new CategoryPath(label, prefixLen));
return (value != null) ? value.intValue() : LabelToOrdinal.InvalidOrdinal;
return (value != null) ? value.intValue() : LabelToOrdinal.INVALID_ORDINAL;
}
}
}