mirror of https://github.com/apache/lucene.git
LUCENE-4897: add a sugar API for traversing categories
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1464730 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
9588a84dec
commit
b6a89d97cb
|
@ -172,6 +172,9 @@ New Features
|
||||||
* LUCENE-4905: Made the maxPassages parameter per-field in PostingsHighlighter.
|
* LUCENE-4905: Made the maxPassages parameter per-field in PostingsHighlighter.
|
||||||
(Robert Muir)
|
(Robert Muir)
|
||||||
|
|
||||||
|
* LUCENE-4897: Added TaxonomyReader.getChildren for traversing a category's
|
||||||
|
children. (Shai Erera)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
|
|
||||||
* LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace
|
* LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace
|
||||||
|
|
|
@ -65,6 +65,31 @@ import org.apache.lucene.store.AlreadyClosedException;
|
||||||
*/
|
*/
|
||||||
public abstract class TaxonomyReader implements Closeable {
|
public abstract class TaxonomyReader implements Closeable {
|
||||||
|
|
||||||
|
/** An iterator over a category's children. */
|
||||||
|
public static class ChildrenIterator {
|
||||||
|
|
||||||
|
private final int[] siblings;
|
||||||
|
private int child;
|
||||||
|
|
||||||
|
ChildrenIterator(int child, int[] siblings) {
|
||||||
|
this.siblings = siblings;
|
||||||
|
this.child = child;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Return the next child ordinal, or {@link TaxonomyReader#INVALID_ORDINAL}
|
||||||
|
* if no more children.
|
||||||
|
*/
|
||||||
|
public int next() {
|
||||||
|
int res = child;
|
||||||
|
if (child != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
|
child = siblings[child];
|
||||||
|
}
|
||||||
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The root category (the category with the empty path) always has the ordinal
|
* The root category (the category with the empty path) always has the ordinal
|
||||||
* 0, to which we give a name ROOT_ORDINAL. {@link #getOrdinal(CategoryPath)}
|
* 0, to which we give a name ROOT_ORDINAL. {@link #getOrdinal(CategoryPath)}
|
||||||
|
@ -167,6 +192,13 @@ public abstract class TaxonomyReader implements Closeable {
|
||||||
*/
|
*/
|
||||||
public abstract ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException;
|
public abstract ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException;
|
||||||
|
|
||||||
|
/** Returns an iterator over the children of the given ordinal. */
|
||||||
|
public ChildrenIterator getChildren(final int ordinal) throws IOException {
|
||||||
|
ParallelTaxonomyArrays arrays = getParallelTaxonomyArrays();
|
||||||
|
int child = ordinal >= 0 ? arrays.children()[ordinal] : INVALID_ORDINAL;
|
||||||
|
return new ChildrenIterator(child, arrays.siblings());
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieve user committed data.
|
* Retrieve user committed data.
|
||||||
*
|
*
|
||||||
|
|
|
@ -22,8 +22,8 @@ import java.io.IOException;
|
||||||
import java.io.PrintStream;
|
import java.io.PrintStream;
|
||||||
|
|
||||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||||
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
|
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator;
|
||||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||||
import org.apache.lucene.store.Directory;
|
import org.apache.lucene.store.Directory;
|
||||||
import org.apache.lucene.store.FSDirectory;
|
import org.apache.lucene.store.FSDirectory;
|
||||||
|
@ -55,45 +55,40 @@ public class PrintTaxonomyStats {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void printStats(TaxonomyReader r, PrintStream out, boolean printTree) throws IOException {
|
public static void printStats(TaxonomyReader r, PrintStream out, boolean printTree) throws IOException {
|
||||||
ParallelTaxonomyArrays arrays = r.getParallelTaxonomyArrays();
|
|
||||||
//int[] parents = arrays.parents();
|
|
||||||
int[] children = arrays.children();
|
|
||||||
int[] siblings = arrays.siblings();
|
|
||||||
out.println(r.getSize() + " total categories.");
|
out.println(r.getSize() + " total categories.");
|
||||||
|
|
||||||
int childOrd = children[TaxonomyReader.ROOT_ORDINAL];
|
ChildrenIterator it = r.getChildren(TaxonomyReader.ROOT_ORDINAL);
|
||||||
while(childOrd != -1) {
|
int child;
|
||||||
CategoryPath cp = r.getPath(childOrd);
|
while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
int childOrd2 = children[childOrd];
|
ChildrenIterator chilrenIt = r.getChildren(child);
|
||||||
int numImmediateChildren = 0;
|
int numImmediateChildren = 0;
|
||||||
while(childOrd2 != -1) {
|
while (chilrenIt.next() != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
numImmediateChildren++;
|
numImmediateChildren++;
|
||||||
childOrd2 = siblings[childOrd2];
|
|
||||||
}
|
}
|
||||||
out.println("/" + cp + ": " + numImmediateChildren + " immediate children; " + (1+countAllChildren(r, childOrd, children, siblings)) + " total categories");
|
CategoryPath cp = r.getPath(child);
|
||||||
|
out.println("/" + cp + ": " + numImmediateChildren + " immediate children; " + (1+countAllChildren(r, child)) + " total categories");
|
||||||
if (printTree) {
|
if (printTree) {
|
||||||
printAllChildren(out, r, childOrd, children, siblings, " ", 1);
|
printAllChildren(out, r, child, " ", 1);
|
||||||
}
|
}
|
||||||
childOrd = siblings[childOrd];
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int countAllChildren(TaxonomyReader r, int ord, int[] children, int[] siblings) throws IOException {
|
private static int countAllChildren(TaxonomyReader r, int ord) throws IOException {
|
||||||
int childOrd = children[ord];
|
|
||||||
int count = 0;
|
int count = 0;
|
||||||
while(childOrd != -1) {
|
ChildrenIterator it = r.getChildren(ord);
|
||||||
count += 1+countAllChildren(r, childOrd, children, siblings);
|
int child;
|
||||||
childOrd = siblings[childOrd];
|
while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
|
count += 1 + countAllChildren(r, child);
|
||||||
}
|
}
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void printAllChildren(PrintStream out, TaxonomyReader r, int ord, int[] children, int[] siblings, String indent, int depth) throws IOException {
|
private static void printAllChildren(PrintStream out, TaxonomyReader r, int ord, String indent, int depth) throws IOException {
|
||||||
int childOrd = children[ord];
|
ChildrenIterator it = r.getChildren(ord);
|
||||||
while(childOrd != -1) {
|
int child;
|
||||||
out.println(indent + "/" + r.getPath(childOrd).components[depth]);
|
while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
printAllChildren(out, r, childOrd, children, siblings, indent + " ", depth+1);
|
out.println(indent + "/" + r.getPath(child).components[depth]);
|
||||||
childOrd = siblings[childOrd];
|
printAllChildren(out, r, child, indent + " ", depth+1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,12 +1,16 @@
|
||||||
package org.apache.lucene.facet.taxonomy.directory;
|
package org.apache.lucene.facet.taxonomy.directory;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashSet;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
import java.util.Set;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.MockAnalyzer;
|
import org.apache.lucene.analysis.MockAnalyzer;
|
||||||
import org.apache.lucene.facet.FacetTestCase;
|
import org.apache.lucene.facet.FacetTestCase;
|
||||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||||
|
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator;
|
||||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||||
import org.apache.lucene.index.IndexWriter;
|
import org.apache.lucene.index.IndexWriter;
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
import org.apache.lucene.index.IndexWriterConfig;
|
||||||
|
@ -461,5 +465,69 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
|
||||||
|
|
||||||
src.close();
|
src.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void testGetChildren() throws Exception {
|
||||||
|
Directory dir = newDirectory();
|
||||||
|
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
|
||||||
|
int numCategories = atLeast(10);
|
||||||
|
int numA = 0, numB = 0;
|
||||||
|
Random random = random();
|
||||||
|
for (int i = 0; i < numCategories; i++) {
|
||||||
|
if (random.nextBoolean()) {
|
||||||
|
taxoWriter.addCategory(new CategoryPath("a", Integer.toString(i)));
|
||||||
|
++numA;
|
||||||
|
} else {
|
||||||
|
taxoWriter.addCategory(new CategoryPath("b", Integer.toString(i)));
|
||||||
|
++numB;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// add category with no children
|
||||||
|
taxoWriter.addCategory(new CategoryPath("c"));
|
||||||
|
taxoWriter.close();
|
||||||
|
|
||||||
|
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
|
||||||
|
|
||||||
|
// non existing category
|
||||||
|
ChildrenIterator it = taxoReader.getChildren(taxoReader.getOrdinal(new CategoryPath("invalid")));
|
||||||
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
|
||||||
|
|
||||||
|
// a category with no children
|
||||||
|
it = taxoReader.getChildren(taxoReader.getOrdinal(new CategoryPath("c")));
|
||||||
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
|
||||||
|
|
||||||
|
// arbitrary negative ordinal
|
||||||
|
it = taxoReader.getChildren(-2);
|
||||||
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
|
||||||
|
|
||||||
|
// root's children
|
||||||
|
Set<String> roots = new HashSet<String>(Arrays.asList("a", "b", "c"));
|
||||||
|
it = taxoReader.getChildren(TaxonomyReader.ROOT_ORDINAL);
|
||||||
|
while (!roots.isEmpty()) {
|
||||||
|
CategoryPath root = taxoReader.getPath(it.next());
|
||||||
|
assertEquals(1, root.length);
|
||||||
|
assertTrue(roots.remove(root.components[0]));
|
||||||
|
}
|
||||||
|
assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
|
||||||
|
|
||||||
|
for (int i = 0; i < 2; i++) {
|
||||||
|
CategoryPath cp = i == 0 ? new CategoryPath("a") : new CategoryPath("b");
|
||||||
|
int ordinal = taxoReader.getOrdinal(cp);
|
||||||
|
it = taxoReader.getChildren(ordinal);
|
||||||
|
int numChildren = 0;
|
||||||
|
int child;
|
||||||
|
while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
|
||||||
|
CategoryPath path = taxoReader.getPath(child);
|
||||||
|
assertEquals(2, path.length);
|
||||||
|
assertEquals(path.components[0], i == 0 ? "a" : "b");
|
||||||
|
++numChildren;
|
||||||
|
}
|
||||||
|
int expected = i == 0 ? numA : numB;
|
||||||
|
assertEquals("invalid num children", expected, numChildren);
|
||||||
|
}
|
||||||
|
taxoReader.close();
|
||||||
|
|
||||||
|
dir.close();
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue