LUCENE-4897: add a sugar API for traversing categories

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1464730 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2013-04-04 20:30:50 +00:00
parent 9588a84dec
commit b6a89d97cb
4 changed files with 123 additions and 25 deletions

View File

@ -172,6 +172,9 @@ New Features
* LUCENE-4905: Made the maxPassages parameter per-field in PostingsHighlighter. * LUCENE-4905: Made the maxPassages parameter per-field in PostingsHighlighter.
(Robert Muir) (Robert Muir)
* LUCENE-4897: Added TaxonomyReader.getChildren for traversing a category's
children. (Shai Erera)
Optimizations Optimizations
* LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace * LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace

View File

@ -65,6 +65,31 @@ import org.apache.lucene.store.AlreadyClosedException;
*/ */
public abstract class TaxonomyReader implements Closeable { public abstract class TaxonomyReader implements Closeable {
/** An iterator over a category's children. */
public static class ChildrenIterator {
private final int[] siblings;
private int child;
ChildrenIterator(int child, int[] siblings) {
this.siblings = siblings;
this.child = child;
}
/**
* Return the next child ordinal, or {@link TaxonomyReader#INVALID_ORDINAL}
* if no more children.
*/
public int next() {
int res = child;
if (child != TaxonomyReader.INVALID_ORDINAL) {
child = siblings[child];
}
return res;
}
}
/** /**
* The root category (the category with the empty path) always has the ordinal * The root category (the category with the empty path) always has the ordinal
* 0, to which we give a name ROOT_ORDINAL. {@link #getOrdinal(CategoryPath)} * 0, to which we give a name ROOT_ORDINAL. {@link #getOrdinal(CategoryPath)}
@ -167,6 +192,13 @@ public abstract class TaxonomyReader implements Closeable {
*/ */
public abstract ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException; public abstract ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException;
/** Returns an iterator over the children of the given ordinal. */
public ChildrenIterator getChildren(final int ordinal) throws IOException {
ParallelTaxonomyArrays arrays = getParallelTaxonomyArrays();
int child = ordinal >= 0 ? arrays.children()[ordinal] : INVALID_ORDINAL;
return new ChildrenIterator(child, arrays.siblings());
}
/** /**
* Retrieve user committed data. * Retrieve user committed data.
* *

View File

@ -22,8 +22,8 @@ import java.io.IOException;
import java.io.PrintStream; import java.io.PrintStream;
import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory; import org.apache.lucene.store.FSDirectory;
@ -55,45 +55,40 @@ public class PrintTaxonomyStats {
} }
public static void printStats(TaxonomyReader r, PrintStream out, boolean printTree) throws IOException { public static void printStats(TaxonomyReader r, PrintStream out, boolean printTree) throws IOException {
ParallelTaxonomyArrays arrays = r.getParallelTaxonomyArrays();
//int[] parents = arrays.parents();
int[] children = arrays.children();
int[] siblings = arrays.siblings();
out.println(r.getSize() + " total categories."); out.println(r.getSize() + " total categories.");
int childOrd = children[TaxonomyReader.ROOT_ORDINAL]; ChildrenIterator it = r.getChildren(TaxonomyReader.ROOT_ORDINAL);
while(childOrd != -1) { int child;
CategoryPath cp = r.getPath(childOrd); while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
int childOrd2 = children[childOrd]; ChildrenIterator chilrenIt = r.getChildren(child);
int numImmediateChildren = 0; int numImmediateChildren = 0;
while(childOrd2 != -1) { while (chilrenIt.next() != TaxonomyReader.INVALID_ORDINAL) {
numImmediateChildren++; numImmediateChildren++;
childOrd2 = siblings[childOrd2];
} }
out.println("/" + cp + ": " + numImmediateChildren + " immediate children; " + (1+countAllChildren(r, childOrd, children, siblings)) + " total categories"); CategoryPath cp = r.getPath(child);
out.println("/" + cp + ": " + numImmediateChildren + " immediate children; " + (1+countAllChildren(r, child)) + " total categories");
if (printTree) { if (printTree) {
printAllChildren(out, r, childOrd, children, siblings, " ", 1); printAllChildren(out, r, child, " ", 1);
} }
childOrd = siblings[childOrd];
} }
} }
private static int countAllChildren(TaxonomyReader r, int ord, int[] children, int[] siblings) throws IOException { private static int countAllChildren(TaxonomyReader r, int ord) throws IOException {
int childOrd = children[ord];
int count = 0; int count = 0;
while(childOrd != -1) { ChildrenIterator it = r.getChildren(ord);
count += 1+countAllChildren(r, childOrd, children, siblings); int child;
childOrd = siblings[childOrd]; while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
count += 1 + countAllChildren(r, child);
} }
return count; return count;
} }
private static void printAllChildren(PrintStream out, TaxonomyReader r, int ord, int[] children, int[] siblings, String indent, int depth) throws IOException { private static void printAllChildren(PrintStream out, TaxonomyReader r, int ord, String indent, int depth) throws IOException {
int childOrd = children[ord]; ChildrenIterator it = r.getChildren(ord);
while(childOrd != -1) { int child;
out.println(indent + "/" + r.getPath(childOrd).components[depth]); while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
printAllChildren(out, r, childOrd, children, siblings, indent + " ", depth+1); out.println(indent + "/" + r.getPath(child).components[depth]);
childOrd = siblings[childOrd]; printAllChildren(out, r, child, indent + " ", depth+1);
} }
} }
} }

View File

@ -1,12 +1,16 @@
package org.apache.lucene.facet.taxonomy.directory; package org.apache.lucene.facet.taxonomy.directory;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Random; import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.FacetTestCase;
import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig;
@ -461,5 +465,69 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase {
src.close(); src.close();
} }
@Test
public void testGetChildren() throws Exception {
Directory dir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
int numCategories = atLeast(10);
int numA = 0, numB = 0;
Random random = random();
for (int i = 0; i < numCategories; i++) {
if (random.nextBoolean()) {
taxoWriter.addCategory(new CategoryPath("a", Integer.toString(i)));
++numA;
} else {
taxoWriter.addCategory(new CategoryPath("b", Integer.toString(i)));
++numB;
}
}
// add category with no children
taxoWriter.addCategory(new CategoryPath("c"));
taxoWriter.close();
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
// non existing category
ChildrenIterator it = taxoReader.getChildren(taxoReader.getOrdinal(new CategoryPath("invalid")));
assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
// a category with no children
it = taxoReader.getChildren(taxoReader.getOrdinal(new CategoryPath("c")));
assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
// arbitrary negative ordinal
it = taxoReader.getChildren(-2);
assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
// root's children
Set<String> roots = new HashSet<String>(Arrays.asList("a", "b", "c"));
it = taxoReader.getChildren(TaxonomyReader.ROOT_ORDINAL);
while (!roots.isEmpty()) {
CategoryPath root = taxoReader.getPath(it.next());
assertEquals(1, root.length);
assertTrue(roots.remove(root.components[0]));
}
assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
for (int i = 0; i < 2; i++) {
CategoryPath cp = i == 0 ? new CategoryPath("a") : new CategoryPath("b");
int ordinal = taxoReader.getOrdinal(cp);
it = taxoReader.getChildren(ordinal);
int numChildren = 0;
int child;
while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
CategoryPath path = taxoReader.getPath(child);
assertEquals(2, path.length);
assertEquals(path.components[0], i == 0 ? "a" : "b");
++numChildren;
}
int expected = i == 0 ? numA : numB;
assertEquals("invalid num children", expected, numChildren);
}
taxoReader.close();
dir.close();
}
} }