From b6a89d97cb34fcff518392d4623b6f01434575bb Mon Sep 17 00:00:00 2001 From: Shai Erera Date: Thu, 4 Apr 2013 20:30:50 +0000 Subject: [PATCH] LUCENE-4897: add a sugar API for traversing categories git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1464730 13f79535-47bb-0310-9956-ffa450edef68 --- lucene/CHANGES.txt | 3 + .../lucene/facet/taxonomy/TaxonomyReader.java | 32 +++++++++ .../lucene/facet/util/PrintTaxonomyStats.java | 45 ++++++------ .../TestDirectoryTaxonomyReader.java | 68 +++++++++++++++++++ 4 files changed, 123 insertions(+), 25 deletions(-) diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt index 3af41f989a2..742e1cdbe3b 100644 --- a/lucene/CHANGES.txt +++ b/lucene/CHANGES.txt @@ -172,6 +172,9 @@ New Features * LUCENE-4905: Made the maxPassages parameter per-field in PostingsHighlighter. (Robert Muir) +* LUCENE-4897: Added TaxonomyReader.getChildren for traversing a category's + children. (Shai Erera) + Optimizations * LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace diff --git a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java index ed6a8d61ab9..e1d8a61e1a4 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java @@ -65,6 +65,31 @@ import org.apache.lucene.store.AlreadyClosedException; */ public abstract class TaxonomyReader implements Closeable { + /** An iterator over a category's children. */ + public static class ChildrenIterator { + + private final int[] siblings; + private int child; + + ChildrenIterator(int child, int[] siblings) { + this.siblings = siblings; + this.child = child; + } + + /** + * Return the next child ordinal, or {@link TaxonomyReader#INVALID_ORDINAL} + * if no more children. + */ + public int next() { + int res = child; + if (child != TaxonomyReader.INVALID_ORDINAL) { + child = siblings[child]; + } + return res; + } + + } + /** * The root category (the category with the empty path) always has the ordinal * 0, to which we give a name ROOT_ORDINAL. {@link #getOrdinal(CategoryPath)} @@ -167,6 +192,13 @@ public abstract class TaxonomyReader implements Closeable { */ public abstract ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException; + /** Returns an iterator over the children of the given ordinal. */ + public ChildrenIterator getChildren(final int ordinal) throws IOException { + ParallelTaxonomyArrays arrays = getParallelTaxonomyArrays(); + int child = ordinal >= 0 ? arrays.children()[ordinal] : INVALID_ORDINAL; + return new ChildrenIterator(child, arrays.siblings()); + } + /** * Retrieve user committed data. * diff --git a/lucene/facet/src/java/org/apache/lucene/facet/util/PrintTaxonomyStats.java b/lucene/facet/src/java/org/apache/lucene/facet/util/PrintTaxonomyStats.java index fd2252ae79f..10a70f7eaf4 100644 --- a/lucene/facet/src/java/org/apache/lucene/facet/util/PrintTaxonomyStats.java +++ b/lucene/facet/src/java/org/apache/lucene/facet/util/PrintTaxonomyStats.java @@ -22,8 +22,8 @@ import java.io.IOException; import java.io.PrintStream; import org.apache.lucene.facet.taxonomy.CategoryPath; -import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays; import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; @@ -55,45 +55,40 @@ public class PrintTaxonomyStats { } public static void printStats(TaxonomyReader r, PrintStream out, boolean printTree) throws IOException { - ParallelTaxonomyArrays arrays = r.getParallelTaxonomyArrays(); - //int[] parents = arrays.parents(); - int[] children = arrays.children(); - int[] siblings = arrays.siblings(); out.println(r.getSize() + " total categories."); - int childOrd = children[TaxonomyReader.ROOT_ORDINAL]; - while(childOrd != -1) { - CategoryPath cp = r.getPath(childOrd); - int childOrd2 = children[childOrd]; + ChildrenIterator it = r.getChildren(TaxonomyReader.ROOT_ORDINAL); + int child; + while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) { + ChildrenIterator chilrenIt = r.getChildren(child); int numImmediateChildren = 0; - while(childOrd2 != -1) { + while (chilrenIt.next() != TaxonomyReader.INVALID_ORDINAL) { numImmediateChildren++; - childOrd2 = siblings[childOrd2]; } - out.println("/" + cp + ": " + numImmediateChildren + " immediate children; " + (1+countAllChildren(r, childOrd, children, siblings)) + " total categories"); + CategoryPath cp = r.getPath(child); + out.println("/" + cp + ": " + numImmediateChildren + " immediate children; " + (1+countAllChildren(r, child)) + " total categories"); if (printTree) { - printAllChildren(out, r, childOrd, children, siblings, " ", 1); + printAllChildren(out, r, child, " ", 1); } - childOrd = siblings[childOrd]; } } - private static int countAllChildren(TaxonomyReader r, int ord, int[] children, int[] siblings) throws IOException { - int childOrd = children[ord]; + private static int countAllChildren(TaxonomyReader r, int ord) throws IOException { int count = 0; - while(childOrd != -1) { - count += 1+countAllChildren(r, childOrd, children, siblings); - childOrd = siblings[childOrd]; + ChildrenIterator it = r.getChildren(ord); + int child; + while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) { + count += 1 + countAllChildren(r, child); } return count; } - private static void printAllChildren(PrintStream out, TaxonomyReader r, int ord, int[] children, int[] siblings, String indent, int depth) throws IOException { - int childOrd = children[ord]; - while(childOrd != -1) { - out.println(indent + "/" + r.getPath(childOrd).components[depth]); - printAllChildren(out, r, childOrd, children, siblings, indent + " ", depth+1); - childOrd = siblings[childOrd]; + private static void printAllChildren(PrintStream out, TaxonomyReader r, int ord, String indent, int depth) throws IOException { + ChildrenIterator it = r.getChildren(ord); + int child; + while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) { + out.println(indent + "/" + r.getPath(child).components[depth]); + printAllChildren(out, r, child, indent + " ", depth+1); } } } diff --git a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java index 3533e64118d..370e8892c89 100644 --- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java +++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java @@ -1,12 +1,16 @@ package org.apache.lucene.facet.taxonomy.directory; import java.io.IOException; +import java.util.Arrays; +import java.util.HashSet; import java.util.Random; +import java.util.Set; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.facet.FacetTestCase; import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator; import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -461,5 +465,69 @@ public class TestDirectoryTaxonomyReader extends FacetTestCase { src.close(); } + + @Test + public void testGetChildren() throws Exception { + Directory dir = newDirectory(); + DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir); + int numCategories = atLeast(10); + int numA = 0, numB = 0; + Random random = random(); + for (int i = 0; i < numCategories; i++) { + if (random.nextBoolean()) { + taxoWriter.addCategory(new CategoryPath("a", Integer.toString(i))); + ++numA; + } else { + taxoWriter.addCategory(new CategoryPath("b", Integer.toString(i))); + ++numB; + } + } + // add category with no children + taxoWriter.addCategory(new CategoryPath("c")); + taxoWriter.close(); + + DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir); + + // non existing category + ChildrenIterator it = taxoReader.getChildren(taxoReader.getOrdinal(new CategoryPath("invalid"))); + assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next()); + + // a category with no children + it = taxoReader.getChildren(taxoReader.getOrdinal(new CategoryPath("c"))); + assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next()); + + // arbitrary negative ordinal + it = taxoReader.getChildren(-2); + assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next()); + + // root's children + Set roots = new HashSet(Arrays.asList("a", "b", "c")); + it = taxoReader.getChildren(TaxonomyReader.ROOT_ORDINAL); + while (!roots.isEmpty()) { + CategoryPath root = taxoReader.getPath(it.next()); + assertEquals(1, root.length); + assertTrue(roots.remove(root.components[0])); + } + assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next()); + + for (int i = 0; i < 2; i++) { + CategoryPath cp = i == 0 ? new CategoryPath("a") : new CategoryPath("b"); + int ordinal = taxoReader.getOrdinal(cp); + it = taxoReader.getChildren(ordinal); + int numChildren = 0; + int child; + while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) { + CategoryPath path = taxoReader.getPath(child); + assertEquals(2, path.length); + assertEquals(path.components[0], i == 0 ? "a" : "b"); + ++numChildren; + } + int expected = i == 0 ? numA : numB; + assertEquals("invalid num children", expected, numChildren); + } + taxoReader.close(); + + dir.close(); + } }