LUCENE-4060: port to trunk

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1339047 13f79535-47bb-0310-9956-ffa450edef68
2012-05-16 08:01:40 +00:00 · 2012-05-16 08:01:40 +00:00 · d8e0288109
parent e9f32e7a72
commit d8e0288109
6 changed files with 280 additions and 420 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -953,6 +953,11 @@ Bug fixes
  offset calculation in PathHierarchyTokenizer. 
  (Mike McCandless, Uwe Schindler, Robert Muir)

+* LUCENE-4060: Fix a synchronization bug in 
+  DirectoryTaxonomyWriter.addTaxonomies(). Also, the method has been renamed to
+  addTaxonomy and now takes only one Directory and one OrdinalMap.
+  (Shai Erera, Gilad Barkai)
+
 Documentation

 * LUCENE-3958: Javadocs corrections for IndexWriter.
--- a/lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java
+++ b/lucene/facet/src/examples/org/apache/lucene/facet/example/merge/TaxonomyMergeUtils.java
@ -81,7 +81,7 @@ public class TaxonomyMergeUtils {
                            OrdinalMap map, IndexWriter destIndexWriter,
                            DirectoryTaxonomyWriter destTaxWriter) throws IOException {
    // merge the taxonomies
-    destTaxWriter.addTaxonomies(new Directory[] { srcTaxDir }, new OrdinalMap[] { map });
+    destTaxWriter.addTaxonomy(srcTaxDir, map);

    PayloadProcessorProvider payloadProcessor = new FacetsPayloadProcessorProvider(
        srcIndexDir, map.getMap(), new DefaultFacetIndexingParams());
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/Consts.java
@ -4,8 +4,6 @@ import java.io.IOException;

 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.StoredFieldVisitor;
-import org.apache.lucene.index.StoredFieldVisitor.Status;
-import org.apache.lucene.store.IndexInput;

 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
@ -42,6 +40,7 @@ abstract class Consts {
  public static final class LoadFullPathOnly extends StoredFieldVisitor {
    private String fullPath;

+    @Override
    public void stringField(FieldInfo fieldInfo, String value) throws IOException {
      fullPath = value;
    }
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
@ -12,15 +12,21 @@ import java.io.IOException;
 import java.util.HashMap;
 import java.util.Map;

-import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
 import org.apache.lucene.document.FieldType;
 import org.apache.lucene.document.StringField;
 import org.apache.lucene.document.TextField;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
+import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
+import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
+import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
 import org.apache.lucene.index.CorruptIndexException;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocsEnum;
@ -30,9 +36,9 @@ import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.IndexWriterConfig.OpenMode;
 import org.apache.lucene.index.LogByteSizeMergePolicy;
 import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.index.Terms;
 import org.apache.lucene.index.TermsEnum;
-import org.apache.lucene.index.SegmentInfos;
 import org.apache.lucene.index.TieredMergePolicy;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.AlreadyClosedException;
@ -44,13 +50,6 @@ import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.Version;

-import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
-import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
-import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
-import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
-
 /**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
@ -812,6 +811,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
    }
    return parentArray;
  }
+  
  @Override
  public int getParent(int ordinal) throws IOException {
    ensureOpen();
@ -823,158 +823,47 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
    }
    return getParentArray().getArray()[ordinal];
  }
-
+  
  /**
-   * Take all the categories of one or more given taxonomies, and add them to
-   * the main taxonomy (this), if they are not already there.
-   * <P>
-   * Additionally, fill a <I>mapping</I> for each of the added taxonomies,
-   * mapping its ordinals to the ordinals in the enlarged main taxonomy.
-   * These mapping are saved into an array of OrdinalMap objects given by the
-   * user, one for each of the given taxonomies (not including "this", the main
-   * taxonomy). Often the first of these will be a MemoryOrdinalMap and the
-   * others will be a DiskOrdinalMap - see discussion in {OrdinalMap}. 
-   * <P> 
-   * Note that the taxonomies to be added are given as Directory objects,
-   * not opened TaxonomyReader/TaxonomyWriter objects, so if any of them are
-   * currently managed by an open TaxonomyWriter, make sure to commit() (or
-   * close()) it first. The main taxonomy (this) is an open TaxonomyWriter,
-   * and does not need to be commit()ed before this call. 
+   * Takes the categories from the given taxonomy directory, and adds the
+   * missing ones to this taxonomy. Additionally, it fills the given
+   * {@link OrdinalMap} with a mapping from the original ordinal to the new
+   * ordinal.
   */
-  public void addTaxonomies(Directory[] taxonomies, OrdinalMap[] ordinalMaps) throws IOException {
+  public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException {
    ensureOpen();
-    // To prevent us stepping on the rest of this class's decisions on when
-    // to open a reader, and when not, we'll be opening a new reader instead
-    // of using the existing "reader" object:
-    IndexReader mainreader = openReader();
-    // TODO (Facet): can this then go segment-by-segment and avoid MultiDocsEnum etc?
-    Terms terms = MultiFields.getTerms(mainreader, Consts.FULL);
-    assert terms != null; // TODO (Facet): explicit check / throw exception?
-    TermsEnum mainte = terms.iterator(null);
-    DocsEnum mainde = null;
-
-    IndexReader[] otherreaders = new IndexReader[taxonomies.length];
-    TermsEnum[] othertes = new TermsEnum[taxonomies.length];
-    DocsEnum[] otherdocsEnum = new DocsEnum[taxonomies.length]; // just for reuse
-    for (int i=0; i<taxonomies.length; i++) {
-      otherreaders[i] = DirectoryReader.open(taxonomies[i]);
-      terms = MultiFields.getTerms(otherreaders[i], Consts.FULL);
-      assert terms != null; // TODO (Facet): explicit check / throw exception?
-      othertes[i] = terms.iterator(null);
-      // Also tell the ordinal maps their expected sizes:
-      ordinalMaps[i].setSize(otherreaders[i].numDocs());
-    }
-
-    CategoryPath cp = new CategoryPath();
-
-    // We keep a "current" cursor over the alphabetically-ordered list of
-    // categories in each taxonomy. We start the cursor on the first
-    // (alphabetically) category of each taxonomy:
-
-    String currentMain;
-    String[] currentOthers = new String[taxonomies.length];
-    currentMain = nextTE(mainte);
-    int otherTaxonomiesLeft = 0;
-    for (int i=0; i<taxonomies.length; i++) {
-      currentOthers[i] = nextTE(othertes[i]);
-      if (currentOthers[i]!=null) {
-        otherTaxonomiesLeft++;
-      }
-    }
-
-    // And then, at each step look at the first (alphabetically) of the
-    // current taxonomies.
-    // NOTE: The most efficient way we could have done this is using a
-    // PriorityQueue. But for simplicity, and assuming that usually we'll
-    // have a very small number of other taxonomies (often just 1), we use
-    // a more naive algorithm (o(ntaxonomies) instead of o(ln ntaxonomies)
-    // per step)
-
-    while (otherTaxonomiesLeft>0) {
-      // TODO: use a pq here
-      String first=null;
-      for (int i=0; i<taxonomies.length; i++) {
-        if (currentOthers[i]==null) continue;
-        if (first==null || first.compareTo(currentOthers[i])>0) {
-          first = currentOthers[i];
-        }
-      }
-      int comp = 0;
-      if (currentMain==null || (comp = currentMain.compareTo(first))>0) {
-        // If 'first' is before currentMain, or currentMain is null,
-        // then 'first' is a new category and we need to add it to the
-        // main taxonomy. Then for all taxonomies with this 'first'
-        // category, we need to add the new category number to their
-        // map, and move to the next category in all of them.
+    IndexReader r = DirectoryReader.open(taxoDir);
+    try {
+      final int size = r.numDocs();
+      final OrdinalMap ordinalMap = map;
+      ordinalMap.setSize(size);
+      CategoryPath cp = new CategoryPath();
+      Terms terms = MultiFields.getTerms(r, Consts.FULL);
+      TermsEnum te = terms.iterator(null);
+      Bits liveDocs = MultiFields.getLiveDocs(r);
+      DocsEnum docs = null;
+      // we call next() first, to skip the root category which always exists.
+      while (te.next() != null) {
+        String value = te.term().utf8ToString();
        cp.clear();
-        cp.add(first, delimiter);
-        // We can call internalAddCategory() instead of addCategory()
-        // because we know the category hasn't been seen yet.
-        int newordinal = internalAddCategory(cp, cp.length());
-        // TODO (Facet): we already had this term in our hands before, in nextTE...
-        // // TODO (Facet): no need to make this term?
-        for (int i=0; i<taxonomies.length; i++) {
-          if (first.equals(currentOthers[i])) {
-            // remember the remapping of this ordinal. Note how
-            // this requires reading a posting list from the index -
-            // but since we do this in lexical order of terms, just
-            // like Lucene's merge works, we hope there are few seeks.
-            // TODO (Facet): is there a quicker way? E.g., not specifying the
-            // next term by name every time?
-            otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i], false);
-            otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
-            int origordinal = otherdocsEnum[i].docID();
-            ordinalMaps[i].addMapping(origordinal, newordinal);
-            // and move to the next category in the i'th taxonomy 
-            currentOthers[i] = nextTE(othertes[i]);
-            if (currentOthers[i]==null) {
-              otherTaxonomiesLeft--;
-            }
-          }
+        cp.add(value, Consts.DEFAULT_DELIMITER);
+        int ordinal = findCategory(cp);
+        if (ordinal < 0) {
+          // NOTE: call addCategory so that it works well in a multi-threaded
+          // environment, in case e.g. a thread just added the category, after
+          // the findCategory() call above failed to find it.
+          ordinal = addCategory(cp);
        }
-      } else if (comp==0) {
-        // 'first' and currentMain are the same, so both the main and some
-        // other taxonomies need to be moved, but a category doesn't need
-        // to be added because it already existed in the main taxonomy.
-
-        // TODO (Facet): Again, is there a quicker way?
-        mainde = mainte.docs(MultiFields.getLiveDocs(mainreader), mainde, false);
-        mainde.nextDoc(); // TODO (Facet): check?
-        int newordinal = mainde.docID();
-
-        currentMain = nextTE(mainte);
-        for (int i=0; i<taxonomies.length; i++) {
-          if (first.equals(currentOthers[i])) {
-            // TODO (Facet): again, is there a quicker way?
-            otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i], false);
-            otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
-            int origordinal = otherdocsEnum[i].docID();
-            ordinalMaps[i].addMapping(origordinal, newordinal);
-
-            // and move to the next category 
-            currentOthers[i] = nextTE(othertes[i]);
-            if (currentOthers[i]==null) {
-              otherTaxonomiesLeft--;
-            }
-          }
-        }
-      } else /* comp > 0 */ {
-        // The currentMain doesn't appear in any of the other taxonomies -
-        // we don't need to do anything, just continue to the next one
-        currentMain = nextTE(mainte);
+        docs = te.docs(liveDocs, docs, false);
+        ordinalMap.addMapping(docs.nextDoc(), ordinal);
      }
-    }
-
-    // Close all the readers we've opened, and also tell the ordinal maps
-    // we're done adding to them
-    mainreader.close();
-    for (int i=0; i<taxonomies.length; i++) {
-      otherreaders[i].close();
-      // We never actually added a mapping for the root ordinal - let's do
-      // it now, just so that the map is complete (every ordinal between 0
-      // and size-1 is remapped)
-      ordinalMaps[i].addMapping(0, 0);
-      ordinalMaps[i].addDone();
+      // we must add the root ordinal map, so that the map will be complete
+      // (otherwise e.g. DiskOrdinalMap may fail because it expects more
+      // categories to exist in the file).
+      ordinalMap.addMapping(0, 0);
+      ordinalMap.addDone();
+    } finally {
+      r.close();
    }
  }

@ -1113,13 +1002,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
    }
  }

-  private static final String nextTE(TermsEnum te) throws IOException {
-    if (te.next() != null) {
-      return te.term().utf8ToString(); // TODO (Facet): avoid String creation/use Bytes?
-    } 
-    return null;
-  }
-
  /**
   * Rollback changes to the taxonomy writer and closes the instance. Following
   * this method the instance becomes unusable (calling any of its API methods
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomies.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomies.java
@ -1,254 +0,0 @@
-package org.apache.lucene.facet.taxonomy.directory;
-
-import java.io.File;
-
-import org.apache.lucene.store.Directory;
-import org.junit.Test;
-
-import org.apache.lucene.util.IOUtils;
-import org.apache.lucene.util.LuceneTestCase;
-import org.apache.lucene.util._TestUtil;
-import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
-import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
-import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap;
-import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
-import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-public class TestAddTaxonomies extends LuceneTestCase {
-
-  @Test
-  public void test1() throws Exception {
-    Directory dir1 = newDirectory();
-    DirectoryTaxonomyWriter tw1 = new DirectoryTaxonomyWriter(dir1);
-    tw1.addCategory(new CategoryPath("Author", "Mark Twain"));
-    tw1.addCategory(new CategoryPath("Animals", "Dog"));
-    Directory dir2 = newDirectory();
-    DirectoryTaxonomyWriter tw2 = new DirectoryTaxonomyWriter(dir2);
-    tw2.addCategory(new CategoryPath("Author", "Rob Pike"));
-    tw2.addCategory(new CategoryPath("Aardvarks", "Bob"));
-    tw2.close();
-    Directory dir3 = newDirectory();
-    DirectoryTaxonomyWriter tw3 = new DirectoryTaxonomyWriter(dir3);
-    tw3.addCategory(new CategoryPath("Author", "Zebra Smith"));
-    tw3.addCategory(new CategoryPath("Aardvarks", "Bob"));
-    tw3.addCategory(new CategoryPath("Aardvarks", "Aaron"));
-    tw3.close();
-
-    MemoryOrdinalMap[] maps = new MemoryOrdinalMap[2];
-    maps[0] = new MemoryOrdinalMap();
-    maps[1] = new MemoryOrdinalMap();
-
-    tw1.addTaxonomies(new Directory[] { dir2, dir3 }, maps);
-    tw1.close();
-
-    TaxonomyReader tr = new DirectoryTaxonomyReader(dir1);
-
-    // Test that the merged taxonomy now contains what we expect:
-    // First all the categories of the original taxonomy, in their original order:
-    assertEquals(tr.getPath(0).toString(), "");
-    assertEquals(tr.getPath(1).toString(), "Author");
-    assertEquals(tr.getPath(2).toString(), "Author/Mark Twain");
-    assertEquals(tr.getPath(3).toString(), "Animals");
-    assertEquals(tr.getPath(4).toString(), "Animals/Dog");
-    // Then the categories new in the new taxonomy, in alphabetical order: 
-    assertEquals(tr.getPath(5).toString(), "Aardvarks");
-    assertEquals(tr.getPath(6).toString(), "Aardvarks/Aaron");
-    assertEquals(tr.getPath(7).toString(), "Aardvarks/Bob");
-    assertEquals(tr.getPath(8).toString(), "Author/Rob Pike");
-    assertEquals(tr.getPath(9).toString(), "Author/Zebra Smith");
-    assertEquals(tr.getSize(), 10);
-
-    // Test that the maps contain what we expect
-    int[] map0 = maps[0].getMap();
-    assertEquals(5, map0.length);
-    assertEquals(0, map0[0]);
-    assertEquals(1, map0[1]);
-    assertEquals(8, map0[2]);
-    assertEquals(5, map0[3]);
-    assertEquals(7, map0[4]);
-
-    int[] map1 = maps[1].getMap();
-    assertEquals(6, map1.length);
-    assertEquals(0, map1[0]);
-    assertEquals(1, map1[1]);
-    assertEquals(9, map1[2]);
-    assertEquals(5, map1[3]);
-    assertEquals(7, map1[4]);
-    assertEquals(6, map1[5]);
-    
-    tr.close();
-    dir1.close();
-    dir2.close();
-    dir3.close();
-  }
-
-  // a reasonable random test
-  public void testmedium() throws Exception {
-    int numTests = atLeast(3);
-    for (int i = 0; i < numTests; i++) {
-      dotest(_TestUtil.nextInt(random(), 1, 10), 
-             _TestUtil.nextInt(random(), 1, 100), 
-             _TestUtil.nextInt(random(), 100, 1000),
-             random().nextBoolean());
-    }
-  }
-
-  // A more comprehensive and big random test.
-  @Test @Nightly
-  public void testbig() throws Exception {
-    dotest(2, 1000, 5000, false);
-    dotest(10, 10000, 100, false);
-    dotest(50, 20, 100, false);
-    dotest(10, 1000, 10000, false);
-    dotest(50, 20, 10000, false);
-    dotest(1, 20, 10000, false);
-    dotest(10, 1, 10000, false);
-    dotest(10, 1000, 20000, true);
-  }
-
-  private void dotest(int ntaxonomies, int ncats, int range, boolean disk) throws Exception {
-    Directory dirs[] = new Directory[ntaxonomies];
-    Directory copydirs[] = new Directory[ntaxonomies];
-
-    for (int i=0; i<ntaxonomies; i++) {
-      dirs[i] = newDirectory();
-      copydirs[i] = newDirectory();
-      DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[i]);
-      DirectoryTaxonomyWriter copytw = new DirectoryTaxonomyWriter(copydirs[i]);
-      for (int j=0; j<ncats; j++) {
-        String cat = Integer.toString(random().nextInt(range));
-        tw.addCategory(new CategoryPath("a",cat));
-        copytw.addCategory(new CategoryPath("a",cat));
-      }
-      // System.err.println("Taxonomy "+i+": "+tw.getSize());
-      tw.close();
-      copytw.close();
-    }
-
-    DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0]);
-    Directory otherdirs[] = new Directory[ntaxonomies-1];
-    System.arraycopy(dirs, 1, otherdirs, 0, ntaxonomies-1);
-
-    OrdinalMap[] maps = new OrdinalMap[ntaxonomies-1];
-    if (ntaxonomies>1) {
-      for (int i=0; i<ntaxonomies-1; i++) {
-        if (disk) {
-          // TODO: use a LTC tempfile
-          maps[i] = new DiskOrdinalMap(new File(System.getProperty("java.io.tmpdir"),
-              "tmpmap"+i));
-        } else {
-          maps[i] = new MemoryOrdinalMap();
-        }
-      }
-    }
-
-    tw.addTaxonomies(otherdirs, maps);
-    // System.err.println("Merged axonomy: "+tw.getSize());
-    tw.close();
-
-    // Check that all original categories in the main taxonomy remain in
-    // unchanged, and the rest of the taxonomies are completely unchanged.
-    for (int i=0; i<ntaxonomies; i++) {
-      TaxonomyReader tr = new DirectoryTaxonomyReader(dirs[i]);
-      TaxonomyReader copytr = new DirectoryTaxonomyReader(copydirs[i]);
-      if (i==0) {
-        assertTrue(tr.getSize() >= copytr.getSize());
-      } else {
-        assertEquals(copytr.getSize(), tr.getSize());
-      }
-      for (int j=0; j<copytr.getSize(); j++) {
-        String expected = copytr.getPath(j).toString();
-        String got = tr.getPath(j).toString();
-        assertTrue("Comparing category "+j+" of taxonomy "+i+": expected "+expected+", got "+got,
-            expected.equals(got));
-      }
-      tr.close();
-      copytr.close();
-    }
-
-    // Check that all the new categories in the main taxonomy are in
-    // lexicographic order. This isn't a requirement of our API, but happens
-    // this way in our current implementation.
-    TaxonomyReader tr = new DirectoryTaxonomyReader(dirs[0]);
-    TaxonomyReader copytr = new DirectoryTaxonomyReader(copydirs[0]);
-    if (tr.getSize() > copytr.getSize()) {
-      String prev = tr.getPath(copytr.getSize()).toString();
-      for (int j=copytr.getSize()+1; j<tr.getSize(); j++) {
-        String n = tr.getPath(j).toString();
-        assertTrue(prev.compareTo(n)<0);
-        prev=n;
-      }
-    }
-    int oldsize = copytr.getSize(); // remember for later
-    tr.close();
-    copytr.close();
-
-    // Check that all the categories from other taxonomies exist in the new
-    // taxonomy.
-    TaxonomyReader main = new DirectoryTaxonomyReader(dirs[0]);
-    for (int i=1; i<ntaxonomies; i++) {
-      TaxonomyReader other = new DirectoryTaxonomyReader(dirs[i]);
-      for (int j=0; j<other.getSize(); j++) {
-        int otherord = main.getOrdinal(other.getPath(j));
-        assertTrue(otherord != TaxonomyReader.INVALID_ORDINAL);
-      }
-      other.close();
-    }
-
-    // Check that all the new categories in the merged taxonomy exist in
-    // one of the added taxonomies.
-    TaxonomyReader[] others = new TaxonomyReader[ntaxonomies-1]; 
-    for (int i=1; i<ntaxonomies; i++) {
-      others[i-1] = new DirectoryTaxonomyReader(dirs[i]);
-    }
-    for (int j=oldsize; j<main.getSize(); j++) {
-      boolean found=false;
-      CategoryPath path = main.getPath(j);
-      for (int i=1; i<ntaxonomies; i++) {
-        if (others[i-1].getOrdinal(path) != TaxonomyReader.INVALID_ORDINAL) {
-          found=true;
-          break;
-        }
-      }
-      if (!found) {
-        fail("Found category "+j+" ("+path+") in merged taxonomy not in any of the separate ones");
-      }
-    }
-
-    // Check that all the maps are correct
-    for (int i=0; i<ntaxonomies-1; i++) {
-      int[] map = maps[i].getMap();
-      for (int j=0; j<map.length; j++) {
-        assertEquals(map[j], main.getOrdinal(others[i].getPath(j)));
-      }
-    }
-
-    for (int i=1; i<ntaxonomies; i++) {
-      others[i-1].close();
-    }
-
-    main.close();
-    IOUtils.close(dirs);
-    IOUtils.close(copydirs);
-  }
-
-}
--- a/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestAddTaxonomy.java
@ -0,0 +1,228 @@
+package org.apache.lucene.facet.taxonomy.directory;
+
+import java.io.IOException;
+import java.util.HashSet;
+import java.util.Random;
+
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+public class TestAddTaxonomy extends LuceneTestCase {
+
+  private void dotest(int ncats, int range) throws Exception {
+    Directory dirs[] = new Directory[2];
+    Random random = random();
+    for (int i = 0; i < dirs.length; i++) {
+      dirs[i] = newDirectory();
+      DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[i]);
+      for (int j = 0; j < ncats; j++) {
+        String cat = Integer.toString(random.nextInt(range));
+        tw.addCategory(new CategoryPath("a", cat));
+      }
+      tw.close();
+    }
+
+    DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0]);
+    OrdinalMap map = randomOrdinalMap();
+    tw.addTaxonomy(dirs[1], map);
+    tw.close();
+    
+    validate(dirs[0], dirs[1], map);
+    
+    IOUtils.close(dirs);
+  }
+  
+  private OrdinalMap randomOrdinalMap() throws IOException {
+    if (random().nextBoolean()) {
+      return new DiskOrdinalMap(_TestUtil.createTempFile("taxoMap", "", TEMP_DIR));
+    } else {
+      return new MemoryOrdinalMap();
+    }
+  }
+
+  private void validate(Directory dest, Directory src, OrdinalMap ordMap) throws Exception {
+    CategoryPath cp = new CategoryPath();
+    DirectoryTaxonomyReader destTR = new DirectoryTaxonomyReader(dest);
+    try {
+      final int destSize = destTR.getSize();
+      DirectoryTaxonomyReader srcTR = new DirectoryTaxonomyReader(src);
+      try {
+        int[] map = ordMap.getMap();
+        
+        // validate taxo sizes
+        int srcSize = srcTR.getSize();
+        assertTrue("destination taxonomy expected to be larger than source; dest="
+            + destSize + " src=" + srcSize,
+            destSize >= srcSize);
+        
+        // validate that all source categories exist in destination, and their
+        // ordinals are as expected.
+        for (int j = 1; j < srcSize; j++) {
+          srcTR.getPath(j, cp);
+          int destOrdinal = destTR.getOrdinal(cp);
+          assertTrue(cp + " not found in destination", destOrdinal > 0);
+          assertEquals(destOrdinal, map[j]);
+        }
+      } finally {
+        srcTR.close();
+      }
+    } finally {
+      destTR.close();
+    }
+  }
+
+  public void testAddEmpty() throws Exception {
+    Directory dest = newDirectory();
+    DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
+    destTW.addCategory(new CategoryPath("Author", "Rob Pike"));
+    destTW.addCategory(new CategoryPath("Aardvarks", "Bob"));
+    destTW.commit();
+    
+    Directory src = newDirectory();
+    new DirectoryTaxonomyWriter(src).close(); // create an empty taxonomy
+    
+    OrdinalMap map = randomOrdinalMap();
+    destTW.addTaxonomy(src, map);
+    destTW.close();
+    
+    validate(dest, src, map);
+    
+    IOUtils.close(dest, src);
+  }
+  
+  public void testAddToEmpty() throws Exception {
+    Directory dest = newDirectory();
+    
+    Directory src = newDirectory();
+    DirectoryTaxonomyWriter srcTW = new DirectoryTaxonomyWriter(src);
+    srcTW.addCategory(new CategoryPath("Author", "Rob Pike"));
+    srcTW.addCategory(new CategoryPath("Aardvarks", "Bob"));
+    srcTW.close();
+    
+    DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
+    OrdinalMap map = randomOrdinalMap();
+    destTW.addTaxonomy(src, map);
+    destTW.close();
+    
+    validate(dest, src, map);
+    
+    IOUtils.close(dest, src);
+  }
+  
+  // A more comprehensive and big random test.
+  @Nightly
+  public void testBig() throws Exception {
+    dotest(200, 10000);
+    dotest(1000, 20000);
+    // really big
+    dotest(400000, 1000000);
+  }
+
+  // a reasonable random test
+  public void testMedium() throws Exception {
+    Random random = random();
+    int numTests = atLeast(3);
+    for (int i = 0; i < numTests; i++) {
+      dotest(_TestUtil.nextInt(random, 2, 100), 
+             _TestUtil.nextInt(random, 100, 1000));
+    }
+  }
+  
+  public void testSimple() throws Exception {
+    Directory dest = newDirectory();
+    DirectoryTaxonomyWriter tw1 = new DirectoryTaxonomyWriter(dest);
+    tw1.addCategory(new CategoryPath("Author", "Mark Twain"));
+    tw1.addCategory(new CategoryPath("Animals", "Dog"));
+    tw1.addCategory(new CategoryPath("Author", "Rob Pike"));
+    
+    Directory src = newDirectory();
+    DirectoryTaxonomyWriter tw2 = new DirectoryTaxonomyWriter(src);
+    tw2.addCategory(new CategoryPath("Author", "Rob Pike"));
+    tw2.addCategory(new CategoryPath("Aardvarks", "Bob"));
+    tw2.close();
+
+    OrdinalMap map = randomOrdinalMap();
+
+    tw1.addTaxonomy(src, map);
+    tw1.close();
+
+    validate(dest, src, map);
+    
+    IOUtils.close(dest, src);
+  }
+
+  public void testConcurrency() throws Exception {
+    // tests that addTaxonomy and addCategory work in parallel
+    final int numCategories = atLeast(5000);
+    
+    // build an input taxonomy index
+    Directory src = newDirectory();
+    DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(src);
+    for (int i = 0; i < numCategories; i++) {
+      tw.addCategory(new CategoryPath("a", Integer.toString(i)));
+    }
+    tw.close();
+    
+    // now add the taxonomy to an empty taxonomy, while adding the categories
+    // again, in parallel -- in the end, no duplicate categories should exist.
+    Directory dest = newDirectory();
+    final DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
+    Thread t = new Thread() {
+      @Override
+      public void run() {
+        for (int i = 0; i < numCategories; i++) {
+          try {
+            destTW.addCategory(new CategoryPath("a", Integer.toString(i)));
+          } catch (IOException e) {
+            // shouldn't happen - if it does, let the test fail on uncaught exception.
+            throw new RuntimeException(e);
+          }
+        }
+      }
+    };
+    t.start();
+    
+    OrdinalMap map = new MemoryOrdinalMap();
+    destTW.addTaxonomy(src, map);
+    t.join();
+    destTW.close();
+    
+    // now validate
+    
+    DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dest);
+    // +2 to account for the root category + "a"
+    assertEquals(numCategories + 2, dtr.getSize());
+    HashSet<CategoryPath> categories = new HashSet<CategoryPath>();
+    for (int i = 1; i < dtr.getSize(); i++) {
+      CategoryPath cat = dtr.getPath(i);
+      assertTrue("category " + cat + " already existed", categories.add(cat));
+    }
+    dtr.close();
+    
+    IOUtils.close(src, dest);
+  }
+  
+}