LUCENE-3786: add SearcherTaxonomyManager

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466548 13f79535-47bb-0310-9956-ffa450edef68
2013-04-10 16:25:22 +00:00 · 2013-04-10 16:25:22 +00:00 · 03694e7693
parent b2348c8494
commit 03694e7693
10 changed files with 345 additions and 11 deletions
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@ -179,6 +179,10 @@ New Features

 * LUCENE-4904: Added descending sort order to NumericDocValuesSorter. (Shai Erera)

+* LUCENE-3786: Added SearcherTaxonomyManager, to manage access to both
+  IndexSearcher and DirectoryTaxonomyReader for near-real-time
+  faceting.  (Shai Erera, Mike McCandless)
+
 Optimizations

 * LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace
--- a/lucene/core/src/java/org/apache/lucene/search/ReferenceManager.java
+++ b/lucene/core/src/java/org/apache/lucene/search/ReferenceManager.java
@ -81,7 +81,7 @@ public abstract class ReferenceManager<G> implements Closeable {
   * the operation was successful.
   * @throws AlreadyClosedException if the reference manager has been {@link #close() closed}. 
   */
-  protected abstract boolean tryIncRef(G reference);
+  protected abstract boolean tryIncRef(G reference) throws IOException;

  /**
   * Obtain the current reference. You must match every call to acquire with one
@ -90,7 +90,7 @@ public abstract class ReferenceManager<G> implements Closeable {
   * released.
   * @throws AlreadyClosedException if the reference manager has been {@link #close() closed}. 
   */
-  public final G acquire() {
+  public final G acquire() throws IOException {
    G ref;
    do {
      if ((ref = current) == null) {
--- a/lucene/core/src/java/org/apache/lucene/search/SearcherManager.java
+++ b/lucene/core/src/java/org/apache/lucene/search/SearcherManager.java
@ -144,8 +144,11 @@ public final class SearcherManager extends ReferenceManager<IndexSearcher> {
    }
  }

-  // NOTE: decRefs incoming reader on throwing an exception
-  static IndexSearcher getSearcher(SearcherFactory searcherFactory, IndexReader reader) throws IOException {
+  /** Expert: creates a searcher from the provided {@link
+   *  IndexReader} using the provided {@link
+   *  SearcherFactory}.  NOTE: this decRefs incoming reader
+   * on throwing an exception. */
+  public static IndexSearcher getSearcher(SearcherFactory searcherFactory, IndexReader reader) throws IOException {
    boolean success = false;
    final IndexSearcher searcher;
    try {
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/FastCountingFacetsAggregator.java
@ -83,6 +83,7 @@ public final class FastCountingFacetsAggregator extends IntRollupFacetsAggregato
          byte b = buf.bytes[offset++];
          if (b >= 0) {
            prev = ord = ((ord << 7) | b) + prev;
+            assert ord < counts.length: "ord=" + ord + " vs maxOrd=" + counts.length;
            ++counts[ord];
            ord = 0;
          } else {
--- a/lucene/facet/src/java/org/apache/lucene/facet/search/SearcherTaxonomyManager.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/search/SearcherTaxonomyManager.java
@ -0,0 +1,122 @@
+package org.apache.lucene.facet.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.ReferenceManager;
+import org.apache.lucene.search.SearcherFactory;
+import org.apache.lucene.search.SearcherManager;
+import org.apache.lucene.util.IOUtils;
+
+/**
+ * Manages near-real-time reopen of both an IndexSearcher
+ * and a TaxonomyReader.
+ *
+ * <p><b>NOTE</b>: If you call {@link
+ * DirectoryTaxonomyWriter#replaceTaxonomy} then you must
+ * open a new {@code SearcherTaxonomyManager} afterwards.
+ */
+public class SearcherTaxonomyManager extends ReferenceManager<SearcherTaxonomyManager.SearcherAndTaxonomy> {
+
+  /** Holds a matched pair of {@link IndexSearcher} and
+   *  {@link TaxonomyReader} */
+  public static class SearcherAndTaxonomy {
+    public final IndexSearcher searcher;
+    public final DirectoryTaxonomyReader taxonomyReader;
+
+    SearcherAndTaxonomy(IndexSearcher searcher, DirectoryTaxonomyReader taxonomyReader) {
+      this.searcher = searcher;
+      this.taxonomyReader = taxonomyReader;
+    }
+  }
+
+  private final SearcherFactory searcherFactory;
+  private final long taxoEpoch;
+  private final DirectoryTaxonomyWriter taxoWriter;
+
+  /** Creates near-real-time searcher and taxonomy reader
+   *  from the corresponding writers. */
+  public SearcherTaxonomyManager(IndexWriter writer, boolean applyAllDeletes, SearcherFactory searcherFactory, DirectoryTaxonomyWriter taxoWriter) throws IOException {
+    if (searcherFactory == null) {
+      searcherFactory = new SearcherFactory();
+    }
+    this.searcherFactory = searcherFactory;
+    this.taxoWriter = taxoWriter;
+    DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
+    current = new SearcherAndTaxonomy(SearcherManager.getSearcher(searcherFactory, DirectoryReader.open(writer, applyAllDeletes)),
+                                      taxoReader);
+    taxoEpoch = taxoWriter.getTaxonomyEpoch();
+  }
+
+  @Override
+  protected void decRef(SearcherAndTaxonomy ref) throws IOException {
+    ref.searcher.getIndexReader().decRef();
+
+    // This decRef can fail, and then in theory we should
+    // tryIncRef the searcher to put back the ref count
+    // ... but 1) the below decRef should only fail because
+    // it decRef'd to 0 and closed and hit some IOException
+    // during close, in which case 2) very likely the
+    // searcher was also just closed by the above decRef and
+    // a tryIncRef would fail:
+    ref.taxonomyReader.decRef();
+  }
+
+  @Override
+  protected boolean tryIncRef(SearcherAndTaxonomy ref) throws IOException {
+    if (ref.searcher.getIndexReader().tryIncRef()) {
+      if (ref.taxonomyReader.tryIncRef()) {
+        return true;
+      } else {
+        ref.searcher.getIndexReader().decRef();
+      }
+    }
+    return false;
+  }
+
+  @Override
+  protected SearcherAndTaxonomy refreshIfNeeded(SearcherAndTaxonomy ref) throws IOException {
+    // Must re-open searcher first, otherwise we may get a
+    // new reader that references ords not yet known to the
+    // taxonomy reader:
+    final IndexReader r = ref.searcher.getIndexReader();
+    final IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) r);
+    if (newReader == null) {
+      return null;
+    } else {
+      DirectoryTaxonomyReader tr = TaxonomyReader.openIfChanged(ref.taxonomyReader);
+      if (tr == null) {
+        ref.taxonomyReader.incRef();
+        tr = ref.taxonomyReader;
+      } else if (taxoWriter.getTaxonomyEpoch() != taxoEpoch) {
+        IOUtils.close(newReader, tr);
+        throw new IllegalStateException("DirectoryTaxonomyWriter.replaceTaxonomy was called, which is not allowed when using SearcherTaxonomyManager");
+      }
+
+      return new SearcherAndTaxonomy(SearcherManager.getSearcher(searcherFactory, newReader), tr);
+    }
+  }
+}
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java
@ -246,4 +246,16 @@ public abstract class TaxonomyReader implements Closeable {
    refCount.incrementAndGet();
  }

+  /** Expert: increments the refCount of this TaxonomyReader
+   *  instance only if it has not been closed yet.  Returns
+   *  true on success. */
+  public final boolean tryIncRef() {
+    int count;
+    while ((count = refCount.get()) > 0) {
+      if (refCount.compareAndSet(count, count+1)) {
+        return true;
+      }
+    }
+    return false;
+  }
 }
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java
@ -9,7 +9,7 @@ import org.apache.lucene.facet.collections.LRUHashMap;
 import org.apache.lucene.facet.taxonomy.CategoryPath;
 import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
-import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.CorruptIndexException; // javadocs
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexWriter;
--- a/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
+++ b/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java
@ -29,7 +29,7 @@ import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache
 import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
 import org.apache.lucene.index.AtomicReader;
 import org.apache.lucene.index.AtomicReaderContext;
-import org.apache.lucene.index.CorruptIndexException;
+import org.apache.lucene.index.CorruptIndexException; // javadocs
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.DocsEnum;
 import org.apache.lucene.index.IndexReader;
@ -44,7 +44,7 @@ import org.apache.lucene.index.TermsEnum;
 import org.apache.lucene.index.TieredMergePolicy;
 import org.apache.lucene.store.AlreadyClosedException;
 import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.LockObtainFailedException;
+import org.apache.lucene.store.LockObtainFailedException; // javadocs
 import org.apache.lucene.store.NativeFSLockFactory;
 import org.apache.lucene.store.SimpleFSLockFactory;
 import org.apache.lucene.util.BytesRef;
@ -991,9 +991,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
    return indexWriter;
  }
  
-  /** Used by {@link DirectoryTaxonomyReader} to support NRT. */
-  final long getTaxonomyEpoch() {
+  /** Expert: returns current index epoch, if this is a
+   * near-real-time reader.  Used by {@link
+   * DirectoryTaxonomyReader} to support NRT. 
+   *
+   * @lucene.internal */
+  public final long getTaxonomyEpoch() {
    return indexEpoch;
  }
-  
 }
--- a/lucene/facet/src/test/org/apache/lucene/facet/search/TestSearcherTaxonomyManager.java
+++ b/lucene/facet/src/test/org/apache/lucene/facet/search/TestSearcherTaxonomyManager.java
@ -0,0 +1,189 @@
+package org.apache.lucene.facet.search;
+
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import org.apache.lucene.analysis.MockAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.facet.index.FacetFields;
+import org.apache.lucene.facet.params.FacetIndexingParams;
+import org.apache.lucene.facet.params.FacetSearchParams;
+import org.apache.lucene.facet.search.SearcherTaxonomyManager.SearcherAndTaxonomy;
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.LuceneTestCase;
+import org.apache.lucene.util._TestUtil;
+
+public class TestSearcherTaxonomyManager extends LuceneTestCase {
+  public void test() throws Exception {
+    Directory dir = newDirectory();
+    Directory taxoDir = newDirectory();
+    final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+    final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
+    final FacetFields facetFields = new FacetFields(tw);
+    final AtomicBoolean stop = new AtomicBoolean();
+
+    Thread indexer = new Thread() {
+        @Override
+        public void run() {
+          Set<String> seen = new HashSet<String>();
+          List<String> paths = new ArrayList<String>();
+          while (!stop.get()) {
+            Document doc = new Document();
+            List<CategoryPath> docPaths = new ArrayList<CategoryPath>();
+            int numPaths = _TestUtil.nextInt(random(), 1, 5);
+            for(int i=0;i<numPaths;i++) {
+              String path;
+              if (!paths.isEmpty() && random().nextInt(5) != 4) {
+                // Use previous path
+                path = paths.get(random().nextInt(paths.size()));
+              } else {
+                // Create new path
+                path = null;
+                while (true) {
+                  path = _TestUtil.randomRealisticUnicodeString(random());
+                  if (path.length() != 0 && !seen.contains(path) && path.indexOf(FacetIndexingParams.DEFAULT_FACET_DELIM_CHAR) == -1) {
+                    seen.add(path);
+                    paths.add(path);
+                    break;
+                  }
+                }
+              }
+              docPaths.add(new CategoryPath("field", path));
+            }
+            try {
+              facetFields.addFields(doc, docPaths);
+              w.addDocument(doc);
+            } catch (IOException ioe) {
+              throw new RuntimeException(ioe);
+            }
+          }
+        }
+      };
+
+    final SearcherTaxonomyManager mgr = new SearcherTaxonomyManager(w, true, null, tw);
+
+    Thread reopener = new Thread() {
+        @Override
+        public void run() {
+          while(!stop.get()) {
+            try {
+              // Sleep for up to 20 msec:
+              Thread.sleep(random().nextInt(20));
+
+              if (VERBOSE) {
+                System.out.println("TEST: reopen");
+              }
+
+              mgr.maybeRefresh();
+
+              if (VERBOSE) {
+                System.out.println("TEST: reopen done");
+              }
+            } catch (Exception ioe) {
+              throw new RuntimeException(ioe);
+            }
+          }
+        }
+      };
+    reopener.start();
+
+    float runTimeSec = TEST_NIGHTLY ? 10.0f : 2.0f;
+
+    long stopTime = System.currentTimeMillis() + (int) (runTimeSec*1000);
+
+    indexer.start();
+
+    try {
+      while (System.currentTimeMillis() < stopTime) {
+        SearcherAndTaxonomy pair = mgr.acquire();
+        try {
+          //System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
+          int topN;
+          if (random().nextBoolean()) {
+            topN = _TestUtil.nextInt(random(), 1, 20);
+          } else {
+            topN = Integer.MAX_VALUE;
+          }
+          FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("field"), topN));
+          FacetsCollector fc = FacetsCollector.create(fsp, pair.searcher.getIndexReader(), pair.taxonomyReader);
+          pair.searcher.search(new MatchAllDocsQuery(), fc);
+          List<FacetResult> results = fc.getFacetResults();
+          FacetResult fr = results.get(0);
+          FacetResultNode root = results.get(0).getFacetResultNode();
+          assertTrue(root.ordinal != 0);
+
+          if (pair.searcher.getIndexReader().numDocs() > 0) { 
+            assertTrue(fr.getNumValidDescendants() > 0);
+            assertFalse(root.subResults.isEmpty());
+          }
+          //if (VERBOSE) {
+          //System.out.println("TEST: facets=" + FacetTestUtils.toSimpleString(results.get(0)));
+          //}
+        } finally {
+          mgr.release(pair);
+        }
+      }
+    } finally {
+      stop.set(true);
+      indexer.join();
+      reopener.join();
+    }
+
+    if (VERBOSE) {
+      System.out.println("TEST: now stop");
+    }
+
+    IOUtils.close(mgr, tw, w, taxoDir, dir);
+  }
+
+  public void testReplaceTaxonomy() throws Exception {
+    Directory dir = newDirectory();
+    Directory taxoDir = newDirectory();
+    IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
+    DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
+
+    Directory taxoDir2 = newDirectory();
+    DirectoryTaxonomyWriter tw2 = new DirectoryTaxonomyWriter(taxoDir2);
+    tw2.close();
+
+    SearcherTaxonomyManager mgr = new SearcherTaxonomyManager(w, true, null, tw);
+    w.addDocument(new Document());
+    tw.replaceTaxonomy(taxoDir2);
+    taxoDir2.close();
+
+    try {
+      mgr.maybeRefresh();
+      fail("should have hit exception");
+    } catch (IllegalStateException ise) {
+      // expected
+    }
+
+    IOUtils.close(mgr, tw, w, taxoDir, dir);
+  }
+}
--- a/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/search/ShardSearchingTestBase.java
@ -448,7 +448,7 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
      currentNodeVersions = new long[numNodes];
    }

-    public void initSearcher(long[] nodeVersions) {
+    public void initSearcher(long[] nodeVersions) throws IOException {
      assert currentShardSearcher == null;
      System.arraycopy(nodeVersions, 0, currentNodeVersions, 0, currentNodeVersions.length);
      currentShardSearcher = new ShardIndexSearcher(currentNodeVersions.clone(),