LUCENE-3786: add SearcherTaxonomyManager

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1466548 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2013-04-10 16:25:22 +00:00
parent b2348c8494
commit 03694e7693
10 changed files with 345 additions and 11 deletions

View File

@ -179,6 +179,10 @@ New Features
* LUCENE-4904: Added descending sort order to NumericDocValuesSorter. (Shai Erera)
* LUCENE-3786: Added SearcherTaxonomyManager, to manage access to both
IndexSearcher and DirectoryTaxonomyReader for near-real-time
faceting. (Shai Erera, Mike McCandless)
Optimizations
* LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace

View File

@ -81,7 +81,7 @@ public abstract class ReferenceManager<G> implements Closeable {
* the operation was successful.
* @throws AlreadyClosedException if the reference manager has been {@link #close() closed}.
*/
protected abstract boolean tryIncRef(G reference);
protected abstract boolean tryIncRef(G reference) throws IOException;
/**
* Obtain the current reference. You must match every call to acquire with one
@ -90,7 +90,7 @@ public abstract class ReferenceManager<G> implements Closeable {
* released.
* @throws AlreadyClosedException if the reference manager has been {@link #close() closed}.
*/
public final G acquire() {
public final G acquire() throws IOException {
G ref;
do {
if ((ref = current) == null) {

View File

@ -144,8 +144,11 @@ public final class SearcherManager extends ReferenceManager<IndexSearcher> {
}
}
// NOTE: decRefs incoming reader on throwing an exception
static IndexSearcher getSearcher(SearcherFactory searcherFactory, IndexReader reader) throws IOException {
/** Expert: creates a searcher from the provided {@link
* IndexReader} using the provided {@link
* SearcherFactory}. NOTE: this decRefs incoming reader
* on throwing an exception. */
public static IndexSearcher getSearcher(SearcherFactory searcherFactory, IndexReader reader) throws IOException {
boolean success = false;
final IndexSearcher searcher;
try {

View File

@ -83,6 +83,7 @@ public final class FastCountingFacetsAggregator extends IntRollupFacetsAggregato
byte b = buf.bytes[offset++];
if (b >= 0) {
prev = ord = ((ord << 7) | b) + prev;
assert ord < counts.length: "ord=" + ord + " vs maxOrd=" + counts.length;
++counts[ord];
ord = 0;
} else {

View File

@ -0,0 +1,122 @@
package org.apache.lucene.facet.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ReferenceManager;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.util.IOUtils;
/**
* Manages near-real-time reopen of both an IndexSearcher
* and a TaxonomyReader.
*
* <p><b>NOTE</b>: If you call {@link
* DirectoryTaxonomyWriter#replaceTaxonomy} then you must
* open a new {@code SearcherTaxonomyManager} afterwards.
*/
public class SearcherTaxonomyManager extends ReferenceManager<SearcherTaxonomyManager.SearcherAndTaxonomy> {
/** Holds a matched pair of {@link IndexSearcher} and
* {@link TaxonomyReader} */
public static class SearcherAndTaxonomy {
public final IndexSearcher searcher;
public final DirectoryTaxonomyReader taxonomyReader;
SearcherAndTaxonomy(IndexSearcher searcher, DirectoryTaxonomyReader taxonomyReader) {
this.searcher = searcher;
this.taxonomyReader = taxonomyReader;
}
}
private final SearcherFactory searcherFactory;
private final long taxoEpoch;
private final DirectoryTaxonomyWriter taxoWriter;
/** Creates near-real-time searcher and taxonomy reader
* from the corresponding writers. */
public SearcherTaxonomyManager(IndexWriter writer, boolean applyAllDeletes, SearcherFactory searcherFactory, DirectoryTaxonomyWriter taxoWriter) throws IOException {
if (searcherFactory == null) {
searcherFactory = new SearcherFactory();
}
this.searcherFactory = searcherFactory;
this.taxoWriter = taxoWriter;
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoWriter);
current = new SearcherAndTaxonomy(SearcherManager.getSearcher(searcherFactory, DirectoryReader.open(writer, applyAllDeletes)),
taxoReader);
taxoEpoch = taxoWriter.getTaxonomyEpoch();
}
@Override
protected void decRef(SearcherAndTaxonomy ref) throws IOException {
ref.searcher.getIndexReader().decRef();
// This decRef can fail, and then in theory we should
// tryIncRef the searcher to put back the ref count
// ... but 1) the below decRef should only fail because
// it decRef'd to 0 and closed and hit some IOException
// during close, in which case 2) very likely the
// searcher was also just closed by the above decRef and
// a tryIncRef would fail:
ref.taxonomyReader.decRef();
}
@Override
protected boolean tryIncRef(SearcherAndTaxonomy ref) throws IOException {
if (ref.searcher.getIndexReader().tryIncRef()) {
if (ref.taxonomyReader.tryIncRef()) {
return true;
} else {
ref.searcher.getIndexReader().decRef();
}
}
return false;
}
@Override
protected SearcherAndTaxonomy refreshIfNeeded(SearcherAndTaxonomy ref) throws IOException {
// Must re-open searcher first, otherwise we may get a
// new reader that references ords not yet known to the
// taxonomy reader:
final IndexReader r = ref.searcher.getIndexReader();
final IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) r);
if (newReader == null) {
return null;
} else {
DirectoryTaxonomyReader tr = TaxonomyReader.openIfChanged(ref.taxonomyReader);
if (tr == null) {
ref.taxonomyReader.incRef();
tr = ref.taxonomyReader;
} else if (taxoWriter.getTaxonomyEpoch() != taxoEpoch) {
IOUtils.close(newReader, tr);
throw new IllegalStateException("DirectoryTaxonomyWriter.replaceTaxonomy was called, which is not allowed when using SearcherTaxonomyManager");
}
return new SearcherAndTaxonomy(SearcherManager.getSearcher(searcherFactory, newReader), tr);
}
}
}

View File

@ -246,4 +246,16 @@ public abstract class TaxonomyReader implements Closeable {
refCount.incrementAndGet();
}
/** Expert: increments the refCount of this TaxonomyReader
* instance only if it has not been closed yet. Returns
* true on success. */
public final boolean tryIncRef() {
int count;
while ((count = refCount.get()) > 0) {
if (refCount.compareAndSet(count, count+1)) {
return true;
}
}
return false;
}
}

View File

@ -9,7 +9,7 @@ import org.apache.lucene.facet.collections.LRUHashMap;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.CorruptIndexException; // javadocs
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexWriter;

View File

@ -29,7 +29,7 @@ import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache
import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.CorruptIndexException; // javadocs
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
@ -44,7 +44,7 @@ import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.LockObtainFailedException; // javadocs
import org.apache.lucene.store.NativeFSLockFactory;
import org.apache.lucene.store.SimpleFSLockFactory;
import org.apache.lucene.util.BytesRef;
@ -991,9 +991,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
return indexWriter;
}
/** Used by {@link DirectoryTaxonomyReader} to support NRT. */
final long getTaxonomyEpoch() {
/** Expert: returns current index epoch, if this is a
* near-real-time reader. Used by {@link
* DirectoryTaxonomyReader} to support NRT.
*
* @lucene.internal */
public final long getTaxonomyEpoch() {
return indexEpoch;
}
}

View File

@ -0,0 +1,189 @@
package org.apache.lucene.facet.search;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.index.FacetFields;
import org.apache.lucene.facet.params.FacetIndexingParams;
import org.apache.lucene.facet.params.FacetSearchParams;
import org.apache.lucene.facet.search.SearcherTaxonomyManager.SearcherAndTaxonomy;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
public class TestSearcherTaxonomyManager extends LuceneTestCase {
public void test() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
final IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
final FacetFields facetFields = new FacetFields(tw);
final AtomicBoolean stop = new AtomicBoolean();
Thread indexer = new Thread() {
@Override
public void run() {
Set<String> seen = new HashSet<String>();
List<String> paths = new ArrayList<String>();
while (!stop.get()) {
Document doc = new Document();
List<CategoryPath> docPaths = new ArrayList<CategoryPath>();
int numPaths = _TestUtil.nextInt(random(), 1, 5);
for(int i=0;i<numPaths;i++) {
String path;
if (!paths.isEmpty() && random().nextInt(5) != 4) {
// Use previous path
path = paths.get(random().nextInt(paths.size()));
} else {
// Create new path
path = null;
while (true) {
path = _TestUtil.randomRealisticUnicodeString(random());
if (path.length() != 0 && !seen.contains(path) && path.indexOf(FacetIndexingParams.DEFAULT_FACET_DELIM_CHAR) == -1) {
seen.add(path);
paths.add(path);
break;
}
}
}
docPaths.add(new CategoryPath("field", path));
}
try {
facetFields.addFields(doc, docPaths);
w.addDocument(doc);
} catch (IOException ioe) {
throw new RuntimeException(ioe);
}
}
}
};
final SearcherTaxonomyManager mgr = new SearcherTaxonomyManager(w, true, null, tw);
Thread reopener = new Thread() {
@Override
public void run() {
while(!stop.get()) {
try {
// Sleep for up to 20 msec:
Thread.sleep(random().nextInt(20));
if (VERBOSE) {
System.out.println("TEST: reopen");
}
mgr.maybeRefresh();
if (VERBOSE) {
System.out.println("TEST: reopen done");
}
} catch (Exception ioe) {
throw new RuntimeException(ioe);
}
}
}
};
reopener.start();
float runTimeSec = TEST_NIGHTLY ? 10.0f : 2.0f;
long stopTime = System.currentTimeMillis() + (int) (runTimeSec*1000);
indexer.start();
try {
while (System.currentTimeMillis() < stopTime) {
SearcherAndTaxonomy pair = mgr.acquire();
try {
//System.out.println("search maxOrd=" + pair.taxonomyReader.getSize());
int topN;
if (random().nextBoolean()) {
topN = _TestUtil.nextInt(random(), 1, 20);
} else {
topN = Integer.MAX_VALUE;
}
FacetSearchParams fsp = new FacetSearchParams(new CountFacetRequest(new CategoryPath("field"), topN));
FacetsCollector fc = FacetsCollector.create(fsp, pair.searcher.getIndexReader(), pair.taxonomyReader);
pair.searcher.search(new MatchAllDocsQuery(), fc);
List<FacetResult> results = fc.getFacetResults();
FacetResult fr = results.get(0);
FacetResultNode root = results.get(0).getFacetResultNode();
assertTrue(root.ordinal != 0);
if (pair.searcher.getIndexReader().numDocs() > 0) {
assertTrue(fr.getNumValidDescendants() > 0);
assertFalse(root.subResults.isEmpty());
}
//if (VERBOSE) {
//System.out.println("TEST: facets=" + FacetTestUtils.toSimpleString(results.get(0)));
//}
} finally {
mgr.release(pair);
}
}
} finally {
stop.set(true);
indexer.join();
reopener.join();
}
if (VERBOSE) {
System.out.println("TEST: now stop");
}
IOUtils.close(mgr, tw, w, taxoDir, dir);
}
public void testReplaceTaxonomy() throws Exception {
Directory dir = newDirectory();
Directory taxoDir = newDirectory();
IndexWriter w = new IndexWriter(dir, newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(taxoDir);
Directory taxoDir2 = newDirectory();
DirectoryTaxonomyWriter tw2 = new DirectoryTaxonomyWriter(taxoDir2);
tw2.close();
SearcherTaxonomyManager mgr = new SearcherTaxonomyManager(w, true, null, tw);
w.addDocument(new Document());
tw.replaceTaxonomy(taxoDir2);
taxoDir2.close();
try {
mgr.maybeRefresh();
fail("should have hit exception");
} catch (IllegalStateException ise) {
// expected
}
IOUtils.close(mgr, tw, w, taxoDir, dir);
}
}

View File

@ -448,7 +448,7 @@ public abstract class ShardSearchingTestBase extends LuceneTestCase {
currentNodeVersions = new long[numNodes];
}
public void initSearcher(long[] nodeVersions) {
public void initSearcher(long[] nodeVersions) throws IOException {
assert currentShardSearcher == null;
System.arraycopy(nodeVersions, 0, currentNodeVersions, 0, currentNodeVersions.length);
currentShardSearcher = new ShardIndexSearcher(currentNodeVersions.clone(),