LUCENE-3573: TaxonomyReader.refresh() was broken if taxonomy index recreated since taxo reader last opened or refreshed.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1202754 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doron Cohen 2011-11-16 15:59:43 +00:00
parent 0605da2de1
commit 0780806efe
7 changed files with 250 additions and 14 deletions

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/
import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
@ -441,6 +440,11 @@ public class FilterIndexReader extends IndexReader {
return in.getTopReaderContext();
}
@Override
public Map<String, String> getCommitUserData() {
return in.getCommitUserData();
}
@Override
public Fields fields() throws IOException {
ensureOpen();

View File

@ -779,6 +779,12 @@ example) so a thread which is in the middle of a search needs to continue using
<code>TaxonomyReader</code>, however, we are guaranteed that existing categories are never deleted or modified -
the only thing that can happen is that new categories are added. Since search threads do not care if new categories
are added in the middle of a search, there is no reason to keep around the old object, and the new one suffices.
<br><b>However</b>, if the taxonomy index was recreated since the <code>TaxonomyReader</code> was opened or
refreshed, this assumption (that categories are forevr) no longer holds, and <code>refresh()</code> will
throw an <code>InconsistentTaxonomyException</code>, guiding the application to open
a new <code>TaxonomyReader</code> for up-to-date taxonomy data. (Old one can
be closed as soon as it is no more used.)
</body>
</html>

View File

@ -0,0 +1,40 @@
package org.apache.lucene.facet.taxonomy;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Exception indicating that a certain operation could not be performed
* on a taxonomy related object because of an inconsistency.
* <p>
* For example, trying to refresh a taxonomy reader might fail in case
* the underlying taxonomy was meanwhile modified in a manner which
* does not allow to perform such a refresh. (See {@link TaxonomyReader#refresh()}.)
*
* @lucene.experimental
*/
public class InconsistentTaxonomyException extends Exception {
public InconsistentTaxonomyException(String message) {
super(message);
}
public InconsistentTaxonomyException() {
super();
}
}

View File

@ -120,6 +120,14 @@ public interface TaxonomyReader extends Closeable {
* faceted search, the taxonomy reader's refresh() should be called only after
* a reopen() of the main index.
* <P>
* Refreshing the taxonomy might fail in some cases, for example
* if the taxonomy was recreated since this instance was opened or last refreshed.
* In this case an {@link InconsistentTaxonomyException} is thrown,
* suggesting that in order to obtain up-to-date taxonomy data a new
* {@link TaxonomyReader} should be opened. Note: This {@link TaxonomyReader}
* instance remains unchanged and usable in this case, and the application can
* continue to use it, and should still {@link #close()} when no longer needed.
* <P>
* It should be noted that refresh() is similar in purpose to
* IndexReader.reopen(), but the two methods behave differently. refresh()
* refreshes the existing TaxonomyReader object, rather than opening a new one
@ -129,8 +137,9 @@ public interface TaxonomyReader extends Closeable {
* of the taxonomy open - refreshing the taxonomy to the newest data and using
* this new snapshots in all threads (whether new or old) is fine. This saves
* us needing to keep multiple copies of the taxonomy open in memory.
* @return true if anything has changed, false otherwise.
*/
public void refresh() throws IOException;
public boolean refresh() throws IOException, InconsistentTaxonomyException;
/**
* getParent() returns the ordinal of the parent category of the category

View File

@ -10,6 +10,7 @@ import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.Consts.LoadFullPathOnly;
import org.apache.lucene.index.CorruptIndexException;
@ -333,7 +334,7 @@ public class DirectoryTaxonomyReader implements TaxonomyReader {
// Note that refresh() is synchronized (it is the only synchronized
// method in this class) to ensure that it never gets called concurrently
// with itself.
public synchronized void refresh() throws IOException {
public synchronized boolean refresh() throws IOException, InconsistentTaxonomyException {
ensureOpen();
/*
* Since refresh() can be a lengthy operation, it is very important that we
@ -349,7 +350,24 @@ public class DirectoryTaxonomyReader implements TaxonomyReader {
// no other thread can be writing at this time (this method is the
// only possible writer, and it is "synchronized" to avoid this case).
IndexReader r2 = IndexReader.openIfChanged(indexReader);
if (r2 != null) {
if (r2 == null) {
return false; // no changes, nothing to do
}
// validate that a refresh is valid at this point, i.e. that the taxonomy
// was not recreated since this reader was last opened or refresshed.
String t1 = indexReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
String t2 = r2.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
if (t1==null) {
if (t2!=null) {
r2.close();
throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2);
}
} else if (!t1.equals(t2)) {
r2.close();
throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2+" != "+t1);
}
IndexReader oldreader = indexReader;
// we can close the old searcher, but need to synchronize this
// so that we don't close it in the middle that another routine
@ -392,8 +410,8 @@ public class DirectoryTaxonomyReader implements TaxonomyReader {
i.remove();
}
}
return true;
}
}
public void close() throws IOException {
if (!closed) {

View File

@ -9,6 +9,7 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
@ -83,6 +84,14 @@ import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
*/
public class DirectoryTaxonomyWriter implements TaxonomyWriter {
/**
* Property name of user commit data that contains the creation time of a taxonomy index.
* <p>
* Applications making use of {@link TaxonomyWriter#commit(Map)} should not use this
* particular property name.
*/
public static final String INDEX_CREATE_TIME = "index.create.time";
private IndexWriter indexWriter;
private int nextID;
private char delimiter = Consts.DEFAULT_DELIMITER;
@ -105,6 +114,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
private IndexReader reader;
private int cacheMisses;
/**
* When a taxonomy is created, we mark that its create time should be committed in the
* next commit.
*/
private String taxoIndexCreateTime = null;
/**
* setDelimiter changes the character that the taxonomy uses in its internal
* storage as a delimiter between category components. Do not use this
@ -172,6 +187,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
throws CorruptIndexException, LockObtainFailedException,
IOException {
if (!IndexReader.indexExists(directory) || openMode==OpenMode.CREATE) {
taxoIndexCreateTime = Long.toString(System.nanoTime());
}
indexWriter = openIndexWriter(directory, openMode);
reader = null;
@ -280,10 +299,17 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override
public synchronized void close() throws CorruptIndexException, IOException {
if (indexWriter != null) {
indexWriter.close();
indexWriter = null;
if (taxoIndexCreateTime != null) {
indexWriter.commit(combinedCommitData(null));
taxoIndexCreateTime = null;
}
doClose();
}
}
private void doClose() throws CorruptIndexException, IOException {
indexWriter.close();
indexWriter = null;
closeResources();
}
@ -584,10 +610,27 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override
public synchronized void commit() throws CorruptIndexException, IOException {
ensureOpen();
indexWriter.commit();
if (taxoIndexCreateTime != null) {
indexWriter.commit(combinedCommitData(null));
taxoIndexCreateTime = null;
} else {
indexWriter.commit();
}
refreshReader();
}
/**
* Combine original user data with that of the taxonomy creation time
*/
private Map<String,String> combinedCommitData(Map<String,String> userData) {
Map<String,String> m = new HashMap<String, String>();
if (userData != null) {
m.putAll(userData);
}
m.put(INDEX_CREATE_TIME, taxoIndexCreateTime);
return m;
}
/**
* Like commit(), but also store properties with the index. These properties
* are retrievable by {@link DirectoryTaxonomyReader#getCommitUserData}.
@ -596,7 +639,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override
public synchronized void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
ensureOpen();
indexWriter.commit(commitUserData);
if (taxoIndexCreateTime != null) {
indexWriter.commit(combinedCommitData(commitUserData));
taxoIndexCreateTime = null;
} else {
indexWriter.commit(commitUserData);
}
refreshReader();
}
@ -607,7 +655,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override
public synchronized void prepareCommit() throws CorruptIndexException, IOException {
ensureOpen();
indexWriter.prepareCommit();
if (taxoIndexCreateTime != null) {
indexWriter.prepareCommit(combinedCommitData(null));
taxoIndexCreateTime = null;
} else {
indexWriter.prepareCommit();
}
}
/**
@ -617,7 +670,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override
public synchronized void prepareCommit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
ensureOpen();
indexWriter.prepareCommit(commitUserData);
if (taxoIndexCreateTime != null) {
indexWriter.prepareCommit(combinedCommitData(commitUserData));
taxoIndexCreateTime = null;
} else {
indexWriter.prepareCommit(commitUserData);
}
}
/**
@ -1037,10 +1095,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
* will yield an {@link AlreadyClosedException}).
*/
@Override
public void rollback() throws IOException {
public synchronized void rollback() throws IOException {
ensureOpen();
indexWriter.rollback();
close();
doClose();
}
}

View File

@ -1,10 +1,17 @@
package org.apache.lucene.facet.taxonomy.directory;
import java.util.Random;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test;
@ -59,6 +66,36 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
dir.close();
}
/**
* Test the boolean returned by TR.refresh
* @throws Exception
*/
@Test
public void testReaderRefreshResult() throws Exception {
Directory dir = null;
DirectoryTaxonomyWriter ltw = null;
DirectoryTaxonomyReader ltr = null;
try {
dir = newDirectory();
ltw = new DirectoryTaxonomyWriter(dir);
ltw.addCategory(new CategoryPath("a"));
ltw.commit();
ltr = new DirectoryTaxonomyReader(dir);
assertFalse("Nothing has changed",ltr.refresh());
ltw.addCategory(new CategoryPath("b"));
ltw.commit();
assertTrue("changes were committed",ltr.refresh());
assertFalse("Nothing has changed",ltr.refresh());
} finally {
IOUtils.close(ltw, ltr, dir);
}
}
@Test
public void testAlreadyClosed() throws Exception {
Directory dir = newDirectory();
@ -77,4 +114,68 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
dir.close();
}
/**
* recreating a taxonomy should work well with a freshly opened taxonomy reader
*/
@Test
public void testFreshReadRecreatedTaxonomy() throws Exception {
doTestReadRecreatedTaxono(random, true);
}
/**
* recreating a taxonomy should work well with a refreshed taxonomy reader
*/
@Test
public void testRefreshReadRecreatedTaxonomy() throws Exception {
doTestReadRecreatedTaxono(random, false);
}
private void doTestReadRecreatedTaxono(Random random, boolean closeReader) throws Exception {
Directory dir = null;
TaxonomyWriter tw = null;
TaxonomyReader tr = null;
// prepare a few categories
int n = 10;
CategoryPath[] cp = new CategoryPath[n];
for (int i=0; i<n; i++) {
cp[i] = new CategoryPath("a", Integer.toString(i));
}
try {
dir = newDirectory();
tw = new DirectoryTaxonomyWriter(dir);
tw.addCategory(new CategoryPath("a"));
tw.close();
tr = new DirectoryTaxonomyReader(dir);
int baseNumcategories = tr.getSize();
for (int i=0; i<n; i++) {
int k = random.nextInt(n);
tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
for (int j=0; j<=k; j++) {
tw.addCategory(new CategoryPath(cp[j]));
}
tw.close();
if (closeReader) {
tr.close();
tr = new DirectoryTaxonomyReader(dir);
} else {
try {
tr.refresh();
fail("Expected InconsistentTaxonomyException");
} catch (InconsistentTaxonomyException e) {
tr.close();
tr = new DirectoryTaxonomyReader(dir);
}
}
assertEquals("Wrong #categories in taxonomy (i="+i+", k="+k+")", baseNumcategories + 1 + k, tr.getSize());
}
} finally {
IOUtils.close(tr, tw, dir);
}
}
}