LUCENE-3573: TaxonomyReader.refresh() was broken if taxonomy index recreated since taxo reader last opened or refreshed.

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1202754 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Doron Cohen 2011-11-16 15:59:43 +00:00
parent 0605da2de1
commit 0780806efe
7 changed files with 250 additions and 14 deletions

View File

@ -18,7 +18,6 @@ package org.apache.lucene.index;
*/ */
import org.apache.lucene.index.codecs.PerDocValues; import org.apache.lucene.index.codecs.PerDocValues;
import org.apache.lucene.index.IndexReader.ReaderContext;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.Bits; import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.BytesRef;
@ -441,6 +440,11 @@ public class FilterIndexReader extends IndexReader {
return in.getTopReaderContext(); return in.getTopReaderContext();
} }
@Override
public Map<String, String> getCommitUserData() {
return in.getCommitUserData();
}
@Override @Override
public Fields fields() throws IOException { public Fields fields() throws IOException {
ensureOpen(); ensureOpen();

View File

@ -779,6 +779,12 @@ example) so a thread which is in the middle of a search needs to continue using
<code>TaxonomyReader</code>, however, we are guaranteed that existing categories are never deleted or modified - <code>TaxonomyReader</code>, however, we are guaranteed that existing categories are never deleted or modified -
the only thing that can happen is that new categories are added. Since search threads do not care if new categories the only thing that can happen is that new categories are added. Since search threads do not care if new categories
are added in the middle of a search, there is no reason to keep around the old object, and the new one suffices. are added in the middle of a search, there is no reason to keep around the old object, and the new one suffices.
<br><b>However</b>, if the taxonomy index was recreated since the <code>TaxonomyReader</code> was opened or
refreshed, this assumption (that categories are forevr) no longer holds, and <code>refresh()</code> will
throw an <code>InconsistentTaxonomyException</code>, guiding the application to open
a new <code>TaxonomyReader</code> for up-to-date taxonomy data. (Old one can
be closed as soon as it is no more used.)
</body> </body>
</html> </html>

View File

@ -0,0 +1,40 @@
package org.apache.lucene.facet.taxonomy;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Exception indicating that a certain operation could not be performed
* on a taxonomy related object because of an inconsistency.
* <p>
* For example, trying to refresh a taxonomy reader might fail in case
* the underlying taxonomy was meanwhile modified in a manner which
* does not allow to perform such a refresh. (See {@link TaxonomyReader#refresh()}.)
*
* @lucene.experimental
*/
public class InconsistentTaxonomyException extends Exception {
public InconsistentTaxonomyException(String message) {
super(message);
}
public InconsistentTaxonomyException() {
super();
}
}

View File

@ -120,6 +120,14 @@ public interface TaxonomyReader extends Closeable {
* faceted search, the taxonomy reader's refresh() should be called only after * faceted search, the taxonomy reader's refresh() should be called only after
* a reopen() of the main index. * a reopen() of the main index.
* <P> * <P>
* Refreshing the taxonomy might fail in some cases, for example
* if the taxonomy was recreated since this instance was opened or last refreshed.
* In this case an {@link InconsistentTaxonomyException} is thrown,
* suggesting that in order to obtain up-to-date taxonomy data a new
* {@link TaxonomyReader} should be opened. Note: This {@link TaxonomyReader}
* instance remains unchanged and usable in this case, and the application can
* continue to use it, and should still {@link #close()} when no longer needed.
* <P>
* It should be noted that refresh() is similar in purpose to * It should be noted that refresh() is similar in purpose to
* IndexReader.reopen(), but the two methods behave differently. refresh() * IndexReader.reopen(), but the two methods behave differently. refresh()
* refreshes the existing TaxonomyReader object, rather than opening a new one * refreshes the existing TaxonomyReader object, rather than opening a new one
@ -129,8 +137,9 @@ public interface TaxonomyReader extends Closeable {
* of the taxonomy open - refreshing the taxonomy to the newest data and using * of the taxonomy open - refreshing the taxonomy to the newest data and using
* this new snapshots in all threads (whether new or old) is fine. This saves * this new snapshots in all threads (whether new or old) is fine. This saves
* us needing to keep multiple copies of the taxonomy open in memory. * us needing to keep multiple copies of the taxonomy open in memory.
* @return true if anything has changed, false otherwise.
*/ */
public void refresh() throws IOException; public boolean refresh() throws IOException, InconsistentTaxonomyException;
/** /**
* getParent() returns the ordinal of the parent category of the category * getParent() returns the ordinal of the parent category of the category

View File

@ -10,6 +10,7 @@ import java.util.logging.Level;
import java.util.logging.Logger; import java.util.logging.Logger;
import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.Consts.LoadFullPathOnly; import org.apache.lucene.facet.taxonomy.directory.Consts.LoadFullPathOnly;
import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.CorruptIndexException;
@ -333,7 +334,7 @@ public class DirectoryTaxonomyReader implements TaxonomyReader {
// Note that refresh() is synchronized (it is the only synchronized // Note that refresh() is synchronized (it is the only synchronized
// method in this class) to ensure that it never gets called concurrently // method in this class) to ensure that it never gets called concurrently
// with itself. // with itself.
public synchronized void refresh() throws IOException { public synchronized boolean refresh() throws IOException, InconsistentTaxonomyException {
ensureOpen(); ensureOpen();
/* /*
* Since refresh() can be a lengthy operation, it is very important that we * Since refresh() can be a lengthy operation, it is very important that we
@ -349,7 +350,24 @@ public class DirectoryTaxonomyReader implements TaxonomyReader {
// no other thread can be writing at this time (this method is the // no other thread can be writing at this time (this method is the
// only possible writer, and it is "synchronized" to avoid this case). // only possible writer, and it is "synchronized" to avoid this case).
IndexReader r2 = IndexReader.openIfChanged(indexReader); IndexReader r2 = IndexReader.openIfChanged(indexReader);
if (r2 != null) { if (r2 == null) {
return false; // no changes, nothing to do
}
// validate that a refresh is valid at this point, i.e. that the taxonomy
// was not recreated since this reader was last opened or refresshed.
String t1 = indexReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
String t2 = r2.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
if (t1==null) {
if (t2!=null) {
r2.close();
throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2);
}
} else if (!t1.equals(t2)) {
r2.close();
throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2+" != "+t1);
}
IndexReader oldreader = indexReader; IndexReader oldreader = indexReader;
// we can close the old searcher, but need to synchronize this // we can close the old searcher, but need to synchronize this
// so that we don't close it in the middle that another routine // so that we don't close it in the middle that another routine
@ -392,7 +410,7 @@ public class DirectoryTaxonomyReader implements TaxonomyReader {
i.remove(); i.remove();
} }
} }
} return true;
} }
public void close() throws IOException { public void close() throws IOException {

View File

@ -9,6 +9,7 @@ import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.util.HashMap;
import java.util.Map; import java.util.Map;
import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer;
@ -83,6 +84,14 @@ import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
*/ */
public class DirectoryTaxonomyWriter implements TaxonomyWriter { public class DirectoryTaxonomyWriter implements TaxonomyWriter {
/**
* Property name of user commit data that contains the creation time of a taxonomy index.
* <p>
* Applications making use of {@link TaxonomyWriter#commit(Map)} should not use this
* particular property name.
*/
public static final String INDEX_CREATE_TIME = "index.create.time";
private IndexWriter indexWriter; private IndexWriter indexWriter;
private int nextID; private int nextID;
private char delimiter = Consts.DEFAULT_DELIMITER; private char delimiter = Consts.DEFAULT_DELIMITER;
@ -105,6 +114,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
private IndexReader reader; private IndexReader reader;
private int cacheMisses; private int cacheMisses;
/**
* When a taxonomy is created, we mark that its create time should be committed in the
* next commit.
*/
private String taxoIndexCreateTime = null;
/** /**
* setDelimiter changes the character that the taxonomy uses in its internal * setDelimiter changes the character that the taxonomy uses in its internal
* storage as a delimiter between category components. Do not use this * storage as a delimiter between category components. Do not use this
@ -172,6 +187,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
throws CorruptIndexException, LockObtainFailedException, throws CorruptIndexException, LockObtainFailedException,
IOException { IOException {
if (!IndexReader.indexExists(directory) || openMode==OpenMode.CREATE) {
taxoIndexCreateTime = Long.toString(System.nanoTime());
}
indexWriter = openIndexWriter(directory, openMode); indexWriter = openIndexWriter(directory, openMode);
reader = null; reader = null;
@ -280,10 +299,17 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override @Override
public synchronized void close() throws CorruptIndexException, IOException { public synchronized void close() throws CorruptIndexException, IOException {
if (indexWriter != null) { if (indexWriter != null) {
indexWriter.close(); if (taxoIndexCreateTime != null) {
indexWriter = null; indexWriter.commit(combinedCommitData(null));
taxoIndexCreateTime = null;
}
doClose();
}
} }
private void doClose() throws CorruptIndexException, IOException {
indexWriter.close();
indexWriter = null;
closeResources(); closeResources();
} }
@ -584,10 +610,27 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override @Override
public synchronized void commit() throws CorruptIndexException, IOException { public synchronized void commit() throws CorruptIndexException, IOException {
ensureOpen(); ensureOpen();
if (taxoIndexCreateTime != null) {
indexWriter.commit(combinedCommitData(null));
taxoIndexCreateTime = null;
} else {
indexWriter.commit(); indexWriter.commit();
}
refreshReader(); refreshReader();
} }
/**
* Combine original user data with that of the taxonomy creation time
*/
private Map<String,String> combinedCommitData(Map<String,String> userData) {
Map<String,String> m = new HashMap<String, String>();
if (userData != null) {
m.putAll(userData);
}
m.put(INDEX_CREATE_TIME, taxoIndexCreateTime);
return m;
}
/** /**
* Like commit(), but also store properties with the index. These properties * Like commit(), but also store properties with the index. These properties
* are retrievable by {@link DirectoryTaxonomyReader#getCommitUserData}. * are retrievable by {@link DirectoryTaxonomyReader#getCommitUserData}.
@ -596,7 +639,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override @Override
public synchronized void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException { public synchronized void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
ensureOpen(); ensureOpen();
if (taxoIndexCreateTime != null) {
indexWriter.commit(combinedCommitData(commitUserData));
taxoIndexCreateTime = null;
} else {
indexWriter.commit(commitUserData); indexWriter.commit(commitUserData);
}
refreshReader(); refreshReader();
} }
@ -607,8 +655,13 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override @Override
public synchronized void prepareCommit() throws CorruptIndexException, IOException { public synchronized void prepareCommit() throws CorruptIndexException, IOException {
ensureOpen(); ensureOpen();
if (taxoIndexCreateTime != null) {
indexWriter.prepareCommit(combinedCommitData(null));
taxoIndexCreateTime = null;
} else {
indexWriter.prepareCommit(); indexWriter.prepareCommit();
} }
}
/** /**
* Like above, and also prepares to store user data with the index. * Like above, and also prepares to store user data with the index.
@ -617,8 +670,13 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override @Override
public synchronized void prepareCommit(Map<String,String> commitUserData) throws CorruptIndexException, IOException { public synchronized void prepareCommit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
ensureOpen(); ensureOpen();
if (taxoIndexCreateTime != null) {
indexWriter.prepareCommit(combinedCommitData(commitUserData));
taxoIndexCreateTime = null;
} else {
indexWriter.prepareCommit(commitUserData); indexWriter.prepareCommit(commitUserData);
} }
}
/** /**
* getSize() returns the number of categories in the taxonomy. * getSize() returns the number of categories in the taxonomy.
@ -1037,10 +1095,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
* will yield an {@link AlreadyClosedException}). * will yield an {@link AlreadyClosedException}).
*/ */
@Override @Override
public void rollback() throws IOException { public synchronized void rollback() throws IOException {
ensureOpen(); ensureOpen();
indexWriter.rollback(); indexWriter.rollback();
close(); doClose();
} }
} }

View File

@ -1,10 +1,17 @@
package org.apache.lucene.facet.taxonomy.directory; package org.apache.lucene.facet.taxonomy.directory;
import java.util.Random;
import org.apache.lucene.facet.taxonomy.CategoryPath; import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory; import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase; import org.apache.lucene.util.LuceneTestCase;
import org.junit.Test; import org.junit.Test;
@ -59,6 +66,36 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
dir.close(); dir.close();
} }
/**
* Test the boolean returned by TR.refresh
* @throws Exception
*/
@Test
public void testReaderRefreshResult() throws Exception {
Directory dir = null;
DirectoryTaxonomyWriter ltw = null;
DirectoryTaxonomyReader ltr = null;
try {
dir = newDirectory();
ltw = new DirectoryTaxonomyWriter(dir);
ltw.addCategory(new CategoryPath("a"));
ltw.commit();
ltr = new DirectoryTaxonomyReader(dir);
assertFalse("Nothing has changed",ltr.refresh());
ltw.addCategory(new CategoryPath("b"));
ltw.commit();
assertTrue("changes were committed",ltr.refresh());
assertFalse("Nothing has changed",ltr.refresh());
} finally {
IOUtils.close(ltw, ltr, dir);
}
}
@Test @Test
public void testAlreadyClosed() throws Exception { public void testAlreadyClosed() throws Exception {
Directory dir = newDirectory(); Directory dir = newDirectory();
@ -77,4 +114,68 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
dir.close(); dir.close();
} }
/**
* recreating a taxonomy should work well with a freshly opened taxonomy reader
*/
@Test
public void testFreshReadRecreatedTaxonomy() throws Exception {
doTestReadRecreatedTaxono(random, true);
}
/**
* recreating a taxonomy should work well with a refreshed taxonomy reader
*/
@Test
public void testRefreshReadRecreatedTaxonomy() throws Exception {
doTestReadRecreatedTaxono(random, false);
}
private void doTestReadRecreatedTaxono(Random random, boolean closeReader) throws Exception {
Directory dir = null;
TaxonomyWriter tw = null;
TaxonomyReader tr = null;
// prepare a few categories
int n = 10;
CategoryPath[] cp = new CategoryPath[n];
for (int i=0; i<n; i++) {
cp[i] = new CategoryPath("a", Integer.toString(i));
}
try {
dir = newDirectory();
tw = new DirectoryTaxonomyWriter(dir);
tw.addCategory(new CategoryPath("a"));
tw.close();
tr = new DirectoryTaxonomyReader(dir);
int baseNumcategories = tr.getSize();
for (int i=0; i<n; i++) {
int k = random.nextInt(n);
tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
for (int j=0; j<=k; j++) {
tw.addCategory(new CategoryPath(cp[j]));
}
tw.close();
if (closeReader) {
tr.close();
tr = new DirectoryTaxonomyReader(dir);
} else {
try {
tr.refresh();
fail("Expected InconsistentTaxonomyException");
} catch (InconsistentTaxonomyException e) {
tr.close();
tr = new DirectoryTaxonomyReader(dir);
}
}
assertEquals("Wrong #categories in taxonomy (i="+i+", k="+k+")", baseNumcategories + 1 + k, tr.getSize());
}
} finally {
IOUtils.close(tr, tw, dir);
}
}
} }