mirror of https://github.com/apache/lucene.git
LUCENE-3573: TaxonomyReader.refresh() was broken if taxonomy index recreated since taxo reader last opened or refreshed.
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1202754 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0605da2de1
commit
0780806efe
|
@ -18,7 +18,6 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import org.apache.lucene.index.codecs.PerDocValues;
|
||||
import org.apache.lucene.index.IndexReader.ReaderContext;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
@ -441,6 +440,11 @@ public class FilterIndexReader extends IndexReader {
|
|||
return in.getTopReaderContext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<String, String> getCommitUserData() {
|
||||
return in.getCommitUserData();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Fields fields() throws IOException {
|
||||
ensureOpen();
|
||||
|
|
|
@ -779,6 +779,12 @@ example) so a thread which is in the middle of a search needs to continue using
|
|||
<code>TaxonomyReader</code>, however, we are guaranteed that existing categories are never deleted or modified -
|
||||
the only thing that can happen is that new categories are added. Since search threads do not care if new categories
|
||||
are added in the middle of a search, there is no reason to keep around the old object, and the new one suffices.
|
||||
<br><b>However</b>, if the taxonomy index was recreated since the <code>TaxonomyReader</code> was opened or
|
||||
refreshed, this assumption (that categories are forevr) no longer holds, and <code>refresh()</code> will
|
||||
throw an <code>InconsistentTaxonomyException</code>, guiding the application to open
|
||||
a new <code>TaxonomyReader</code> for up-to-date taxonomy data. (Old one can
|
||||
be closed as soon as it is no more used.)
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
|
|
@ -0,0 +1,40 @@
|
|||
package org.apache.lucene.facet.taxonomy;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Exception indicating that a certain operation could not be performed
|
||||
* on a taxonomy related object because of an inconsistency.
|
||||
* <p>
|
||||
* For example, trying to refresh a taxonomy reader might fail in case
|
||||
* the underlying taxonomy was meanwhile modified in a manner which
|
||||
* does not allow to perform such a refresh. (See {@link TaxonomyReader#refresh()}.)
|
||||
*
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public class InconsistentTaxonomyException extends Exception {
|
||||
|
||||
public InconsistentTaxonomyException(String message) {
|
||||
super(message);
|
||||
}
|
||||
|
||||
public InconsistentTaxonomyException() {
|
||||
super();
|
||||
}
|
||||
|
||||
}
|
|
@ -120,6 +120,14 @@ public interface TaxonomyReader extends Closeable {
|
|||
* faceted search, the taxonomy reader's refresh() should be called only after
|
||||
* a reopen() of the main index.
|
||||
* <P>
|
||||
* Refreshing the taxonomy might fail in some cases, for example
|
||||
* if the taxonomy was recreated since this instance was opened or last refreshed.
|
||||
* In this case an {@link InconsistentTaxonomyException} is thrown,
|
||||
* suggesting that in order to obtain up-to-date taxonomy data a new
|
||||
* {@link TaxonomyReader} should be opened. Note: This {@link TaxonomyReader}
|
||||
* instance remains unchanged and usable in this case, and the application can
|
||||
* continue to use it, and should still {@link #close()} when no longer needed.
|
||||
* <P>
|
||||
* It should be noted that refresh() is similar in purpose to
|
||||
* IndexReader.reopen(), but the two methods behave differently. refresh()
|
||||
* refreshes the existing TaxonomyReader object, rather than opening a new one
|
||||
|
@ -129,8 +137,9 @@ public interface TaxonomyReader extends Closeable {
|
|||
* of the taxonomy open - refreshing the taxonomy to the newest data and using
|
||||
* this new snapshots in all threads (whether new or old) is fine. This saves
|
||||
* us needing to keep multiple copies of the taxonomy open in memory.
|
||||
* @return true if anything has changed, false otherwise.
|
||||
*/
|
||||
public void refresh() throws IOException;
|
||||
public boolean refresh() throws IOException, InconsistentTaxonomyException;
|
||||
|
||||
/**
|
||||
* getParent() returns the ordinal of the parent category of the category
|
||||
|
|
|
@ -10,6 +10,7 @@ import java.util.logging.Level;
|
|||
import java.util.logging.Logger;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.Consts.LoadFullPathOnly;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
|
@ -333,7 +334,7 @@ public class DirectoryTaxonomyReader implements TaxonomyReader {
|
|||
// Note that refresh() is synchronized (it is the only synchronized
|
||||
// method in this class) to ensure that it never gets called concurrently
|
||||
// with itself.
|
||||
public synchronized void refresh() throws IOException {
|
||||
public synchronized boolean refresh() throws IOException, InconsistentTaxonomyException {
|
||||
ensureOpen();
|
||||
/*
|
||||
* Since refresh() can be a lengthy operation, it is very important that we
|
||||
|
@ -349,7 +350,24 @@ public class DirectoryTaxonomyReader implements TaxonomyReader {
|
|||
// no other thread can be writing at this time (this method is the
|
||||
// only possible writer, and it is "synchronized" to avoid this case).
|
||||
IndexReader r2 = IndexReader.openIfChanged(indexReader);
|
||||
if (r2 != null) {
|
||||
if (r2 == null) {
|
||||
return false; // no changes, nothing to do
|
||||
}
|
||||
|
||||
// validate that a refresh is valid at this point, i.e. that the taxonomy
|
||||
// was not recreated since this reader was last opened or refresshed.
|
||||
String t1 = indexReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
|
||||
String t2 = r2.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
|
||||
if (t1==null) {
|
||||
if (t2!=null) {
|
||||
r2.close();
|
||||
throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2);
|
||||
}
|
||||
} else if (!t1.equals(t2)) {
|
||||
r2.close();
|
||||
throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2+" != "+t1);
|
||||
}
|
||||
|
||||
IndexReader oldreader = indexReader;
|
||||
// we can close the old searcher, but need to synchronize this
|
||||
// so that we don't close it in the middle that another routine
|
||||
|
@ -392,7 +410,7 @@ public class DirectoryTaxonomyReader implements TaxonomyReader {
|
|||
i.remove();
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
|
|
|
@ -9,6 +9,7 @@ import java.io.FileInputStream;
|
|||
import java.io.FileNotFoundException;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
|
@ -83,6 +84,14 @@ import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
|
|||
*/
|
||||
public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
||||
|
||||
/**
|
||||
* Property name of user commit data that contains the creation time of a taxonomy index.
|
||||
* <p>
|
||||
* Applications making use of {@link TaxonomyWriter#commit(Map)} should not use this
|
||||
* particular property name.
|
||||
*/
|
||||
public static final String INDEX_CREATE_TIME = "index.create.time";
|
||||
|
||||
private IndexWriter indexWriter;
|
||||
private int nextID;
|
||||
private char delimiter = Consts.DEFAULT_DELIMITER;
|
||||
|
@ -105,6 +114,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
private IndexReader reader;
|
||||
private int cacheMisses;
|
||||
|
||||
/**
|
||||
* When a taxonomy is created, we mark that its create time should be committed in the
|
||||
* next commit.
|
||||
*/
|
||||
private String taxoIndexCreateTime = null;
|
||||
|
||||
/**
|
||||
* setDelimiter changes the character that the taxonomy uses in its internal
|
||||
* storage as a delimiter between category components. Do not use this
|
||||
|
@ -172,6 +187,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
throws CorruptIndexException, LockObtainFailedException,
|
||||
IOException {
|
||||
|
||||
if (!IndexReader.indexExists(directory) || openMode==OpenMode.CREATE) {
|
||||
taxoIndexCreateTime = Long.toString(System.nanoTime());
|
||||
}
|
||||
|
||||
indexWriter = openIndexWriter(directory, openMode);
|
||||
reader = null;
|
||||
|
||||
|
@ -280,10 +299,17 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
@Override
|
||||
public synchronized void close() throws CorruptIndexException, IOException {
|
||||
if (indexWriter != null) {
|
||||
indexWriter.close();
|
||||
indexWriter = null;
|
||||
if (taxoIndexCreateTime != null) {
|
||||
indexWriter.commit(combinedCommitData(null));
|
||||
taxoIndexCreateTime = null;
|
||||
}
|
||||
doClose();
|
||||
}
|
||||
}
|
||||
|
||||
private void doClose() throws CorruptIndexException, IOException {
|
||||
indexWriter.close();
|
||||
indexWriter = null;
|
||||
closeResources();
|
||||
}
|
||||
|
||||
|
@ -584,10 +610,27 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
@Override
|
||||
public synchronized void commit() throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
if (taxoIndexCreateTime != null) {
|
||||
indexWriter.commit(combinedCommitData(null));
|
||||
taxoIndexCreateTime = null;
|
||||
} else {
|
||||
indexWriter.commit();
|
||||
}
|
||||
refreshReader();
|
||||
}
|
||||
|
||||
/**
|
||||
* Combine original user data with that of the taxonomy creation time
|
||||
*/
|
||||
private Map<String,String> combinedCommitData(Map<String,String> userData) {
|
||||
Map<String,String> m = new HashMap<String, String>();
|
||||
if (userData != null) {
|
||||
m.putAll(userData);
|
||||
}
|
||||
m.put(INDEX_CREATE_TIME, taxoIndexCreateTime);
|
||||
return m;
|
||||
}
|
||||
|
||||
/**
|
||||
* Like commit(), but also store properties with the index. These properties
|
||||
* are retrievable by {@link DirectoryTaxonomyReader#getCommitUserData}.
|
||||
|
@ -596,7 +639,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
@Override
|
||||
public synchronized void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
if (taxoIndexCreateTime != null) {
|
||||
indexWriter.commit(combinedCommitData(commitUserData));
|
||||
taxoIndexCreateTime = null;
|
||||
} else {
|
||||
indexWriter.commit(commitUserData);
|
||||
}
|
||||
refreshReader();
|
||||
}
|
||||
|
||||
|
@ -607,8 +655,13 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
@Override
|
||||
public synchronized void prepareCommit() throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
if (taxoIndexCreateTime != null) {
|
||||
indexWriter.prepareCommit(combinedCommitData(null));
|
||||
taxoIndexCreateTime = null;
|
||||
} else {
|
||||
indexWriter.prepareCommit();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Like above, and also prepares to store user data with the index.
|
||||
|
@ -617,8 +670,13 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
@Override
|
||||
public synchronized void prepareCommit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
if (taxoIndexCreateTime != null) {
|
||||
indexWriter.prepareCommit(combinedCommitData(commitUserData));
|
||||
taxoIndexCreateTime = null;
|
||||
} else {
|
||||
indexWriter.prepareCommit(commitUserData);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* getSize() returns the number of categories in the taxonomy.
|
||||
|
@ -1037,10 +1095,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
* will yield an {@link AlreadyClosedException}).
|
||||
*/
|
||||
@Override
|
||||
public void rollback() throws IOException {
|
||||
public synchronized void rollback() throws IOException {
|
||||
ensureOpen();
|
||||
indexWriter.rollback();
|
||||
close();
|
||||
doClose();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,10 +1,17 @@
|
|||
package org.apache.lucene.facet.taxonomy.directory;
|
||||
|
||||
import java.util.Random;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
|
@ -59,6 +66,36 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Test the boolean returned by TR.refresh
|
||||
* @throws Exception
|
||||
*/
|
||||
@Test
|
||||
public void testReaderRefreshResult() throws Exception {
|
||||
Directory dir = null;
|
||||
DirectoryTaxonomyWriter ltw = null;
|
||||
DirectoryTaxonomyReader ltr = null;
|
||||
|
||||
try {
|
||||
dir = newDirectory();
|
||||
ltw = new DirectoryTaxonomyWriter(dir);
|
||||
|
||||
ltw.addCategory(new CategoryPath("a"));
|
||||
ltw.commit();
|
||||
|
||||
ltr = new DirectoryTaxonomyReader(dir);
|
||||
assertFalse("Nothing has changed",ltr.refresh());
|
||||
|
||||
ltw.addCategory(new CategoryPath("b"));
|
||||
ltw.commit();
|
||||
|
||||
assertTrue("changes were committed",ltr.refresh());
|
||||
assertFalse("Nothing has changed",ltr.refresh());
|
||||
} finally {
|
||||
IOUtils.close(ltw, ltr, dir);
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testAlreadyClosed() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
|
@ -77,4 +114,68 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
|
|||
dir.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* recreating a taxonomy should work well with a freshly opened taxonomy reader
|
||||
*/
|
||||
@Test
|
||||
public void testFreshReadRecreatedTaxonomy() throws Exception {
|
||||
doTestReadRecreatedTaxono(random, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* recreating a taxonomy should work well with a refreshed taxonomy reader
|
||||
*/
|
||||
@Test
|
||||
public void testRefreshReadRecreatedTaxonomy() throws Exception {
|
||||
doTestReadRecreatedTaxono(random, false);
|
||||
}
|
||||
|
||||
private void doTestReadRecreatedTaxono(Random random, boolean closeReader) throws Exception {
|
||||
Directory dir = null;
|
||||
TaxonomyWriter tw = null;
|
||||
TaxonomyReader tr = null;
|
||||
|
||||
// prepare a few categories
|
||||
int n = 10;
|
||||
CategoryPath[] cp = new CategoryPath[n];
|
||||
for (int i=0; i<n; i++) {
|
||||
cp[i] = new CategoryPath("a", Integer.toString(i));
|
||||
}
|
||||
|
||||
try {
|
||||
dir = newDirectory();
|
||||
|
||||
tw = new DirectoryTaxonomyWriter(dir);
|
||||
tw.addCategory(new CategoryPath("a"));
|
||||
tw.close();
|
||||
|
||||
tr = new DirectoryTaxonomyReader(dir);
|
||||
int baseNumcategories = tr.getSize();
|
||||
|
||||
for (int i=0; i<n; i++) {
|
||||
int k = random.nextInt(n);
|
||||
tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE);
|
||||
for (int j=0; j<=k; j++) {
|
||||
tw.addCategory(new CategoryPath(cp[j]));
|
||||
}
|
||||
tw.close();
|
||||
if (closeReader) {
|
||||
tr.close();
|
||||
tr = new DirectoryTaxonomyReader(dir);
|
||||
} else {
|
||||
try {
|
||||
tr.refresh();
|
||||
fail("Expected InconsistentTaxonomyException");
|
||||
} catch (InconsistentTaxonomyException e) {
|
||||
tr.close();
|
||||
tr = new DirectoryTaxonomyReader(dir);
|
||||
}
|
||||
}
|
||||
assertEquals("Wrong #categories in taxonomy (i="+i+", k="+k+")", baseNumcategories + 1 + k, tr.getSize());
|
||||
}
|
||||
} finally {
|
||||
IOUtils.close(tr, tw, dir);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue