LUCENE-3794: DirectoryTaxonomyWriter can lose the INDEX_CREATE_TIME property, causing DirTaxoReader.refresh() to falsely succeed (or fail)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1244964 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2012-02-16 12:54:56 +00:00
parent dddbe9456d
commit 242092c929
3 changed files with 137 additions and 61 deletions

View File

@ -32,6 +32,7 @@ import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.AlreadyClosedException;
@ -87,13 +88,14 @@ import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
public class DirectoryTaxonomyWriter implements TaxonomyWriter {
/**
* Property name of user commit data that contains the creation time of a taxonomy index.
* Property name of user commit data that contains the creation time of a
* taxonomy index.
* <p>
* Applications making use of {@link TaxonomyWriter#commit(Map)} should not use this
* particular property name.
* Applications should not use this property in their commit data because it
* will be overridden by this taxonomy writer.
*/
public static final String INDEX_CREATE_TIME = "index.create.time";
private IndexWriter indexWriter;
private int nextID;
private char delimiter = Consts.DEFAULT_DELIMITER;
@ -116,11 +118,15 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
private DirectoryReader reader;
private int cacheMisses;
/**
* When a taxonomy is created, we mark that its create time should be committed in the
* next commit.
*/
private String taxoIndexCreateTime = null;
/** Records the taxonomy index creation time. */
private final String createTime;
/** Reads the commit data from a Directory. */
private static Map<String, String> readCommitData(Directory dir) throws IOException {
SegmentInfos infos = new SegmentInfos();
infos.read(dir);
return infos.getUserData();
}
/**
* setDelimiter changes the character that the taxonomy uses in its internal
@ -185,12 +191,20 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
* if another error occurred.
*/
public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode,
TaxonomyWriterCache cache)
throws CorruptIndexException, LockObtainFailedException,
IOException {
TaxonomyWriterCache cache) throws IOException {
if (!DirectoryReader.indexExists(directory) || openMode==OpenMode.CREATE) {
taxoIndexCreateTime = Long.toString(System.nanoTime());
createTime = Long.toString(System.nanoTime());
} else {
Map<String, String> commitData = readCommitData(directory);
if (commitData != null) {
// It is ok if an existing index doesn't have commitData, or the
// INDEX_CREATE_TIME property. If ever it will be recreated, we'll set
// createTime accordingly in the above 'if'.
createTime = commitData.get(INDEX_CREATE_TIME);
} else {
createTime = null;
}
}
IndexWriterConfig config = createIndexWriterConfig(openMode);
@ -209,7 +223,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
this.nextID = indexWriter.maxDoc();
if (cache==null) {
if (cache == null) {
cache = defaultTaxonomyWriterCache();
}
this.cache = cache;
@ -323,10 +337,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override
public synchronized void close() throws CorruptIndexException, IOException {
if (indexWriter != null) {
if (taxoIndexCreateTime != null) {
indexWriter.commit(combinedCommitData(null));
taxoIndexCreateTime = null;
}
indexWriter.commit(combinedCommitData(null));
doClose();
}
}
@ -636,12 +647,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override
public synchronized void commit() throws CorruptIndexException, IOException {
ensureOpen();
if (taxoIndexCreateTime != null) {
indexWriter.commit(combinedCommitData(null));
taxoIndexCreateTime = null;
} else {
indexWriter.commit();
}
indexWriter.commit(combinedCommitData(null));
refreshReader();
}
@ -653,7 +659,9 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
if (userData != null) {
m.putAll(userData);
}
m.put(INDEX_CREATE_TIME, taxoIndexCreateTime);
if (createTime != null) {
m.put(INDEX_CREATE_TIME, createTime);
}
return m;
}
@ -665,12 +673,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override
public synchronized void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
ensureOpen();
if (taxoIndexCreateTime != null) {
indexWriter.commit(combinedCommitData(commitUserData));
taxoIndexCreateTime = null;
} else {
indexWriter.commit(commitUserData);
}
indexWriter.commit(combinedCommitData(commitUserData));
refreshReader();
}
@ -681,12 +684,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override
public synchronized void prepareCommit() throws CorruptIndexException, IOException {
ensureOpen();
if (taxoIndexCreateTime != null) {
indexWriter.prepareCommit(combinedCommitData(null));
taxoIndexCreateTime = null;
} else {
indexWriter.prepareCommit();
}
indexWriter.prepareCommit(combinedCommitData(null));
}
/**
@ -696,12 +694,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
@Override
public synchronized void prepareCommit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
ensureOpen();
if (taxoIndexCreateTime != null) {
indexWriter.prepareCommit(combinedCommitData(commitUserData));
taxoIndexCreateTime = null;
} else {
indexWriter.prepareCommit(commitUserData);
}
indexWriter.prepareCommit(combinedCommitData(commitUserData));
}
/**
@ -864,7 +857,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
TermsEnum[] othertes = new TermsEnum[taxonomies.length];
DocsEnum[] otherdocsEnum = new DocsEnum[taxonomies.length]; // just for reuse
for (int i=0; i<taxonomies.length; i++) {
otherreaders[i] = IndexReader.open(taxonomies[i]);
otherreaders[i] = DirectoryReader.open(taxonomies[i]);
terms = MultiFields.getTerms(otherreaders[i], Consts.FULL);
assert terms != null; // TODO (Facet): explicit check / throw exception?
othertes[i] = terms.iterator(null);

View File

@ -151,7 +151,7 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
tw.close();
tr = new DirectoryTaxonomyReader(dir);
int baseNumcategories = tr.getSize();
int baseNumCategories = tr.getSize();
for (int i=0; i<n; i++) {
int k = random.nextInt(n);
@ -172,7 +172,7 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase {
tr = new DirectoryTaxonomyReader(dir);
}
}
assertEquals("Wrong #categories in taxonomy (i="+i+", k="+k+")", baseNumcategories + 1 + k, tr.getSize());
assertEquals("Wrong #categories in taxonomy (i="+i+", k="+k+")", baseNumCategories + 1 + k, tr.getSize());
}
} finally {
IOUtils.close(tr, tw, dir);

View File

@ -1,19 +1,21 @@
package org.apache.lucene.facet.taxonomy.directory;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.junit.Test;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
import org.junit.Test;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -59,7 +61,7 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
ltw.commit(); // first commit, so that an index will be created
ltw.addCategory(new CategoryPath("a"));
IndexReader r = IndexReader.open(dir);
IndexReader r = DirectoryReader.open(dir);
assertEquals("No categories should have been committed to the underlying directory", 1, r.numDocs());
r.close();
ltw.close();
@ -68,23 +70,38 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
@Test
public void testCommitUserData() throws Exception {
// Verifies that committed data is retrievable
// Verifies taxonomy commit data
Directory dir = newDirectory();
DirectoryTaxonomyWriter ltw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, new NoOpCache());
assertFalse(DirectoryReader.indexExists(dir));
ltw.commit(); // first commit, so that an index will be created
ltw.addCategory(new CategoryPath("a"));
ltw.addCategory(new CategoryPath("b"));
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, new NoOpCache());
taxoWriter.addCategory(new CategoryPath("a"));
taxoWriter.addCategory(new CategoryPath("b"));
Map <String, String> userCommitData = new HashMap<String, String>();
userCommitData.put("testing", "1 2 3");
ltw.commit(userCommitData);
ltw.close();
DirectoryReader r = IndexReader.open(dir);
taxoWriter.commit(userCommitData);
taxoWriter.close();
DirectoryReader r = DirectoryReader.open(dir);
assertEquals("2 categories plus root should have been committed to the underlying directory", 3, r.numDocs());
Map <String, String> readUserCommitData = r.getIndexCommit().getUserData();
assertTrue("wrong value extracted from commit data",
"1 2 3".equals(readUserCommitData.get("testing")));
assertNotNull("index.create.time not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME));
r.close();
// open DirTaxoWriter again and commit, INDEX_CREATE_TIME should still exist
// in the commit data, otherwise DirTaxoReader.refresh() might not detect
// that the taxonomy index has been recreated.
taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, new NoOpCache());
taxoWriter.addCategory(new CategoryPath("c")); // add a category so that commit will happen
taxoWriter.commit(new HashMap<String, String>(){{
put("just", "data");
}});
taxoWriter.close();
r = DirectoryReader.open(dir);
readUserCommitData = r.getIndexCommit().getUserData();
assertNotNull("index.create.time not found in commitData", readUserCommitData.get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME));
r.close();
dir.close();
}
@ -118,5 +135,71 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
}
dir.close();
}
private void touchTaxo(DirectoryTaxonomyWriter taxoWriter, CategoryPath cp) throws IOException {
taxoWriter.addCategory(cp);
taxoWriter.commit(new HashMap<String, String>(){{
put("just", "data");
}});
}
@Test
public void testRecreateAndRefresh() throws Exception {
// DirTaxoWriter lost the INDEX_CREATE_TIME property if it was opened in
// CREATE_OR_APPEND (or commit(userData) called twice), which could lead to
// DirTaxoReader succeeding to refresh().
Directory dir = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, new NoOpCache());
touchTaxo(taxoWriter, new CategoryPath("a"));
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
touchTaxo(taxoWriter, new CategoryPath("b"));
// this should not fail
taxoReader.refresh();
// now recreate the taxonomy, and check that the timestamp is preserved after opening DirTW again.
taxoWriter.close();
taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, new NoOpCache());
touchTaxo(taxoWriter, new CategoryPath("c"));
taxoWriter.close();
taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, new NoOpCache());
touchTaxo(taxoWriter, new CategoryPath("d"));
taxoWriter.close();
// this should fail
try {
taxoReader.refresh();
fail("IconsistentTaxonomyException should have been thrown");
} catch (InconsistentTaxonomyException e) {
// ok, expected
}
taxoReader.close();
dir.close();
}
@Test
public void testUndefinedCreateTime() throws Exception {
// tests that if the taxonomy index doesn't have the INDEX_CREATE_TIME
// property (supports pre-3.6 indexes), all still works.
Directory dir = newDirectory();
// create an empty index first, so that DirTaxoWriter initializes createTime to null.
new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, null)).close();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE_OR_APPEND, new NoOpCache());
// we cannot commit null keys/values, this ensures that if DirTW.createTime is null, we can still commit.
taxoWriter.close();
DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
taxoReader.refresh();
taxoReader.close();
dir.close();
}
}