diff --git a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java index cd5e816e476..09004bbb963 100644 --- a/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java +++ b/lucene/src/java/org/apache/lucene/index/FilterIndexReader.java @@ -18,7 +18,6 @@ package org.apache.lucene.index; */ import org.apache.lucene.index.codecs.PerDocValues; -import org.apache.lucene.index.IndexReader.ReaderContext; import org.apache.lucene.store.Directory; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -441,6 +440,11 @@ public class FilterIndexReader extends IndexReader { return in.getTopReaderContext(); } + @Override + public Map getCommitUserData() { + return in.getCommitUserData(); + } + @Override public Fields fields() throws IOException { ensureOpen(); diff --git a/modules/facet/docs/userguide.html b/modules/facet/docs/userguide.html index 684d498f0dc..5b2d9c7eab6 100644 --- a/modules/facet/docs/userguide.html +++ b/modules/facet/docs/userguide.html @@ -779,6 +779,12 @@ example) so a thread which is in the middle of a search needs to continue using TaxonomyReader, however, we are guaranteed that existing categories are never deleted or modified - the only thing that can happen is that new categories are added. Since search threads do not care if new categories are added in the middle of a search, there is no reason to keep around the old object, and the new one suffices. +
However, if the taxonomy index was recreated since the TaxonomyReader was opened or +refreshed, this assumption (that categories are forevr) no longer holds, and refresh() will +throw an InconsistentTaxonomyException, guiding the application to open +a new TaxonomyReader for up-to-date taxonomy data. (Old one can +be closed as soon as it is no more used.) + diff --git a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/InconsistentTaxonomyException.java b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/InconsistentTaxonomyException.java new file mode 100644 index 00000000000..168736ab775 --- /dev/null +++ b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/InconsistentTaxonomyException.java @@ -0,0 +1,40 @@ +package org.apache.lucene.facet.taxonomy; + +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/** + * Exception indicating that a certain operation could not be performed + * on a taxonomy related object because of an inconsistency. + *

+ * For example, trying to refresh a taxonomy reader might fail in case + * the underlying taxonomy was meanwhile modified in a manner which + * does not allow to perform such a refresh. (See {@link TaxonomyReader#refresh()}.) + * + * @lucene.experimental + */ +public class InconsistentTaxonomyException extends Exception { + + public InconsistentTaxonomyException(String message) { + super(message); + } + + public InconsistentTaxonomyException() { + super(); + } + +} diff --git a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java index 2d9649f399f..e17de23ec0c 100644 --- a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java +++ b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java @@ -120,6 +120,14 @@ public interface TaxonomyReader extends Closeable { * faceted search, the taxonomy reader's refresh() should be called only after * a reopen() of the main index. *

+ * Refreshing the taxonomy might fail in some cases, for example + * if the taxonomy was recreated since this instance was opened or last refreshed. + * In this case an {@link InconsistentTaxonomyException} is thrown, + * suggesting that in order to obtain up-to-date taxonomy data a new + * {@link TaxonomyReader} should be opened. Note: This {@link TaxonomyReader} + * instance remains unchanged and usable in this case, and the application can + * continue to use it, and should still {@link #close()} when no longer needed. + *

* It should be noted that refresh() is similar in purpose to * IndexReader.reopen(), but the two methods behave differently. refresh() * refreshes the existing TaxonomyReader object, rather than opening a new one @@ -129,8 +137,9 @@ public interface TaxonomyReader extends Closeable { * of the taxonomy open - refreshing the taxonomy to the newest data and using * this new snapshots in all threads (whether new or old) is fine. This saves * us needing to keep multiple copies of the taxonomy open in memory. + * @return true if anything has changed, false otherwise. */ - public void refresh() throws IOException; + public boolean refresh() throws IOException, InconsistentTaxonomyException; /** * getParent() returns the ordinal of the parent category of the category diff --git a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java index b5c21846bbb..fe9ce199709 100644 --- a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java +++ b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyReader.java @@ -10,6 +10,7 @@ import java.util.logging.Level; import java.util.logging.Logger; import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException; import org.apache.lucene.facet.taxonomy.TaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.Consts.LoadFullPathOnly; import org.apache.lucene.index.CorruptIndexException; @@ -333,7 +334,7 @@ public class DirectoryTaxonomyReader implements TaxonomyReader { // Note that refresh() is synchronized (it is the only synchronized // method in this class) to ensure that it never gets called concurrently // with itself. - public synchronized void refresh() throws IOException { + public synchronized boolean refresh() throws IOException, InconsistentTaxonomyException { ensureOpen(); /* * Since refresh() can be a lengthy operation, it is very important that we @@ -349,7 +350,24 @@ public class DirectoryTaxonomyReader implements TaxonomyReader { // no other thread can be writing at this time (this method is the // only possible writer, and it is "synchronized" to avoid this case). IndexReader r2 = IndexReader.openIfChanged(indexReader); - if (r2 != null) { + if (r2 == null) { + return false; // no changes, nothing to do + } + + // validate that a refresh is valid at this point, i.e. that the taxonomy + // was not recreated since this reader was last opened or refresshed. + String t1 = indexReader.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME); + String t2 = r2.getCommitUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME); + if (t1==null) { + if (t2!=null) { + r2.close(); + throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2); + } + } else if (!t1.equals(t2)) { + r2.close(); + throw new InconsistentTaxonomyException("Taxonomy was recreated at: "+t2+" != "+t1); + } + IndexReader oldreader = indexReader; // we can close the old searcher, but need to synchronize this // so that we don't close it in the middle that another routine @@ -392,8 +410,8 @@ public class DirectoryTaxonomyReader implements TaxonomyReader { i.remove(); } } + return true; } - } public void close() throws IOException { if (!closed) { diff --git a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java index f5bcb59d23b..c3cc4c577c3 100644 --- a/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java +++ b/modules/facet/src/java/org/apache/lucene/facet/taxonomy/directory/DirectoryTaxonomyWriter.java @@ -9,6 +9,7 @@ import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; +import java.util.HashMap; import java.util.Map; import org.apache.lucene.analysis.core.KeywordAnalyzer; @@ -83,6 +84,14 @@ import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache; */ public class DirectoryTaxonomyWriter implements TaxonomyWriter { + /** + * Property name of user commit data that contains the creation time of a taxonomy index. + *

+ * Applications making use of {@link TaxonomyWriter#commit(Map)} should not use this + * particular property name. + */ + public static final String INDEX_CREATE_TIME = "index.create.time"; + private IndexWriter indexWriter; private int nextID; private char delimiter = Consts.DEFAULT_DELIMITER; @@ -105,6 +114,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { private IndexReader reader; private int cacheMisses; + /** + * When a taxonomy is created, we mark that its create time should be committed in the + * next commit. + */ + private String taxoIndexCreateTime = null; + /** * setDelimiter changes the character that the taxonomy uses in its internal * storage as a delimiter between category components. Do not use this @@ -172,6 +187,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { throws CorruptIndexException, LockObtainFailedException, IOException { + if (!IndexReader.indexExists(directory) || openMode==OpenMode.CREATE) { + taxoIndexCreateTime = Long.toString(System.nanoTime()); + } + indexWriter = openIndexWriter(directory, openMode); reader = null; @@ -280,10 +299,17 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { @Override public synchronized void close() throws CorruptIndexException, IOException { if (indexWriter != null) { - indexWriter.close(); - indexWriter = null; + if (taxoIndexCreateTime != null) { + indexWriter.commit(combinedCommitData(null)); + taxoIndexCreateTime = null; + } + doClose(); } - + } + + private void doClose() throws CorruptIndexException, IOException { + indexWriter.close(); + indexWriter = null; closeResources(); } @@ -584,10 +610,27 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { @Override public synchronized void commit() throws CorruptIndexException, IOException { ensureOpen(); - indexWriter.commit(); + if (taxoIndexCreateTime != null) { + indexWriter.commit(combinedCommitData(null)); + taxoIndexCreateTime = null; + } else { + indexWriter.commit(); + } refreshReader(); } + /** + * Combine original user data with that of the taxonomy creation time + */ + private Map combinedCommitData(Map userData) { + Map m = new HashMap(); + if (userData != null) { + m.putAll(userData); + } + m.put(INDEX_CREATE_TIME, taxoIndexCreateTime); + return m; + } + /** * Like commit(), but also store properties with the index. These properties * are retrievable by {@link DirectoryTaxonomyReader#getCommitUserData}. @@ -596,7 +639,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { @Override public synchronized void commit(Map commitUserData) throws CorruptIndexException, IOException { ensureOpen(); - indexWriter.commit(commitUserData); + if (taxoIndexCreateTime != null) { + indexWriter.commit(combinedCommitData(commitUserData)); + taxoIndexCreateTime = null; + } else { + indexWriter.commit(commitUserData); + } refreshReader(); } @@ -607,7 +655,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { @Override public synchronized void prepareCommit() throws CorruptIndexException, IOException { ensureOpen(); - indexWriter.prepareCommit(); + if (taxoIndexCreateTime != null) { + indexWriter.prepareCommit(combinedCommitData(null)); + taxoIndexCreateTime = null; + } else { + indexWriter.prepareCommit(); + } } /** @@ -617,7 +670,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { @Override public synchronized void prepareCommit(Map commitUserData) throws CorruptIndexException, IOException { ensureOpen(); - indexWriter.prepareCommit(commitUserData); + if (taxoIndexCreateTime != null) { + indexWriter.prepareCommit(combinedCommitData(commitUserData)); + taxoIndexCreateTime = null; + } else { + indexWriter.prepareCommit(commitUserData); + } } /** @@ -1037,10 +1095,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter { * will yield an {@link AlreadyClosedException}). */ @Override - public void rollback() throws IOException { + public synchronized void rollback() throws IOException { ensureOpen(); indexWriter.rollback(); - close(); + doClose(); } } diff --git a/modules/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java b/modules/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java index 2af37fcabfb..cd10a479f5b 100644 --- a/modules/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java +++ b/modules/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java @@ -1,10 +1,17 @@ package org.apache.lucene.facet.taxonomy.directory; +import java.util.Random; + import org.apache.lucene.facet.taxonomy.CategoryPath; +import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException; +import org.apache.lucene.facet.taxonomy.TaxonomyReader; +import org.apache.lucene.facet.taxonomy.TaxonomyWriter; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader; import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.store.AlreadyClosedException; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; import org.junit.Test; @@ -59,6 +66,36 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase { dir.close(); } + /** + * Test the boolean returned by TR.refresh + * @throws Exception + */ + @Test + public void testReaderRefreshResult() throws Exception { + Directory dir = null; + DirectoryTaxonomyWriter ltw = null; + DirectoryTaxonomyReader ltr = null; + + try { + dir = newDirectory(); + ltw = new DirectoryTaxonomyWriter(dir); + + ltw.addCategory(new CategoryPath("a")); + ltw.commit(); + + ltr = new DirectoryTaxonomyReader(dir); + assertFalse("Nothing has changed",ltr.refresh()); + + ltw.addCategory(new CategoryPath("b")); + ltw.commit(); + + assertTrue("changes were committed",ltr.refresh()); + assertFalse("Nothing has changed",ltr.refresh()); + } finally { + IOUtils.close(ltw, ltr, dir); + } + } + @Test public void testAlreadyClosed() throws Exception { Directory dir = newDirectory(); @@ -77,4 +114,68 @@ public class TestDirectoryTaxonomyReader extends LuceneTestCase { dir.close(); } + /** + * recreating a taxonomy should work well with a freshly opened taxonomy reader + */ + @Test + public void testFreshReadRecreatedTaxonomy() throws Exception { + doTestReadRecreatedTaxono(random, true); + } + + /** + * recreating a taxonomy should work well with a refreshed taxonomy reader + */ + @Test + public void testRefreshReadRecreatedTaxonomy() throws Exception { + doTestReadRecreatedTaxono(random, false); + } + + private void doTestReadRecreatedTaxono(Random random, boolean closeReader) throws Exception { + Directory dir = null; + TaxonomyWriter tw = null; + TaxonomyReader tr = null; + + // prepare a few categories + int n = 10; + CategoryPath[] cp = new CategoryPath[n]; + for (int i=0; i