LUCENE-4108: add replaceTaxonomy to DirectoryTaxonomyWriter

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1346784 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2012-06-06 08:46:10 +00:00
parent 5fb2e4a751
commit 8b5af86c45
6 changed files with 101 additions and 5 deletions

View File

@ -6,7 +6,10 @@ http://s.apache.org/luceneversions
======================= Lucene 5.0.0 =======================
(No changes)
New features
* LUCENE-4108: add replaceTaxonomy to DirectoryTaxonomyWriter, which replaces
the taxonomy in place with the given one. (Shai Erera)
======================= Lucene 4.0.0 =======================

View File

@ -93,6 +93,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
*/
public static final String INDEX_CREATE_TIME = "index.create.time";
private Directory dir;
private IndexWriter indexWriter;
private int nextID;
private char delimiter = Consts.DEFAULT_DELIMITER;
@ -115,8 +116,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
private DirectoryReader reader;
private int cacheMisses;
/** Records the taxonomy index creation time. */
private final String createTime;
/** Records the taxonomy index creation time, updated on replaceTaxonomy as well. */
private String createTime;
/** Reads the commit data from a Directory. */
private static Map<String, String> readCommitData(Directory dir) throws IOException {
@ -204,8 +205,9 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
}
}
dir = directory;
IndexWriterConfig config = createIndexWriterConfig(openMode);
indexWriter = openIndexWriter(directory, config);
indexWriter = openIndexWriter(dir, config);
// verify (to some extent) that merge policy in effect would preserve category docids
assert !(indexWriter.getConfig().getMergePolicy() instanceof TieredMergePolicy) :
@ -282,7 +284,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
// Make sure we use a MergePolicy which always merges adjacent segments and thus
// keeps the doc IDs ordered as well (this is crucial for the taxonomy index).
return new IndexWriterConfig(Version.LUCENE_40,
return new IndexWriterConfig(Version.LUCENE_50,
new KeywordAnalyzer()).setOpenMode(openMode).setMergePolicy(
new LogByteSizeMergePolicy());
}
@ -1021,4 +1023,32 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
doClose();
}
/**
* Replaces the current taxonomy with the given one. This method should
* generally be called in conjunction with
* {@link IndexWriter#addIndexes(Directory...)} to replace both the taxonomy
* as well as the search index content.
*/
public void replaceTaxonomy(Directory taxoDir) throws IOException {
// replace the taxonomy by doing IW optimized operations
indexWriter.deleteAll();
indexWriter.addIndexes(taxoDir);
refreshInternalReader();
nextID = indexWriter.maxDoc();
// need to clear the cache, so that addCategory won't accidentally return
// old categories that are in the cache.
cache.clear();
cacheIsComplete = false;
alreadyCalledFillCache = false;
// update createTime as a taxonomy replace is just like it has be recreated
createTime = Long.toString(System.nanoTime());
}
/** Returns the {@link Directory} of this taxonomy writer. */
public Directory getDirectory() {
return dir;
}
}

View File

@ -112,4 +112,10 @@ public interface TaxonomyWriterCache {
*/
public boolean hasRoom(int numberOfEntries);
/**
* Clears the content of the cache. Unlike {@link #close()}, the caller can
* assume that the cache is still operable after this method returns.
*/
public void clear();
}

View File

@ -34,12 +34,23 @@ import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
public class Cl2oTaxonomyWriterCache implements TaxonomyWriterCache {
private final ReadWriteLock lock = new ReentrantReadWriteLock();
private final int initialCapcity, numHashArrays;
private final float loadFactor;
private CompactLabelToOrdinal cache;
public Cl2oTaxonomyWriterCache(int initialCapcity, float loadFactor, int numHashArrays) {
this.cache = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
this.initialCapcity = initialCapcity;
this.numHashArrays = numHashArrays;
this.loadFactor = loadFactor;
}
@Override
public void clear() {
cache = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
}
@Override
public synchronized void close() {
cache = null;

View File

@ -65,6 +65,11 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
return n <= (cache.getMaxSize() - cache.getSize());
}
@Override
public void clear() {
cache.clear();
}
@Override
public synchronized void close() {
cache.clear();

View File

@ -17,6 +17,7 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
@ -59,6 +60,8 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) { return true; }
@Override
public boolean hasRoom(int numberOfEntries) { return false; }
@Override
public void clear() {}
}
@ -255,5 +258,43 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
dir.close();
}
private String getCreateTime(Directory taxoDir) throws IOException {
SegmentInfos infos = new SegmentInfos();
infos.read(taxoDir);
return infos.getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
}
@Test
public void testReplaceTaxonomy() throws Exception {
Directory input = newDirectory();
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(input);
taxoWriter.addCategory(new CategoryPath("a"));
taxoWriter.close();
Directory dir = newDirectory();
taxoWriter = new DirectoryTaxonomyWriter(dir);
int ordinal = taxoWriter.addCategory(new CategoryPath("b"));
taxoWriter.addCategory(new CategoryPath("c"));
taxoWriter.commit();
String origCreateTime = getCreateTime(dir);
// replace the taxonomy with the input one
taxoWriter.replaceTaxonomy(input);
// add the same category again -- it should not receive the same ordinal !
int newOrdinal = taxoWriter.addCategory(new CategoryPath("b"));
assertNotSame("new ordinal cannot be the original ordinal", ordinal, newOrdinal);
assertEquals("ordinal should have been 2 since only one category was added by replaceTaxonomy", 2, newOrdinal);
taxoWriter.close();
String newCreateTime = getCreateTime(dir);
assertNotSame("create time should have been changed after replaceTaxonomy", origCreateTime, newCreateTime);
dir.close();
input.close();
}
}