mirror of https://github.com/apache/lucene.git
LUCENE-4108: add replaceTaxonomy to DirectoryTaxonomyWriter
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1346784 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
5fb2e4a751
commit
8b5af86c45
|
@ -6,7 +6,10 @@ http://s.apache.org/luceneversions
|
|||
|
||||
======================= Lucene 5.0.0 =======================
|
||||
|
||||
(No changes)
|
||||
New features
|
||||
|
||||
* LUCENE-4108: add replaceTaxonomy to DirectoryTaxonomyWriter, which replaces
|
||||
the taxonomy in place with the given one. (Shai Erera)
|
||||
|
||||
======================= Lucene 4.0.0 =======================
|
||||
|
||||
|
|
|
@ -93,6 +93,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
*/
|
||||
public static final String INDEX_CREATE_TIME = "index.create.time";
|
||||
|
||||
private Directory dir;
|
||||
private IndexWriter indexWriter;
|
||||
private int nextID;
|
||||
private char delimiter = Consts.DEFAULT_DELIMITER;
|
||||
|
@ -115,8 +116,8 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
private DirectoryReader reader;
|
||||
private int cacheMisses;
|
||||
|
||||
/** Records the taxonomy index creation time. */
|
||||
private final String createTime;
|
||||
/** Records the taxonomy index creation time, updated on replaceTaxonomy as well. */
|
||||
private String createTime;
|
||||
|
||||
/** Reads the commit data from a Directory. */
|
||||
private static Map<String, String> readCommitData(Directory dir) throws IOException {
|
||||
|
@ -204,8 +205,9 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
}
|
||||
}
|
||||
|
||||
dir = directory;
|
||||
IndexWriterConfig config = createIndexWriterConfig(openMode);
|
||||
indexWriter = openIndexWriter(directory, config);
|
||||
indexWriter = openIndexWriter(dir, config);
|
||||
|
||||
// verify (to some extent) that merge policy in effect would preserve category docids
|
||||
assert !(indexWriter.getConfig().getMergePolicy() instanceof TieredMergePolicy) :
|
||||
|
@ -282,7 +284,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
|
||||
// Make sure we use a MergePolicy which always merges adjacent segments and thus
|
||||
// keeps the doc IDs ordered as well (this is crucial for the taxonomy index).
|
||||
return new IndexWriterConfig(Version.LUCENE_40,
|
||||
return new IndexWriterConfig(Version.LUCENE_50,
|
||||
new KeywordAnalyzer()).setOpenMode(openMode).setMergePolicy(
|
||||
new LogByteSizeMergePolicy());
|
||||
}
|
||||
|
@ -1021,4 +1023,32 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
doClose();
|
||||
}
|
||||
|
||||
/**
|
||||
* Replaces the current taxonomy with the given one. This method should
|
||||
* generally be called in conjunction with
|
||||
* {@link IndexWriter#addIndexes(Directory...)} to replace both the taxonomy
|
||||
* as well as the search index content.
|
||||
*/
|
||||
public void replaceTaxonomy(Directory taxoDir) throws IOException {
|
||||
// replace the taxonomy by doing IW optimized operations
|
||||
indexWriter.deleteAll();
|
||||
indexWriter.addIndexes(taxoDir);
|
||||
refreshInternalReader();
|
||||
nextID = indexWriter.maxDoc();
|
||||
|
||||
// need to clear the cache, so that addCategory won't accidentally return
|
||||
// old categories that are in the cache.
|
||||
cache.clear();
|
||||
cacheIsComplete = false;
|
||||
alreadyCalledFillCache = false;
|
||||
|
||||
// update createTime as a taxonomy replace is just like it has be recreated
|
||||
createTime = Long.toString(System.nanoTime());
|
||||
}
|
||||
|
||||
/** Returns the {@link Directory} of this taxonomy writer. */
|
||||
public Directory getDirectory() {
|
||||
return dir;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -112,4 +112,10 @@ public interface TaxonomyWriterCache {
|
|||
*/
|
||||
public boolean hasRoom(int numberOfEntries);
|
||||
|
||||
/**
|
||||
* Clears the content of the cache. Unlike {@link #close()}, the caller can
|
||||
* assume that the cache is still operable after this method returns.
|
||||
*/
|
||||
public void clear();
|
||||
|
||||
}
|
||||
|
|
|
@ -34,12 +34,23 @@ import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
|
|||
public class Cl2oTaxonomyWriterCache implements TaxonomyWriterCache {
|
||||
|
||||
private final ReadWriteLock lock = new ReentrantReadWriteLock();
|
||||
private final int initialCapcity, numHashArrays;
|
||||
private final float loadFactor;
|
||||
|
||||
private CompactLabelToOrdinal cache;
|
||||
|
||||
public Cl2oTaxonomyWriterCache(int initialCapcity, float loadFactor, int numHashArrays) {
|
||||
this.cache = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
|
||||
this.initialCapcity = initialCapcity;
|
||||
this.numHashArrays = numHashArrays;
|
||||
this.loadFactor = loadFactor;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
cache = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() {
|
||||
cache = null;
|
||||
|
|
|
@ -65,6 +65,11 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
return n <= (cache.getMaxSize() - cache.getSize());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
cache.clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
public synchronized void close() {
|
||||
cache.clear();
|
||||
|
|
|
@ -17,6 +17,7 @@ import org.apache.lucene.index.IndexReader;
|
|||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
@ -59,6 +60,8 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
|
|||
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) { return true; }
|
||||
@Override
|
||||
public boolean hasRoom(int numberOfEntries) { return false; }
|
||||
@Override
|
||||
public void clear() {}
|
||||
|
||||
}
|
||||
|
||||
|
@ -255,5 +258,43 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
|
|||
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private String getCreateTime(Directory taxoDir) throws IOException {
|
||||
SegmentInfos infos = new SegmentInfos();
|
||||
infos.read(taxoDir);
|
||||
return infos.getUserData().get(DirectoryTaxonomyWriter.INDEX_CREATE_TIME);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReplaceTaxonomy() throws Exception {
|
||||
Directory input = newDirectory();
|
||||
DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(input);
|
||||
taxoWriter.addCategory(new CategoryPath("a"));
|
||||
taxoWriter.close();
|
||||
|
||||
Directory dir = newDirectory();
|
||||
taxoWriter = new DirectoryTaxonomyWriter(dir);
|
||||
int ordinal = taxoWriter.addCategory(new CategoryPath("b"));
|
||||
taxoWriter.addCategory(new CategoryPath("c"));
|
||||
taxoWriter.commit();
|
||||
|
||||
String origCreateTime = getCreateTime(dir);
|
||||
|
||||
// replace the taxonomy with the input one
|
||||
taxoWriter.replaceTaxonomy(input);
|
||||
|
||||
// add the same category again -- it should not receive the same ordinal !
|
||||
int newOrdinal = taxoWriter.addCategory(new CategoryPath("b"));
|
||||
assertNotSame("new ordinal cannot be the original ordinal", ordinal, newOrdinal);
|
||||
assertEquals("ordinal should have been 2 since only one category was added by replaceTaxonomy", 2, newOrdinal);
|
||||
|
||||
taxoWriter.close();
|
||||
|
||||
String newCreateTime = getCreateTime(dir);
|
||||
assertNotSame("create time should have been changed after replaceTaxonomy", origCreateTime, newCreateTime);
|
||||
|
||||
dir.close();
|
||||
input.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue