mirror of https://github.com/apache/lucene.git
LUCENE-5246: SegmentInfoPerCommit continues to list unneeded updatesFiles
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1527361 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
da15f78a59
commit
c534c0b3b3
|
@ -23,6 +23,7 @@ import java.util.Iterator;
|
|||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
|
@ -351,6 +352,7 @@ class ReadersAndLiveDocs { // TODO (DVU_RENAME) to ReaderAndUpdates
|
|||
// We can write directly to the actual name (vs to a
|
||||
// .tmp & renaming it) because the file is not live
|
||||
// until segments file is written:
|
||||
FieldInfos fieldInfos = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
Codec codec = info.info.getCodec();
|
||||
|
@ -385,7 +387,7 @@ class ReadersAndLiveDocs { // TODO (DVU_RENAME) to ReaderAndUpdates
|
|||
builder.addOrUpdate(f, NumericDocValuesField.TYPE);
|
||||
}
|
||||
|
||||
final FieldInfos fieldInfos = builder.finish();
|
||||
fieldInfos = builder.finish();
|
||||
final long nextFieldInfosGen = info.getNextFieldInfosGen();
|
||||
final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX);
|
||||
final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, IOContext.DEFAULT, segmentSuffix);
|
||||
|
@ -502,10 +504,25 @@ class ReadersAndLiveDocs { // TODO (DVU_RENAME) to ReaderAndUpdates
|
|||
copyUpdatesToMerging();
|
||||
}
|
||||
numericUpdates.clear();
|
||||
|
||||
// create a new map, keeping only the gens that are in use
|
||||
Map<Long,Set<String>> genUpdatesFiles = info.getUpdatesFiles();
|
||||
Map<Long,Set<String>> newGenUpdatesFiles = new HashMap<Long,Set<String>>();
|
||||
final long fieldInfosGen = info.getFieldInfosGen();
|
||||
for (FieldInfo fi : fieldInfos) {
|
||||
long dvGen = fi.getDocValuesGen();
|
||||
if (dvGen != -1 && !newGenUpdatesFiles.containsKey(dvGen)) {
|
||||
if (dvGen == fieldInfosGen) {
|
||||
newGenUpdatesFiles.put(fieldInfosGen, trackingDir.getCreatedFiles());
|
||||
} else {
|
||||
newGenUpdatesFiles.put(dvGen, genUpdatesFiles.get(dvGen));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
info.setGenUpdatesFiles(newGenUpdatesFiles);
|
||||
}
|
||||
|
||||
info.addUpdatesFiles(trackingDir.getCreatedFiles());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,11 @@ package org.apache.lucene.index;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -51,8 +55,8 @@ public class SegmentInfoPerCommit { // TODO (DVU_RENAME) to SegmentCommitInfo
|
|||
// write
|
||||
private long nextWriteFieldInfosGen;
|
||||
|
||||
// Tracks the files with field updates
|
||||
private Set<String> updatesFiles = new HashSet<String>();
|
||||
// Track the per-generation updates files
|
||||
private final Map<Long,Set<String>> genUpdatesFiles = new HashMap<Long,Set<String>>();
|
||||
|
||||
private volatile long sizeInBytes = -1;
|
||||
|
||||
|
@ -86,14 +90,15 @@ public class SegmentInfoPerCommit { // TODO (DVU_RENAME) to SegmentCommitInfo
|
|||
}
|
||||
}
|
||||
|
||||
/** Returns the files which contains field updates. */
|
||||
public Set<String> getUpdatesFiles() {
|
||||
return new HashSet<String>(updatesFiles);
|
||||
/** Returns the per generation updates files. */
|
||||
public Map<Long,Set<String>> getUpdatesFiles() {
|
||||
return Collections.unmodifiableMap(genUpdatesFiles);
|
||||
}
|
||||
|
||||
/** Called when we succeed in writing field updates. */
|
||||
public void addUpdatesFiles(Set<String> files) {
|
||||
updatesFiles.addAll(files);
|
||||
/** Sets the updates file names per generation. Does not deep clone the map. */
|
||||
public void setGenUpdatesFiles(Map<Long,Set<String>> genUpdatesFiles) {
|
||||
this.genUpdatesFiles.clear();
|
||||
this.genUpdatesFiles.putAll(genUpdatesFiles);
|
||||
}
|
||||
|
||||
/** Called when we succeed in writing deletes */
|
||||
|
@ -151,7 +156,9 @@ public class SegmentInfoPerCommit { // TODO (DVU_RENAME) to SegmentCommitInfo
|
|||
info.getCodec().liveDocsFormat().files(this, files);
|
||||
|
||||
// Must separately add any field updates files
|
||||
files.addAll(updatesFiles);
|
||||
for (Set<String> updateFiles : genUpdatesFiles.values()) {
|
||||
files.addAll(updateFiles);
|
||||
}
|
||||
|
||||
return files;
|
||||
}
|
||||
|
@ -248,7 +255,10 @@ public class SegmentInfoPerCommit { // TODO (DVU_RENAME) to SegmentCommitInfo
|
|||
other.nextWriteDelGen = nextWriteDelGen;
|
||||
other.nextWriteFieldInfosGen = nextWriteFieldInfosGen;
|
||||
|
||||
other.updatesFiles.addAll(updatesFiles);
|
||||
// deep clone
|
||||
for (Entry<Long,Set<String>> e : genUpdatesFiles.entrySet()) {
|
||||
other.genUpdatesFiles.put(e.getKey(), new HashSet<String>(e.getValue()));
|
||||
}
|
||||
|
||||
return other;
|
||||
}
|
||||
|
|
|
@ -28,6 +28,7 @@ import java.util.HashSet;
|
|||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
|
@ -349,7 +350,17 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfoPerCom
|
|||
}
|
||||
SegmentInfoPerCommit siPerCommit = new SegmentInfoPerCommit(info, delCount, delGen, fieldInfosGen);
|
||||
if (format >= VERSION_46) {
|
||||
siPerCommit.addUpdatesFiles(input.readStringSet());
|
||||
int numGensUpdatesFiles = input.readInt();
|
||||
final Map<Long,Set<String>> genUpdatesFiles;
|
||||
if (numGensUpdatesFiles == 0) {
|
||||
genUpdatesFiles = Collections.emptyMap();
|
||||
} else {
|
||||
genUpdatesFiles = new HashMap<Long,Set<String>>(numGensUpdatesFiles);
|
||||
for (int i = 0; i < numGensUpdatesFiles; i++) {
|
||||
genUpdatesFiles.put(input.readLong(), input.readStringSet());
|
||||
}
|
||||
}
|
||||
siPerCommit.setGenUpdatesFiles(genUpdatesFiles);
|
||||
}
|
||||
add(siPerCommit);
|
||||
}
|
||||
|
@ -420,7 +431,12 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfoPerCom
|
|||
segnOutput.writeLong(siPerCommit.getDelGen());
|
||||
segnOutput.writeInt(siPerCommit.getDelCount());
|
||||
segnOutput.writeLong(siPerCommit.getFieldInfosGen());
|
||||
segnOutput.writeStringSet(siPerCommit.getUpdatesFiles());
|
||||
final Map<Long,Set<String>> genUpdatesFiles = siPerCommit.getUpdatesFiles();
|
||||
segnOutput.writeInt(genUpdatesFiles.size());
|
||||
for (Entry<Long,Set<String>> e : genUpdatesFiles.entrySet()) {
|
||||
segnOutput.writeLong(e.getKey());
|
||||
segnOutput.writeStringSet(e.getValue());
|
||||
}
|
||||
assert si.dir == directory;
|
||||
|
||||
assert siPerCommit.getDelCount() <= si.getDocCount();
|
||||
|
|
|
@ -110,7 +110,7 @@ public final class SegmentReader extends AtomicReader {
|
|||
final DocValuesFormat dvFormat = codec.docValuesFormat();
|
||||
// initialize the per generation numericDVProducers and put the correct
|
||||
// DVProducer for each field
|
||||
final Map<Long,List<FieldInfo>> genInfos = getGenInfos(si);
|
||||
final Map<Long,List<FieldInfo>> genInfos = getGenInfos();
|
||||
|
||||
// System.out.println("[" + Thread.currentThread().getName() + "] SR.init: new reader: " + si + "; gens=" + genInfos.keySet());
|
||||
|
||||
|
@ -178,7 +178,7 @@ public final class SegmentReader extends AtomicReader {
|
|||
final Directory dir = core.cfsReader != null ? core.cfsReader : si.info.dir;
|
||||
|
||||
final DocValuesFormat dvFormat = codec.docValuesFormat();
|
||||
final Map<Long,List<FieldInfo>> genInfos = getGenInfos(si);
|
||||
final Map<Long,List<FieldInfo>> genInfos = getGenInfos();
|
||||
|
||||
for (Entry<Long,List<FieldInfo>> e : genInfos.entrySet()) {
|
||||
Long gen = e.getKey();
|
||||
|
@ -244,7 +244,7 @@ public final class SegmentReader extends AtomicReader {
|
|||
}
|
||||
|
||||
// returns a gen->List<FieldInfo> mapping. Fields without DV updates have gen=-1
|
||||
private Map<Long,List<FieldInfo>> getGenInfos(SegmentInfoPerCommit si) {
|
||||
private Map<Long,List<FieldInfo>> getGenInfos() {
|
||||
final Map<Long,List<FieldInfo>> genInfos = new HashMap<Long,List<FieldInfo>>();
|
||||
for (FieldInfo fi : fieldInfos) {
|
||||
if (fi.getDocValuesType() == null) {
|
||||
|
|
|
@ -5,6 +5,7 @@ import java.util.HashSet;
|
|||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
|
@ -28,8 +29,8 @@ import org.apache.lucene.util.Bits;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.junit.Test;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
|
||||
|
@ -917,10 +918,11 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase {
|
|||
final IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
// create index
|
||||
final int numThreads = atLeast(3);
|
||||
final int numThreads = _TestUtil.nextInt(random(), 3, 6);
|
||||
final int numDocs = atLeast(2000);
|
||||
for (int i = 0; i < numDocs; i++) {
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", "doc" + i, Store.NO));
|
||||
double group = random().nextDouble();
|
||||
String g;
|
||||
if (group < 0.1) g = "g0";
|
||||
|
@ -937,20 +939,21 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase {
|
|||
}
|
||||
|
||||
final CountDownLatch done = new CountDownLatch(numThreads);
|
||||
final AtomicInteger numUpdates = new AtomicInteger(atLeast(100));
|
||||
|
||||
// same thread updates a field as well as reopens
|
||||
Thread[] threads = new Thread[numThreads];
|
||||
for (int i = 0; i < threads.length; i++) {
|
||||
final String f = "f" + i;
|
||||
final String cf = "cf" + i;
|
||||
final int numThreadUpdates = atLeast(40);
|
||||
threads[i] = new Thread("UpdateThread-" + i) {
|
||||
@Override
|
||||
public void run() {
|
||||
DirectoryReader reader = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
Random random = random();
|
||||
int numUpdates = numThreadUpdates;
|
||||
while (numUpdates-- > 0) {
|
||||
while (numUpdates.getAndDecrement() > 0) {
|
||||
double group = random.nextDouble();
|
||||
Term t;
|
||||
if (group < 0.1) t = new Term("updKey", "g0");
|
||||
|
@ -965,20 +968,43 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase {
|
|||
if (random.nextDouble() < 0.2) {
|
||||
// delete a random document
|
||||
int doc = random.nextInt(numDocs);
|
||||
// System.out.println("[" + Thread.currentThread().getName() + "] deleteDoc=doc" + doc);
|
||||
writer.deleteDocuments(new Term("id", "doc" + doc));
|
||||
}
|
||||
|
||||
if (random.nextDouble() < 0.1) {
|
||||
writer.commit(); // rarely commit
|
||||
|
||||
if (random.nextDouble() < 0.05) { // commit every 20 updates on average
|
||||
// System.out.println("[" + Thread.currentThread().getName() + "] commit");
|
||||
writer.commit();
|
||||
}
|
||||
|
||||
if (random.nextDouble() < 0.3) { // obtain NRT reader (apply updates)
|
||||
DirectoryReader.open(writer, true).close();
|
||||
if (random.nextDouble() < 0.1) { // reopen NRT reader (apply updates), on average once every 10 updates
|
||||
if (reader == null) {
|
||||
// System.out.println("[" + Thread.currentThread().getName() + "] open NRT");
|
||||
reader = DirectoryReader.open(writer, true);
|
||||
} else {
|
||||
// System.out.println("[" + Thread.currentThread().getName() + "] reopen NRT");
|
||||
DirectoryReader r2 = DirectoryReader.openIfChanged(reader, writer, true);
|
||||
if (r2 != null) {
|
||||
reader.close();
|
||||
reader = r2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// System.out.println("[" + Thread.currentThread().getName() + "] DONE");
|
||||
success = true;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
if (reader != null) {
|
||||
try {
|
||||
reader.close();
|
||||
} catch (IOException e) {
|
||||
if (success) { // suppress this exception only if there was another exception
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
done.countDown();
|
||||
}
|
||||
}
|
||||
|
@ -1152,5 +1178,31 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase {
|
|||
|
||||
IOUtils.close(dir1, dir2);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDeleteUnusedUpdatesFiles() throws Exception {
|
||||
Directory dir = newDirectory();
|
||||
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
IndexWriter writer = new IndexWriter(dir, conf);
|
||||
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField("id", "d0", Store.NO));
|
||||
doc.add(new NumericDocValuesField("f", 1L));
|
||||
writer.addDocument(doc);
|
||||
|
||||
// create _0_1.fnm
|
||||
writer.updateNumericDocValue(new Term("id", "d0"), "f", 2L);
|
||||
writer.commit();
|
||||
|
||||
// create _0_2.fnm, and _0_1.fnm should be deleted
|
||||
writer.updateNumericDocValue(new Term("id", "d0"), "f", 2L);
|
||||
writer.commit();
|
||||
|
||||
assertTrue(dir.fileExists("_0_2.fnm"));
|
||||
assertFalse("old generation field infos file should not exist in the directory: _0_1.fnm", dir.fileExists("_0_1.fnm"));
|
||||
|
||||
writer.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue