LUCENE-5246: SegmentInfoPerCommit continues to list unneeded updatesFiles

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1527361 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Shai Erera 2013-09-29 17:37:02 +00:00
parent da15f78a59
commit c534c0b3b3
5 changed files with 123 additions and 28 deletions

View File

@ -23,6 +23,7 @@ import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.codecs.Codec;
@ -351,6 +352,7 @@ class ReadersAndLiveDocs { // TODO (DVU_RENAME) to ReaderAndUpdates
// We can write directly to the actual name (vs to a
// .tmp & renaming it) because the file is not live
// until segments file is written:
FieldInfos fieldInfos = null;
boolean success = false;
try {
Codec codec = info.info.getCodec();
@ -385,7 +387,7 @@ class ReadersAndLiveDocs { // TODO (DVU_RENAME) to ReaderAndUpdates
builder.addOrUpdate(f, NumericDocValuesField.TYPE);
}
final FieldInfos fieldInfos = builder.finish();
fieldInfos = builder.finish();
final long nextFieldInfosGen = info.getNextFieldInfosGen();
final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX);
final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, IOContext.DEFAULT, segmentSuffix);
@ -502,10 +504,25 @@ class ReadersAndLiveDocs { // TODO (DVU_RENAME) to ReaderAndUpdates
copyUpdatesToMerging();
}
numericUpdates.clear();
// create a new map, keeping only the gens that are in use
Map<Long,Set<String>> genUpdatesFiles = info.getUpdatesFiles();
Map<Long,Set<String>> newGenUpdatesFiles = new HashMap<Long,Set<String>>();
final long fieldInfosGen = info.getFieldInfosGen();
for (FieldInfo fi : fieldInfos) {
long dvGen = fi.getDocValuesGen();
if (dvGen != -1 && !newGenUpdatesFiles.containsKey(dvGen)) {
if (dvGen == fieldInfosGen) {
newGenUpdatesFiles.put(fieldInfosGen, trackingDir.getCreatedFiles());
} else {
newGenUpdatesFiles.put(dvGen, genUpdatesFiles.get(dvGen));
}
}
}
info.setGenUpdatesFiles(newGenUpdatesFiles);
}
info.addUpdatesFiles(trackingDir.getCreatedFiles());
return true;
}

View File

@ -19,7 +19,11 @@ package org.apache.lucene.index;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.lucene.store.Directory;
@ -51,8 +55,8 @@ public class SegmentInfoPerCommit { // TODO (DVU_RENAME) to SegmentCommitInfo
// write
private long nextWriteFieldInfosGen;
// Tracks the files with field updates
private Set<String> updatesFiles = new HashSet<String>();
// Track the per-generation updates files
private final Map<Long,Set<String>> genUpdatesFiles = new HashMap<Long,Set<String>>();
private volatile long sizeInBytes = -1;
@ -86,14 +90,15 @@ public class SegmentInfoPerCommit { // TODO (DVU_RENAME) to SegmentCommitInfo
}
}
/** Returns the files which contains field updates. */
public Set<String> getUpdatesFiles() {
return new HashSet<String>(updatesFiles);
/** Returns the per generation updates files. */
public Map<Long,Set<String>> getUpdatesFiles() {
return Collections.unmodifiableMap(genUpdatesFiles);
}
/** Called when we succeed in writing field updates. */
public void addUpdatesFiles(Set<String> files) {
updatesFiles.addAll(files);
/** Sets the updates file names per generation. Does not deep clone the map. */
public void setGenUpdatesFiles(Map<Long,Set<String>> genUpdatesFiles) {
this.genUpdatesFiles.clear();
this.genUpdatesFiles.putAll(genUpdatesFiles);
}
/** Called when we succeed in writing deletes */
@ -151,7 +156,9 @@ public class SegmentInfoPerCommit { // TODO (DVU_RENAME) to SegmentCommitInfo
info.getCodec().liveDocsFormat().files(this, files);
// Must separately add any field updates files
files.addAll(updatesFiles);
for (Set<String> updateFiles : genUpdatesFiles.values()) {
files.addAll(updateFiles);
}
return files;
}
@ -248,7 +255,10 @@ public class SegmentInfoPerCommit { // TODO (DVU_RENAME) to SegmentCommitInfo
other.nextWriteDelGen = nextWriteDelGen;
other.nextWriteFieldInfosGen = nextWriteFieldInfosGen;
other.updatesFiles.addAll(updatesFiles);
// deep clone
for (Entry<Long,Set<String>> e : genUpdatesFiles.entrySet()) {
other.genUpdatesFiles.put(e.getKey(), new HashSet<String>(e.getValue()));
}
return other;
}

View File

@ -28,6 +28,7 @@ import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.apache.lucene.codecs.Codec;
@ -349,7 +350,17 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfoPerCom
}
SegmentInfoPerCommit siPerCommit = new SegmentInfoPerCommit(info, delCount, delGen, fieldInfosGen);
if (format >= VERSION_46) {
siPerCommit.addUpdatesFiles(input.readStringSet());
int numGensUpdatesFiles = input.readInt();
final Map<Long,Set<String>> genUpdatesFiles;
if (numGensUpdatesFiles == 0) {
genUpdatesFiles = Collections.emptyMap();
} else {
genUpdatesFiles = new HashMap<Long,Set<String>>(numGensUpdatesFiles);
for (int i = 0; i < numGensUpdatesFiles; i++) {
genUpdatesFiles.put(input.readLong(), input.readStringSet());
}
}
siPerCommit.setGenUpdatesFiles(genUpdatesFiles);
}
add(siPerCommit);
}
@ -420,7 +431,12 @@ public final class SegmentInfos implements Cloneable, Iterable<SegmentInfoPerCom
segnOutput.writeLong(siPerCommit.getDelGen());
segnOutput.writeInt(siPerCommit.getDelCount());
segnOutput.writeLong(siPerCommit.getFieldInfosGen());
segnOutput.writeStringSet(siPerCommit.getUpdatesFiles());
final Map<Long,Set<String>> genUpdatesFiles = siPerCommit.getUpdatesFiles();
segnOutput.writeInt(genUpdatesFiles.size());
for (Entry<Long,Set<String>> e : genUpdatesFiles.entrySet()) {
segnOutput.writeLong(e.getKey());
segnOutput.writeStringSet(e.getValue());
}
assert si.dir == directory;
assert siPerCommit.getDelCount() <= si.getDocCount();

View File

@ -110,7 +110,7 @@ public final class SegmentReader extends AtomicReader {
final DocValuesFormat dvFormat = codec.docValuesFormat();
// initialize the per generation numericDVProducers and put the correct
// DVProducer for each field
final Map<Long,List<FieldInfo>> genInfos = getGenInfos(si);
final Map<Long,List<FieldInfo>> genInfos = getGenInfos();
// System.out.println("[" + Thread.currentThread().getName() + "] SR.init: new reader: " + si + "; gens=" + genInfos.keySet());
@ -178,7 +178,7 @@ public final class SegmentReader extends AtomicReader {
final Directory dir = core.cfsReader != null ? core.cfsReader : si.info.dir;
final DocValuesFormat dvFormat = codec.docValuesFormat();
final Map<Long,List<FieldInfo>> genInfos = getGenInfos(si);
final Map<Long,List<FieldInfo>> genInfos = getGenInfos();
for (Entry<Long,List<FieldInfo>> e : genInfos.entrySet()) {
Long gen = e.getKey();
@ -244,7 +244,7 @@ public final class SegmentReader extends AtomicReader {
}
// returns a gen->List<FieldInfo> mapping. Fields without DV updates have gen=-1
private Map<Long,List<FieldInfo>> getGenInfos(SegmentInfoPerCommit si) {
private Map<Long,List<FieldInfo>> getGenInfos() {
final Map<Long,List<FieldInfo>> genInfos = new HashMap<Long,List<FieldInfo>>();
for (FieldInfo fi : fieldInfos) {
if (fi.getDocValuesType() == null) {

View File

@ -5,6 +5,7 @@ import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.codecs.Codec;
@ -28,8 +29,8 @@ import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util._TestUtil;
import org.junit.Test;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
@ -917,10 +918,11 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase {
final IndexWriter writer = new IndexWriter(dir, conf);
// create index
final int numThreads = atLeast(3);
final int numThreads = _TestUtil.nextInt(random(), 3, 6);
final int numDocs = atLeast(2000);
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
doc.add(new StringField("id", "doc" + i, Store.NO));
double group = random().nextDouble();
String g;
if (group < 0.1) g = "g0";
@ -937,20 +939,21 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase {
}
final CountDownLatch done = new CountDownLatch(numThreads);
final AtomicInteger numUpdates = new AtomicInteger(atLeast(100));
// same thread updates a field as well as reopens
Thread[] threads = new Thread[numThreads];
for (int i = 0; i < threads.length; i++) {
final String f = "f" + i;
final String cf = "cf" + i;
final int numThreadUpdates = atLeast(40);
threads[i] = new Thread("UpdateThread-" + i) {
@Override
public void run() {
DirectoryReader reader = null;
boolean success = false;
try {
Random random = random();
int numUpdates = numThreadUpdates;
while (numUpdates-- > 0) {
while (numUpdates.getAndDecrement() > 0) {
double group = random.nextDouble();
Term t;
if (group < 0.1) t = new Term("updKey", "g0");
@ -965,20 +968,43 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase {
if (random.nextDouble() < 0.2) {
// delete a random document
int doc = random.nextInt(numDocs);
// System.out.println("[" + Thread.currentThread().getName() + "] deleteDoc=doc" + doc);
writer.deleteDocuments(new Term("id", "doc" + doc));
}
if (random.nextDouble() < 0.1) {
writer.commit(); // rarely commit
if (random.nextDouble() < 0.05) { // commit every 20 updates on average
// System.out.println("[" + Thread.currentThread().getName() + "] commit");
writer.commit();
}
if (random.nextDouble() < 0.3) { // obtain NRT reader (apply updates)
DirectoryReader.open(writer, true).close();
if (random.nextDouble() < 0.1) { // reopen NRT reader (apply updates), on average once every 10 updates
if (reader == null) {
// System.out.println("[" + Thread.currentThread().getName() + "] open NRT");
reader = DirectoryReader.open(writer, true);
} else {
// System.out.println("[" + Thread.currentThread().getName() + "] reopen NRT");
DirectoryReader r2 = DirectoryReader.openIfChanged(reader, writer, true);
if (r2 != null) {
reader.close();
reader = r2;
}
}
}
}
// System.out.println("[" + Thread.currentThread().getName() + "] DONE");
success = true;
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
if (success) { // suppress this exception only if there was another exception
throw new RuntimeException(e);
}
}
}
done.countDown();
}
}
@ -1152,5 +1178,31 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase {
IOUtils.close(dir1, dir2);
}
@Test
public void testDeleteUnusedUpdatesFiles() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
IndexWriter writer = new IndexWriter(dir, conf);
Document doc = new Document();
doc.add(new StringField("id", "d0", Store.NO));
doc.add(new NumericDocValuesField("f", 1L));
writer.addDocument(doc);
// create _0_1.fnm
writer.updateNumericDocValue(new Term("id", "d0"), "f", 2L);
writer.commit();
// create _0_2.fnm, and _0_1.fnm should be deleted
writer.updateNumericDocValue(new Term("id", "d0"), "f", 2L);
writer.commit();
assertTrue(dir.fileExists("_0_2.fnm"));
assertFalse("old generation field infos file should not exist in the directory: _0_1.fnm", dir.fileExists("_0_1.fnm"));
writer.close();
dir.close();
}
}