From c534c0b3b39454fda9bd65138cc7f7078545b2d9 Mon Sep 17 00:00:00 2001 From: Shai Erera Date: Sun, 29 Sep 2013 17:37:02 +0000 Subject: [PATCH] LUCENE-5246: SegmentInfoPerCommit continues to list unneeded updatesFiles git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1527361 13f79535-47bb-0310-9956-ffa450edef68 --- .../lucene/index/ReadersAndLiveDocs.java | 23 +++++- .../lucene/index/SegmentInfoPerCommit.java | 30 +++++--- .../org/apache/lucene/index/SegmentInfos.java | 20 +++++- .../apache/lucene/index/SegmentReader.java | 6 +- .../index/TestNumericDocValuesUpdates.java | 72 ++++++++++++++++--- 5 files changed, 123 insertions(+), 28 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java b/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java index d6f8e22087b..1b9962a2ea5 100644 --- a/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java +++ b/lucene/core/src/java/org/apache/lucene/index/ReadersAndLiveDocs.java @@ -23,6 +23,7 @@ import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; import java.util.NoSuchElementException; +import java.util.Set; import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.codecs.Codec; @@ -351,6 +352,7 @@ class ReadersAndLiveDocs { // TODO (DVU_RENAME) to ReaderAndUpdates // We can write directly to the actual name (vs to a // .tmp & renaming it) because the file is not live // until segments file is written: + FieldInfos fieldInfos = null; boolean success = false; try { Codec codec = info.info.getCodec(); @@ -385,7 +387,7 @@ class ReadersAndLiveDocs { // TODO (DVU_RENAME) to ReaderAndUpdates builder.addOrUpdate(f, NumericDocValuesField.TYPE); } - final FieldInfos fieldInfos = builder.finish(); + fieldInfos = builder.finish(); final long nextFieldInfosGen = info.getNextFieldInfosGen(); final String segmentSuffix = Long.toString(nextFieldInfosGen, Character.MAX_RADIX); final SegmentWriteState state = new SegmentWriteState(null, trackingDir, info.info, fieldInfos, null, IOContext.DEFAULT, segmentSuffix); @@ -502,10 +504,25 @@ class ReadersAndLiveDocs { // TODO (DVU_RENAME) to ReaderAndUpdates copyUpdatesToMerging(); } numericUpdates.clear(); + + // create a new map, keeping only the gens that are in use + Map> genUpdatesFiles = info.getUpdatesFiles(); + Map> newGenUpdatesFiles = new HashMap>(); + final long fieldInfosGen = info.getFieldInfosGen(); + for (FieldInfo fi : fieldInfos) { + long dvGen = fi.getDocValuesGen(); + if (dvGen != -1 && !newGenUpdatesFiles.containsKey(dvGen)) { + if (dvGen == fieldInfosGen) { + newGenUpdatesFiles.put(fieldInfosGen, trackingDir.getCreatedFiles()); + } else { + newGenUpdatesFiles.put(dvGen, genUpdatesFiles.get(dvGen)); + } + } + } + + info.setGenUpdatesFiles(newGenUpdatesFiles); } - info.addUpdatesFiles(trackingDir.getCreatedFiles()); - return true; } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java index da1bdd769fa..1b13307bf8f 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfoPerCommit.java @@ -19,7 +19,11 @@ package org.apache.lucene.index; import java.io.IOException; import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; +import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import org.apache.lucene.store.Directory; @@ -51,8 +55,8 @@ public class SegmentInfoPerCommit { // TODO (DVU_RENAME) to SegmentCommitInfo // write private long nextWriteFieldInfosGen; - // Tracks the files with field updates - private Set updatesFiles = new HashSet(); + // Track the per-generation updates files + private final Map> genUpdatesFiles = new HashMap>(); private volatile long sizeInBytes = -1; @@ -86,14 +90,15 @@ public class SegmentInfoPerCommit { // TODO (DVU_RENAME) to SegmentCommitInfo } } - /** Returns the files which contains field updates. */ - public Set getUpdatesFiles() { - return new HashSet(updatesFiles); + /** Returns the per generation updates files. */ + public Map> getUpdatesFiles() { + return Collections.unmodifiableMap(genUpdatesFiles); } - /** Called when we succeed in writing field updates. */ - public void addUpdatesFiles(Set files) { - updatesFiles.addAll(files); + /** Sets the updates file names per generation. Does not deep clone the map. */ + public void setGenUpdatesFiles(Map> genUpdatesFiles) { + this.genUpdatesFiles.clear(); + this.genUpdatesFiles.putAll(genUpdatesFiles); } /** Called when we succeed in writing deletes */ @@ -151,7 +156,9 @@ public class SegmentInfoPerCommit { // TODO (DVU_RENAME) to SegmentCommitInfo info.getCodec().liveDocsFormat().files(this, files); // Must separately add any field updates files - files.addAll(updatesFiles); + for (Set updateFiles : genUpdatesFiles.values()) { + files.addAll(updateFiles); + } return files; } @@ -248,7 +255,10 @@ public class SegmentInfoPerCommit { // TODO (DVU_RENAME) to SegmentCommitInfo other.nextWriteDelGen = nextWriteDelGen; other.nextWriteFieldInfosGen = nextWriteFieldInfosGen; - other.updatesFiles.addAll(updatesFiles); + // deep clone + for (Entry> e : genUpdatesFiles.entrySet()) { + other.genUpdatesFiles.put(e.getKey(), new HashSet(e.getValue())); + } return other; } diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java index 44a26025885..b8c8e088332 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentInfos.java @@ -28,6 +28,7 @@ import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.Set; import org.apache.lucene.codecs.Codec; @@ -349,7 +350,17 @@ public final class SegmentInfos implements Cloneable, Iterable= VERSION_46) { - siPerCommit.addUpdatesFiles(input.readStringSet()); + int numGensUpdatesFiles = input.readInt(); + final Map> genUpdatesFiles; + if (numGensUpdatesFiles == 0) { + genUpdatesFiles = Collections.emptyMap(); + } else { + genUpdatesFiles = new HashMap>(numGensUpdatesFiles); + for (int i = 0; i < numGensUpdatesFiles; i++) { + genUpdatesFiles.put(input.readLong(), input.readStringSet()); + } + } + siPerCommit.setGenUpdatesFiles(genUpdatesFiles); } add(siPerCommit); } @@ -420,7 +431,12 @@ public final class SegmentInfos implements Cloneable, Iterable> genUpdatesFiles = siPerCommit.getUpdatesFiles(); + segnOutput.writeInt(genUpdatesFiles.size()); + for (Entry> e : genUpdatesFiles.entrySet()) { + segnOutput.writeLong(e.getKey()); + segnOutput.writeStringSet(e.getValue()); + } assert si.dir == directory; assert siPerCommit.getDelCount() <= si.getDocCount(); diff --git a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java index 855117cdd0c..5b3b3efce3a 100644 --- a/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java +++ b/lucene/core/src/java/org/apache/lucene/index/SegmentReader.java @@ -110,7 +110,7 @@ public final class SegmentReader extends AtomicReader { final DocValuesFormat dvFormat = codec.docValuesFormat(); // initialize the per generation numericDVProducers and put the correct // DVProducer for each field - final Map> genInfos = getGenInfos(si); + final Map> genInfos = getGenInfos(); // System.out.println("[" + Thread.currentThread().getName() + "] SR.init: new reader: " + si + "; gens=" + genInfos.keySet()); @@ -178,7 +178,7 @@ public final class SegmentReader extends AtomicReader { final Directory dir = core.cfsReader != null ? core.cfsReader : si.info.dir; final DocValuesFormat dvFormat = codec.docValuesFormat(); - final Map> genInfos = getGenInfos(si); + final Map> genInfos = getGenInfos(); for (Entry> e : genInfos.entrySet()) { Long gen = e.getKey(); @@ -244,7 +244,7 @@ public final class SegmentReader extends AtomicReader { } // returns a gen->List mapping. Fields without DV updates have gen=-1 - private Map> getGenInfos(SegmentInfoPerCommit si) { + private Map> getGenInfos() { final Map> genInfos = new HashMap>(); for (FieldInfo fi : fieldInfos) { if (fi.getDocValuesType() == null) { diff --git a/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java b/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java index 6d51b7107d4..4aa7d953fa1 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestNumericDocValuesUpdates.java @@ -5,6 +5,7 @@ import java.util.HashSet; import java.util.Random; import java.util.Set; import java.util.concurrent.CountDownLatch; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.codecs.Codec; @@ -28,8 +29,8 @@ import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.IOUtils; import org.apache.lucene.util.LuceneTestCase; -import org.apache.lucene.util._TestUtil; import org.apache.lucene.util.LuceneTestCase.SuppressCodecs; +import org.apache.lucene.util._TestUtil; import org.junit.Test; import com.carrotsearch.randomizedtesting.generators.RandomPicks; @@ -917,10 +918,11 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase { final IndexWriter writer = new IndexWriter(dir, conf); // create index - final int numThreads = atLeast(3); + final int numThreads = _TestUtil.nextInt(random(), 3, 6); final int numDocs = atLeast(2000); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); + doc.add(new StringField("id", "doc" + i, Store.NO)); double group = random().nextDouble(); String g; if (group < 0.1) g = "g0"; @@ -937,20 +939,21 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase { } final CountDownLatch done = new CountDownLatch(numThreads); + final AtomicInteger numUpdates = new AtomicInteger(atLeast(100)); // same thread updates a field as well as reopens Thread[] threads = new Thread[numThreads]; for (int i = 0; i < threads.length; i++) { final String f = "f" + i; final String cf = "cf" + i; - final int numThreadUpdates = atLeast(40); threads[i] = new Thread("UpdateThread-" + i) { @Override public void run() { + DirectoryReader reader = null; + boolean success = false; try { Random random = random(); - int numUpdates = numThreadUpdates; - while (numUpdates-- > 0) { + while (numUpdates.getAndDecrement() > 0) { double group = random.nextDouble(); Term t; if (group < 0.1) t = new Term("updKey", "g0"); @@ -965,20 +968,43 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase { if (random.nextDouble() < 0.2) { // delete a random document int doc = random.nextInt(numDocs); +// System.out.println("[" + Thread.currentThread().getName() + "] deleteDoc=doc" + doc); writer.deleteDocuments(new Term("id", "doc" + doc)); } - - if (random.nextDouble() < 0.1) { - writer.commit(); // rarely commit + + if (random.nextDouble() < 0.05) { // commit every 20 updates on average +// System.out.println("[" + Thread.currentThread().getName() + "] commit"); + writer.commit(); } - if (random.nextDouble() < 0.3) { // obtain NRT reader (apply updates) - DirectoryReader.open(writer, true).close(); + if (random.nextDouble() < 0.1) { // reopen NRT reader (apply updates), on average once every 10 updates + if (reader == null) { +// System.out.println("[" + Thread.currentThread().getName() + "] open NRT"); + reader = DirectoryReader.open(writer, true); + } else { +// System.out.println("[" + Thread.currentThread().getName() + "] reopen NRT"); + DirectoryReader r2 = DirectoryReader.openIfChanged(reader, writer, true); + if (r2 != null) { + reader.close(); + reader = r2; + } + } } } +// System.out.println("[" + Thread.currentThread().getName() + "] DONE"); + success = true; } catch (IOException e) { throw new RuntimeException(e); } finally { + if (reader != null) { + try { + reader.close(); + } catch (IOException e) { + if (success) { // suppress this exception only if there was another exception + throw new RuntimeException(e); + } + } + } done.countDown(); } } @@ -1152,5 +1178,31 @@ public class TestNumericDocValuesUpdates extends LuceneTestCase { IOUtils.close(dir1, dir2); } + + @Test + public void testDeleteUnusedUpdatesFiles() throws Exception { + Directory dir = newDirectory(); + IndexWriterConfig conf = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())); + IndexWriter writer = new IndexWriter(dir, conf); + + Document doc = new Document(); + doc.add(new StringField("id", "d0", Store.NO)); + doc.add(new NumericDocValuesField("f", 1L)); + writer.addDocument(doc); + + // create _0_1.fnm + writer.updateNumericDocValue(new Term("id", "d0"), "f", 2L); + writer.commit(); + + // create _0_2.fnm, and _0_1.fnm should be deleted + writer.updateNumericDocValue(new Term("id", "d0"), "f", 2L); + writer.commit(); + + assertTrue(dir.fileExists("_0_2.fnm")); + assertFalse("old generation field infos file should not exist in the directory: _0_1.fnm", dir.fileExists("_0_1.fnm")); + + writer.close(); + dir.close(); + } }