feat: soft delete optimize (#12339)

This commit is contained in:
fudongying 2023-06-09 17:41:28 +08:00 committed by GitHub
parent 9a2d19324f
commit 2934899ca6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 141 additions and 4 deletions

View File

@ -177,6 +177,8 @@ Optimizations
* GITHUB#12328: Optimize ConjunctionDISI.createConjunction (Armin Braun)
* GITHUB#12339: Optimize part of duplicate calculation numDeletesToMerge in merge phase (fudongying)
Bug Fixes
---------------------

View File

@ -0,0 +1,61 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.HashMap;
import java.util.Set;
import org.apache.lucene.util.InfoStream;
/**
* a wrapper of IndexWriter MergeContext. Try to cache the {@link
* #numDeletesToMerge(SegmentCommitInfo)} result in merge phase, to avoid duplicate calculation
*/
class CachingMergeContext implements MergePolicy.MergeContext {
final MergePolicy.MergeContext mergeContext;
final HashMap<SegmentCommitInfo, Integer> cachedNumDeletesToMerge = new HashMap<>();
CachingMergeContext(MergePolicy.MergeContext mergeContext) {
this.mergeContext = mergeContext;
}
@Override
public final int numDeletesToMerge(SegmentCommitInfo info) throws IOException {
Integer numDeletesToMerge = cachedNumDeletesToMerge.get(info);
if (numDeletesToMerge != null) {
return numDeletesToMerge;
}
numDeletesToMerge = mergeContext.numDeletesToMerge(info);
cachedNumDeletesToMerge.put(info, numDeletesToMerge);
return numDeletesToMerge;
}
@Override
public final int numDeletedDocs(SegmentCommitInfo info) {
return mergeContext.numDeletedDocs(info);
}
@Override
public final InfoStream getInfoStream() {
return mergeContext.getInfoStream();
}
@Override
public final Set<SegmentCommitInfo> getMergingSegments() {
return mergeContext.getMergingSegments();
}
}

View File

@ -2214,10 +2214,11 @@ public class IndexWriter
}
final MergePolicy mergePolicy = config.getMergePolicy();
final CachingMergeContext cachingMergeContext = new CachingMergeContext(this);
MergePolicy.MergeSpecification spec;
boolean newMergesFound = false;
synchronized (this) {
spec = mergePolicy.findForcedDeletesMerges(segmentInfos, this);
spec = mergePolicy.findForcedDeletesMerges(segmentInfos, cachingMergeContext);
newMergesFound = spec != null;
if (newMergesFound) {
final int numMerges = spec.merges.size();
@ -2327,6 +2328,7 @@ public class IndexWriter
}
final MergePolicy.MergeSpecification spec;
final CachingMergeContext cachingMergeContext = new CachingMergeContext(this);
if (maxNumSegments != UNBOUNDED_MAX_MERGE_SEGMENTS) {
assert trigger == MergeTrigger.EXPLICIT || trigger == MergeTrigger.MERGE_FINISHED
: "Expected EXPLICT or MERGE_FINISHED as trigger even with maxNumSegments set but was: "
@ -2334,7 +2336,10 @@ public class IndexWriter
spec =
mergePolicy.findForcedMerges(
segmentInfos, maxNumSegments, Collections.unmodifiableMap(segmentsToMerge), this);
segmentInfos,
maxNumSegments,
Collections.unmodifiableMap(segmentsToMerge),
cachingMergeContext);
if (spec != null) {
final int numMerges = spec.merges.size();
for (int i = 0; i < numMerges; i++) {
@ -2346,7 +2351,7 @@ public class IndexWriter
switch (trigger) {
case GET_READER:
case COMMIT:
spec = mergePolicy.findFullFlushMerges(trigger, segmentInfos, this);
spec = mergePolicy.findFullFlushMerges(trigger, segmentInfos, cachingMergeContext);
break;
case ADD_INDEXES:
throw new IllegalStateException(
@ -2358,7 +2363,7 @@ public class IndexWriter
case SEGMENT_FLUSH:
case CLOSING:
default:
spec = mergePolicy.findMerges(trigger, segmentInfos, this);
spec = mergePolicy.findMerges(trigger, segmentInfos, cachingMergeContext);
}
}
if (spec != null) {

View File

@ -0,0 +1,69 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;
import java.io.IOException;
import java.util.Set;
import org.apache.lucene.tests.util.LuceneTestCase;
import org.apache.lucene.util.InfoStream;
public class TestCachingMergeContext extends LuceneTestCase {
public void testNumDeletesToMerge() throws IOException {
MockMergeContext mergeContext = new MockMergeContext();
CachingMergeContext cachingMergeContext = new CachingMergeContext(mergeContext);
assertEquals(cachingMergeContext.numDeletesToMerge(null), 1);
assertEquals(cachingMergeContext.cachedNumDeletesToMerge.size(), 1);
assertEquals(
cachingMergeContext.cachedNumDeletesToMerge.getOrDefault(null, -1), Integer.valueOf(1));
assertEquals(mergeContext.count, 1);
// increase the mock count
mergeContext.numDeletesToMerge(null);
assertEquals(mergeContext.count, 2);
// assert the cache result still one
assertEquals(cachingMergeContext.numDeletesToMerge(null), 1);
assertEquals(cachingMergeContext.cachedNumDeletesToMerge.size(), 1);
assertEquals(
cachingMergeContext.cachedNumDeletesToMerge.getOrDefault(null, -1), Integer.valueOf(1));
}
private static final class MockMergeContext implements MergePolicy.MergeContext {
int count = 0;
@Override
public final int numDeletesToMerge(SegmentCommitInfo info) throws IOException {
this.count += 1;
return this.count;
}
@Override
public int numDeletedDocs(SegmentCommitInfo info) {
return 0;
}
@Override
public InfoStream getInfoStream() {
return null;
}
@Override
public Set<SegmentCommitInfo> getMergingSegments() {
return null;
}
}
}