GITHUB-11761: Move minimum TieredMergePolicy delete percentage and change default value (#11831)

Move minimum TieredMergePolicy delete percentage from 20% to 5%

and change deletePctAllowed default to 20%

Co-authored-by: Marc D'Mello <dmellomd@amazon.com>
This commit is contained in:
Marc D'Mello 2022-10-05 15:33:12 -07:00 committed by GitHub
parent f54fddc89f
commit d966adcb62
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 14 additions and 4 deletions

View File

@ -98,6 +98,9 @@ API Changes
* GITHUB#11804: FacetsCollector#collect is no longer final, allowing extension. (Greg Miller)
* GITHUB#11761: TieredMergePolicy now allowed a maximum allowable deletes percentage of down to 5%, and the default
maximum allowable deletes percentage is changed from 33% to 20%. (Marc D'Mello)
Improvements
---------------------
* GITHUB#11778: Detailed part-of-speech information for particle(조사) and ending(어미) on Nori

View File

@ -92,7 +92,7 @@ public class TieredMergePolicy extends MergePolicy {
private long floorSegmentBytes = 2 * 1024 * 1024L;
private double segsPerTier = 10.0;
private double forceMergeDeletesPctAllowed = 10.0;
private double deletesPctAllowed = 33.0;
private double deletesPctAllowed = 20.0;
/** Sole constructor, setting all settings to their defaults. */
public TieredMergePolicy() {
@ -151,12 +151,17 @@ public class TieredMergePolicy extends MergePolicy {
/**
* Controls the maximum percentage of deleted documents that is tolerated in the index. Lower
* values make the index more space efficient at the expense of increased CPU and I/O activity.
* Values must be between 20 and 50. Default value is 33.
* Values must be between 5 and 50. Default value is 20.
*
* <p>When the maximum delete percentage is lowered, the indexing thread will call for merges more
* often, meaning that write amplification factor will be increased. Write amplification factor
* measures the number of times each document in the index is written. A higher write
* amplification factor will lead to higher CPU and I/O activity as indicated above.
*/
public TieredMergePolicy setDeletesPctAllowed(double v) {
if (v < 20 || v > 50) {
if (v < 5 || v > 50) {
throw new IllegalArgumentException(
"indexPctDeletedTarget must be >= 20.0 and <= 50 (got " + v + ")");
"indexPctDeletedTarget must be >= 5.0 and <= 50 (got " + v + ")");
}
deletesPctAllowed = v;
return this;

View File

@ -1345,6 +1345,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
w.addDocument(doc);
w.close();
iwc = new IndexWriterConfig(new MockAnalyzer(random()));
((TieredMergePolicy) iwc.getMergePolicy()).setDeletesPctAllowed(33.0);
iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
w = new IndexWriter(d, iwc);
IndexReader r = DirectoryReader.open(w, false, false);

View File

@ -310,6 +310,7 @@ public class TestTieredMergePolicy extends BaseMergePolicyTestCase {
((1024.0 * 1024.0)); // fudge it up, we're trying to catch egregious errors and segbytes
// don't really reflect the number for original merges.
tmp.setMaxMergedSegmentMB(mbSize);
tmp.setDeletesPctAllowed(33.0);
conf.setMaxBufferedDocs(100);
conf.setMergePolicy(tmp);