LUCENE-2086: resolve deletes-by-term in term sorted order for better performance

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@882672 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-11-20 18:37:42 +00:00
parent 85fbeddf44
commit f3b9bea18d
3 changed files with 27 additions and 8 deletions

View File

@ -17,6 +17,9 @@ New features
Optimizations
* LUCENE-2086: When resolving deleted terms, do so in term sort order
for better performance (Bogdan Ghidireac via Mike McCandless)
Build
======================= Release 3.0.0 2009-11-25 =======================

View File

@ -18,6 +18,8 @@ package org.apache.lucene.index;
*/
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
import java.util.ArrayList;
import java.util.List;
import java.util.Map.Entry;
@ -33,10 +35,20 @@ import org.apache.lucene.search.Query;
* previously flushed segments. */
class BufferedDeletes {
int numTerms;
HashMap<Term,Num> terms = new HashMap<Term,Num>();
HashMap<Query,Integer> queries = new HashMap<Query,Integer>();
Map<Term,Num> terms;
Map<Query,Integer> queries = new HashMap<Query,Integer>();
List<Integer> docIDs = new ArrayList<Integer>();
long bytesUsed;
private final boolean doTermSort;
public BufferedDeletes(boolean doTermSort) {
this.doTermSort = doTermSort;
if (doTermSort) {
terms = new TreeMap<Term,Num>();
} else {
terms = new HashMap<Term,Num>();
}
}
// Number of documents a delete term applies to.
final static class Num {
@ -104,11 +116,15 @@ class BufferedDeletes {
MergePolicy.OneMerge merge,
int mergeDocCount) {
final HashMap<Term,Num> newDeleteTerms;
final Map<Term,Num> newDeleteTerms;
// Remap delete-by-term
if (terms.size() > 0) {
newDeleteTerms = new HashMap<Term, Num>();
if (doTermSort) {
newDeleteTerms = new TreeMap<Term,Num>();
} else {
newDeleteTerms = new HashMap<Term,Num>();
}
for(Entry<Term,Num> entry : terms.entrySet()) {
Num num = entry.getValue();
newDeleteTerms.put(entry.getKey(),

View File

@ -23,6 +23,7 @@ import java.text.NumberFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
@ -219,11 +220,11 @@ final class DocumentsWriter {
// Deletes done after the last flush; these are discarded
// on abort
private BufferedDeletes deletesInRAM = new BufferedDeletes();
private BufferedDeletes deletesInRAM = new BufferedDeletes(false);
// Deletes done before the last flush; these are still
// kept on abort
private BufferedDeletes deletesFlushed = new BufferedDeletes();
private BufferedDeletes deletesFlushed = new BufferedDeletes(true);
// The max number of delete terms that can be buffered before
// they must be flushed to disk.
@ -828,7 +829,7 @@ final class DocumentsWriter {
}
// for testing
synchronized HashMap<Term,BufferedDeletes.Num> getBufferedDeleteTerms() {
synchronized Map<Term,BufferedDeletes.Num> getBufferedDeleteTerms() {
return deletesInRAM.terms;
}
@ -974,7 +975,6 @@ final class DocumentsWriter {
try {
for (Entry<Term, BufferedDeletes.Num> entry: deletesFlushed.terms.entrySet()) {
Term term = entry.getKey();
docs.seek(term);
int limit = entry.getValue().getNum();
while (docs.next()) {