mirror of https://github.com/apache/lucene.git
LUCENE-2086: resolve deletes-by-term in term sorted order for better performance
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@882672 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
85fbeddf44
commit
f3b9bea18d
|
@ -17,6 +17,9 @@ New features
|
|||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-2086: When resolving deleted terms, do so in term sort order
|
||||
for better performance (Bogdan Ghidireac via Mike McCandless)
|
||||
|
||||
Build
|
||||
|
||||
======================= Release 3.0.0 2009-11-25 =======================
|
||||
|
|
|
@ -18,6 +18,8 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
|
@ -33,10 +35,20 @@ import org.apache.lucene.search.Query;
|
|||
* previously flushed segments. */
|
||||
class BufferedDeletes {
|
||||
int numTerms;
|
||||
HashMap<Term,Num> terms = new HashMap<Term,Num>();
|
||||
HashMap<Query,Integer> queries = new HashMap<Query,Integer>();
|
||||
Map<Term,Num> terms;
|
||||
Map<Query,Integer> queries = new HashMap<Query,Integer>();
|
||||
List<Integer> docIDs = new ArrayList<Integer>();
|
||||
long bytesUsed;
|
||||
private final boolean doTermSort;
|
||||
|
||||
public BufferedDeletes(boolean doTermSort) {
|
||||
this.doTermSort = doTermSort;
|
||||
if (doTermSort) {
|
||||
terms = new TreeMap<Term,Num>();
|
||||
} else {
|
||||
terms = new HashMap<Term,Num>();
|
||||
}
|
||||
}
|
||||
|
||||
// Number of documents a delete term applies to.
|
||||
final static class Num {
|
||||
|
@ -104,11 +116,15 @@ class BufferedDeletes {
|
|||
MergePolicy.OneMerge merge,
|
||||
int mergeDocCount) {
|
||||
|
||||
final HashMap<Term,Num> newDeleteTerms;
|
||||
final Map<Term,Num> newDeleteTerms;
|
||||
|
||||
// Remap delete-by-term
|
||||
if (terms.size() > 0) {
|
||||
newDeleteTerms = new HashMap<Term, Num>();
|
||||
if (doTermSort) {
|
||||
newDeleteTerms = new TreeMap<Term,Num>();
|
||||
} else {
|
||||
newDeleteTerms = new HashMap<Term,Num>();
|
||||
}
|
||||
for(Entry<Term,Num> entry : terms.entrySet()) {
|
||||
Num num = entry.getValue();
|
||||
newDeleteTerms.put(entry.getKey(),
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.text.NumberFormat;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
|
@ -219,11 +220,11 @@ final class DocumentsWriter {
|
|||
|
||||
// Deletes done after the last flush; these are discarded
|
||||
// on abort
|
||||
private BufferedDeletes deletesInRAM = new BufferedDeletes();
|
||||
private BufferedDeletes deletesInRAM = new BufferedDeletes(false);
|
||||
|
||||
// Deletes done before the last flush; these are still
|
||||
// kept on abort
|
||||
private BufferedDeletes deletesFlushed = new BufferedDeletes();
|
||||
private BufferedDeletes deletesFlushed = new BufferedDeletes(true);
|
||||
|
||||
// The max number of delete terms that can be buffered before
|
||||
// they must be flushed to disk.
|
||||
|
@ -828,7 +829,7 @@ final class DocumentsWriter {
|
|||
}
|
||||
|
||||
// for testing
|
||||
synchronized HashMap<Term,BufferedDeletes.Num> getBufferedDeleteTerms() {
|
||||
synchronized Map<Term,BufferedDeletes.Num> getBufferedDeleteTerms() {
|
||||
return deletesInRAM.terms;
|
||||
}
|
||||
|
||||
|
@ -974,7 +975,6 @@ final class DocumentsWriter {
|
|||
try {
|
||||
for (Entry<Term, BufferedDeletes.Num> entry: deletesFlushed.terms.entrySet()) {
|
||||
Term term = entry.getKey();
|
||||
|
||||
docs.seek(term);
|
||||
int limit = entry.getValue().getNum();
|
||||
while (docs.next()) {
|
||||
|
|
Loading…
Reference in New Issue