mirror of https://github.com/apache/lucene.git
LUCENE-3342: improve FrozenBufferedDeletes ram efficiency
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1152055 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b6de525da5
commit
963bce23d1
|
@ -18,17 +18,14 @@ package org.apache.lucene.index;
|
|||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
|
||||
|
||||
/* Holds buffered deletes, by docID, term or query for a
|
||||
* single segment. This is used to hold buffered pending
|
||||
|
@ -67,7 +64,7 @@ class BufferedDeletes {
|
|||
final static int BYTES_PER_DEL_QUERY = 5*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 2*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 2*RamUsageEstimator.NUM_BYTES_INT + 24;
|
||||
|
||||
final AtomicInteger numTermDeletes = new AtomicInteger();
|
||||
final Map<Term,Integer> terms;
|
||||
final Map<Term,Integer> terms = new HashMap<Term,Integer>();
|
||||
final Map<Query,Integer> queries = new HashMap<Query,Integer>();
|
||||
final List<Integer> docIDs = new ArrayList<Integer>();
|
||||
|
||||
|
@ -78,18 +75,13 @@ class BufferedDeletes {
|
|||
private final static boolean VERBOSE_DELETES = false;
|
||||
|
||||
long gen;
|
||||
public BufferedDeletes(boolean sortTerms) {
|
||||
this(sortTerms, new AtomicLong());
|
||||
public BufferedDeletes() {
|
||||
this(new AtomicLong());
|
||||
}
|
||||
|
||||
BufferedDeletes(boolean sortTerms, AtomicLong bytesUsed) {
|
||||
BufferedDeletes(AtomicLong bytesUsed) {
|
||||
assert bytesUsed != null;
|
||||
this.bytesUsed = bytesUsed;
|
||||
if (sortTerms) {
|
||||
terms = new TreeMap<Term,Integer>();
|
||||
} else {
|
||||
terms = new HashMap<Term,Integer>();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -117,50 +109,6 @@ class BufferedDeletes {
|
|||
}
|
||||
}
|
||||
|
||||
void update(BufferedDeletes in) {
|
||||
numTermDeletes.addAndGet(in.numTermDeletes.get());
|
||||
for (Map.Entry<Term,Integer> ent : in.terms.entrySet()) {
|
||||
final Term term = ent.getKey();
|
||||
if (!terms.containsKey(term)) {
|
||||
// only incr bytesUsed if this term wasn't already buffered:
|
||||
bytesUsed.addAndGet(BYTES_PER_DEL_TERM);
|
||||
}
|
||||
terms.put(term, MAX_INT);
|
||||
}
|
||||
|
||||
for (Map.Entry<Query,Integer> ent : in.queries.entrySet()) {
|
||||
final Query query = ent.getKey();
|
||||
if (!queries.containsKey(query)) {
|
||||
// only incr bytesUsed if this query wasn't already buffered:
|
||||
bytesUsed.addAndGet(BYTES_PER_DEL_QUERY);
|
||||
}
|
||||
queries.put(query, MAX_INT);
|
||||
}
|
||||
|
||||
// docIDs never move across segments and the docIDs
|
||||
// should already be cleared
|
||||
}
|
||||
|
||||
void update(FrozenBufferedDeletes in) {
|
||||
numTermDeletes.addAndGet(in.numTermDeletes);
|
||||
for(Term term : in.terms) {
|
||||
if (!terms.containsKey(term)) {
|
||||
// only incr bytesUsed if this term wasn't already buffered:
|
||||
bytesUsed.addAndGet(BYTES_PER_DEL_TERM);
|
||||
}
|
||||
terms.put(term, MAX_INT);
|
||||
}
|
||||
|
||||
for(int queryIdx=0;queryIdx<in.queries.length;queryIdx++) {
|
||||
final Query query = in.queries[queryIdx];
|
||||
if (!queries.containsKey(query)) {
|
||||
// only incr bytesUsed if this query wasn't already buffered:
|
||||
bytesUsed.addAndGet(BYTES_PER_DEL_QUERY);
|
||||
}
|
||||
queries.put(query, MAX_INT);
|
||||
}
|
||||
}
|
||||
|
||||
public void addQuery(Query query, int docIDUpto) {
|
||||
Integer current = queries.put(query, docIDUpto);
|
||||
// increment bytes used only if the query wasn't added so far.
|
||||
|
@ -193,44 +141,7 @@ class BufferedDeletes {
|
|||
bytesUsed.addAndGet(BYTES_PER_DEL_TERM + term.bytes.length + (RamUsageEstimator.NUM_BYTES_CHAR * term.field().length()));
|
||||
}
|
||||
}
|
||||
|
||||
public Iterable<Term> termsIterable() {
|
||||
return new Iterable<Term>() {
|
||||
@Override
|
||||
public Iterator<Term> iterator() {
|
||||
return terms.keySet().iterator();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public Iterable<QueryAndLimit> queriesIterable() {
|
||||
return new Iterable<QueryAndLimit>() {
|
||||
|
||||
@Override
|
||||
public Iterator<QueryAndLimit> iterator() {
|
||||
return new Iterator<QueryAndLimit>() {
|
||||
private final Iterator<Map.Entry<Query,Integer>> iter = queries.entrySet().iterator();
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iter.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryAndLimit next() {
|
||||
final Map.Entry<Query,Integer> ent = iter.next();
|
||||
return new QueryAndLimit(ent.getKey(), ent.getValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
||||
void clear() {
|
||||
terms.clear();
|
||||
queries.clear();
|
||||
|
|
|
@ -33,6 +33,7 @@ import org.apache.lucene.search.DocIdSetIterator;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.QueryWrapperFilter;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/* Tracks the stream of {@link BufferedDeletes}.
|
||||
* When DocumentsWriterPerThread flushes, its buffered
|
||||
|
@ -182,7 +183,7 @@ class BufferedDeletesStream {
|
|||
infos2.addAll(infos);
|
||||
Collections.sort(infos2, sortSegInfoByDelGen);
|
||||
|
||||
BufferedDeletes coalescedDeletes = null;
|
||||
CoalescedDeletes coalescedDeletes = null;
|
||||
boolean anyNewDeletes = false;
|
||||
|
||||
int infosIDX = infos2.size()-1;
|
||||
|
@ -200,7 +201,7 @@ class BufferedDeletesStream {
|
|||
if (packet != null && segGen < packet.delGen()) {
|
||||
//System.out.println(" coalesce");
|
||||
if (coalescedDeletes == null) {
|
||||
coalescedDeletes = new BufferedDeletes(true);
|
||||
coalescedDeletes = new CoalescedDeletes();
|
||||
}
|
||||
if (!packet.isSegmentPrivate) {
|
||||
/*
|
||||
|
@ -251,7 +252,7 @@ class BufferedDeletesStream {
|
|||
}
|
||||
|
||||
if (coalescedDeletes == null) {
|
||||
coalescedDeletes = new BufferedDeletes(true);
|
||||
coalescedDeletes = new CoalescedDeletes();
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -463,7 +464,8 @@ class BufferedDeletesStream {
|
|||
if (term != null) {
|
||||
assert lastDeleteTerm == null || term.compareTo(lastDeleteTerm) > 0: "lastTerm=" + lastDeleteTerm + " vs term=" + term;
|
||||
}
|
||||
lastDeleteTerm = term;
|
||||
// TODO: we re-use term now in our merged iterable, but we shouldn't clone, instead copy for this assert
|
||||
lastDeleteTerm = term == null ? null : new Term(term.field(), new BytesRef(term.bytes));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,191 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.PriorityQueue;
|
||||
import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
|
||||
|
||||
class CoalescedDeletes {
|
||||
final Map<Query,Integer> queries = new HashMap<Query,Integer>();
|
||||
final List<Iterable<Term>> iterables = new ArrayList<Iterable<Term>>();
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
// note: we could add/collect more debugging information
|
||||
return "CoalescedDeletes(termSets=" + iterables.size() + ",queries=" + queries.size() + ")";
|
||||
}
|
||||
|
||||
void update(FrozenBufferedDeletes in) {
|
||||
iterables.add(in.termsIterable());
|
||||
|
||||
for(int queryIdx=0;queryIdx<in.queries.length;queryIdx++) {
|
||||
final Query query = in.queries[queryIdx];
|
||||
queries.put(query, BufferedDeletes.MAX_INT);
|
||||
}
|
||||
}
|
||||
|
||||
public Iterable<Term> termsIterable() {
|
||||
return new Iterable<Term>() {
|
||||
@Override
|
||||
public Iterator<Term> iterator() {
|
||||
ArrayList<Iterator<Term>> subs = new ArrayList<Iterator<Term>>(iterables.size());
|
||||
for (Iterable<Term> iterable : iterables) {
|
||||
subs.add(iterable.iterator());
|
||||
}
|
||||
return mergedIterator(subs);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public Iterable<QueryAndLimit> queriesIterable() {
|
||||
return new Iterable<QueryAndLimit>() {
|
||||
|
||||
@Override
|
||||
public Iterator<QueryAndLimit> iterator() {
|
||||
return new Iterator<QueryAndLimit>() {
|
||||
private final Iterator<Map.Entry<Query,Integer>> iter = queries.entrySet().iterator();
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return iter.hasNext();
|
||||
}
|
||||
|
||||
@Override
|
||||
public QueryAndLimit next() {
|
||||
final Map.Entry<Query,Integer> ent = iter.next();
|
||||
return new QueryAndLimit(ent.getKey(), ent.getValue());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/** provides a merged view across multiple iterators */
|
||||
static Iterator<Term> mergedIterator(final List<Iterator<Term>> iterators) {
|
||||
return new Iterator<Term>() {
|
||||
Term current;
|
||||
TermMergeQueue queue = new TermMergeQueue(iterators.size());
|
||||
SubIterator[] top = new SubIterator[iterators.size()];
|
||||
int numTop;
|
||||
|
||||
{
|
||||
int index = 0;
|
||||
for (Iterator<Term> iterator : iterators) {
|
||||
if (iterator.hasNext()) {
|
||||
SubIterator sub = new SubIterator();
|
||||
sub.current = iterator.next();
|
||||
sub.iterator = iterator;
|
||||
sub.index = index++;
|
||||
queue.add(sub);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
if (queue.size() > 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
for (int i = 0; i < numTop; i++) {
|
||||
if (top[i].iterator.hasNext()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
public Term next() {
|
||||
// restore queue
|
||||
pushTop();
|
||||
|
||||
// gather equal top fields
|
||||
if (queue.size() > 0) {
|
||||
pullTop();
|
||||
} else {
|
||||
current = null;
|
||||
}
|
||||
return current;
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
private void pullTop() {
|
||||
// extract all subs from the queue that have the same top term
|
||||
assert numTop == 0;
|
||||
while (true) {
|
||||
top[numTop++] = queue.pop();
|
||||
if (queue.size() == 0
|
||||
|| !(queue.top()).current.equals(top[0].current)) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
current = top[0].current;
|
||||
}
|
||||
|
||||
private void pushTop() {
|
||||
// call next() on each top, and put back into queue
|
||||
for (int i = 0; i < numTop; i++) {
|
||||
if (top[i].iterator.hasNext()) {
|
||||
top[i].current = top[i].iterator.next();
|
||||
queue.add(top[i]);
|
||||
} else {
|
||||
// no more terms
|
||||
top[i].current = null;
|
||||
}
|
||||
}
|
||||
numTop = 0;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static class SubIterator {
|
||||
Iterator<Term> iterator;
|
||||
Term current;
|
||||
int index;
|
||||
}
|
||||
|
||||
private static class TermMergeQueue extends PriorityQueue<SubIterator> {
|
||||
TermMergeQueue(int size) {
|
||||
super(size);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean lessThan(SubIterator a, SubIterator b) {
|
||||
final int cmp = a.current.compareTo(b.current);
|
||||
if (cmp != 0) {
|
||||
return cmp < 0;
|
||||
} else {
|
||||
return a.index < b.index;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -81,7 +81,7 @@ final class DocumentsWriterDeleteQueue {
|
|||
}
|
||||
|
||||
DocumentsWriterDeleteQueue(long generation) {
|
||||
this(new BufferedDeletes(false), generation);
|
||||
this(new BufferedDeletes(), generation);
|
||||
}
|
||||
|
||||
DocumentsWriterDeleteQueue(BufferedDeletes globalBufferedDeletes, long generation) {
|
||||
|
|
|
@ -188,7 +188,7 @@ public class DocumentsWriterPerThread {
|
|||
bytesUsed = new AtomicLong(0);
|
||||
byteBlockAllocator = new DirectTrackingAllocator(bytesUsed);
|
||||
consumer = indexingChain.getChain(this);
|
||||
pendingDeletes = new BufferedDeletes(false);
|
||||
pendingDeletes = new BufferedDeletes();
|
||||
initialize();
|
||||
}
|
||||
|
||||
|
@ -476,7 +476,7 @@ public class DocumentsWriterPerThread {
|
|||
segmentDeletes = null;
|
||||
} else {
|
||||
segmentDeletes = pendingDeletes;
|
||||
pendingDeletes = new BufferedDeletes(false);
|
||||
pendingDeletes = new BufferedDeletes();
|
||||
}
|
||||
|
||||
if (infoStream != null) {
|
||||
|
|
|
@ -21,31 +21,23 @@ import java.util.Iterator;
|
|||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.RamUsageEstimator;
|
||||
import org.apache.lucene.index.BufferedDeletesStream.QueryAndLimit;
|
||||
|
||||
/** Holds buffered deletes by term or query, once pushed.
|
||||
* Pushed deltes are write-once, so we shift to more
|
||||
* Pushed deletes are write-once, so we shift to more
|
||||
* memory efficient data structure to hold them. We don't
|
||||
* hold docIDs because these are applied on flush. */
|
||||
|
||||
class FrozenBufferedDeletes {
|
||||
|
||||
/* Rough logic: Term is object w/
|
||||
String field and BytesRef text (OBJ_HEADER + 2*POINTER).
|
||||
String field is (OBJ_HEADER + 4*INT +
|
||||
POINTER + OBJ_HEADER + CHAR*field.length).
|
||||
Term's text is BytesRef (OBJ_HEADER + 2*INT + POINTER +
|
||||
OBJ_HEADER + bytes.length). */
|
||||
final static int BYTES_PER_DEL_TERM = 4*RamUsageEstimator.NUM_BYTES_OBJECT_REF + 5*RamUsageEstimator.NUM_BYTES_OBJECT_HEADER + 6*RamUsageEstimator.NUM_BYTES_INT;
|
||||
|
||||
/* Query we often undercount (say 24 bytes), plus int. */
|
||||
final static int BYTES_PER_DEL_QUERY = RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_INT + 24;
|
||||
|
||||
// Terms, in sorted order:
|
||||
// TODO: would be more RAM efficient to store BytesRef[],
|
||||
// per field:
|
||||
final Term[] terms;
|
||||
final PrefixCodedTerms terms;
|
||||
int termCount; // just for debugging
|
||||
|
||||
// Parallel array of deleted query, and the docIDUpto for
|
||||
// each
|
||||
|
@ -62,7 +54,15 @@ class FrozenBufferedDeletes {
|
|||
public FrozenBufferedDeletes(BufferedDeletes deletes, boolean isSegmentPrivate) {
|
||||
this.isSegmentPrivate = isSegmentPrivate;
|
||||
assert !isSegmentPrivate || deletes.terms.size() == 0 : "segment private package should only have del queries";
|
||||
terms = deletes.terms.keySet().toArray(new Term[deletes.terms.size()]);
|
||||
Term termsArray[] = deletes.terms.keySet().toArray(new Term[deletes.terms.size()]);
|
||||
termCount = termsArray.length;
|
||||
ArrayUtil.mergeSort(termsArray);
|
||||
PrefixCodedTerms.Builder builder = new PrefixCodedTerms.Builder();
|
||||
for (Term term : termsArray) {
|
||||
builder.add(term);
|
||||
}
|
||||
terms = builder.finish();
|
||||
|
||||
queries = new Query[deletes.queries.size()];
|
||||
queryLimits = new int[deletes.queries.size()];
|
||||
int upto = 0;
|
||||
|
@ -71,13 +71,8 @@ class FrozenBufferedDeletes {
|
|||
queryLimits[upto] = ent.getValue();
|
||||
upto++;
|
||||
}
|
||||
int termDataBytes = 0;
|
||||
for(Map.Entry<Term,Integer> ent : deletes.terms.entrySet()) {
|
||||
final Term term = ent.getKey();
|
||||
termDataBytes += term.bytes().length;
|
||||
termDataBytes += term.field().length() * RamUsageEstimator.NUM_BYTES_CHAR;
|
||||
}
|
||||
bytesUsed = terms.length * BYTES_PER_DEL_TERM + queries.length * BYTES_PER_DEL_QUERY + termDataBytes;
|
||||
|
||||
bytesUsed = (int) terms.getSizeInBytes() + queries.length * BYTES_PER_DEL_QUERY;
|
||||
numTermDeletes = deletes.numTermDeletes.get();
|
||||
}
|
||||
|
||||
|
@ -95,24 +90,7 @@ class FrozenBufferedDeletes {
|
|||
return new Iterable<Term>() {
|
||||
@Override
|
||||
public Iterator<Term> iterator() {
|
||||
return new Iterator<Term>() {
|
||||
private int upto;
|
||||
|
||||
@Override
|
||||
public boolean hasNext() {
|
||||
return upto < terms.length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Term next() {
|
||||
return terms[upto++];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
};
|
||||
return terms.iterator();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
@ -149,7 +127,7 @@ class FrozenBufferedDeletes {
|
|||
public String toString() {
|
||||
String s = "";
|
||||
if (numTermDeletes != 0) {
|
||||
s += " " + numTermDeletes + " deleted terms (unique count=" + terms.length + ")";
|
||||
s += " " + numTermDeletes + " deleted terms (unique count=" + termCount + ")";
|
||||
}
|
||||
if (queries.length != 0) {
|
||||
s += " " + queries.length + " deleted queries";
|
||||
|
@ -162,6 +140,6 @@ class FrozenBufferedDeletes {
|
|||
}
|
||||
|
||||
boolean any() {
|
||||
return terms.length > 0 || queries.length > 0;
|
||||
return termCount > 0 || queries.length > 0;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,146 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Iterator;
|
||||
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.RAMFile;
|
||||
import org.apache.lucene.store.RAMInputStream;
|
||||
import org.apache.lucene.store.RAMOutputStream;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
/**
|
||||
* Prefix codes term instances (prefixes are shared)
|
||||
* @lucene.experimental
|
||||
*/
|
||||
class PrefixCodedTerms implements Iterable<Term> {
|
||||
final RAMFile buffer;
|
||||
|
||||
private PrefixCodedTerms(RAMFile buffer) {
|
||||
this.buffer = buffer;
|
||||
}
|
||||
|
||||
/** @return size in bytes */
|
||||
public long getSizeInBytes() {
|
||||
return buffer.getSizeInBytes();
|
||||
}
|
||||
|
||||
/** @return iterator over the bytes */
|
||||
public Iterator<Term> iterator() {
|
||||
return new PrefixCodedTermsIterator();
|
||||
}
|
||||
|
||||
class PrefixCodedTermsIterator implements Iterator<Term> {
|
||||
final IndexInput input;
|
||||
String field = "";
|
||||
BytesRef bytes = new BytesRef();
|
||||
Term term = new Term(field, bytes);
|
||||
|
||||
PrefixCodedTermsIterator() {
|
||||
try {
|
||||
input = new RAMInputStream(buffer);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public boolean hasNext() {
|
||||
return input.getFilePointer() < input.length();
|
||||
}
|
||||
|
||||
public Term next() {
|
||||
assert hasNext();
|
||||
try {
|
||||
int code = input.readVInt();
|
||||
if ((code & 1) != 0) {
|
||||
// new field
|
||||
field = input.readString();
|
||||
}
|
||||
int prefix = code >>> 1;
|
||||
int suffix = input.readVInt();
|
||||
bytes.grow(prefix + suffix);
|
||||
input.readBytes(bytes.bytes, prefix, suffix);
|
||||
bytes.length = prefix + suffix;
|
||||
term.set(field, bytes);
|
||||
return term;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
public void remove() {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
}
|
||||
|
||||
/** Builds a PrefixCodedTerms: call add repeatedly, then finish. */
|
||||
public static class Builder {
|
||||
private RAMFile buffer = new RAMFile();
|
||||
private RAMOutputStream output = new RAMOutputStream(buffer);
|
||||
private Term lastTerm = new Term("");
|
||||
|
||||
/** add a term */
|
||||
public void add(Term term) {
|
||||
assert lastTerm.equals(new Term("")) || term.compareTo(lastTerm) > 0;
|
||||
|
||||
try {
|
||||
int prefix = sharedPrefix(lastTerm.bytes, term.bytes);
|
||||
int suffix = term.bytes.length - prefix;
|
||||
if (term.field.equals(lastTerm.field)) {
|
||||
output.writeVInt(prefix << 1);
|
||||
} else {
|
||||
output.writeVInt(prefix << 1 | 1);
|
||||
output.writeString(term.field);
|
||||
}
|
||||
output.writeVInt(suffix);
|
||||
output.writeBytes(term.bytes.bytes, term.bytes.offset + prefix, suffix);
|
||||
lastTerm.bytes.copy(term.bytes);
|
||||
lastTerm.field = term.field;
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
/** return finalized form */
|
||||
public PrefixCodedTerms finish() {
|
||||
try {
|
||||
output.close();
|
||||
return new PrefixCodedTerms(buffer);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private int sharedPrefix(BytesRef term1, BytesRef term2) {
|
||||
int pos1 = 0;
|
||||
int pos1End = pos1 + Math.min(term1.length, term2.length);
|
||||
int pos2 = 0;
|
||||
while(pos1 < pos1End) {
|
||||
if (term1.bytes[term1.offset + pos1] != term2.bytes[term2.offset + pos2]) {
|
||||
return pos1;
|
||||
}
|
||||
pos1++;
|
||||
pos2++;
|
||||
}
|
||||
return pos1;
|
||||
}
|
||||
}
|
||||
}
|
|
@ -29,7 +29,7 @@ public class RAMFile {
|
|||
private long lastModified = System.currentTimeMillis();
|
||||
|
||||
// File used as buffer, in no RAMDirectory
|
||||
protected RAMFile() {}
|
||||
public RAMFile() {}
|
||||
|
||||
RAMFile(RAMDirectory directory) {
|
||||
this.directory = directory;
|
||||
|
|
|
@ -19,8 +19,10 @@ package org.apache.lucene.store;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
/** A memory-resident {@link IndexInput} implementation. */
|
||||
class RAMInputStream extends IndexInput implements Cloneable {
|
||||
/** A memory-resident {@link IndexInput} implementation.
|
||||
*
|
||||
* @lucene.internal */
|
||||
public class RAMInputStream extends IndexInput implements Cloneable {
|
||||
static final int BUFFER_SIZE = RAMOutputStream.BUFFER_SIZE;
|
||||
|
||||
private RAMFile file;
|
||||
|
@ -33,7 +35,7 @@ class RAMInputStream extends IndexInput implements Cloneable {
|
|||
private long bufferStart;
|
||||
private int bufferLength;
|
||||
|
||||
RAMInputStream(RAMFile f) throws IOException {
|
||||
public RAMInputStream(RAMFile f) throws IOException {
|
||||
file = f;
|
||||
length = file.length;
|
||||
if (length/BUFFER_SIZE >= Integer.MAX_VALUE) {
|
||||
|
|
|
@ -41,8 +41,8 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
|
|||
}
|
||||
DeleteSlice slice1 = queue.newSlice();
|
||||
DeleteSlice slice2 = queue.newSlice();
|
||||
BufferedDeletes bd1 = new BufferedDeletes(false);
|
||||
BufferedDeletes bd2 = new BufferedDeletes(false);
|
||||
BufferedDeletes bd1 = new BufferedDeletes();
|
||||
BufferedDeletes bd2 = new BufferedDeletes();
|
||||
int last1 = 0;
|
||||
int last2 = 0;
|
||||
Set<Term> uniqueValues = new HashSet<Term>();
|
||||
|
@ -70,8 +70,11 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
|
|||
}
|
||||
assertEquals(uniqueValues, bd1.terms.keySet());
|
||||
assertEquals(uniqueValues, bd2.terms.keySet());
|
||||
assertEquals(uniqueValues, new HashSet<Term>(Arrays.asList(queue
|
||||
.freezeGlobalBuffer(null).terms)));
|
||||
HashSet<Term> frozenSet = new HashSet<Term>();
|
||||
for (Term t : queue.freezeGlobalBuffer(null).termsIterable()) {
|
||||
frozenSet.add(t);
|
||||
}
|
||||
assertEquals(uniqueValues, frozenSet);
|
||||
assertEquals("num deletes must be 0 after freeze", 0, queue
|
||||
.numGlobalTermDeletes());
|
||||
}
|
||||
|
@ -129,7 +132,7 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
|
|||
if (random.nextInt(5) == 0) {
|
||||
FrozenBufferedDeletes freezeGlobalBuffer = queue
|
||||
.freezeGlobalBuffer(null);
|
||||
assertEquals(termsSinceFreeze, freezeGlobalBuffer.terms.length);
|
||||
assertEquals(termsSinceFreeze, freezeGlobalBuffer.termCount);
|
||||
assertEquals(queriesSinceFreeze, freezeGlobalBuffer.queries.length);
|
||||
queriesSinceFreeze = 0;
|
||||
termsSinceFreeze = 0;
|
||||
|
@ -168,8 +171,11 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
|
|||
assertEquals(uniqueValues, deletes.terms.keySet());
|
||||
}
|
||||
queue.tryApplyGlobalSlice();
|
||||
assertEquals(uniqueValues, new HashSet<Term>(Arrays.asList(queue
|
||||
.freezeGlobalBuffer(null).terms)));
|
||||
HashSet<Term> frozenSet = new HashSet<Term>();
|
||||
for (Term t : queue.freezeGlobalBuffer(null).termsIterable()) {
|
||||
frozenSet.add(t);
|
||||
}
|
||||
assertEquals(uniqueValues, frozenSet);
|
||||
assertEquals("num deletes must be 0 after freeze", 0, queue
|
||||
.numGlobalTermDeletes());
|
||||
}
|
||||
|
@ -188,7 +194,7 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
|
|||
this.index = index;
|
||||
this.ids = ids;
|
||||
this.slice = queue.newSlice();
|
||||
deletes = new BufferedDeletes(false);
|
||||
deletes = new BufferedDeletes();
|
||||
this.latch = latch;
|
||||
}
|
||||
|
||||
|
|
|
@ -954,9 +954,10 @@ public class TestIndexWriterDelete extends LuceneTestCase {
|
|||
Directory dir = newDirectory();
|
||||
// Cannot use RandomIndexWriter because we don't want to
|
||||
// ever call commit() for this test:
|
||||
// note: tiny rambuffer used, as with a 1MB buffer the test is too slow (flush @ 128,999)
|
||||
IndexWriter w = new IndexWriter(dir,
|
||||
newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random))
|
||||
.setRAMBufferSizeMB(1.0f).setMaxBufferedDocs(1000).setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES).setReaderPooling(false));
|
||||
.setRAMBufferSizeMB(0.2f).setMaxBufferedDocs(1000).setMergePolicy(NoMergePolicy.NO_COMPOUND_FILES).setReaderPooling(false));
|
||||
w.setInfoStream(VERBOSE ? System.out : null);
|
||||
int count = 0;
|
||||
while(true) {
|
||||
|
@ -981,7 +982,7 @@ public class TestIndexWriterDelete extends LuceneTestCase {
|
|||
}
|
||||
count++;
|
||||
|
||||
// Today we applyDelets @ count=7199; even if we make
|
||||
// Today we applyDeletes @ count=21553; even if we make
|
||||
// sizable improvements to RAM efficiency of buffered
|
||||
// del term we're unlikely to go over 100K:
|
||||
if (count > 100000) {
|
||||
|
|
|
@ -0,0 +1,136 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
public class TestPrefixCodedTerms extends LuceneTestCase {
|
||||
|
||||
public void testEmpty() {
|
||||
PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
|
||||
PrefixCodedTerms pb = b.finish();
|
||||
assertFalse(pb.iterator().hasNext());
|
||||
}
|
||||
|
||||
public void testOne() {
|
||||
Term term = new Term("foo", "bogus");
|
||||
PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
|
||||
b.add(term);
|
||||
PrefixCodedTerms pb = b.finish();
|
||||
Iterator<Term> iterator = pb.iterator();
|
||||
assertTrue(iterator.hasNext());
|
||||
assertEquals(term, iterator.next());
|
||||
}
|
||||
|
||||
public void testRandom() {
|
||||
Set<Term> terms = new TreeSet<Term>();
|
||||
int nterms = atLeast(10000);
|
||||
for (int i = 0; i < nterms; i++) {
|
||||
Term term = new Term(_TestUtil.randomUnicodeString(random, 2), _TestUtil.randomUnicodeString(random));
|
||||
terms.add(term);
|
||||
}
|
||||
|
||||
PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
|
||||
for (Term ref: terms) {
|
||||
b.add(ref);
|
||||
}
|
||||
PrefixCodedTerms pb = b.finish();
|
||||
|
||||
Iterator<Term> expected = terms.iterator();
|
||||
for (Term t : pb) {
|
||||
assertTrue(expected.hasNext());
|
||||
assertEquals(expected.next(), t);
|
||||
}
|
||||
assertFalse(expected.hasNext());
|
||||
}
|
||||
|
||||
public void testMergeEmpty() {
|
||||
List<Iterator<Term>> subs = Collections.emptyList();
|
||||
assertFalse(CoalescedDeletes.mergedIterator(subs).hasNext());
|
||||
|
||||
subs = new ArrayList<Iterator<Term>>();
|
||||
subs.add(new PrefixCodedTerms.Builder().finish().iterator());
|
||||
subs.add(new PrefixCodedTerms.Builder().finish().iterator());
|
||||
Iterator<Term> merged = CoalescedDeletes.mergedIterator(subs);
|
||||
assertFalse(merged.hasNext());
|
||||
}
|
||||
|
||||
public void testMergeOne() {
|
||||
Term t1 = new Term("foo", "a");
|
||||
PrefixCodedTerms.Builder b1 = new PrefixCodedTerms.Builder();
|
||||
b1.add(t1);
|
||||
PrefixCodedTerms pb1 = b1.finish();
|
||||
|
||||
Term t2 = new Term("foo", "b");
|
||||
PrefixCodedTerms.Builder b2 = new PrefixCodedTerms.Builder();
|
||||
b2.add(t2);
|
||||
PrefixCodedTerms pb2 = b2.finish();
|
||||
|
||||
List<Iterator<Term>> subs = new ArrayList<Iterator<Term>>();
|
||||
subs.add(pb1.iterator());
|
||||
subs.add(pb2.iterator());
|
||||
|
||||
Iterator<Term> merged = CoalescedDeletes.mergedIterator(subs);
|
||||
assertTrue(merged.hasNext());
|
||||
assertEquals(t1, merged.next());
|
||||
assertTrue(merged.hasNext());
|
||||
assertEquals(t2, merged.next());
|
||||
}
|
||||
|
||||
public void testMergeRandom() {
|
||||
PrefixCodedTerms pb[] = new PrefixCodedTerms[_TestUtil.nextInt(random, 2, 10)];
|
||||
Set<Term> superSet = new TreeSet<Term>();
|
||||
|
||||
for (int i = 0; i < pb.length; i++) {
|
||||
Set<Term> terms = new TreeSet<Term>();
|
||||
int nterms = _TestUtil.nextInt(random, 0, 10000);
|
||||
for (int j = 0; j < nterms; j++) {
|
||||
Term term = new Term(_TestUtil.randomUnicodeString(random, 2), _TestUtil.randomUnicodeString(random, 4));
|
||||
terms.add(term);
|
||||
}
|
||||
superSet.addAll(terms);
|
||||
|
||||
PrefixCodedTerms.Builder b = new PrefixCodedTerms.Builder();
|
||||
for (Term ref: terms) {
|
||||
b.add(ref);
|
||||
}
|
||||
pb[i] = b.finish();
|
||||
}
|
||||
|
||||
List<Iterator<Term>> subs = new ArrayList<Iterator<Term>>();
|
||||
for (int i = 0; i < pb.length; i++) {
|
||||
subs.add(pb[i].iterator());
|
||||
}
|
||||
|
||||
Iterator<Term> expected = superSet.iterator();
|
||||
Iterator<Term> actual = CoalescedDeletes.mergedIterator(subs);
|
||||
while (actual.hasNext()) {
|
||||
assertTrue(expected.hasNext());
|
||||
assertEquals(expected.next(), actual.next());
|
||||
}
|
||||
assertFalse(expected.hasNext());
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue