No need for deepCopy on makeSafe for pages field data

Since its a reference to a buffer in the PagedBytes, we don't need to deep copy it on makeSafe, just shallow copy it
This commit is contained in:
Shay Banon 2013-05-10 17:24:53 +02:00
parent 2be23d2427
commit 455b5da52f
5 changed files with 78 additions and 80 deletions

View File

@ -26,16 +26,16 @@ import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
/**
*/
public abstract class BytesValues {
public abstract class BytesValues {
public static final BytesValues EMPTY = new Empty();
private boolean multiValued;
protected final BytesRef scratch = new BytesRef();
protected BytesValues(boolean multiValued) {
this.multiValued = multiValued;
}
/**
* Is one of the documents in this field data values is multi valued?
*/
@ -49,7 +49,9 @@ public abstract class BytesValues {
public abstract boolean hasValue(int docId);
/**
* Converts the provided bytes to "safe" ones from a "non" safe call made (if needed).
* Converts the provided bytes to "safe" ones from a "non" safe call made (if needed). Note,
* this calls makes the bytes safe for *reads*, not writes (into the same BytesRef). For example,
* it makes it safe to be placed in a map.
*/
public BytesRef makeSafe(BytesRef bytes) {
return BytesRef.deepCopyOf(bytes);
@ -61,7 +63,7 @@ public abstract class BytesValues {
public BytesRef getValue(int docId) {
if (hasValue(docId)) {
return getValueScratch(docId, scratch);
}
}
return null;
}
@ -71,8 +73,8 @@ public abstract class BytesValues {
* Note, the bytes are not "safe".
*/
public abstract BytesRef getValueScratch(int docId, BytesRef ret);
/**
* Fills the given spare for the given doc ID and returns the hashcode of the reference as defined by
* {@link BytesRef#hashCode()}
@ -85,14 +87,14 @@ public abstract class BytesValues {
* Returns a bytes value iterator for a docId. Note, the content of it might be shared across invocation.
*/
public abstract Iter getIter(int docId); // TODO: maybe this should return null for no values so we can safe one call?
public static interface Iter {
boolean hasNext();
BytesRef next();
int hash();
public static class Empty implements Iter {
@ -139,12 +141,12 @@ public abstract class BytesValues {
done = true;
return value;
}
public int hash() {
return value.hashCode();
}
}
static class Multi implements Iter {
protected int innerOrd;
@ -152,10 +154,11 @@ public abstract class BytesValues {
protected BytesValues.WithOrdinals withOrds;
protected Ordinals.Docs.Iter ordsIter;
protected final BytesRef scratch = new BytesRef();
public Multi(WithOrdinals withOrds) {
this.withOrds = withOrds;
assert withOrds.isMultiValued();
}
public Multi reset(Ordinals.Docs.Iter ordsIter) {
@ -176,7 +179,7 @@ public abstract class BytesValues {
innerOrd = ordsIter.next();
return scratch;
}
public int hash() {
return scratch.hashCode();
}
@ -184,7 +187,7 @@ public abstract class BytesValues {
}
public static class Empty extends BytesValues {
public Empty() {
super(false);
}
@ -211,7 +214,7 @@ public abstract class BytesValues {
* Bytes values that are based on ordinals.
*/
public static abstract class WithOrdinals extends BytesValues {
protected final Docs ordinals;
protected WithOrdinals(Ordinals.Docs ordinals) {
@ -226,12 +229,12 @@ public abstract class BytesValues {
public BytesRef getValueByOrd(int ord) {
return getValueScratchByOrd(ord, scratch);
}
protected Iter.Multi newMultiIter() {
assert this.isMultiValued();
return new Iter.Multi(this);
}
protected Iter.Single newSingleIter() {
assert !this.isMultiValued();
return new Iter.Single();
@ -241,21 +244,21 @@ public abstract class BytesValues {
public boolean hasValue(int docId) {
return ordinals.getOrd(docId) != 0;
}
@Override
public BytesRef getValue(int docId) {
final int ord = ordinals.getOrd(docId);
if (ord == 0) {
if (ord == 0) {
return null;
}
return getValueScratchByOrd(ord, scratch);
}
@Override
public BytesRef getValueScratch(int docId, BytesRef ret) {
return getValueScratchByOrd(ordinals.getOrd(docId), ret);
}
public BytesRef getSafeValueByOrd(int ord) {
return getValueScratchByOrd(ord, new BytesRef());
}

View File

@ -19,8 +19,6 @@
package org.elasticsearch.index.fielddata.plain;
import java.io.IOException;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.fst.BytesRefFSTEnum;
@ -35,6 +33,8 @@ import org.elasticsearch.index.fielddata.ordinals.EmptyOrdinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals;
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
import java.io.IOException;
/**
*/
public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<ScriptDocValues.Strings> {
@ -92,14 +92,13 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
return ordinals.isMultiValued() ? new BytesValues.Multi(fst, ordinals.ordinals()) : new BytesValues.Single(fst, ordinals.ordinals());
}
@Override
public ScriptDocValues.Strings getScriptValues() {
assert fst != null;
return new ScriptDocValues.Strings(getBytesValues());
}
@Override
public org.elasticsearch.index.fielddata.BytesValues.WithOrdinals getHashedBytesValues() {
assert fst != null;
@ -112,8 +111,8 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
hashes[0] = new BytesRef().hashCode();
int i = 1;
try {
while((next = fstEnum.next()) != null) {
hashes[i++] = next.input.hashCode();
while ((next = fstEnum.next()) != null) {
hashes[i++] = next.input.hashCode();
}
} catch (IOException ex) {
//bogus
@ -128,9 +127,8 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
protected final FST<Long> fst;
protected final Ordinals.Docs ordinals;
protected final BytesRef scratch = new BytesRef();
// per-thread resources
protected final BytesReader in ;
protected final BytesReader in;
protected final Arc<Long> firstArc = new Arc<Long>();
protected final Arc<Long> scratchArc = new Arc<Long>();
protected final IntsRef scratchInts = new IntsRef();
@ -169,7 +167,7 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
assert !ordinals.isMultiValued();
this.iter = newSingleIter();
}
@Override
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
@ -177,14 +175,15 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
return iter.reset(getValueByOrd(ord), ord);
}
}
static final class SingleHashed extends Single {
private final int[] hashes;
SingleHashed(FST<Long> fst, Docs ordinals, int[] hashes) {
super(fst, ordinals);
this.hashes = hashes;
}
@Override
protected Iter.Single newSingleIter() {
return new Iter.Single() {
@ -193,7 +192,7 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
}
};
}
@Override
public int getValueHashed(int docId, BytesRef ret) {
final int ord = ordinals.getOrd(docId);
@ -211,14 +210,14 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
assert ordinals.isMultiValued();
this.iter = newMultiIter();
}
@Override
public Iter getIter(int docId) {
return iter.reset(ordinals.getIter(docId));
}
}
static final class MultiHashed extends Multi {
private final int[] hashes;
@ -226,7 +225,7 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
super(fst, ordinals);
this.hashes = hashes;
}
@Override
protected Iter.Multi newMultiIter() {
return new Iter.Multi(this) {
@ -242,11 +241,10 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
getValueScratchByOrd(ord, ret);
return hashes[ord];
}
}
}
static class Empty extends FSTBytesAtomicFieldData {
@ -286,7 +284,4 @@ public class FSTBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<Scr
}
}

View File

@ -85,7 +85,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
}
return size;
}
private final int[] getHashes() {
if (hashes == null) {
int numberOfValues = termOrdToBytesOffset.size();
@ -105,14 +105,14 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
return ordinals.isMultiValued() ? new BytesValues.Multi(bytes, termOrdToBytesOffset, ordinals.ordinals()) : new BytesValues.Single(
bytes, termOrdToBytesOffset, ordinals.ordinals());
}
@Override
public org.elasticsearch.index.fielddata.BytesValues.WithOrdinals getHashedBytesValues() {
final int[] hashes = getHashes();
return ordinals.isMultiValued() ? new BytesValues.MultiHashed(hashes, bytes, termOrdToBytesOffset, ordinals.ordinals())
: new BytesValues.SingleHashed(hashes, bytes, termOrdToBytesOffset, ordinals.ordinals());
}
@Override
public ScriptDocValues.Strings getScriptValues() {
return new ScriptDocValues.Strings(getBytesValues());
@ -133,6 +133,13 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
this.ordinals = ordinals;
}
@Override
public BytesRef makeSafe(BytesRef bytes) {
// when we fill from the pages bytes, we just reference an existing buffer slice, its enough
// to create a shallow copy of the bytes to be safe for "reads".
return new BytesRef(bytes.bytes, bytes.offset, bytes.length);
}
@Override
public Ordinals.Docs ordinals() {
return this.ordinals;
@ -154,7 +161,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
assert !ordinals.isMultiValued();
iter = newSingleIter();
}
@Override
public Iter getIter(int docId) {
int ord = ordinals.getOrd(docId);
@ -164,7 +171,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
}
}
static final class SingleHashed extends Single {
private final int[] hashes;
@ -172,7 +179,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
super(bytes, termOrdToBytesOffset, ordinals);
this.hashes = hashes;
}
@Override
protected Iter.Single newSingleIter() {
return new Iter.Single() {
@ -181,16 +188,16 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
}
};
}
@Override
public int getValueHashed(int docId, BytesRef ret) {
final int ord = ordinals.getOrd(docId);
getValueScratchByOrd(ord, ret);
return hashes[ord];
}
}
static class Multi extends BytesValues {
@ -207,7 +214,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
return iter.reset(ordinals.getIter(docId));
}
}
static final class MultiHashed extends Multi {
private final int[] hashes;
@ -232,7 +239,7 @@ public class PagedBytesAtomicFieldData implements AtomicFieldData.WithOrdinals<S
getValueScratchByOrd(ord, ret);
return hashes[ord];
}
}
}

View File

@ -18,10 +18,8 @@
*/
package org.elasticsearch.search.facet.terms.strings;
import com.google.common.collect.ImmutableList;
import gnu.trove.map.hash.TObjectIntHashMap;
import java.util.Arrays;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
@ -33,7 +31,7 @@ import org.elasticsearch.search.facet.InternalFacet;
import org.elasticsearch.search.facet.terms.TermsFacet;
import org.elasticsearch.search.facet.terms.support.EntryPriorityQueue;
import com.google.common.collect.ImmutableList;
import java.util.Arrays;
public class HashedAggregator {
private int missing;
@ -57,13 +55,9 @@ public class HashedAggregator {
}
}
protected BytesRef makesSafe(BytesRef ref, BytesValues values) {
return values.makeSafe(ref);
}
public void addValue(BytesRef value, int hashCode, BytesValues values) {
final boolean added = hash.addNoCount(value, hashCode, values);
assert assertHash.addNoCount(value, hashCode, values) == added : "asserting counter diverged from current counter - value: "
assert assertHash.addNoCount(value, hashCode, values) == added : "asserting counter diverged from current counter - value: "
+ value + " hash: " + hashCode;
}
@ -107,9 +101,9 @@ public class HashedAggregator {
}
public static InternalFacet buildFacet(String facetName, int size, long missing, long total, TermsFacet.ComparatorType comparatorType,
HashedAggregator aggregator) {
HashedAggregator aggregator) {
if (aggregator.isEmpty()) {
return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.<InternalStringTermsFacet.TermEntry> of(),
return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.<InternalStringTermsFacet.TermEntry>of(),
missing, total);
} else {
if (size < EntryPriorityQueue.LIMIT) {
@ -175,7 +169,7 @@ public class HashedAggregator {
}
return (counts[key]++) == 0;
}
public boolean addNoCount(BytesRef value, int hashCode, BytesValues values) {
int key = hash.add(value, hashCode);
final boolean added = key >= 0;
@ -240,9 +234,9 @@ public class HashedAggregator {
}
private static final class AssertingHashCount implements HashCount { // simple
// implemenation
// for
// assertions
// implemenation
// for
// assertions
private final TObjectIntHashMap<HashedBytesRef> valuesAndCount = new TObjectIntHashMap<HashedBytesRef>();
private HashedBytesRef spare = new HashedBytesRef();
@ -251,7 +245,7 @@ public class HashedAggregator {
int adjustedValue = valuesAndCount.adjustOrPutValue(spare.reset(value, hashCode), 1, 1);
assert adjustedValue >= 1;
if (adjustedValue == 1) { // only if we added the spare we create a
// new instance
// new instance
spare.bytes = values.makeSafe(spare.bytes);
spare = new HashedBytesRef();
return true;

View File

@ -19,11 +19,8 @@
package org.elasticsearch.search.facet.termsstats.strings;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
@ -42,8 +39,10 @@ import org.elasticsearch.search.facet.terms.strings.HashedAggregator;
import org.elasticsearch.search.facet.termsstats.TermsStatsFacet;
import org.elasticsearch.search.internal.SearchContext;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
public class TermsStatsStringFacetExecutor extends FacetExecutor {
@ -158,7 +157,7 @@ public class TermsStatsStringFacetExecutor extends FacetExecutor {
spare.reset(value, hashCode);
InternalTermsStatsStringFacet.StringEntry stringEntry = entries.get(spare);
if (stringEntry == null) {
HashedBytesRef theValue = new HashedBytesRef(makesSafe(value, values), hashCode);
HashedBytesRef theValue = new HashedBytesRef(values.makeSafe(value), hashCode);
stringEntry = new InternalTermsStatsStringFacet.StringEntry(theValue, 0, 0, 0, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY);
entries.put(theValue, stringEntry);
}
@ -198,7 +197,7 @@ public class TermsStatsStringFacetExecutor extends FacetExecutor {
spare.reset(value, hashCode);
InternalTermsStatsStringFacet.StringEntry stringEntry = entries.get(spare);
if (stringEntry == null) {
HashedBytesRef theValue = new HashedBytesRef(makesSafe(value, values), hashCode);
HashedBytesRef theValue = new HashedBytesRef(values.makeSafe(value), hashCode);
stringEntry = new InternalTermsStatsStringFacet.StringEntry(theValue, 1, 0, 0, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY);
entries.put(theValue, stringEntry);
} else {