fielddata: Binary script doc values should make a deep copy of the BytesRef before populating it in the values array.

Added common base class for ScriptDocValues.Strings and ScriptDocValues.BytesRefs now that these classes are very similar.

Also cleaned up the BinaryDVFieldDataTests:
* Use junit assertions instead of hamcrest
* Use BytesRef directly instead of byte[]

Closes #24785
This commit is contained in:
Martijn van Groningen 2017-06-08 10:57:36 +02:00
parent eeac4b9721
commit 326fa33d4e
No known key found for this signature in database
GPG Key ID: AB236F4FCF2AF12A
2 changed files with 129 additions and 125 deletions

View File

@ -85,76 +85,6 @@ public abstract class ScriptDocValues<T> extends AbstractList<T> {
throw new UnsupportedOperationException("doc values are unmodifiable");
}
public static final class Strings extends ScriptDocValues<String> {
private final SortedBinaryDocValues in;
private BytesRefBuilder[] values = new BytesRefBuilder[0];
private int count;
public Strings(SortedBinaryDocValues in) {
this.in = in;
}
@Override
public void setNextDocId(int docId) throws IOException {
if (in.advanceExact(docId)) {
resize(in.docValueCount());
for (int i = 0; i < count; i++) {
values[i].copyBytes(in.nextValue());
}
} else {
resize(0);
}
}
/**
* Set the {@link #size()} and ensure that the {@link #values} array can
* store at least that many entries.
*/
protected void resize(int newSize) {
count = newSize;
if (newSize > values.length) {
final int oldLength = values.length;
values = ArrayUtil.grow(values, count);
for (int i = oldLength; i < values.length; ++i) {
values[i] = new BytesRefBuilder();
}
}
}
public SortedBinaryDocValues getInternalValues() {
return this.in;
}
public BytesRef getBytesValue() {
if (size() > 0) {
return values[0].get();
} else {
return null;
}
}
public String getValue() {
BytesRef value = getBytesValue();
if (value == null) {
return null;
} else {
return value.utf8ToString();
}
}
@Override
public String get(int index) {
return values[index].get().utf8ToString();
}
@Override
public int size() {
return count;
}
}
public static final class Longs extends ScriptDocValues<Long> {
protected static final DeprecationLogger deprecationLogger = new DeprecationLogger(ESLoggerFactory.getLogger(Longs.class));
@ -570,13 +500,13 @@ public abstract class ScriptDocValues<T> extends AbstractList<T> {
}
public static final class BytesRefs extends ScriptDocValues<BytesRef> {
abstract static class BinaryScriptDocValues<T> extends ScriptDocValues<T> {
private final SortedBinaryDocValues in;
private BytesRef[] values;
private int count;
protected BytesRefBuilder[] values = new BytesRefBuilder[0];
protected int count;
public BytesRefs(SortedBinaryDocValues in) {
BinaryScriptDocValues(SortedBinaryDocValues in) {
this.in = in;
}
@ -585,7 +515,10 @@ public abstract class ScriptDocValues<T> extends AbstractList<T> {
if (in.advanceExact(docId)) {
resize(in.docValueCount());
for (int i = 0; i < count; i++) {
values[i] = in.nextValue();
// We need to make a copy here, because BytesBinaryDVAtomicFieldData's SortedBinaryDocValues
// implementation reuses the returned BytesRef. Otherwise we would end up with the same BytesRef
// instance for all slots in the values array.
values[i].copyBytes(in.nextValue());
}
} else {
resize(0);
@ -598,32 +531,69 @@ public abstract class ScriptDocValues<T> extends AbstractList<T> {
*/
protected void resize(int newSize) {
count = newSize;
if (values == null) {
values = new BytesRef[newSize];
} else {
if (newSize > values.length) {
final int oldLength = values.length;
values = ArrayUtil.grow(values, count);
for (int i = oldLength; i < values.length; ++i) {
values[i] = new BytesRefBuilder();
}
}
}
public SortedBinaryDocValues getInternalValues() {
return this.in;
}
public BytesRef getValue() {
if (count == 0) {
return new BytesRef();
}
return values[0];
}
@Override
public BytesRef get(int index) {
return values[index];
}
@Override
public int size() {
return count;
}
}
public static final class Strings extends BinaryScriptDocValues<String> {
public Strings(SortedBinaryDocValues in) {
super(in);
}
@Override
public String get(int index) {
return values[index].get().utf8ToString();
}
public BytesRef getBytesValue() {
if (size() > 0) {
return values[0].get();
} else {
return null;
}
}
public String getValue() {
BytesRef value = getBytesValue();
if (value == null) {
return null;
} else {
return value.utf8ToString();
}
}
}
public static final class BytesRefs extends BinaryScriptDocValues<BytesRef> {
public BytesRefs(SortedBinaryDocValues in) {
super(in);
}
@Override
public BytesRef get(int index) {
return values[index].get();
}
public BytesRef getValue() {
if (count == 0) {
return new BytesRef();
}
return values[0].get();
}
}
}

View File

@ -19,11 +19,9 @@
package org.elasticsearch.index.fielddata;
import com.carrotsearch.hppc.ObjectArrayList;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.compress.CompressedXContent;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.common.xcontent.XContentType;
@ -31,8 +29,8 @@ import org.elasticsearch.index.mapper.DocumentMapper;
import org.elasticsearch.index.mapper.ParsedDocument;
import org.elasticsearch.index.mapper.SourceToParse;
import java.util.ArrayList;
import java.util.List;
import static org.hamcrest.Matchers.equalTo;
public class BinaryDVFieldDataTests extends AbstractFieldDataTestCase {
@Override
@ -53,15 +51,21 @@ public class BinaryDVFieldDataTests extends AbstractFieldDataTestCase {
final DocumentMapper mapper = mapperService.documentMapperParser().parse("test", new CompressedXContent(mapping));
ObjectArrayList<byte[]> bytesList1 = new ObjectArrayList<>(2);
List<BytesRef> bytesList1 = new ArrayList<>(2);
bytesList1.add(randomBytes());
bytesList1.add(randomBytes());
XContentBuilder doc = XContentFactory.jsonBuilder().startObject().startArray("field").value(bytesList1.get(0)).value(bytesList1.get(1)).endArray().endObject();
ParsedDocument d = mapper.parse(SourceToParse.source("test", "test", "1",
doc.bytes(), XContentType.JSON));
XContentBuilder doc = XContentFactory.jsonBuilder().startObject();
{
doc.startArray("field");
doc.value(bytesList1.get(0));
doc.value(bytesList1.get(1));
doc.endArray();
}
doc.endObject();
ParsedDocument d = mapper.parse(SourceToParse.source("test", "test", "1", doc.bytes(), XContentType.JSON));
writer.addDocument(d.rootDoc());
byte[] bytes1 = randomBytes();
BytesRef bytes1 = randomBytes();
doc = XContentFactory.jsonBuilder().startObject().field("field", bytes1).endObject();
d = mapper.parse(SourceToParse.source("test", "test", "2", doc.bytes(), XContentType.JSON));
writer.addDocument(d.rootDoc());
@ -71,45 +75,75 @@ public class BinaryDVFieldDataTests extends AbstractFieldDataTestCase {
writer.addDocument(d.rootDoc());
// test remove duplicate value
ObjectArrayList<byte[]> bytesList2 = new ObjectArrayList<>(2);
List<BytesRef> bytesList2 = new ArrayList<>(2);
bytesList2.add(randomBytes());
bytesList2.add(randomBytes());
doc = XContentFactory.jsonBuilder().startObject().startArray("field").value(bytesList2.get(0)).value(bytesList2.get(1)).value(bytesList2.get(0)).endArray().endObject();
doc = XContentFactory.jsonBuilder().startObject();
{
doc.startArray("field");
doc.value(bytesList2.get(0));
doc.value(bytesList2.get(1));
doc.value(bytesList2.get(0));
doc.endArray();
}
doc.endObject();
d = mapper.parse(SourceToParse.source("test", "test", "4", doc.bytes(), XContentType.JSON));
writer.addDocument(d.rootDoc());
List<LeafReaderContext> readers = refreshReader();
IndexFieldData<?> indexFieldData = getForField("field");
for (LeafReaderContext reader : readers) {
AtomicFieldData fieldData = indexFieldData.load(reader);
List<LeafReaderContext> readers = refreshReader();
assertEquals(1, readers.size());
LeafReaderContext reader = readers.get(0);
SortedBinaryDocValues bytesValues = fieldData.getBytesValues();
bytesList1.sort(null);
bytesList2.sort(null);
CollectionUtils.sortAndDedup(bytesList1);
assertTrue(bytesValues.advanceExact(0));
assertThat(bytesValues.docValueCount(), equalTo(2));
assertThat(bytesValues.nextValue(), equalTo(new BytesRef(bytesList1.get(0))));
assertThat(bytesValues.nextValue(), equalTo(new BytesRef(bytesList1.get(1))));
// Test SortedBinaryDocValues's decoding:
AtomicFieldData fieldData = indexFieldData.load(reader);
SortedBinaryDocValues bytesValues = fieldData.getBytesValues();
assertTrue(bytesValues.advanceExact(1));
assertThat(bytesValues.docValueCount(), equalTo(1));
assertThat(bytesValues.nextValue(), equalTo(new BytesRef(bytes1)));
assertTrue(bytesValues.advanceExact(0));
assertEquals(2, bytesValues.docValueCount());
assertEquals(bytesList1.get(0), bytesValues.nextValue());
assertEquals(bytesList1.get(1), bytesValues.nextValue());
assertFalse(bytesValues.advanceExact(2));
assertTrue(bytesValues.advanceExact(1));
assertEquals(1, bytesValues.docValueCount());
assertEquals(bytes1, bytesValues.nextValue());
CollectionUtils.sortAndDedup(bytesList2);
assertTrue(bytesValues.advanceExact(3));
assertThat(bytesValues.docValueCount(), equalTo(2));
assertThat(bytesValues.nextValue(), equalTo(new BytesRef(bytesList2.get(0))));
assertThat(bytesValues.nextValue(), equalTo(new BytesRef(bytesList2.get(1))));
}
assertFalse(bytesValues.advanceExact(2));
assertTrue(bytesValues.advanceExact(3));
assertEquals(2, bytesValues.docValueCount());
assertEquals(bytesList2.get(0), bytesValues.nextValue());
assertEquals(bytesList2.get(1), bytesValues.nextValue());
// Test whether ScriptDocValues.BytesRefs makes a deepcopy
fieldData = indexFieldData.load(reader);
ScriptDocValues<?> scriptValues = fieldData.getScriptValues();
scriptValues.setNextDocId(0);
assertEquals(2, scriptValues.size());
assertEquals(bytesList1.get(0), scriptValues.get(0));
assertEquals(bytesList1.get(1), scriptValues.get(1));
scriptValues.setNextDocId(1);
assertEquals(1, scriptValues.size());
assertEquals(bytes1, scriptValues.get(0));
scriptValues.setNextDocId(2);
assertEquals(0, scriptValues.size());
scriptValues.setNextDocId(3);
assertEquals(2, scriptValues.size());
assertEquals(bytesList2.get(0), scriptValues.get(0));
assertEquals(bytesList2.get(1), scriptValues.get(1));
}
private byte[] randomBytes() {
private static BytesRef randomBytes() {
int size = randomIntBetween(10, 1000);
byte[] bytes = new byte[size];
random().nextBytes(bytes);
return bytes;
return new BytesRef(bytes);
}
@Override