SOLR-12983: JavabinLoader should avoid creating String Objects and create UTF8CharSequence fields from byte[]

This commit is contained in:
Noble Paul 2019-01-08 15:27:48 +11:00
parent 5a513fab83
commit 3932a4cc6c
18 changed files with 404 additions and 167 deletions

View File

@ -265,6 +265,8 @@ Improvements
`solr.max.booleanClauses` sysprop is specified, that will override the 1024 default. This enables users to
update this property across the board more easily. (Jason Gerlowski)
* SOLR-12983: JavabinLoader should avoid creating String Objects and create UTF8CharSequence fields from byte[] (noble)
Other Changes
----------------------

View File

@ -109,8 +109,8 @@ public class DatePointField extends PointField implements DateValueFieldType {
@Override
public Object toNativeType(Object val) {
if (val instanceof String) {
return DateMathParser.parseMath(null, (String) val);
if (val instanceof CharSequence) {
return DateMathParser.parseMath(null, val.toString());
}
return super.toNativeType(val);
}

View File

@ -49,7 +49,7 @@ public class DoublePointField extends PointField implements DoubleValueFieldType
public Object toNativeType(Object val) {
if (val == null) return null;
if (val instanceof Number) return ((Number) val).doubleValue();
if (val instanceof String) return Double.parseDouble((String) val);
if (val instanceof CharSequence) return Double.parseDouble( val.toString());
return super.toNativeType(val);
}

View File

@ -49,7 +49,7 @@ public class FloatPointField extends PointField implements FloatValueFieldType {
public Object toNativeType(Object val) {
if (val == null) return null;
if (val instanceof Number) return ((Number) val).floatValue();
if (val instanceof String) return Float.parseFloat((String) val);
if (val instanceof CharSequence) return Float.parseFloat(val.toString());
return super.toNativeType(val);
}

View File

@ -49,9 +49,9 @@ public class IntPointField extends PointField implements IntValueFieldType {
if (val == null) return null;
if (val instanceof Number) return ((Number) val).intValue();
try {
if (val instanceof String) return Integer.parseInt((String) val);
if (val instanceof CharSequence) return Integer.parseInt( val.toString());
} catch (NumberFormatException e) {
Float v = Float.parseFloat((String) val);
Float v = Float.parseFloat(val.toString());
return v.intValue();
}
return super.toNativeType(val);

View File

@ -48,9 +48,9 @@ public class LongPointField extends PointField implements LongValueFieldType {
if (val == null) return null;
if (val instanceof Number) return ((Number) val).longValue();
try {
if (val instanceof String) return Long.parseLong((String) val);
if (val instanceof CharSequence) return Long.parseLong(val.toString());
} catch (NumberFormatException e) {
Double v = Double.parseDouble((String) val);
Double v = Double.parseDouble(val.toString());
return v.longValue();
}
return super.toNativeType(val);

View File

@ -96,7 +96,7 @@ public class TrieDateField extends TrieField implements DateValueFieldType {
@Override
public Object toNativeType(Object val) {
if (val instanceof String) {
if (val instanceof CharSequence) {
return DateMathParser.parseMath(null, (String)val);
}
return super.toNativeType(val);

View File

@ -61,7 +61,7 @@ public class TrieDoubleField extends TrieField implements DoubleValueFieldType {
public Object toNativeType(Object val) {
if(val==null) return null;
if (val instanceof Number) return ((Number) val).doubleValue();
if (val instanceof String) return Double.parseDouble((String) val);
if (val instanceof CharSequence) return Double.parseDouble(val.toString());
return super.toNativeType(val);
}

View File

@ -61,7 +61,7 @@ public class TrieFloatField extends TrieField implements FloatValueFieldType {
public Object toNativeType(Object val) {
if(val==null) return null;
if (val instanceof Number) return ((Number) val).floatValue();
if (val instanceof String) return Float.parseFloat((String) val);
if (val instanceof CharSequence) return Float.parseFloat(val.toString());
return super.toNativeType(val);
}

View File

@ -55,9 +55,9 @@ public class TrieIntField extends TrieField implements IntValueFieldType {
if(val==null) return null;
if (val instanceof Number) return ((Number) val).intValue();
try {
if (val instanceof String) return Integer.parseInt((String) val);
if (val instanceof CharSequence) return Integer.parseInt(val.toString());
} catch (NumberFormatException e) {
Float v = Float.parseFloat((String) val);
Float v = Float.parseFloat(val.toString());
return v.intValue();
}
return super.toNativeType(val);

View File

@ -55,9 +55,9 @@ public class TrieLongField extends TrieField implements LongValueFieldType {
if(val==null) return null;
if (val instanceof Number) return ((Number) val).longValue();
try {
if (val instanceof String) return Long.parseLong((String) val);
if (val instanceof CharSequence) return Long.parseLong(val.toString());
} catch (NumberFormatException e) {
Double v = Double.parseDouble((String) val);
Double v = Double.parseDouble((String)val);
return v.longValue();
}
return super.toNativeType(val);

View File

@ -29,6 +29,7 @@ import org.apache.solr.common.SolrDocumentBase;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.util.ByteArrayUtf8CharSequence;
import org.apache.solr.schema.CopyField;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
@ -165,6 +166,7 @@ public class DocumentBuilder {
if( v == null ) {
continue;
}
v = ByteArrayUtf8CharSequence.convertCharSeq(v);
hasField = true;
if (sfield != null) {
used = true;

View File

@ -23,12 +23,14 @@ import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.SolrInputField;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
@ -38,6 +40,8 @@ import org.apache.solr.common.util.NamedList;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.solr.common.util.ByteArrayUtf8CharSequence.convertCharSeq;
/**
* Provides methods for marshalling an UpdateRequest to a NamedList which can be serialized in the javabin format and
* vice versa.
@ -111,87 +115,7 @@ public class JavaBinUpdateRequestCodec {
Map<String,Map<String,Object>> delByIdMap;
List<String> delByQ;
final NamedList[] namedList = new NamedList[1];
try (JavaBinCodec codec = new JavaBinCodec() {
// NOTE: this only works because this is an anonymous inner class
// which will only ever be used on a single stream -- if this class
// is ever refactored, this will not work.
private boolean seenOuterMostDocIterator = false;
@Override
public NamedList readNamedList(DataInputInputStream dis) throws IOException {
int sz = readSize(dis);
NamedList nl = new NamedList();
if (namedList[0] == null) {
namedList[0] = nl;
}
for (int i = 0; i < sz; i++) {
String name = (String) readVal(dis);
Object val = readVal(dis);
nl.add(name, val);
}
return nl;
}
@Override
public List readIterator(DataInputInputStream fis) throws IOException {
// default behavior for reading any regular Iterator in the stream
if (seenOuterMostDocIterator) return super.readIterator(fis);
// special treatment for first outermost Iterator
// (the list of documents)
seenOuterMostDocIterator = true;
return readOuterMostDocIterator(fis);
}
private List readOuterMostDocIterator(DataInputInputStream fis) throws IOException {
NamedList params = (NamedList) namedList[0].get("params");
updateRequest.setParams(new ModifiableSolrParams(params.toSolrParams()));
if (handler == null) return super.readIterator(fis);
Integer commitWithin = null;
Boolean overwrite = null;
Object o = null;
while (true) {
if (o == null) {
o = readVal(fis);
}
if (o == END_OBJ) {
break;
}
SolrInputDocument sdoc = null;
if (o instanceof List) {
sdoc = listToSolrInputDocument((List<NamedList>) o);
} else if (o instanceof NamedList) {
UpdateRequest req = new UpdateRequest();
req.setParams(new ModifiableSolrParams(((NamedList) o).toSolrParams()));
handler.update(null, req, null, null);
} else if (o instanceof Map.Entry){
sdoc = (SolrInputDocument) ((Map.Entry) o).getKey();
Map p = (Map) ((Map.Entry) o).getValue();
if (p != null) {
commitWithin = (Integer) p.get(UpdateRequest.COMMIT_WITHIN);
overwrite = (Boolean) p.get(UpdateRequest.OVERWRITE);
}
} else {
sdoc = (SolrInputDocument) o;
}
// peek at the next object to see if we're at the end
o = readVal(fis);
if (o == END_OBJ) {
// indicate that we've hit the last doc in the batch, used to enable optimizations when doing replication
updateRequest.lastDocInBatch();
}
handler.update(sdoc, updateRequest, commitWithin, overwrite);
}
return Collections.EMPTY_LIST;
}
};) {
try (JavaBinCodec codec = new StreamingCodec(namedList, updateRequest, handler)) {
codec.unmarshal(is);
}
@ -248,43 +172,169 @@ public class JavaBinUpdateRequestCodec {
return updateRequest;
}
private SolrInputDocument listToSolrInputDocument(List<NamedList> namedList) {
SolrInputDocument doc = new SolrInputDocument();
for (int i = 0; i < namedList.size(); i++) {
NamedList nl = namedList.get(i);
if (i == 0) {
Float boost = (Float) nl.getVal(0);
if (boost != null && boost.floatValue() != 1f) {
String message = "Ignoring document boost: " + boost + " as index-time boosts are not supported anymore";
if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
log.warn(message);
} else {
log.debug(message);
}
}
} else {
Float boost = (Float) nl.getVal(2);
if (boost != null && boost.floatValue() != 1f) {
String message = "Ignoring field boost: " + boost + " as index-time boosts are not supported anymore";
if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
log.warn(message);
} else {
log.debug(message);
}
}
doc.addField((String) nl.getVal(0),
nl.getVal(1));
}
}
return doc;
}
private NamedList solrParamsToNamedList(SolrParams params) {
if (params == null) return new NamedList();
return params.toNamedList();
}
public static interface StreamingUpdateHandler {
public void update(SolrInputDocument document, UpdateRequest req, Integer commitWithin, Boolean override);
public interface StreamingUpdateHandler {
void update(SolrInputDocument document, UpdateRequest req, Integer commitWithin, Boolean override);
}
static class MaskCharSequenceSolrInputDoc extends SolrInputDocument {
public MaskCharSequenceSolrInputDoc(Map<String, SolrInputField> fields) {
super(fields);
}
@Override
public Object getFieldValue(String name) {
return convertCharSeq(super.getFieldValue(name));
}
}
class StreamingCodec extends JavaBinCodec {
private final NamedList[] namedList;
private final UpdateRequest updateRequest;
private final StreamingUpdateHandler handler;
// NOTE: this only works because this is an anonymous inner class
// which will only ever be used on a single stream -- if this class
// is ever refactored, this will not work.
private boolean seenOuterMostDocIterator;
public StreamingCodec(NamedList[] namedList, UpdateRequest updateRequest, StreamingUpdateHandler handler) {
this.namedList = namedList;
this.updateRequest = updateRequest;
this.handler = handler;
seenOuterMostDocIterator = false;
}
@Override
protected SolrInputDocument createSolrInputDocument(int sz) {
return new MaskCharSequenceSolrInputDoc(new LinkedHashMap(sz));
}
@Override
public NamedList readNamedList(DataInputInputStream dis) throws IOException {
int sz = readSize(dis);
NamedList nl = new NamedList();
if (namedList[0] == null) {
namedList[0] = nl;
}
for (int i = 0; i < sz; i++) {
String name = (String) readVal(dis);
Object val = readVal(dis);
nl.add(name, val);
}
return nl;
}
private SolrInputDocument listToSolrInputDocument(List<NamedList> namedList) {
SolrInputDocument doc = new SolrInputDocument();
for (int i = 0; i < namedList.size(); i++) {
NamedList nl = namedList.get(i);
if (i == 0) {
Float boost = (Float) nl.getVal(0);
if (boost != null && boost.floatValue() != 1f) {
String message = "Ignoring document boost: " + boost + " as index-time boosts are not supported anymore";
if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
log.warn(message);
} else {
log.debug(message);
}
}
} else {
Float boost = (Float) nl.getVal(2);
if (boost != null && boost.floatValue() != 1f) {
String message = "Ignoring field boost: " + boost + " as index-time boosts are not supported anymore";
if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
log.warn(message);
} else {
log.debug(message);
}
}
doc.addField((String) nl.getVal(0),
nl.getVal(1));
}
}
return doc;
}
@Override
public List readIterator(DataInputInputStream fis) throws IOException {
// default behavior for reading any regular Iterator in the stream
if (seenOuterMostDocIterator) return super.readIterator(fis);
// special treatment for first outermost Iterator
// (the list of documents)
seenOuterMostDocIterator = true;
return readOuterMostDocIterator(fis);
}
/* @Override
protected Object readDocumentFieldVal(String fieldName, DataInputInputStream dis) throws IOException {
super.readStringAsCharSeq = utf8FieldPredicate != null && utf8FieldPredicate.test(fieldName);
try {
return super.readDocumentFieldVal(fieldName, dis);
} finally {
super.readStringAsCharSeq = false;
}
}*/
private List readOuterMostDocIterator(DataInputInputStream fis) throws IOException {
NamedList params = (NamedList) namedList[0].get("params");
updateRequest.setParams(new ModifiableSolrParams(params.toSolrParams()));
if (handler == null) return super.readIterator(fis);
Integer commitWithin = null;
Boolean overwrite = null;
Object o = null;
super.readStringAsCharSeq = true;
try {
while (true) {
if (o == null) {
o = readVal(fis);
}
if (o == END_OBJ) {
break;
}
SolrInputDocument sdoc = null;
if (o instanceof List) {
sdoc = listToSolrInputDocument((List<NamedList>) o);
} else if (o instanceof NamedList) {
UpdateRequest req = new UpdateRequest();
req.setParams(new ModifiableSolrParams(((NamedList) o).toSolrParams()));
handler.update(null, req, null, null);
} else if (o instanceof Map.Entry) {
sdoc = (SolrInputDocument) ((Entry) o).getKey();
Map p = (Map) ((Entry) o).getValue();
if (p != null) {
commitWithin = (Integer) p.get(UpdateRequest.COMMIT_WITHIN);
overwrite = (Boolean) p.get(UpdateRequest.OVERWRITE);
}
} else {
sdoc = (SolrInputDocument) o;
}
// peek at the next object to see if we're at the end
o = readVal(fis);
if (o == END_OBJ) {
// indicate that we've hit the last doc in the batch, used to enable optimizations when doing replication
updateRequest.lastDocInBatch();
}
handler.update(sdoc, updateRequest, commitWithin, overwrite);
}
return Collections.EMPTY_LIST;
} finally {
super.readStringAsCharSeq = false;
}
}
}
}

View File

@ -23,6 +23,7 @@ import java.util.Collection;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import org.noggit.CharArr;
@ -38,6 +39,7 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
protected int hashCode = Integer.MIN_VALUE;
protected int length;
protected volatile String utf16;
public Function<ByteArrayUtf8CharSequence, String> stringProvider;
public ByteArrayUtf8CharSequence(String utf16) {
buf = new byte[Math.multiplyExact(utf16.length(), 3)];
@ -51,21 +53,39 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
assert isValid();
}
public byte[] getBuf() {
return buf;
}
public int offset() {
return offset;
}
public ByteArrayUtf8CharSequence(byte[] buf, int offset, int length) {
this.buf = buf;
this.offset = offset;
this.length = length;
}
@Override
public byte byteAt(int idx) {
if (idx >= length || idx < 0) throw new ArrayIndexOutOfBoundsException("idx must be >=0 and < " + length);
return buf[offset + idx];
}
public String getStringOrNull() {
return utf16;
}
@Override
public int write(int start, byte[] buffer, int pos) {
if (start == -1 || start >= length) return -1;
if (length == 0) return 0;
int writableBytes = Math.min(length - start, buffer.length - pos);
System.arraycopy(buf, offset + start, buffer, pos, writableBytes);
return _writeBytes(buf, offset, length, start, buffer, pos);
}
static int _writeBytes(byte[] src, int srcOffset, int srcLength, int start, byte[] buffer, int pos) {
if (srcOffset == -1 || start >= srcLength) return -1;
int writableBytes = Math.min(srcLength - start, buffer.length - pos);
System.arraycopy(src, srcOffset + start, buffer, pos, writableBytes);
return writableBytes;
}
@ -97,15 +117,26 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
@Override
public boolean equals(Object other) {
if (other == null) {
if (other instanceof Utf8CharSequence) {
if (size() != ((Utf8CharSequence) other).size()) return false;
if (other instanceof ByteArrayUtf8CharSequence) {
if (this.length != ((ByteArrayUtf8CharSequence) other).length) return false;
ByteArrayUtf8CharSequence that = (ByteArrayUtf8CharSequence) other;
return _equals(this.buf, this.offset, this.offset + this.length,
that.buf, that.offset, that.offset + that.length);
}
return utf8Equals(this, (Utf8CharSequence) other);
} else {
return false;
}
if (other instanceof ByteArrayUtf8CharSequence) {
ByteArrayUtf8CharSequence that = (ByteArrayUtf8CharSequence) other;
return _equals(this.buf, this.offset, this.offset + this.length,
that.buf, that.offset, that.offset + that.length);
}
public static boolean utf8Equals(Utf8CharSequence utf8_1, Utf8CharSequence utf8_2) {
if (utf8_1.size() != utf8_2.size()) return false;
for (int i = 0; i < utf8_1.size(); i++) {
if (utf8_1.byteAt(i) != utf8_2.byteAt(i)) return false;
}
return false;
return true;
}
@ -115,14 +146,16 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
}
private String _getStr() {
String utf16 = this.utf16;
if (utf16 == null) {
synchronized (this) {
if (utf16 == null) {
CharArr arr = new CharArr();
ByteUtils.UTF8toUTF16(buf, offset, length, arr);
utf16 = arr.toString();
}
if (stringProvider != null) {
this.utf16 = utf16 = stringProvider.apply(this);
} else {
CharArr arr = new CharArr();
ByteUtils.UTF8toUTF16(buf, offset, length, arr);
this.utf16 = utf16 = arr.toString();
}
}
return utf16;
}

View File

@ -0,0 +1,65 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.common.util;
public class BytesBlock {
private int bufSize;
public byte[] buf;
//current position
private int pos;
//going to expand. mark the start position
private int startPos = 0;
public BytesBlock(int sz) {
this.bufSize = sz;
create();
}
public int getPos() {
return pos;
}
public int getStartPos() {
return startPos;
}
public byte[] getBuf() {
return buf;
}
public BytesBlock expand(int sz) {
if (bufSize - pos >= sz) {
return markPositions(sz);
}
if (sz > (bufSize / 4)) return new BytesBlock(sz).expand(sz);// a reasonably large block, create new
create();
return markPositions(sz);
}
private BytesBlock markPositions(int sz) {
this.startPos = pos;
pos += sz;
return this;
}
private void create() {
buf = new byte[bufSize];
startPos = pos = 0;
}
}

View File

@ -36,6 +36,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.BiConsumer;
import java.util.function.Function;
import org.apache.solr.common.ConditionalMapWriter;
import org.apache.solr.common.EnumFieldValue;
@ -117,7 +118,7 @@ public class JavaBinCodec implements PushWriter {
private WritableDocFields writableDocFields;
private boolean alreadyMarshalled;
private boolean alreadyUnmarshalled;
private boolean readStringAsCharSeq = false;
protected boolean readStringAsCharSeq = false;
public JavaBinCodec() {
resolver =null;
@ -284,7 +285,7 @@ public class JavaBinCodec implements PushWriter {
// OK, try type + size in single byte
switch (tagByte >>> 5) {
case STR >>> 5:
return readStr(dis);
return readStr(dis, stringCache, readStringAsCharSeq);
case SINT >>> 5:
return readSmallInt(dis);
case SLONG >>> 5:
@ -355,6 +356,9 @@ public class JavaBinCodec implements PushWriter {
writeSolrDocumentList((SolrDocumentList) val);
return true;
}
if (val instanceof SolrInputField) {
return writeKnownType(((SolrInputField) val).getValue());
}
if (val instanceof IteratorWriter) {
writeIterator((IteratorWriter) val);
return true;
@ -612,7 +616,7 @@ public class JavaBinCodec implements PushWriter {
log.debug(message);
}
}
SolrInputDocument sdoc = new SolrInputDocument(new LinkedHashMap<>(sz));
SolrInputDocument sdoc = createSolrInputDocument(sz);
for (int i = 0; i < sz; i++) {
String fieldName;
Object obj = readVal(dis); // could be a boost, a field name, or a child document
@ -639,15 +643,16 @@ public class JavaBinCodec implements PushWriter {
return sdoc;
}
protected SolrInputDocument createSolrInputDocument(int sz) {
return new SolrInputDocument(new LinkedHashMap<>(sz));
}
public void writeSolrInputDocument(SolrInputDocument sdoc) throws IOException {
List<SolrInputDocument> children = sdoc.getChildDocuments();
int sz = sdoc.size() + (children==null ? 0 : children.size());
writeTag(SOLRINPUTDOC, sz);
writeFloat(1f); // document boost
for (SolrInputField inputField : sdoc.values()) {
writeExternString(inputField.getName());
writeVal(inputField.getValue());
}
sdoc.writeMap(ew);
if (children != null) {
for (SolrInputDocument child : children) {
writeSolrInputDocument(child);
@ -891,28 +896,55 @@ public class JavaBinCodec implements PushWriter {
private StringBytes bytesRef = new StringBytes(bytes,0,0);
public CharSequence readStr(DataInputInputStream dis) throws IOException {
return readStr(dis,null);
return readStr(dis, null, readStringAsCharSeq);
}
public CharSequence readStr(DataInputInputStream dis, StringCache stringCache) throws IOException {
public CharSequence readStr(DataInputInputStream dis, StringCache stringCache, boolean readStringAsCharSeq) throws IOException {
if (readStringAsCharSeq) {
return readUtf8(dis);
}
int sz = readSize(dis);
return _readStr(dis, stringCache, sz);
}
private CharSequence _readStr(DataInputInputStream dis, StringCache stringCache, int sz) throws IOException {
if (bytes == null || bytes.length < sz) bytes = new byte[sz];
dis.readFully(bytes, 0, sz);
if (stringCache != null) {
return stringCache.get(bytesRef.reset(bytes, 0, sz));
} else {
arr.reset();
if (readStringAsCharSeq) {
byte[] copyBuf = new byte[sz];
System.arraycopy(bytes, 0, copyBuf, 0, sz);
return new ByteArrayUtf8CharSequence(copyBuf, 0, sz);
} else {
ByteUtils.UTF8toUTF16(bytes, 0, sz, arr);
return arr.toString();
}
ByteUtils.UTF8toUTF16(bytes, 0, sz, arr);
return arr.toString();
}
}
/////////// code to optimize reading UTF8
static final int MAX_UTF8_SZ = 1024 * 64;//too big strings can cause too much memory allocation
private Function<ByteArrayUtf8CharSequence, String> stringProvider;
private BytesBlock bytesBlock;
protected CharSequence readUtf8(DataInputInputStream dis) throws IOException {
int sz = readSize(dis);
if (sz > MAX_UTF8_SZ) return _readStr(dis, null, sz);
if (bytesBlock == null) bytesBlock = new BytesBlock(1024 * 4);
BytesBlock block = this.bytesBlock.expand(sz);
dis.readFully(block.getBuf(), block.getStartPos(), sz);
ByteArrayUtf8CharSequence result = new ByteArrayUtf8CharSequence(block.getBuf(), block.getStartPos(), sz);
if (stringProvider == null) {
stringProvider = butf8cs -> {
synchronized (JavaBinCodec.this) {
arr.reset();
ByteUtils.UTF8toUTF16(butf8cs.buf, butf8cs.offset(), butf8cs.size(), arr);
return arr.toString();
}
};
}
result.stringProvider = this.stringProvider;
return result;
}
public void writeInt(int val) throws IOException {
if (val > 0) {
int b = SINT | (val & 0x0f);
@ -973,6 +1005,7 @@ public class JavaBinCodec implements PushWriter {
return true;
} else if (val instanceof Utf8CharSequence) {
writeUTF8Str((Utf8CharSequence) val);
return true;
} else if (val instanceof CharSequence) {
writeStr((CharSequence) val);
return true;
@ -1133,7 +1166,7 @@ public class JavaBinCodec implements PushWriter {
return stringsList.get(idx - 1);
} else {// idx == 0 means it has a string value
tagByte = fis.readByte();
CharSequence s = readStr(fis, stringCache);
CharSequence s = readStr(fis, stringCache, false);
if (s != null) s = s.toString();
if (stringsList == null) stringsList = new ArrayList<>();
stringsList.add(s);

View File

@ -20,10 +20,10 @@ package org.apache.solr.common.util;
import java.io.IOException;
import java.io.OutputStream;
/**A byte[] backed String
*
/**
* A byte[] backed String
*/
public interface Utf8CharSequence extends CharSequence {
public interface Utf8CharSequence extends CharSequence , Comparable {
/**
* Write the bytes into a buffer. The objective is to avoid the local bytes being exposed to
@ -31,20 +31,31 @@ public interface Utf8CharSequence extends CharSequence {
* possible into the buffer and then return how many bytes were written. It's the responsibility
* of the caller to call this method repeatedly and ensure that everything is completely written
*
* @param start position from which to start writing
* @param start position from which to start writing
* @param buffer the buffer to which to write to
* @param pos position to start writing
* @return no:of bytes written
*/
int write(int start, byte[] buffer, int pos);
/** The size of utf8 bytes
/**
* The size of utf8 bytes
*
* @return the size
*/
int size();
byte byteAt(int idx);
@Override
default int compareTo(Object o) {
if(o == null) return 1;
return toString().compareTo(o.toString());
}
/**
* Creates a byte[] and copy to it first before writing it out to the output
*
* @param os The sink
*/
default void write(OutputStream os) throws IOException {

View File

@ -58,4 +58,45 @@ public class Utf8CharSequenceTest extends SolrTestCaseJ4 {
utf81 = (ByteArrayUtf8CharSequence) m1.get("str");
assertTrue(utf81.equals(utf8));
}
public void testUnMarshal() throws IOException {
NamedList nl = new NamedList();
String str = " The value!";
for (int i = 0; i < 5; i++) {
StringBuffer sb = new StringBuffer();
sb.append(i);
for (int j = 0; j < i; j++) {
sb.append(str);
}
nl.add("key" + i, sb.toString());
}
StringBuffer sb = new StringBuffer();
for (; ; ) {
sb.append(str);
if (sb.length() > 1024 * 4) break;
}
nl.add("key_long", sb.toString());
nl.add("key5", "5" + str);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
new JavaBinCodec().marshal(nl, baos);
byte[] bytes = baos.toByteArray();
NamedList nl1 = (NamedList) new JavaBinCodec()
.setReadStringAsCharSeq(true)
.unmarshal(new FastInputStream(null, bytes, 0, bytes.length));
byte[] buf = ((ByteArrayUtf8CharSequence) nl1.getVal(0)).getBuf();
ByteArrayUtf8CharSequence valLong = (ByteArrayUtf8CharSequence) nl1.get("key_long");
assertFalse(valLong.getBuf() == buf);
for (int i = 1; i < 6; i++) {
ByteArrayUtf8CharSequence val = (ByteArrayUtf8CharSequence) nl1.get("key" + i);
assertEquals(buf, val.getBuf());
String s = val.toString();
assertTrue(s.startsWith("" + i));
assertTrue(s, s.endsWith(str));
}
}
}