mirror of https://github.com/apache/lucene.git
SOLR-12983: JavabinLoader should avoid creating String Objects and create UTF8CharSequence fields from byte[]
This commit is contained in:
parent
5a513fab83
commit
3932a4cc6c
|
@ -265,6 +265,8 @@ Improvements
|
|||
`solr.max.booleanClauses` sysprop is specified, that will override the 1024 default. This enables users to
|
||||
update this property across the board more easily. (Jason Gerlowski)
|
||||
|
||||
* SOLR-12983: JavabinLoader should avoid creating String Objects and create UTF8CharSequence fields from byte[] (noble)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -109,8 +109,8 @@ public class DatePointField extends PointField implements DateValueFieldType {
|
|||
|
||||
@Override
|
||||
public Object toNativeType(Object val) {
|
||||
if (val instanceof String) {
|
||||
return DateMathParser.parseMath(null, (String) val);
|
||||
if (val instanceof CharSequence) {
|
||||
return DateMathParser.parseMath(null, val.toString());
|
||||
}
|
||||
return super.toNativeType(val);
|
||||
}
|
||||
|
|
|
@ -49,7 +49,7 @@ public class DoublePointField extends PointField implements DoubleValueFieldType
|
|||
public Object toNativeType(Object val) {
|
||||
if (val == null) return null;
|
||||
if (val instanceof Number) return ((Number) val).doubleValue();
|
||||
if (val instanceof String) return Double.parseDouble((String) val);
|
||||
if (val instanceof CharSequence) return Double.parseDouble( val.toString());
|
||||
return super.toNativeType(val);
|
||||
}
|
||||
|
||||
|
|
|
@ -49,7 +49,7 @@ public class FloatPointField extends PointField implements FloatValueFieldType {
|
|||
public Object toNativeType(Object val) {
|
||||
if (val == null) return null;
|
||||
if (val instanceof Number) return ((Number) val).floatValue();
|
||||
if (val instanceof String) return Float.parseFloat((String) val);
|
||||
if (val instanceof CharSequence) return Float.parseFloat(val.toString());
|
||||
return super.toNativeType(val);
|
||||
}
|
||||
|
||||
|
|
|
@ -49,9 +49,9 @@ public class IntPointField extends PointField implements IntValueFieldType {
|
|||
if (val == null) return null;
|
||||
if (val instanceof Number) return ((Number) val).intValue();
|
||||
try {
|
||||
if (val instanceof String) return Integer.parseInt((String) val);
|
||||
if (val instanceof CharSequence) return Integer.parseInt( val.toString());
|
||||
} catch (NumberFormatException e) {
|
||||
Float v = Float.parseFloat((String) val);
|
||||
Float v = Float.parseFloat(val.toString());
|
||||
return v.intValue();
|
||||
}
|
||||
return super.toNativeType(val);
|
||||
|
|
|
@ -48,9 +48,9 @@ public class LongPointField extends PointField implements LongValueFieldType {
|
|||
if (val == null) return null;
|
||||
if (val instanceof Number) return ((Number) val).longValue();
|
||||
try {
|
||||
if (val instanceof String) return Long.parseLong((String) val);
|
||||
if (val instanceof CharSequence) return Long.parseLong(val.toString());
|
||||
} catch (NumberFormatException e) {
|
||||
Double v = Double.parseDouble((String) val);
|
||||
Double v = Double.parseDouble(val.toString());
|
||||
return v.longValue();
|
||||
}
|
||||
return super.toNativeType(val);
|
||||
|
|
|
@ -96,7 +96,7 @@ public class TrieDateField extends TrieField implements DateValueFieldType {
|
|||
|
||||
@Override
|
||||
public Object toNativeType(Object val) {
|
||||
if (val instanceof String) {
|
||||
if (val instanceof CharSequence) {
|
||||
return DateMathParser.parseMath(null, (String)val);
|
||||
}
|
||||
return super.toNativeType(val);
|
||||
|
|
|
@ -61,7 +61,7 @@ public class TrieDoubleField extends TrieField implements DoubleValueFieldType {
|
|||
public Object toNativeType(Object val) {
|
||||
if(val==null) return null;
|
||||
if (val instanceof Number) return ((Number) val).doubleValue();
|
||||
if (val instanceof String) return Double.parseDouble((String) val);
|
||||
if (val instanceof CharSequence) return Double.parseDouble(val.toString());
|
||||
return super.toNativeType(val);
|
||||
}
|
||||
|
||||
|
|
|
@ -61,7 +61,7 @@ public class TrieFloatField extends TrieField implements FloatValueFieldType {
|
|||
public Object toNativeType(Object val) {
|
||||
if(val==null) return null;
|
||||
if (val instanceof Number) return ((Number) val).floatValue();
|
||||
if (val instanceof String) return Float.parseFloat((String) val);
|
||||
if (val instanceof CharSequence) return Float.parseFloat(val.toString());
|
||||
return super.toNativeType(val);
|
||||
}
|
||||
|
||||
|
|
|
@ -55,9 +55,9 @@ public class TrieIntField extends TrieField implements IntValueFieldType {
|
|||
if(val==null) return null;
|
||||
if (val instanceof Number) return ((Number) val).intValue();
|
||||
try {
|
||||
if (val instanceof String) return Integer.parseInt((String) val);
|
||||
if (val instanceof CharSequence) return Integer.parseInt(val.toString());
|
||||
} catch (NumberFormatException e) {
|
||||
Float v = Float.parseFloat((String) val);
|
||||
Float v = Float.parseFloat(val.toString());
|
||||
return v.intValue();
|
||||
}
|
||||
return super.toNativeType(val);
|
||||
|
|
|
@ -55,9 +55,9 @@ public class TrieLongField extends TrieField implements LongValueFieldType {
|
|||
if(val==null) return null;
|
||||
if (val instanceof Number) return ((Number) val).longValue();
|
||||
try {
|
||||
if (val instanceof String) return Long.parseLong((String) val);
|
||||
if (val instanceof CharSequence) return Long.parseLong(val.toString());
|
||||
} catch (NumberFormatException e) {
|
||||
Double v = Double.parseDouble((String) val);
|
||||
Double v = Double.parseDouble((String)val);
|
||||
return v.longValue();
|
||||
}
|
||||
return super.toNativeType(val);
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.solr.common.SolrDocumentBase;
|
|||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.common.util.ByteArrayUtf8CharSequence;
|
||||
import org.apache.solr.schema.CopyField;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.apache.solr.schema.SchemaField;
|
||||
|
@ -165,6 +166,7 @@ public class DocumentBuilder {
|
|||
if( v == null ) {
|
||||
continue;
|
||||
}
|
||||
v = ByteArrayUtf8CharSequence.convertCharSeq(v);
|
||||
hasField = true;
|
||||
if (sfield != null) {
|
||||
used = true;
|
||||
|
|
|
@ -23,12 +23,14 @@ import java.lang.invoke.MethodHandles;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.solr.common.SolrInputDocument;
|
||||
import org.apache.solr.common.SolrInputField;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.ShardParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
|
@ -38,6 +40,8 @@ import org.apache.solr.common.util.NamedList;
|
|||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import static org.apache.solr.common.util.ByteArrayUtf8CharSequence.convertCharSeq;
|
||||
|
||||
/**
|
||||
* Provides methods for marshalling an UpdateRequest to a NamedList which can be serialized in the javabin format and
|
||||
* vice versa.
|
||||
|
@ -111,87 +115,7 @@ public class JavaBinUpdateRequestCodec {
|
|||
Map<String,Map<String,Object>> delByIdMap;
|
||||
List<String> delByQ;
|
||||
final NamedList[] namedList = new NamedList[1];
|
||||
try (JavaBinCodec codec = new JavaBinCodec() {
|
||||
|
||||
// NOTE: this only works because this is an anonymous inner class
|
||||
// which will only ever be used on a single stream -- if this class
|
||||
// is ever refactored, this will not work.
|
||||
private boolean seenOuterMostDocIterator = false;
|
||||
|
||||
@Override
|
||||
public NamedList readNamedList(DataInputInputStream dis) throws IOException {
|
||||
int sz = readSize(dis);
|
||||
NamedList nl = new NamedList();
|
||||
if (namedList[0] == null) {
|
||||
namedList[0] = nl;
|
||||
}
|
||||
for (int i = 0; i < sz; i++) {
|
||||
String name = (String) readVal(dis);
|
||||
Object val = readVal(dis);
|
||||
nl.add(name, val);
|
||||
}
|
||||
return nl;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List readIterator(DataInputInputStream fis) throws IOException {
|
||||
// default behavior for reading any regular Iterator in the stream
|
||||
if (seenOuterMostDocIterator) return super.readIterator(fis);
|
||||
|
||||
// special treatment for first outermost Iterator
|
||||
// (the list of documents)
|
||||
seenOuterMostDocIterator = true;
|
||||
return readOuterMostDocIterator(fis);
|
||||
}
|
||||
|
||||
private List readOuterMostDocIterator(DataInputInputStream fis) throws IOException {
|
||||
NamedList params = (NamedList) namedList[0].get("params");
|
||||
updateRequest.setParams(new ModifiableSolrParams(params.toSolrParams()));
|
||||
if (handler == null) return super.readIterator(fis);
|
||||
Integer commitWithin = null;
|
||||
Boolean overwrite = null;
|
||||
Object o = null;
|
||||
while (true) {
|
||||
if (o == null) {
|
||||
o = readVal(fis);
|
||||
}
|
||||
|
||||
if (o == END_OBJ) {
|
||||
break;
|
||||
}
|
||||
|
||||
SolrInputDocument sdoc = null;
|
||||
if (o instanceof List) {
|
||||
sdoc = listToSolrInputDocument((List<NamedList>) o);
|
||||
} else if (o instanceof NamedList) {
|
||||
UpdateRequest req = new UpdateRequest();
|
||||
req.setParams(new ModifiableSolrParams(((NamedList) o).toSolrParams()));
|
||||
handler.update(null, req, null, null);
|
||||
} else if (o instanceof Map.Entry){
|
||||
sdoc = (SolrInputDocument) ((Map.Entry) o).getKey();
|
||||
Map p = (Map) ((Map.Entry) o).getValue();
|
||||
if (p != null) {
|
||||
commitWithin = (Integer) p.get(UpdateRequest.COMMIT_WITHIN);
|
||||
overwrite = (Boolean) p.get(UpdateRequest.OVERWRITE);
|
||||
}
|
||||
} else {
|
||||
sdoc = (SolrInputDocument) o;
|
||||
}
|
||||
|
||||
// peek at the next object to see if we're at the end
|
||||
o = readVal(fis);
|
||||
if (o == END_OBJ) {
|
||||
// indicate that we've hit the last doc in the batch, used to enable optimizations when doing replication
|
||||
updateRequest.lastDocInBatch();
|
||||
}
|
||||
|
||||
handler.update(sdoc, updateRequest, commitWithin, overwrite);
|
||||
}
|
||||
return Collections.EMPTY_LIST;
|
||||
}
|
||||
|
||||
};) {
|
||||
|
||||
try (JavaBinCodec codec = new StreamingCodec(namedList, updateRequest, handler)) {
|
||||
codec.unmarshal(is);
|
||||
}
|
||||
|
||||
|
@ -248,43 +172,169 @@ public class JavaBinUpdateRequestCodec {
|
|||
return updateRequest;
|
||||
}
|
||||
|
||||
private SolrInputDocument listToSolrInputDocument(List<NamedList> namedList) {
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
for (int i = 0; i < namedList.size(); i++) {
|
||||
NamedList nl = namedList.get(i);
|
||||
if (i == 0) {
|
||||
Float boost = (Float) nl.getVal(0);
|
||||
if (boost != null && boost.floatValue() != 1f) {
|
||||
String message = "Ignoring document boost: " + boost + " as index-time boosts are not supported anymore";
|
||||
if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
|
||||
log.warn(message);
|
||||
} else {
|
||||
log.debug(message);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Float boost = (Float) nl.getVal(2);
|
||||
if (boost != null && boost.floatValue() != 1f) {
|
||||
String message = "Ignoring field boost: " + boost + " as index-time boosts are not supported anymore";
|
||||
if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
|
||||
log.warn(message);
|
||||
} else {
|
||||
log.debug(message);
|
||||
}
|
||||
}
|
||||
doc.addField((String) nl.getVal(0),
|
||||
nl.getVal(1));
|
||||
}
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
private NamedList solrParamsToNamedList(SolrParams params) {
|
||||
if (params == null) return new NamedList();
|
||||
return params.toNamedList();
|
||||
}
|
||||
|
||||
public static interface StreamingUpdateHandler {
|
||||
public void update(SolrInputDocument document, UpdateRequest req, Integer commitWithin, Boolean override);
|
||||
public interface StreamingUpdateHandler {
|
||||
void update(SolrInputDocument document, UpdateRequest req, Integer commitWithin, Boolean override);
|
||||
}
|
||||
|
||||
static class MaskCharSequenceSolrInputDoc extends SolrInputDocument {
|
||||
public MaskCharSequenceSolrInputDoc(Map<String, SolrInputField> fields) {
|
||||
super(fields);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Object getFieldValue(String name) {
|
||||
return convertCharSeq(super.getFieldValue(name));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
class StreamingCodec extends JavaBinCodec {
|
||||
|
||||
private final NamedList[] namedList;
|
||||
private final UpdateRequest updateRequest;
|
||||
private final StreamingUpdateHandler handler;
|
||||
// NOTE: this only works because this is an anonymous inner class
|
||||
// which will only ever be used on a single stream -- if this class
|
||||
// is ever refactored, this will not work.
|
||||
private boolean seenOuterMostDocIterator;
|
||||
|
||||
public StreamingCodec(NamedList[] namedList, UpdateRequest updateRequest, StreamingUpdateHandler handler) {
|
||||
this.namedList = namedList;
|
||||
this.updateRequest = updateRequest;
|
||||
this.handler = handler;
|
||||
seenOuterMostDocIterator = false;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected SolrInputDocument createSolrInputDocument(int sz) {
|
||||
return new MaskCharSequenceSolrInputDoc(new LinkedHashMap(sz));
|
||||
}
|
||||
|
||||
@Override
|
||||
public NamedList readNamedList(DataInputInputStream dis) throws IOException {
|
||||
int sz = readSize(dis);
|
||||
NamedList nl = new NamedList();
|
||||
if (namedList[0] == null) {
|
||||
namedList[0] = nl;
|
||||
}
|
||||
for (int i = 0; i < sz; i++) {
|
||||
String name = (String) readVal(dis);
|
||||
Object val = readVal(dis);
|
||||
nl.add(name, val);
|
||||
}
|
||||
return nl;
|
||||
}
|
||||
|
||||
private SolrInputDocument listToSolrInputDocument(List<NamedList> namedList) {
|
||||
SolrInputDocument doc = new SolrInputDocument();
|
||||
for (int i = 0; i < namedList.size(); i++) {
|
||||
NamedList nl = namedList.get(i);
|
||||
if (i == 0) {
|
||||
Float boost = (Float) nl.getVal(0);
|
||||
if (boost != null && boost.floatValue() != 1f) {
|
||||
String message = "Ignoring document boost: " + boost + " as index-time boosts are not supported anymore";
|
||||
if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
|
||||
log.warn(message);
|
||||
} else {
|
||||
log.debug(message);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Float boost = (Float) nl.getVal(2);
|
||||
if (boost != null && boost.floatValue() != 1f) {
|
||||
String message = "Ignoring field boost: " + boost + " as index-time boosts are not supported anymore";
|
||||
if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
|
||||
log.warn(message);
|
||||
} else {
|
||||
log.debug(message);
|
||||
}
|
||||
}
|
||||
doc.addField((String) nl.getVal(0),
|
||||
nl.getVal(1));
|
||||
}
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List readIterator(DataInputInputStream fis) throws IOException {
|
||||
// default behavior for reading any regular Iterator in the stream
|
||||
if (seenOuterMostDocIterator) return super.readIterator(fis);
|
||||
|
||||
// special treatment for first outermost Iterator
|
||||
// (the list of documents)
|
||||
seenOuterMostDocIterator = true;
|
||||
return readOuterMostDocIterator(fis);
|
||||
}
|
||||
|
||||
|
||||
/* @Override
|
||||
protected Object readDocumentFieldVal(String fieldName, DataInputInputStream dis) throws IOException {
|
||||
super.readStringAsCharSeq = utf8FieldPredicate != null && utf8FieldPredicate.test(fieldName);
|
||||
try {
|
||||
return super.readDocumentFieldVal(fieldName, dis);
|
||||
} finally {
|
||||
super.readStringAsCharSeq = false;
|
||||
}
|
||||
}*/
|
||||
|
||||
private List readOuterMostDocIterator(DataInputInputStream fis) throws IOException {
|
||||
NamedList params = (NamedList) namedList[0].get("params");
|
||||
updateRequest.setParams(new ModifiableSolrParams(params.toSolrParams()));
|
||||
if (handler == null) return super.readIterator(fis);
|
||||
Integer commitWithin = null;
|
||||
Boolean overwrite = null;
|
||||
Object o = null;
|
||||
super.readStringAsCharSeq = true;
|
||||
try {
|
||||
while (true) {
|
||||
if (o == null) {
|
||||
o = readVal(fis);
|
||||
}
|
||||
|
||||
if (o == END_OBJ) {
|
||||
break;
|
||||
}
|
||||
|
||||
SolrInputDocument sdoc = null;
|
||||
if (o instanceof List) {
|
||||
sdoc = listToSolrInputDocument((List<NamedList>) o);
|
||||
} else if (o instanceof NamedList) {
|
||||
UpdateRequest req = new UpdateRequest();
|
||||
req.setParams(new ModifiableSolrParams(((NamedList) o).toSolrParams()));
|
||||
handler.update(null, req, null, null);
|
||||
} else if (o instanceof Map.Entry) {
|
||||
sdoc = (SolrInputDocument) ((Entry) o).getKey();
|
||||
Map p = (Map) ((Entry) o).getValue();
|
||||
if (p != null) {
|
||||
commitWithin = (Integer) p.get(UpdateRequest.COMMIT_WITHIN);
|
||||
overwrite = (Boolean) p.get(UpdateRequest.OVERWRITE);
|
||||
}
|
||||
} else {
|
||||
sdoc = (SolrInputDocument) o;
|
||||
}
|
||||
|
||||
// peek at the next object to see if we're at the end
|
||||
o = readVal(fis);
|
||||
if (o == END_OBJ) {
|
||||
// indicate that we've hit the last doc in the batch, used to enable optimizations when doing replication
|
||||
updateRequest.lastDocInBatch();
|
||||
}
|
||||
|
||||
handler.update(sdoc, updateRequest, commitWithin, overwrite);
|
||||
}
|
||||
return Collections.EMPTY_LIST;
|
||||
} finally {
|
||||
super.readStringAsCharSeq = false;
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ import java.util.Collection;
|
|||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.function.Function;
|
||||
|
||||
import org.noggit.CharArr;
|
||||
|
||||
|
@ -38,6 +39,7 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
|
|||
protected int hashCode = Integer.MIN_VALUE;
|
||||
protected int length;
|
||||
protected volatile String utf16;
|
||||
public Function<ByteArrayUtf8CharSequence, String> stringProvider;
|
||||
|
||||
public ByteArrayUtf8CharSequence(String utf16) {
|
||||
buf = new byte[Math.multiplyExact(utf16.length(), 3)];
|
||||
|
@ -51,21 +53,39 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
|
|||
assert isValid();
|
||||
}
|
||||
|
||||
public byte[] getBuf() {
|
||||
return buf;
|
||||
}
|
||||
|
||||
public int offset() {
|
||||
return offset;
|
||||
}
|
||||
|
||||
public ByteArrayUtf8CharSequence(byte[] buf, int offset, int length) {
|
||||
this.buf = buf;
|
||||
this.offset = offset;
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte byteAt(int idx) {
|
||||
if (idx >= length || idx < 0) throw new ArrayIndexOutOfBoundsException("idx must be >=0 and < " + length);
|
||||
return buf[offset + idx];
|
||||
}
|
||||
|
||||
public String getStringOrNull() {
|
||||
return utf16;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int write(int start, byte[] buffer, int pos) {
|
||||
if (start == -1 || start >= length) return -1;
|
||||
if (length == 0) return 0;
|
||||
int writableBytes = Math.min(length - start, buffer.length - pos);
|
||||
System.arraycopy(buf, offset + start, buffer, pos, writableBytes);
|
||||
return _writeBytes(buf, offset, length, start, buffer, pos);
|
||||
}
|
||||
|
||||
static int _writeBytes(byte[] src, int srcOffset, int srcLength, int start, byte[] buffer, int pos) {
|
||||
if (srcOffset == -1 || start >= srcLength) return -1;
|
||||
int writableBytes = Math.min(srcLength - start, buffer.length - pos);
|
||||
System.arraycopy(src, srcOffset + start, buffer, pos, writableBytes);
|
||||
return writableBytes;
|
||||
}
|
||||
|
||||
|
@ -97,15 +117,26 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
|
|||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other == null) {
|
||||
if (other instanceof Utf8CharSequence) {
|
||||
if (size() != ((Utf8CharSequence) other).size()) return false;
|
||||
if (other instanceof ByteArrayUtf8CharSequence) {
|
||||
if (this.length != ((ByteArrayUtf8CharSequence) other).length) return false;
|
||||
ByteArrayUtf8CharSequence that = (ByteArrayUtf8CharSequence) other;
|
||||
return _equals(this.buf, this.offset, this.offset + this.length,
|
||||
that.buf, that.offset, that.offset + that.length);
|
||||
}
|
||||
return utf8Equals(this, (Utf8CharSequence) other);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
if (other instanceof ByteArrayUtf8CharSequence) {
|
||||
ByteArrayUtf8CharSequence that = (ByteArrayUtf8CharSequence) other;
|
||||
return _equals(this.buf, this.offset, this.offset + this.length,
|
||||
that.buf, that.offset, that.offset + that.length);
|
||||
}
|
||||
|
||||
public static boolean utf8Equals(Utf8CharSequence utf8_1, Utf8CharSequence utf8_2) {
|
||||
if (utf8_1.size() != utf8_2.size()) return false;
|
||||
for (int i = 0; i < utf8_1.size(); i++) {
|
||||
if (utf8_1.byteAt(i) != utf8_2.byteAt(i)) return false;
|
||||
}
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
|
@ -115,14 +146,16 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
|
|||
}
|
||||
|
||||
private String _getStr() {
|
||||
String utf16 = this.utf16;
|
||||
if (utf16 == null) {
|
||||
synchronized (this) {
|
||||
if (utf16 == null) {
|
||||
CharArr arr = new CharArr();
|
||||
ByteUtils.UTF8toUTF16(buf, offset, length, arr);
|
||||
utf16 = arr.toString();
|
||||
}
|
||||
if (stringProvider != null) {
|
||||
this.utf16 = utf16 = stringProvider.apply(this);
|
||||
} else {
|
||||
CharArr arr = new CharArr();
|
||||
ByteUtils.UTF8toUTF16(buf, offset, length, arr);
|
||||
this.utf16 = utf16 = arr.toString();
|
||||
}
|
||||
|
||||
}
|
||||
return utf16;
|
||||
}
|
||||
|
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.common.util;
|
||||
|
||||
public class BytesBlock {
|
||||
private int bufSize;
|
||||
public byte[] buf;
|
||||
//current position
|
||||
private int pos;
|
||||
//going to expand. mark the start position
|
||||
private int startPos = 0;
|
||||
|
||||
public BytesBlock(int sz) {
|
||||
this.bufSize = sz;
|
||||
create();
|
||||
}
|
||||
|
||||
public int getPos() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
public int getStartPos() {
|
||||
return startPos;
|
||||
}
|
||||
|
||||
public byte[] getBuf() {
|
||||
return buf;
|
||||
}
|
||||
|
||||
public BytesBlock expand(int sz) {
|
||||
if (bufSize - pos >= sz) {
|
||||
return markPositions(sz);
|
||||
}
|
||||
if (sz > (bufSize / 4)) return new BytesBlock(sz).expand(sz);// a reasonably large block, create new
|
||||
create();
|
||||
return markPositions(sz);
|
||||
}
|
||||
|
||||
private BytesBlock markPositions(int sz) {
|
||||
this.startPos = pos;
|
||||
pos += sz;
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
private void create() {
|
||||
buf = new byte[bufSize];
|
||||
startPos = pos = 0;
|
||||
}
|
||||
}
|
|
@ -36,6 +36,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
|||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.function.BiConsumer;
|
||||
import java.util.function.Function;
|
||||
|
||||
import org.apache.solr.common.ConditionalMapWriter;
|
||||
import org.apache.solr.common.EnumFieldValue;
|
||||
|
@ -117,7 +118,7 @@ public class JavaBinCodec implements PushWriter {
|
|||
private WritableDocFields writableDocFields;
|
||||
private boolean alreadyMarshalled;
|
||||
private boolean alreadyUnmarshalled;
|
||||
private boolean readStringAsCharSeq = false;
|
||||
protected boolean readStringAsCharSeq = false;
|
||||
|
||||
public JavaBinCodec() {
|
||||
resolver =null;
|
||||
|
@ -284,7 +285,7 @@ public class JavaBinCodec implements PushWriter {
|
|||
// OK, try type + size in single byte
|
||||
switch (tagByte >>> 5) {
|
||||
case STR >>> 5:
|
||||
return readStr(dis);
|
||||
return readStr(dis, stringCache, readStringAsCharSeq);
|
||||
case SINT >>> 5:
|
||||
return readSmallInt(dis);
|
||||
case SLONG >>> 5:
|
||||
|
@ -355,6 +356,9 @@ public class JavaBinCodec implements PushWriter {
|
|||
writeSolrDocumentList((SolrDocumentList) val);
|
||||
return true;
|
||||
}
|
||||
if (val instanceof SolrInputField) {
|
||||
return writeKnownType(((SolrInputField) val).getValue());
|
||||
}
|
||||
if (val instanceof IteratorWriter) {
|
||||
writeIterator((IteratorWriter) val);
|
||||
return true;
|
||||
|
@ -612,7 +616,7 @@ public class JavaBinCodec implements PushWriter {
|
|||
log.debug(message);
|
||||
}
|
||||
}
|
||||
SolrInputDocument sdoc = new SolrInputDocument(new LinkedHashMap<>(sz));
|
||||
SolrInputDocument sdoc = createSolrInputDocument(sz);
|
||||
for (int i = 0; i < sz; i++) {
|
||||
String fieldName;
|
||||
Object obj = readVal(dis); // could be a boost, a field name, or a child document
|
||||
|
@ -639,15 +643,16 @@ public class JavaBinCodec implements PushWriter {
|
|||
return sdoc;
|
||||
}
|
||||
|
||||
protected SolrInputDocument createSolrInputDocument(int sz) {
|
||||
return new SolrInputDocument(new LinkedHashMap<>(sz));
|
||||
}
|
||||
|
||||
public void writeSolrInputDocument(SolrInputDocument sdoc) throws IOException {
|
||||
List<SolrInputDocument> children = sdoc.getChildDocuments();
|
||||
int sz = sdoc.size() + (children==null ? 0 : children.size());
|
||||
writeTag(SOLRINPUTDOC, sz);
|
||||
writeFloat(1f); // document boost
|
||||
for (SolrInputField inputField : sdoc.values()) {
|
||||
writeExternString(inputField.getName());
|
||||
writeVal(inputField.getValue());
|
||||
}
|
||||
sdoc.writeMap(ew);
|
||||
if (children != null) {
|
||||
for (SolrInputDocument child : children) {
|
||||
writeSolrInputDocument(child);
|
||||
|
@ -891,28 +896,55 @@ public class JavaBinCodec implements PushWriter {
|
|||
private StringBytes bytesRef = new StringBytes(bytes,0,0);
|
||||
|
||||
public CharSequence readStr(DataInputInputStream dis) throws IOException {
|
||||
return readStr(dis,null);
|
||||
return readStr(dis, null, readStringAsCharSeq);
|
||||
}
|
||||
|
||||
public CharSequence readStr(DataInputInputStream dis, StringCache stringCache) throws IOException {
|
||||
public CharSequence readStr(DataInputInputStream dis, StringCache stringCache, boolean readStringAsCharSeq) throws IOException {
|
||||
if (readStringAsCharSeq) {
|
||||
return readUtf8(dis);
|
||||
}
|
||||
int sz = readSize(dis);
|
||||
return _readStr(dis, stringCache, sz);
|
||||
}
|
||||
|
||||
private CharSequence _readStr(DataInputInputStream dis, StringCache stringCache, int sz) throws IOException {
|
||||
if (bytes == null || bytes.length < sz) bytes = new byte[sz];
|
||||
dis.readFully(bytes, 0, sz);
|
||||
if (stringCache != null) {
|
||||
return stringCache.get(bytesRef.reset(bytes, 0, sz));
|
||||
} else {
|
||||
arr.reset();
|
||||
if (readStringAsCharSeq) {
|
||||
byte[] copyBuf = new byte[sz];
|
||||
System.arraycopy(bytes, 0, copyBuf, 0, sz);
|
||||
return new ByteArrayUtf8CharSequence(copyBuf, 0, sz);
|
||||
} else {
|
||||
ByteUtils.UTF8toUTF16(bytes, 0, sz, arr);
|
||||
return arr.toString();
|
||||
}
|
||||
ByteUtils.UTF8toUTF16(bytes, 0, sz, arr);
|
||||
return arr.toString();
|
||||
}
|
||||
}
|
||||
|
||||
/////////// code to optimize reading UTF8
|
||||
static final int MAX_UTF8_SZ = 1024 * 64;//too big strings can cause too much memory allocation
|
||||
private Function<ByteArrayUtf8CharSequence, String> stringProvider;
|
||||
private BytesBlock bytesBlock;
|
||||
|
||||
protected CharSequence readUtf8(DataInputInputStream dis) throws IOException {
|
||||
int sz = readSize(dis);
|
||||
if (sz > MAX_UTF8_SZ) return _readStr(dis, null, sz);
|
||||
if (bytesBlock == null) bytesBlock = new BytesBlock(1024 * 4);
|
||||
BytesBlock block = this.bytesBlock.expand(sz);
|
||||
dis.readFully(block.getBuf(), block.getStartPos(), sz);
|
||||
|
||||
ByteArrayUtf8CharSequence result = new ByteArrayUtf8CharSequence(block.getBuf(), block.getStartPos(), sz);
|
||||
if (stringProvider == null) {
|
||||
stringProvider = butf8cs -> {
|
||||
synchronized (JavaBinCodec.this) {
|
||||
arr.reset();
|
||||
ByteUtils.UTF8toUTF16(butf8cs.buf, butf8cs.offset(), butf8cs.size(), arr);
|
||||
return arr.toString();
|
||||
}
|
||||
};
|
||||
}
|
||||
result.stringProvider = this.stringProvider;
|
||||
return result;
|
||||
}
|
||||
|
||||
public void writeInt(int val) throws IOException {
|
||||
if (val > 0) {
|
||||
int b = SINT | (val & 0x0f);
|
||||
|
@ -973,6 +1005,7 @@ public class JavaBinCodec implements PushWriter {
|
|||
return true;
|
||||
} else if (val instanceof Utf8CharSequence) {
|
||||
writeUTF8Str((Utf8CharSequence) val);
|
||||
return true;
|
||||
} else if (val instanceof CharSequence) {
|
||||
writeStr((CharSequence) val);
|
||||
return true;
|
||||
|
@ -1133,7 +1166,7 @@ public class JavaBinCodec implements PushWriter {
|
|||
return stringsList.get(idx - 1);
|
||||
} else {// idx == 0 means it has a string value
|
||||
tagByte = fis.readByte();
|
||||
CharSequence s = readStr(fis, stringCache);
|
||||
CharSequence s = readStr(fis, stringCache, false);
|
||||
if (s != null) s = s.toString();
|
||||
if (stringsList == null) stringsList = new ArrayList<>();
|
||||
stringsList.add(s);
|
||||
|
|
|
@ -20,10 +20,10 @@ package org.apache.solr.common.util;
|
|||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**A byte[] backed String
|
||||
*
|
||||
/**
|
||||
* A byte[] backed String
|
||||
*/
|
||||
public interface Utf8CharSequence extends CharSequence {
|
||||
public interface Utf8CharSequence extends CharSequence , Comparable {
|
||||
|
||||
/**
|
||||
* Write the bytes into a buffer. The objective is to avoid the local bytes being exposed to
|
||||
|
@ -31,20 +31,31 @@ public interface Utf8CharSequence extends CharSequence {
|
|||
* possible into the buffer and then return how many bytes were written. It's the responsibility
|
||||
* of the caller to call this method repeatedly and ensure that everything is completely written
|
||||
*
|
||||
* @param start position from which to start writing
|
||||
* @param start position from which to start writing
|
||||
* @param buffer the buffer to which to write to
|
||||
* @param pos position to start writing
|
||||
* @return no:of bytes written
|
||||
*/
|
||||
int write(int start, byte[] buffer, int pos);
|
||||
|
||||
/** The size of utf8 bytes
|
||||
/**
|
||||
* The size of utf8 bytes
|
||||
*
|
||||
* @return the size
|
||||
*/
|
||||
int size();
|
||||
|
||||
byte byteAt(int idx);
|
||||
|
||||
@Override
|
||||
default int compareTo(Object o) {
|
||||
if(o == null) return 1;
|
||||
return toString().compareTo(o.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a byte[] and copy to it first before writing it out to the output
|
||||
*
|
||||
* @param os The sink
|
||||
*/
|
||||
default void write(OutputStream os) throws IOException {
|
||||
|
|
|
@ -58,4 +58,45 @@ public class Utf8CharSequenceTest extends SolrTestCaseJ4 {
|
|||
utf81 = (ByteArrayUtf8CharSequence) m1.get("str");
|
||||
assertTrue(utf81.equals(utf8));
|
||||
}
|
||||
|
||||
public void testUnMarshal() throws IOException {
|
||||
NamedList nl = new NamedList();
|
||||
String str = " The value!";
|
||||
for (int i = 0; i < 5; i++) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append(i);
|
||||
for (int j = 0; j < i; j++) {
|
||||
sb.append(str);
|
||||
}
|
||||
nl.add("key" + i, sb.toString());
|
||||
}
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (; ; ) {
|
||||
sb.append(str);
|
||||
if (sb.length() > 1024 * 4) break;
|
||||
}
|
||||
nl.add("key_long", sb.toString());
|
||||
nl.add("key5", "5" + str);
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
new JavaBinCodec().marshal(nl, baos);
|
||||
byte[] bytes = baos.toByteArray();
|
||||
|
||||
NamedList nl1 = (NamedList) new JavaBinCodec()
|
||||
.setReadStringAsCharSeq(true)
|
||||
.unmarshal(new FastInputStream(null, bytes, 0, bytes.length));
|
||||
byte[] buf = ((ByteArrayUtf8CharSequence) nl1.getVal(0)).getBuf();
|
||||
ByteArrayUtf8CharSequence valLong = (ByteArrayUtf8CharSequence) nl1.get("key_long");
|
||||
assertFalse(valLong.getBuf() == buf);
|
||||
|
||||
for (int i = 1; i < 6; i++) {
|
||||
ByteArrayUtf8CharSequence val = (ByteArrayUtf8CharSequence) nl1.get("key" + i);
|
||||
assertEquals(buf, val.getBuf());
|
||||
String s = val.toString();
|
||||
assertTrue(s.startsWith("" + i));
|
||||
assertTrue(s, s.endsWith(str));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue