SOLR-12983: JavabinLoader should avoid creating String Objects and create UTF8CharSequence fields from byte[]

2019-01-08 15:27:48 +11:00 · 2019-01-08 15:27:48 +11:00 · 3932a4cc6c
parent 5a513fab83
commit 3932a4cc6c
18 changed files with 404 additions and 167 deletions
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@ -265,6 +265,8 @@ Improvements
  `solr.max.booleanClauses` sysprop is specified, that will override the 1024 default.  This enables users to
  update this property across the board more easily. (Jason Gerlowski)

+* SOLR-12983: JavabinLoader should avoid creating String Objects and create UTF8CharSequence fields from byte[] (noble)
+
 Other Changes
 ----------------------

--- a/solr/core/src/java/org/apache/solr/schema/DatePointField.java
+++ b/solr/core/src/java/org/apache/solr/schema/DatePointField.java
@ -109,8 +109,8 @@ public class DatePointField extends PointField implements DateValueFieldType {

  @Override
  public Object toNativeType(Object val) {
-    if (val instanceof String) {
-      return DateMathParser.parseMath(null, (String) val);
+    if (val instanceof CharSequence) {
+      return DateMathParser.parseMath(null, val.toString());
    }
    return super.toNativeType(val);
  }
--- a/solr/core/src/java/org/apache/solr/schema/DoublePointField.java
+++ b/solr/core/src/java/org/apache/solr/schema/DoublePointField.java
@ -49,7 +49,7 @@ public class DoublePointField extends PointField implements DoubleValueFieldType
  public Object toNativeType(Object val) {
    if (val == null) return null;
    if (val instanceof Number) return ((Number) val).doubleValue();
-    if (val instanceof String) return Double.parseDouble((String) val);
+    if (val instanceof CharSequence) return Double.parseDouble( val.toString());
    return super.toNativeType(val);
  }

--- a/solr/core/src/java/org/apache/solr/schema/FloatPointField.java
+++ b/solr/core/src/java/org/apache/solr/schema/FloatPointField.java
@ -49,7 +49,7 @@ public class FloatPointField extends PointField implements FloatValueFieldType {
  public Object toNativeType(Object val) {
    if (val == null) return null;
    if (val instanceof Number) return ((Number) val).floatValue();
-    if (val instanceof String) return Float.parseFloat((String) val);
+    if (val instanceof CharSequence) return Float.parseFloat(val.toString());
    return super.toNativeType(val);
  }

--- a/solr/core/src/java/org/apache/solr/schema/IntPointField.java
+++ b/solr/core/src/java/org/apache/solr/schema/IntPointField.java
@ -49,9 +49,9 @@ public class IntPointField extends PointField implements IntValueFieldType {
    if (val == null) return null;
    if (val instanceof Number) return ((Number) val).intValue();
    try {
-      if (val instanceof String) return Integer.parseInt((String) val);
+      if (val instanceof CharSequence) return Integer.parseInt( val.toString());
    } catch (NumberFormatException e) {
-      Float v = Float.parseFloat((String) val);
+      Float v = Float.parseFloat(val.toString());
      return v.intValue();
    }
    return super.toNativeType(val);
--- a/solr/core/src/java/org/apache/solr/schema/LongPointField.java
+++ b/solr/core/src/java/org/apache/solr/schema/LongPointField.java
@ -48,9 +48,9 @@ public class LongPointField extends PointField implements LongValueFieldType {
    if (val == null) return null;
    if (val instanceof Number) return ((Number) val).longValue();
    try {
-      if (val instanceof String) return Long.parseLong((String) val);
+      if (val instanceof CharSequence) return Long.parseLong(val.toString());
    } catch (NumberFormatException e) {
-      Double v = Double.parseDouble((String) val);
+      Double v = Double.parseDouble(val.toString());
      return v.longValue();
    }
    return super.toNativeType(val);
--- a/solr/core/src/java/org/apache/solr/schema/TrieDateField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TrieDateField.java
@ -96,7 +96,7 @@ public class TrieDateField extends TrieField implements DateValueFieldType {

  @Override
  public Object toNativeType(Object val) {
-    if (val instanceof String) {
+    if (val instanceof CharSequence) {
      return DateMathParser.parseMath(null, (String)val);
    }
    return super.toNativeType(val);
--- a/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TrieDoubleField.java
@ -61,7 +61,7 @@ public class TrieDoubleField extends TrieField implements DoubleValueFieldType {
  public Object toNativeType(Object val) {
    if(val==null) return null;
    if (val instanceof Number) return ((Number) val).doubleValue();
-    if (val instanceof String) return Double.parseDouble((String) val);
+    if (val instanceof CharSequence) return Double.parseDouble(val.toString());
    return super.toNativeType(val);
  }

--- a/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TrieFloatField.java
@ -61,7 +61,7 @@ public class TrieFloatField extends TrieField implements FloatValueFieldType {
  public Object toNativeType(Object val) {
    if(val==null) return null;
    if (val instanceof Number) return ((Number) val).floatValue();
-    if (val instanceof String) return Float.parseFloat((String) val);
+    if (val instanceof CharSequence) return Float.parseFloat(val.toString());
    return super.toNativeType(val);
  }

--- a/solr/core/src/java/org/apache/solr/schema/TrieIntField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TrieIntField.java
@ -55,9 +55,9 @@ public class TrieIntField extends TrieField implements IntValueFieldType {
    if(val==null) return null;
    if (val instanceof Number) return ((Number) val).intValue();
    try {
-      if (val instanceof String) return Integer.parseInt((String) val);
+      if (val instanceof CharSequence) return Integer.parseInt(val.toString());
    } catch (NumberFormatException e) {
-      Float v = Float.parseFloat((String) val);
+      Float v = Float.parseFloat(val.toString());
      return v.intValue();
    }
    return super.toNativeType(val);
--- a/solr/core/src/java/org/apache/solr/schema/TrieLongField.java
+++ b/solr/core/src/java/org/apache/solr/schema/TrieLongField.java
@ -55,9 +55,9 @@ public class TrieLongField extends TrieField implements LongValueFieldType {
    if(val==null) return null;
    if (val instanceof Number) return ((Number) val).longValue();
    try {
-      if (val instanceof String) return Long.parseLong((String) val);
+      if (val instanceof CharSequence) return Long.parseLong(val.toString());
    } catch (NumberFormatException e) {
-      Double v = Double.parseDouble((String) val);
+      Double v = Double.parseDouble((String)val);
      return v.longValue();
    }
    return super.toNativeType(val);
--- a/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
+++ b/solr/core/src/java/org/apache/solr/update/DocumentBuilder.java
@ -29,6 +29,7 @@ import org.apache.solr.common.SolrDocumentBase;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrInputDocument;
 import org.apache.solr.common.SolrInputField;
+import org.apache.solr.common.util.ByteArrayUtf8CharSequence;
 import org.apache.solr.schema.CopyField;
 import org.apache.solr.schema.IndexSchema;
 import org.apache.solr.schema.SchemaField;
@ -165,6 +166,7 @@ public class DocumentBuilder {
          if( v == null ) {
            continue;
          }
+          v = ByteArrayUtf8CharSequence.convertCharSeq(v);
          hasField = true;
          if (sfield != null) {
            used = true;
--- a/solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
+++ b/solr/solrj/src/java/org/apache/solr/client/solrj/request/JavaBinUpdateRequestCodec.java
@ -23,12 +23,14 @@ import java.lang.invoke.MethodHandles;
 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Iterator;
+import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.Map.Entry;
 import java.util.concurrent.atomic.AtomicBoolean;

 import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrInputField;
 import org.apache.solr.common.params.ModifiableSolrParams;
 import org.apache.solr.common.params.ShardParams;
 import org.apache.solr.common.params.SolrParams;
@ -38,6 +40,8 @@ import org.apache.solr.common.util.NamedList;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

+import static org.apache.solr.common.util.ByteArrayUtf8CharSequence.convertCharSeq;
+
 /**
 * Provides methods for marshalling an UpdateRequest to a NamedList which can be serialized in the javabin format and
 * vice versa.
@ -111,87 +115,7 @@ public class JavaBinUpdateRequestCodec {
    Map<String,Map<String,Object>> delByIdMap;
    List<String> delByQ;
    final NamedList[] namedList = new NamedList[1];
-    try (JavaBinCodec codec = new JavaBinCodec() {
-
-      // NOTE: this only works because this is an anonymous inner class 
-      // which will only ever be used on a single stream -- if this class 
-      // is ever refactored, this will not work.
-      private boolean seenOuterMostDocIterator = false;
-        
-      @Override
-      public NamedList readNamedList(DataInputInputStream dis) throws IOException {
-        int sz = readSize(dis);
-        NamedList nl = new NamedList();
-        if (namedList[0] == null) {
-          namedList[0] = nl;
-        }
-        for (int i = 0; i < sz; i++) {
-          String name = (String) readVal(dis);
-          Object val = readVal(dis);
-          nl.add(name, val);
-        }
-        return nl;
-      }
-
-      @Override
-      public List readIterator(DataInputInputStream fis) throws IOException {
-        // default behavior for reading any regular Iterator in the stream
-        if (seenOuterMostDocIterator) return super.readIterator(fis);
-
-        // special treatment for first outermost Iterator 
-        // (the list of documents)
-        seenOuterMostDocIterator = true;
-        return readOuterMostDocIterator(fis);
-      }
-
-      private List readOuterMostDocIterator(DataInputInputStream fis) throws IOException {
-        NamedList params = (NamedList) namedList[0].get("params");
-        updateRequest.setParams(new ModifiableSolrParams(params.toSolrParams()));
-        if (handler == null) return super.readIterator(fis);
-        Integer commitWithin = null;
-        Boolean overwrite = null;
-        Object o = null;
-        while (true) {
-          if (o == null) {
-            o = readVal(fis);
-          }
-
-          if (o == END_OBJ) {
-            break;
-          }
-
-          SolrInputDocument sdoc = null;
-          if (o instanceof List) {
-            sdoc = listToSolrInputDocument((List<NamedList>) o);
-          } else if (o instanceof NamedList)  {
-            UpdateRequest req = new UpdateRequest();
-            req.setParams(new ModifiableSolrParams(((NamedList) o).toSolrParams()));
-            handler.update(null, req, null, null);
-          } else if (o instanceof Map.Entry){
-            sdoc = (SolrInputDocument) ((Map.Entry) o).getKey();
-            Map p = (Map) ((Map.Entry) o).getValue();
-            if (p != null) {
-              commitWithin = (Integer) p.get(UpdateRequest.COMMIT_WITHIN);
-              overwrite = (Boolean) p.get(UpdateRequest.OVERWRITE);
-            }
-          } else  {
-            sdoc = (SolrInputDocument) o;
-          }
-
-          // peek at the next object to see if we're at the end
-          o = readVal(fis);
-          if (o == END_OBJ) {
-            // indicate that we've hit the last doc in the batch, used to enable optimizations when doing replication
-            updateRequest.lastDocInBatch();
-          }
-
-          handler.update(sdoc, updateRequest, commitWithin, overwrite);
-        }
-        return Collections.EMPTY_LIST;
-      }
-
-    };) {
-
+    try (JavaBinCodec codec = new StreamingCodec(namedList, updateRequest, handler)) {
      codec.unmarshal(is);
    }
    
@ -248,43 +172,169 @@ public class JavaBinUpdateRequestCodec {
    return updateRequest;
  }

-  private SolrInputDocument listToSolrInputDocument(List<NamedList> namedList) {
-    SolrInputDocument doc = new SolrInputDocument();
-    for (int i = 0; i < namedList.size(); i++) {
-      NamedList nl = namedList.get(i);
-      if (i == 0) {
-        Float boost = (Float) nl.getVal(0);
-        if (boost != null && boost.floatValue() != 1f) {
-          String message = "Ignoring document boost: " + boost + " as index-time boosts are not supported anymore";
-          if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
-            log.warn(message);
-          } else {
-            log.debug(message);
-          }
-        }
-      } else {
-        Float boost = (Float) nl.getVal(2);
-        if (boost != null && boost.floatValue() != 1f) {
-          String message = "Ignoring field boost: " + boost + " as index-time boosts are not supported anymore";
-          if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
-            log.warn(message);
-          } else {
-            log.debug(message);
-          }
-        }
-        doc.addField((String) nl.getVal(0),
-                nl.getVal(1));
-      }
-    }
-    return doc;
-  }

  private NamedList solrParamsToNamedList(SolrParams params) {
    if (params == null) return new NamedList();
    return params.toNamedList();
  }

-  public static interface StreamingUpdateHandler {
-    public void update(SolrInputDocument document, UpdateRequest req, Integer commitWithin, Boolean override);
+  public interface StreamingUpdateHandler {
+    void update(SolrInputDocument document, UpdateRequest req, Integer commitWithin, Boolean override);
+  }
+
+  static class MaskCharSequenceSolrInputDoc extends SolrInputDocument {
+    public MaskCharSequenceSolrInputDoc(Map<String, SolrInputField> fields) {
+      super(fields);
+    }
+
+    @Override
+    public Object getFieldValue(String name) {
+      return convertCharSeq(super.getFieldValue(name));
+    }
+
+  }
+
+  class StreamingCodec extends JavaBinCodec {
+
+    private final NamedList[] namedList;
+    private final UpdateRequest updateRequest;
+    private final StreamingUpdateHandler handler;
+    // NOTE: this only works because this is an anonymous inner class
+    // which will only ever be used on a single stream -- if this class
+    // is ever refactored, this will not work.
+    private boolean seenOuterMostDocIterator;
+
+    public StreamingCodec(NamedList[] namedList, UpdateRequest updateRequest, StreamingUpdateHandler handler) {
+      this.namedList = namedList;
+      this.updateRequest = updateRequest;
+      this.handler = handler;
+      seenOuterMostDocIterator = false;
+    }
+
+    @Override
+    protected SolrInputDocument createSolrInputDocument(int sz) {
+      return new MaskCharSequenceSolrInputDoc(new LinkedHashMap(sz));
+    }
+
+    @Override
+    public NamedList readNamedList(DataInputInputStream dis) throws IOException {
+      int sz = readSize(dis);
+      NamedList nl = new NamedList();
+      if (namedList[0] == null) {
+        namedList[0] = nl;
+      }
+      for (int i = 0; i < sz; i++) {
+        String name = (String) readVal(dis);
+        Object val = readVal(dis);
+        nl.add(name, val);
+      }
+      return nl;
+    }
+
+    private SolrInputDocument listToSolrInputDocument(List<NamedList> namedList) {
+      SolrInputDocument doc = new SolrInputDocument();
+      for (int i = 0; i < namedList.size(); i++) {
+        NamedList nl = namedList.get(i);
+        if (i == 0) {
+          Float boost = (Float) nl.getVal(0);
+          if (boost != null && boost.floatValue() != 1f) {
+            String message = "Ignoring document boost: " + boost + " as index-time boosts are not supported anymore";
+            if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
+              log.warn(message);
+            } else {
+              log.debug(message);
+            }
+          }
+        } else {
+          Float boost = (Float) nl.getVal(2);
+          if (boost != null && boost.floatValue() != 1f) {
+            String message = "Ignoring field boost: " + boost + " as index-time boosts are not supported anymore";
+            if (WARNED_ABOUT_INDEX_TIME_BOOSTS.compareAndSet(false, true)) {
+              log.warn(message);
+            } else {
+              log.debug(message);
+            }
+          }
+          doc.addField((String) nl.getVal(0),
+              nl.getVal(1));
+        }
+      }
+      return doc;
+    }
+
+    @Override
+    public List readIterator(DataInputInputStream fis) throws IOException {
+      // default behavior for reading any regular Iterator in the stream
+      if (seenOuterMostDocIterator) return super.readIterator(fis);
+
+      // special treatment for first outermost Iterator
+      // (the list of documents)
+      seenOuterMostDocIterator = true;
+      return readOuterMostDocIterator(fis);
+    }
+
+
+   /* @Override
+    protected Object readDocumentFieldVal(String fieldName, DataInputInputStream dis) throws IOException {
+      super.readStringAsCharSeq = utf8FieldPredicate != null && utf8FieldPredicate.test(fieldName);
+      try {
+        return super.readDocumentFieldVal(fieldName, dis);
+      } finally {
+        super.readStringAsCharSeq = false;
+      }
+    }*/
+
+    private List readOuterMostDocIterator(DataInputInputStream fis) throws IOException {
+      NamedList params = (NamedList) namedList[0].get("params");
+      updateRequest.setParams(new ModifiableSolrParams(params.toSolrParams()));
+      if (handler == null) return super.readIterator(fis);
+      Integer commitWithin = null;
+      Boolean overwrite = null;
+      Object o = null;
+      super.readStringAsCharSeq = true;
+      try {
+        while (true) {
+          if (o == null) {
+            o = readVal(fis);
+          }
+
+          if (o == END_OBJ) {
+            break;
+          }
+
+          SolrInputDocument sdoc = null;
+          if (o instanceof List) {
+            sdoc = listToSolrInputDocument((List<NamedList>) o);
+          } else if (o instanceof NamedList) {
+            UpdateRequest req = new UpdateRequest();
+            req.setParams(new ModifiableSolrParams(((NamedList) o).toSolrParams()));
+            handler.update(null, req, null, null);
+          } else if (o instanceof Map.Entry) {
+            sdoc = (SolrInputDocument) ((Entry) o).getKey();
+            Map p = (Map) ((Entry) o).getValue();
+            if (p != null) {
+              commitWithin = (Integer) p.get(UpdateRequest.COMMIT_WITHIN);
+              overwrite = (Boolean) p.get(UpdateRequest.OVERWRITE);
+            }
+          } else {
+            sdoc = (SolrInputDocument) o;
+          }
+
+          // peek at the next object to see if we're at the end
+          o = readVal(fis);
+          if (o == END_OBJ) {
+            // indicate that we've hit the last doc in the batch, used to enable optimizations when doing replication
+            updateRequest.lastDocInBatch();
+          }
+
+          handler.update(sdoc, updateRequest, commitWithin, overwrite);
+        }
+        return Collections.EMPTY_LIST;
+      } finally {
+        super.readStringAsCharSeq = false;
+
+      }
+    }
+
  }
 }
--- a/solr/solrj/src/java/org/apache/solr/common/util/ByteArrayUtf8CharSequence.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/ByteArrayUtf8CharSequence.java
@ -23,6 +23,7 @@ import java.util.Collection;
 import java.util.HashSet;
 import java.util.Map;
 import java.util.Set;
+import java.util.function.Function;

 import org.noggit.CharArr;

@ -38,6 +39,7 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
  protected int hashCode = Integer.MIN_VALUE;
  protected int length;
  protected volatile String utf16;
+  public Function<ByteArrayUtf8CharSequence, String> stringProvider;

  public ByteArrayUtf8CharSequence(String utf16) {
    buf = new byte[Math.multiplyExact(utf16.length(), 3)];
@ -51,21 +53,39 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
    assert isValid();
  }

+  public byte[] getBuf() {
+    return buf;
+  }
+
+  public int offset() {
+    return offset;
+  }
+
  public ByteArrayUtf8CharSequence(byte[] buf, int offset, int length) {
    this.buf = buf;
    this.offset = offset;
    this.length = length;
  }

+  @Override
+  public byte byteAt(int idx) {
+    if (idx >= length || idx < 0) throw new ArrayIndexOutOfBoundsException("idx must be >=0 and < " + length);
+    return buf[offset + idx];
+  }
+
  public String getStringOrNull() {
    return utf16;
  }
+
  @Override
  public int write(int start, byte[] buffer, int pos) {
-    if (start == -1 || start >= length) return -1;
-    if (length == 0) return 0;
-    int writableBytes = Math.min(length - start, buffer.length - pos);
-    System.arraycopy(buf, offset + start, buffer, pos, writableBytes);
+    return _writeBytes(buf, offset, length, start, buffer, pos);
+  }
+
+  static int _writeBytes(byte[] src, int srcOffset, int srcLength, int start, byte[] buffer, int pos) {
+    if (srcOffset == -1 || start >= srcLength) return -1;
+    int writableBytes = Math.min(srcLength - start, buffer.length - pos);
+    System.arraycopy(src, srcOffset + start, buffer, pos, writableBytes);
    return writableBytes;
  }

@ -97,15 +117,26 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {

  @Override
  public boolean equals(Object other) {
-    if (other == null) {
+    if (other instanceof Utf8CharSequence) {
+      if (size() != ((Utf8CharSequence) other).size()) return false;
+      if (other instanceof ByteArrayUtf8CharSequence) {
+        if (this.length != ((ByteArrayUtf8CharSequence) other).length) return false;
+        ByteArrayUtf8CharSequence that = (ByteArrayUtf8CharSequence) other;
+        return _equals(this.buf, this.offset, this.offset + this.length,
+            that.buf, that.offset, that.offset + that.length);
+      }
+      return utf8Equals(this, (Utf8CharSequence) other);
+    } else {
      return false;
    }
-    if (other instanceof ByteArrayUtf8CharSequence) {
-      ByteArrayUtf8CharSequence that = (ByteArrayUtf8CharSequence) other;
-      return _equals(this.buf, this.offset, this.offset + this.length,
-          that.buf, that.offset, that.offset + that.length);
+  }
+
+  public static boolean utf8Equals(Utf8CharSequence utf8_1, Utf8CharSequence utf8_2) {
+    if (utf8_1.size() != utf8_2.size()) return false;
+    for (int i = 0; i < utf8_1.size(); i++) {
+      if (utf8_1.byteAt(i) != utf8_2.byteAt(i)) return false;
    }
-    return false;
+    return true;
  }


@ -115,14 +146,16 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
  }

  private String _getStr() {
+    String utf16 = this.utf16;
    if (utf16 == null) {
-      synchronized (this) {
-        if (utf16 == null) {
-          CharArr arr = new CharArr();
-          ByteUtils.UTF8toUTF16(buf, offset, length, arr);
-          utf16 = arr.toString();
-        }
+      if (stringProvider != null) {
+        this.utf16 = utf16 = stringProvider.apply(this);
+      } else {
+        CharArr arr = new CharArr();
+        ByteUtils.UTF8toUTF16(buf, offset, length, arr);
+        this.utf16 = utf16 = arr.toString();
      }
+
    }
    return utf16;
  }
--- a/solr/solrj/src/java/org/apache/solr/common/util/BytesBlock.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/BytesBlock.java
@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.common.util;
+
+public class BytesBlock {
+  private int bufSize;
+  public byte[] buf;
+  //current position
+  private int pos;
+  //going to expand. mark the start position
+  private int startPos = 0;
+
+  public BytesBlock(int sz) {
+    this.bufSize = sz;
+    create();
+  }
+
+  public int getPos() {
+    return pos;
+  }
+
+  public int getStartPos() {
+    return startPos;
+  }
+
+  public byte[] getBuf() {
+    return buf;
+  }
+
+  public BytesBlock expand(int sz) {
+    if (bufSize - pos >= sz) {
+      return markPositions(sz);
+    }
+    if (sz > (bufSize / 4)) return new BytesBlock(sz).expand(sz);// a reasonably large block, create new
+    create();
+    return markPositions(sz);
+  }
+
+  private BytesBlock markPositions(int sz) {
+    this.startPos = pos;
+    pos += sz;
+    return this;
+  }
+
+
+  private void create() {
+    buf = new byte[bufSize];
+    startPos = pos = 0;
+  }
+}
--- a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java
@ -36,6 +36,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
 import java.util.concurrent.atomic.AtomicLong;
 import java.util.function.BiConsumer;
+import java.util.function.Function;

 import org.apache.solr.common.ConditionalMapWriter;
 import org.apache.solr.common.EnumFieldValue;
@ -117,7 +118,7 @@ public class JavaBinCodec implements PushWriter {
  private WritableDocFields writableDocFields;
  private boolean alreadyMarshalled;
  private boolean alreadyUnmarshalled;
-  private boolean readStringAsCharSeq = false;
+  protected boolean readStringAsCharSeq = false;

  public JavaBinCodec() {
    resolver =null;
@ -284,7 +285,7 @@ public class JavaBinCodec implements PushWriter {
    // OK, try type + size in single byte
    switch (tagByte >>> 5) {
      case STR >>> 5:
-        return readStr(dis);
+        return readStr(dis, stringCache, readStringAsCharSeq);
      case SINT >>> 5:
        return readSmallInt(dis);
      case SLONG >>> 5:
@ -355,6 +356,9 @@ public class JavaBinCodec implements PushWriter {
      writeSolrDocumentList((SolrDocumentList) val);
      return true;
    }
+    if (val instanceof SolrInputField) {
+      return writeKnownType(((SolrInputField) val).getValue());
+    }
    if (val instanceof IteratorWriter) {
      writeIterator((IteratorWriter) val);
      return true;
@ -612,7 +616,7 @@ public class JavaBinCodec implements PushWriter {
        log.debug(message);
      }
    }
-    SolrInputDocument sdoc = new SolrInputDocument(new LinkedHashMap<>(sz));
+    SolrInputDocument sdoc = createSolrInputDocument(sz);
    for (int i = 0; i < sz; i++) {
      String fieldName;
      Object obj = readVal(dis); // could be a boost, a field name, or a child document
@ -639,15 +643,16 @@ public class JavaBinCodec implements PushWriter {
    return sdoc;
  }

+  protected SolrInputDocument createSolrInputDocument(int sz) {
+    return new SolrInputDocument(new LinkedHashMap<>(sz));
+  }
+
  public void writeSolrInputDocument(SolrInputDocument sdoc) throws IOException {
    List<SolrInputDocument> children = sdoc.getChildDocuments();
    int sz = sdoc.size() + (children==null ? 0 : children.size());
    writeTag(SOLRINPUTDOC, sz);
    writeFloat(1f); // document boost
-    for (SolrInputField inputField : sdoc.values()) {
-      writeExternString(inputField.getName());
-      writeVal(inputField.getValue());
-    }
+    sdoc.writeMap(ew);
    if (children != null) {
      for (SolrInputDocument child : children) {
        writeSolrInputDocument(child);
@ -891,28 +896,55 @@ public class JavaBinCodec implements PushWriter {
  private StringBytes bytesRef = new StringBytes(bytes,0,0);

  public CharSequence readStr(DataInputInputStream dis) throws IOException {
-    return readStr(dis,null);
+    return readStr(dis, null, readStringAsCharSeq);
  }

-  public CharSequence readStr(DataInputInputStream dis, StringCache stringCache) throws IOException {
+  public CharSequence readStr(DataInputInputStream dis, StringCache stringCache, boolean readStringAsCharSeq) throws IOException {
+    if (readStringAsCharSeq) {
+      return readUtf8(dis);
+    }
    int sz = readSize(dis);
+    return _readStr(dis, stringCache, sz);
+  }
+
+  private CharSequence _readStr(DataInputInputStream dis, StringCache stringCache, int sz) throws IOException {
    if (bytes == null || bytes.length < sz) bytes = new byte[sz];
    dis.readFully(bytes, 0, sz);
    if (stringCache != null) {
      return stringCache.get(bytesRef.reset(bytes, 0, sz));
    } else {
      arr.reset();
-      if (readStringAsCharSeq) {
-        byte[] copyBuf = new byte[sz];
-        System.arraycopy(bytes, 0, copyBuf, 0, sz);
-        return new ByteArrayUtf8CharSequence(copyBuf, 0, sz);
-      } else {
-        ByteUtils.UTF8toUTF16(bytes, 0, sz, arr);
-        return arr.toString();
-      }
+      ByteUtils.UTF8toUTF16(bytes, 0, sz, arr);
+      return arr.toString();
    }
  }

+  /////////// code to optimize reading UTF8
+  static final int MAX_UTF8_SZ = 1024 * 64;//too big strings can cause too much memory allocation
+  private Function<ByteArrayUtf8CharSequence, String> stringProvider;
+  private BytesBlock bytesBlock;
+
+  protected CharSequence readUtf8(DataInputInputStream dis) throws IOException {
+    int sz = readSize(dis);
+    if (sz > MAX_UTF8_SZ) return _readStr(dis, null, sz);
+    if (bytesBlock == null) bytesBlock = new BytesBlock(1024 * 4);
+    BytesBlock block = this.bytesBlock.expand(sz);
+    dis.readFully(block.getBuf(), block.getStartPos(), sz);
+
+    ByteArrayUtf8CharSequence result = new ByteArrayUtf8CharSequence(block.getBuf(), block.getStartPos(), sz);
+    if (stringProvider == null) {
+      stringProvider = butf8cs -> {
+        synchronized (JavaBinCodec.this) {
+          arr.reset();
+          ByteUtils.UTF8toUTF16(butf8cs.buf, butf8cs.offset(), butf8cs.size(), arr);
+          return arr.toString();
+        }
+      };
+    }
+    result.stringProvider = this.stringProvider;
+    return result;
+  }
+
  public void writeInt(int val) throws IOException {
    if (val > 0) {
      int b = SINT | (val & 0x0f);
@ -973,6 +1005,7 @@ public class JavaBinCodec implements PushWriter {
      return true;
    } else if (val instanceof Utf8CharSequence) {
      writeUTF8Str((Utf8CharSequence) val);
+      return true;
    } else if (val instanceof CharSequence) {
      writeStr((CharSequence) val);
      return true;
@ -1133,7 +1166,7 @@ public class JavaBinCodec implements PushWriter {
      return stringsList.get(idx - 1);
    } else {// idx == 0 means it has a string value
      tagByte = fis.readByte();
-      CharSequence s = readStr(fis, stringCache);
+      CharSequence s = readStr(fis, stringCache, false);
      if (s != null) s = s.toString();
      if (stringsList == null) stringsList = new ArrayList<>();
      stringsList.add(s);
--- a/solr/solrj/src/java/org/apache/solr/common/util/Utf8CharSequence.java
+++ b/solr/solrj/src/java/org/apache/solr/common/util/Utf8CharSequence.java
@ -20,10 +20,10 @@ package org.apache.solr.common.util;
 import java.io.IOException;
 import java.io.OutputStream;

-/**A byte[] backed String
- *
+/**
+ * A byte[] backed String
 */
-public interface Utf8CharSequence extends CharSequence {
+public interface Utf8CharSequence extends CharSequence , Comparable {

  /**
   * Write the bytes into a buffer. The objective is to avoid the local bytes being exposed to
@ -31,20 +31,31 @@ public interface Utf8CharSequence extends CharSequence {
   * possible into the buffer and then return how many bytes were written. It's the responsibility
   * of the caller to call this method repeatedly and ensure that everything is completely written
   *
-   * @param start position from which to start writing
+   * @param start  position from which to start writing
   * @param buffer the buffer to which to write to
   * @param pos    position to start writing
   * @return no:of bytes written
   */
  int write(int start, byte[] buffer, int pos);

-  /** The size of utf8 bytes
+  /**
+   * The size of utf8 bytes
+   *
   * @return the size
   */
  int size();

+  byte byteAt(int idx);
+
+  @Override
+  default int compareTo(Object o) {
+    if(o == null) return 1;
+    return toString().compareTo(o.toString());
+  }
+
  /**
   * Creates  a byte[] and copy to it first before writing it out to the output
+   *
   * @param os The sink
   */
  default void write(OutputStream os) throws IOException {
--- a/solr/solrj/src/test/org/apache/solr/common/util/Utf8CharSequenceTest.java
+++ b/solr/solrj/src/test/org/apache/solr/common/util/Utf8CharSequenceTest.java
@ -58,4 +58,45 @@ public class Utf8CharSequenceTest extends SolrTestCaseJ4 {
    utf81 = (ByteArrayUtf8CharSequence) m1.get("str");
    assertTrue(utf81.equals(utf8));
  }
+
+  public void testUnMarshal() throws IOException {
+    NamedList nl = new NamedList();
+    String str = " The value!";
+    for (int i = 0; i < 5; i++) {
+      StringBuffer sb = new StringBuffer();
+      sb.append(i);
+      for (int j = 0; j < i; j++) {
+        sb.append(str);
+      }
+      nl.add("key" + i, sb.toString());
+    }
+    StringBuffer sb = new StringBuffer();
+    for (; ; ) {
+      sb.append(str);
+      if (sb.length() > 1024 * 4) break;
+    }
+    nl.add("key_long", sb.toString());
+    nl.add("key5", "5" + str);
+    ByteArrayOutputStream baos = new ByteArrayOutputStream();
+    new JavaBinCodec().marshal(nl, baos);
+    byte[] bytes = baos.toByteArray();
+
+    NamedList nl1 = (NamedList) new JavaBinCodec()
+        .setReadStringAsCharSeq(true)
+        .unmarshal(new FastInputStream(null, bytes, 0, bytes.length));
+    byte[] buf = ((ByteArrayUtf8CharSequence) nl1.getVal(0)).getBuf();
+    ByteArrayUtf8CharSequence valLong = (ByteArrayUtf8CharSequence) nl1.get("key_long");
+    assertFalse(valLong.getBuf() == buf);
+
+    for (int i = 1; i < 6; i++) {
+      ByteArrayUtf8CharSequence val = (ByteArrayUtf8CharSequence) nl1.get("key" + i);
+      assertEquals(buf, val.getBuf());
+      String s = val.toString();
+      assertTrue(s.startsWith("" + i));
+      assertTrue(s, s.endsWith(str));
+    }
+
+  }
+
+
 }