diff --git a/lucene/core/src/java/org/apache/lucene/document/Field.java b/lucene/core/src/java/org/apache/lucene/document/Field.java index 467fec7c237..2f0f13bd4fb 100644 --- a/lucene/core/src/java/org/apache/lucene/document/Field.java +++ b/lucene/core/src/java/org/apache/lucene/document/Field.java @@ -230,7 +230,7 @@ public class Field implements IndexableField { * is null, or if the field's type is neither indexed() nor stored(), * or if indexed() is false but storeTermVectors() is true. */ - public Field(String name, String value, IndexableFieldType type) { + public Field(String name, CharSequence value, IndexableFieldType type) { if (name == null) { throw new IllegalArgumentException("name must not be null"); } @@ -256,13 +256,19 @@ public class Field implements IndexableField { */ @Override public String stringValue() { - if (fieldsData instanceof String || fieldsData instanceof Number) { + if (fieldsData instanceof CharSequence || fieldsData instanceof Number) { return fieldsData.toString(); } else { return null; } } - + + @Override + public CharSequence getCharSequenceValue() { + return fieldsData instanceof CharSequence ? + (CharSequence) fieldsData : stringValue(); + } + /** * The value of the field as a Reader, or null. If null, the String value or * binary value is used. Exactly one of stringValue(), readerValue(), and @@ -446,7 +452,7 @@ public class Field implements IndexableField { return null; } } - + /** Prints a Field for human consumption. */ @Override public String toString() { diff --git a/lucene/core/src/java/org/apache/lucene/document/StoredField.java b/lucene/core/src/java/org/apache/lucene/document/StoredField.java index 7dc5a99f5be..19fc231b19e 100644 --- a/lucene/core/src/java/org/apache/lucene/document/StoredField.java +++ b/lucene/core/src/java/org/apache/lucene/document/StoredField.java @@ -127,6 +127,19 @@ public class StoredField extends Field { super(name, value, type); } + /** + * Expert: allows you to customize the {@link + * FieldType}. + * @param name field name + * @param value CharSequence value + * @param type custom {@link FieldType} for this field + * @throws IllegalArgumentException if the field name, value or type + * is null. + */ + public StoredField(String name, CharSequence value, FieldType type) { + super(name, value, type); + } + // TODO: not great but maybe not a big problem? /** * Create a stored-only field with the given integer value. diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexableField.java b/lucene/core/src/java/org/apache/lucene/index/IndexableField.java index f08eab50676..4039f1f8199 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexableField.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexableField.java @@ -61,6 +61,13 @@ public interface IndexableField { /** Non-null if this field has a string value */ public String stringValue(); + /** + * Non-null if this field has a string value + */ + default CharSequence getCharSequenceValue() { + return stringValue(); + } + /** Non-null if this field has a Reader value */ public Reader readerValue(); diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt index ff003945f58..62370de8993 100644 --- a/solr/CHANGES.txt +++ b/solr/CHANGES.txt @@ -167,6 +167,8 @@ Improvements * SOLR-12833: Add configurable timeout to VersionBucket lock. (Jeffery Yuan, Mark Miller) +* SOLR-12885: BinaryResponseWriter (javabin format) should directly copy from BytesRef to output (noble) + Other Changes ---------------------- diff --git a/solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java b/solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java index 81172f399e0..be6317bcb19 100644 --- a/solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java +++ b/solr/core/src/java/org/apache/solr/response/BinaryResponseWriter.java @@ -22,16 +22,20 @@ import java.io.OutputStream; import java.io.Writer; import java.lang.invoke.MethodHandles; import java.util.ArrayList; +import java.util.Collection; import java.util.Iterator; import java.util.List; +import java.util.function.Consumer; import org.apache.commons.io.output.ByteArrayOutputStream; +import org.apache.lucene.document.StoredField; import org.apache.lucene.index.IndexableField; import org.apache.solr.client.solrj.impl.BinaryResponseParser; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.params.CommonParams; import org.apache.solr.common.util.JavaBinCodec; import org.apache.solr.common.util.NamedList; +import org.apache.solr.common.util.Utf8CharSequence; import org.apache.solr.request.SolrQueryRequest; import org.apache.solr.schema.IndexSchema; import org.apache.solr.schema.SchemaField; @@ -40,8 +44,11 @@ import org.apache.solr.search.ReturnFields; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import static org.apache.solr.common.util.ByteArrayUtf8CharSequence.convertCharSeq; + public class BinaryResponseWriter implements BinaryQueryResponseWriter { +// public static boolean useUtf8CharSeq = true; private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @Override @@ -80,14 +87,32 @@ public class BinaryResponseWriter implements BinaryQueryResponseWriter { @Override public Object resolve(Object o, JavaBinCodec codec) throws IOException { + if (o instanceof StoredField) { + CharSequence val = ((StoredField) o).getCharSequenceValue(); + if (val instanceof Utf8CharSequence) { + codec.writeUTF8Str((Utf8CharSequence) val); + return null; + } + } if (o instanceof ResultContext) { ReturnFields orig = returnFields; ResultContext res = (ResultContext)o; if(res.getReturnFields()!=null) { returnFields = res.getReturnFields(); } - writeResults(res, codec); +// if (useUtf8CharSeq) { + ResultContext.READASBYTES.set(fieldName -> { + SchemaField fld = res.getRequest().getSchema().getFieldOrNull(fieldName); + return fld != null && fld.getType().isUtf8Field(); + }); + + try { + writeResults(res, codec); + } finally { + ResultContext.READASBYTES.remove(); + } returnFields = orig; + return null; // null means we completely handled it } if (o instanceof DocList) { @@ -176,4 +201,98 @@ public class BinaryResponseWriter implements BinaryQueryResponseWriter { } } + static class MaskCharSeqSolrDocument extends SolrDocument { + /** + * Get the value or collection of values for a given field. + */ + @Override + public Object getFieldValue(String name) { + return convertCharSeq(_fields.get(name)); + } + + /** + * Get a collection of values for a given field name + */ + @SuppressWarnings("unchecked") + @Override + public Collection getFieldValues(String name) { + Object v = _fields.get(name); + if (v instanceof Collection) { + return convertCharSeq((Collection) v); + } + if (v != null) { + ArrayList arr = new ArrayList<>(1); + arr.add(convertCharSeq(v)); + return arr; + } + return null; + } + + public Collection getRawFieldValues(String name) { + Object v = _fields.get(name); + if (v instanceof Collection) { + return (Collection) v; + } + if (v != null) { + ArrayList arr = new ArrayList<>(1); + arr.add(v); + return arr; + } + return null; + } + + + /** + * Iterate of String->Object keys + */ + @Override + public Iterator> iterator() { + Iterator> it = _fields.entrySet().iterator(); + return new Iterator>() { + @Override + public boolean hasNext() { + return it.hasNext(); + } + + @Override + public Entry next() { + return convertCharSeq(it.next()); + } + }; + } + + + /////////////////////////////////////////////////////////////////// + // Get the field values + /////////////////////////////////////////////////////////////////// + + /** + * returns the first value for a field + */ + @Override + public Object getFirstValue(String name) { + Object v = _fields.get(name); + if (v == null || !(v instanceof Collection)) return convertCharSeq(v); + Collection c = (Collection) v; + if (c.size() > 0) { + return convertCharSeq(c.iterator().next()); + } + return null; + } + + @Override + public Object get(Object key) { + return convertCharSeq(_fields.get(key)); + } + + public Object getRaw(Object key) { + return _fields.get(key); + } + + @Override + public void forEach(Consumer> action) { + super.forEach(action); + } + } + } diff --git a/solr/core/src/java/org/apache/solr/response/DocsStreamer.java b/solr/core/src/java/org/apache/solr/response/DocsStreamer.java index 67892cbbae4..3d1976e143c 100644 --- a/solr/core/src/java/org/apache/solr/response/DocsStreamer.java +++ b/solr/core/src/java/org/apache/solr/response/DocsStreamer.java @@ -147,8 +147,10 @@ public class DocsStreamer implements Iterator { // can't just use fields.wantsField(String) // because that doesn't include extra fields needed by transformers final Set fieldNamesNeeded = fields.getLuceneFieldNames(); - - final SolrDocument out = new SolrDocument(); + + final SolrDocument out = ResultContext.READASBYTES.get() == null ? + new SolrDocument() : + new BinaryResponseWriter.MaskCharSeqSolrDocument(); // NOTE: it would be tempting to try and optimize this to loop over fieldNamesNeeded // when it's smaller then the IndexableField[] in the Document -- but that's actually *less* effecient diff --git a/solr/core/src/java/org/apache/solr/response/ResultContext.java b/solr/core/src/java/org/apache/solr/response/ResultContext.java index a9aff25e05e..ccc2c5daebe 100644 --- a/solr/core/src/java/org/apache/solr/response/ResultContext.java +++ b/solr/core/src/java/org/apache/solr/response/ResultContext.java @@ -17,6 +17,7 @@ package org.apache.solr.response; import java.util.Iterator; +import java.util.function.Predicate; import org.apache.lucene.search.Query; import org.apache.solr.common.SolrDocument; @@ -54,6 +55,7 @@ public abstract class ResultContext { public Iterator getProcessedDocuments() { return new DocsStreamer(this); } + public static final ThreadLocal> READASBYTES = new ThreadLocal<>(); } diff --git a/solr/core/src/java/org/apache/solr/schema/FieldType.java b/solr/core/src/java/org/apache/solr/schema/FieldType.java index 5919c9e1994..8bcf839aaf3 100644 --- a/solr/core/src/java/org/apache/solr/schema/FieldType.java +++ b/solr/core/src/java/org/apache/solr/schema/FieldType.java @@ -62,6 +62,7 @@ import org.apache.lucene.util.CharsRefBuilder; import org.apache.lucene.util.Version; import org.apache.solr.analysis.SolrAnalyzer; import org.apache.solr.analysis.TokenizerChain; +import org.apache.solr.common.IteratorWriter; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.util.Base64; @@ -133,6 +134,8 @@ public abstract class FieldType extends FieldProperties { return false; } + public boolean isUtf8Field(){return false;} + /** * Returns true if the fields' docValues should be used for obtaining stored value */ @@ -157,6 +160,10 @@ public abstract class FieldType extends FieldProperties { } + public boolean write(IteratorWriter.ItemWriter itemWriter) { + return false; + } + // Handle additional arguments... protected void setArgs(IndexSchema schema, Map args) { // default to STORED, INDEXED, OMIT_TF_POSITIONS and MULTIVALUED depending on schema version diff --git a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java index b31f6e92a95..098b3f07312 100644 --- a/solr/core/src/java/org/apache/solr/schema/IndexSchema.java +++ b/solr/core/src/java/org/apache/solr/schema/IndexSchema.java @@ -37,6 +37,7 @@ import java.util.Set; import java.util.SortedMap; import java.util.TreeMap; import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; import java.util.regex.Pattern; import java.util.stream.Collectors; @@ -139,6 +140,8 @@ public class IndexSchema { protected volatile DynamicField[] dynamicFields; public DynamicField[] getDynamicFields() { return dynamicFields; } + protected Map dynamicFieldCache = new ConcurrentHashMap<>(); + private Analyzer indexAnalyzer; private Analyzer queryAnalyzer; @@ -1195,9 +1198,14 @@ public class IndexSchema { public SchemaField getFieldOrNull(String fieldName) { SchemaField f = fields.get(fieldName); if (f != null) return f; + f = dynamicFieldCache.get(fieldName); + if (f != null) return f; for (DynamicField df : dynamicFields) { - if (df.matches(fieldName)) return df.makeSchemaField(fieldName); + if (df.matches(fieldName)) { + dynamicFieldCache.put(fieldName, f = df.makeSchemaField(fieldName)); + break; + } } return f; diff --git a/solr/core/src/java/org/apache/solr/schema/StrField.java b/solr/core/src/java/org/apache/solr/schema/StrField.java index d9b51d17c29..a8ec62cbaac 100644 --- a/solr/core/src/java/org/apache/solr/schema/StrField.java +++ b/solr/core/src/java/org/apache/solr/schema/StrField.java @@ -68,6 +68,12 @@ public class StrField extends PrimitiveFieldType { return Collections.singletonList(fval); } + + @Override + public boolean isUtf8Field() { + return true; + } + @Override public SortField getSortField(SchemaField field,boolean reverse) { return getStringSort(field,reverse); diff --git a/solr/core/src/java/org/apache/solr/schema/TextField.java b/solr/core/src/java/org/apache/solr/schema/TextField.java index 8920c53a2dd..0d44eb7293a 100644 --- a/solr/core/src/java/org/apache/solr/schema/TextField.java +++ b/solr/core/src/java/org/apache/solr/schema/TextField.java @@ -209,4 +209,9 @@ public class TextField extends FieldType { public Object unmarshalSortValue(Object value) { return unmarshalStringSortValue(value); } + + @Override + public boolean isUtf8Field() { + return true; + } } diff --git a/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java b/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java index 45c1cf200a0..4d5018d5732 100644 --- a/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java +++ b/solr/core/src/java/org/apache/solr/search/SolrDocumentFetcher.java @@ -30,6 +30,7 @@ import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Set; +import java.util.function.Predicate; import java.util.function.Supplier; import org.apache.commons.collections.CollectionUtils; @@ -37,7 +38,10 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.DocumentStoredFieldVisitor; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.LazyDocument; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.BinaryDocValues; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.DocValuesType; @@ -58,11 +62,13 @@ import org.apache.lucene.util.NumericUtils; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentBase; import org.apache.solr.common.SolrException; +import org.apache.solr.common.util.ByteArrayUtf8CharSequence; import org.apache.solr.core.SolrConfig; import org.apache.solr.response.DocsStreamer; +import org.apache.solr.response.ResultContext; +import org.apache.solr.schema.AbstractEnumField; import org.apache.solr.schema.BoolField; import org.apache.solr.schema.LatLonPointSpatialField; -import org.apache.solr.schema.AbstractEnumField; import org.apache.solr.schema.NumberType; import org.apache.solr.schema.SchemaField; import org.slf4j.Logger; @@ -291,6 +297,22 @@ public class SolrDocumentFetcher { // When: toLoad is one single-valued field, no lazyFieldProducer } + + @Override + public void stringField(FieldInfo fieldInfo, byte[] value) throws IOException { + Predicate readAsBytes = ResultContext.READASBYTES.get(); + if (readAsBytes != null && readAsBytes.test(fieldInfo.name)) { + final FieldType ft = new FieldType(TextField.TYPE_STORED); + ft.setStoreTermVectors(fieldInfo.hasVectors()); + ft.setOmitNorms(fieldInfo.omitsNorms()); + ft.setIndexOptions(fieldInfo.getIndexOptions()); + doc.add(new StoredField(fieldInfo.name, new ByteArrayUtf8CharSequence(value, 0, value.length), ft)); + } else { + super.stringField(fieldInfo, value); + } + + } + @Override public Status needsField(FieldInfo fieldInfo) throws IOException { Status status = super.needsField(fieldInfo); diff --git a/solr/core/src/test/org/apache/solr/response/TestBinaryResponseWriter.java b/solr/core/src/test/org/apache/solr/response/TestBinaryResponseWriter.java index 5db49ca1e4a..bbc4985ad1d 100644 --- a/solr/core/src/test/org/apache/solr/response/TestBinaryResponseWriter.java +++ b/solr/core/src/test/org/apache/solr/response/TestBinaryResponseWriter.java @@ -18,19 +18,27 @@ package org.apache.solr.response; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; import java.util.Locale; import java.util.Map; import java.util.UUID; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.util.BytesRef; +import org.apache.solr.SolrTestCaseJ4; import org.apache.solr.common.SolrDocument; import org.apache.solr.common.SolrDocumentList; import org.apache.solr.common.params.CommonParams; +import org.apache.solr.common.util.ByteArrayUtf8CharSequence; +import org.apache.solr.common.util.ByteUtils; import org.apache.solr.common.util.JavaBinCodec; import org.apache.solr.common.util.NamedList; import org.apache.solr.request.LocalSolrQueryRequest; import org.apache.solr.response.BinaryResponseWriter.Resolver; import org.apache.solr.search.SolrReturnFields; -import org.apache.solr.SolrTestCaseJ4; +import org.apache.solr.util.SimplePostTool; import org.junit.BeforeClass; /** @@ -48,6 +56,41 @@ public class TestBinaryResponseWriter extends SolrTestCaseJ4 { initCore("solrconfig.xml", "schema12.xml"); } + public void testBytesRefWriting() { + compareStringFormat("ThisIsUTF8String"); + compareStringFormat("Thailand (ประเทศไทย)"); + compareStringFormat("LIVE: सबरीमाला मंदिर के पास पहुंची दो महिलाएं, जमकर हो रहा विरोध-प्रदर्शन"); + } + + public void testJavabinCodecWithCharSeq() throws IOException { + SolrDocument document = new SolrDocument(); + document.put("id", "1"); + String text = "नए लुक में धमाल मचाने आ रहे हैं MS Dhoni, कुछ यूं दिखाया हेलीकॉप्टर शॉट"; + document.put("desc", new StoredField("desc", new ByteArrayUtf8CharSequence(text) { + }, TextField.TYPE_STORED)); + + NamedList nl = new NamedList(); + nl.add("doc1", document); + SimplePostTool.BAOS baos = new SimplePostTool.BAOS(); + new JavaBinCodec(new BinaryResponseWriter.Resolver(null, null)).marshal(nl, baos); + ByteBuffer byteBuffer = baos.getByteBuffer(); + nl = (NamedList) new JavaBinCodec().unmarshal(new ByteArrayInputStream(byteBuffer.array(), 0, byteBuffer.limit())); + assertEquals(text, nl._get("doc1/desc", null)); + + + } + + private void compareStringFormat(String input) { + byte[] bytes1 = new byte[1024]; + int len1 = ByteUtils.UTF16toUTF8(input, 0, input.length(), bytes1, 0); + BytesRef bytesref = new BytesRef(input); + System.out.println(); + assertEquals(len1, bytesref.length); + for (int i = 0; i < len1; i++) { + assertEquals(input + " not matching char at :" + i, bytesref.bytes[i], bytes1[i]); + } + } + /** * Tests known types implementation by asserting correct encoding/decoding of UUIDField */ diff --git a/solr/solrj/src/java/org/apache/solr/common/ConditionalMapWriter.java b/solr/solrj/src/java/org/apache/solr/common/ConditionalMapWriter.java index 1b6b03e3dac..54785685cda 100644 --- a/solr/solrj/src/java/org/apache/solr/common/ConditionalMapWriter.java +++ b/solr/solrj/src/java/org/apache/solr/common/ConditionalMapWriter.java @@ -30,11 +30,13 @@ public class ConditionalMapWriter implements MapWriter { this.predicate = predicate; } - private class EntryWriterWrapper implements EntryWriter { + public static class EntryWriterWrapper implements EntryWriter { private final EntryWriter delegate; + private final BiPredicate predicate; - EntryWriterWrapper(EntryWriter delegate) { + public EntryWriterWrapper(EntryWriter delegate, BiPredicate predicate) { this.delegate = delegate; + this.predicate = predicate; } @Override @@ -71,7 +73,7 @@ public class ConditionalMapWriter implements MapWriter { @Override public void writeMap(EntryWriter ew) throws IOException { - if(delegate!=null) delegate.writeMap(new EntryWriterWrapper(ew)); + if (delegate != null) delegate.writeMap(new EntryWriterWrapper(ew, predicate)); } public static BiPredicate dedupeKeyPredicate(Set keys) { diff --git a/solr/solrj/src/java/org/apache/solr/common/LinkedHashMapWriter.java b/solr/solrj/src/java/org/apache/solr/common/LinkedHashMapWriter.java index fe434001946..8d07babe156 100644 --- a/solr/solrj/src/java/org/apache/solr/common/LinkedHashMapWriter.java +++ b/solr/solrj/src/java/org/apache/solr/common/LinkedHashMapWriter.java @@ -38,7 +38,7 @@ public class LinkedHashMapWriter extends LinkedHashMap implements @Override public void writeMap(EntryWriter ew) throws IOException { - forEach((k, v) -> ew.putNoEx(k, v)); + forEach(ew.getBiConsumer()); } @Override diff --git a/solr/solrj/src/java/org/apache/solr/common/MapWriter.java b/solr/solrj/src/java/org/apache/solr/common/MapWriter.java index a378e1d7e6e..926cf4c360d 100644 --- a/solr/solrj/src/java/org/apache/solr/common/MapWriter.java +++ b/solr/solrj/src/java/org/apache/solr/common/MapWriter.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.function.BiConsumer; import java.util.function.BiPredicate; import org.apache.solr.common.util.Utils; @@ -154,5 +155,9 @@ public interface MapWriter extends MapSerializable , NavigableObject { return this; } + + default BiConsumer getBiConsumer(){ + return (k, v) -> putNoEx(k,v); + } } } diff --git a/solr/solrj/src/java/org/apache/solr/common/MapWriterMap.java b/solr/solrj/src/java/org/apache/solr/common/MapWriterMap.java index e0dd0ac4196..f6b978fded7 100644 --- a/solr/solrj/src/java/org/apache/solr/common/MapWriterMap.java +++ b/solr/solrj/src/java/org/apache/solr/common/MapWriterMap.java @@ -30,7 +30,7 @@ public class MapWriterMap implements MapWriter { @Override public void writeMap(EntryWriter ew) throws IOException { - delegate.forEach((k, v) -> ew.putNoEx(k == null ? null : k.toString(), v)); + delegate.forEach(ew.getBiConsumer()); } @Override diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java b/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java index 910ec5186c7..0563c9e0749 100644 --- a/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java +++ b/solr/solrj/src/java/org/apache/solr/common/SolrDocument.java @@ -16,6 +16,7 @@ */ package org.apache.solr.common; +import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; @@ -27,6 +28,8 @@ import java.util.Set; import org.apache.solr.common.util.NamedList; +import static org.apache.solr.common.util.ByteArrayUtf8CharSequence.convertCharSeq; + /** * A concrete representation of a document within a Solr index. Unlike a lucene @@ -41,7 +44,7 @@ import org.apache.solr.common.util.NamedList; */ public class SolrDocument extends SolrDocumentBase implements Iterable> { - private final Map _fields; + protected final Map _fields; private List _childDocuments; @@ -50,6 +53,11 @@ public class SolrDocument extends SolrDocumentBase impleme _fields = new LinkedHashMap<>(); } + @Override + public void writeMap(EntryWriter ew) throws IOException { + _fields.forEach(ew.getBiConsumer()); + } + public SolrDocument(Map fields) { this._fields = fields; } @@ -281,14 +289,14 @@ public class SolrDocument extends SolrDocumentBase impleme /** Get the field Value */ @Override public Object get(Object key) { - return getFirstValue( (String)key ); + return convertCharSeq(getFirstValue( (String)key)); } // Easily Supported methods @Override public boolean containsKey(Object key) { return _fields.containsKey( key ); } @Override - public Set keySet() { return _fields.keySet(); } + public Set keySet() { return (Set) convertCharSeq(_fields.keySet()); } @Override public int size() { return _fields.size(); } @Override @@ -360,7 +368,7 @@ public class SolrDocument extends SolrDocumentBase impleme @Override public Object remove(Object key) { - return _fields.remove(key); + return convertCharSeq(_fields.remove(key)); } @Override @@ -370,7 +378,7 @@ public class SolrDocument extends SolrDocumentBase impleme @Override public Collection values() { - return _fields.values(); + return convertCharSeq(_fields.values()); } @Override diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrDocumentBase.java b/solr/solrj/src/java/org/apache/solr/common/SolrDocumentBase.java index 4429fbef64c..21d79a97fd4 100644 --- a/solr/solrj/src/java/org/apache/solr/common/SolrDocumentBase.java +++ b/solr/solrj/src/java/org/apache/solr/common/SolrDocumentBase.java @@ -21,7 +21,7 @@ import java.util.Collection; import java.util.List; import java.util.Map; -public abstract class SolrDocumentBase implements Map, Serializable { +public abstract class SolrDocumentBase implements Map, Serializable, MapWriter { /** Get all field names. */ diff --git a/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java b/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java index 172606f1b93..e1fa311b24f 100644 --- a/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java +++ b/solr/solrj/src/java/org/apache/solr/common/SolrInputDocument.java @@ -16,6 +16,7 @@ */ package org.apache.solr.common; +import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; @@ -44,7 +45,12 @@ public class SolrInputDocument extends SolrDocumentBase fields) { _fields = fields; } diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ByteArrayUtf8CharSequence.java b/solr/solrj/src/java/org/apache/solr/common/util/ByteArrayUtf8CharSequence.java index aecb50d9b19..4ac48f23590 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/ByteArrayUtf8CharSequence.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/ByteArrayUtf8CharSequence.java @@ -20,11 +20,14 @@ package org.apache.solr.common.util; import java.util.AbstractMap; import java.util.ArrayList; import java.util.Collection; +import java.util.HashSet; import java.util.Map; +import java.util.Set; import org.noggit.CharArr; /**A mutable byte[] backed Utf8CharSequence. This is quite similar to the BytesRef of Lucene + * Do not alter the contents of the byte[] . it may be inconsistent with the cached String * This is designed for single-threaded use * */ @@ -60,6 +63,7 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence { @Override public int write(int start, byte[] buffer, int pos) { if (start == -1 || start >= length) return -1; + if (length == 0) return 0; int writableBytes = Math.min(length - start, buffer.length - pos); System.arraycopy(buf, offset + start, buffer, pos, writableBytes); return writableBytes; @@ -139,11 +143,11 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence { return new ByteArrayUtf8CharSequence(bytes, 0, length, utf16, hashCode); } - public static Map.Entry convertCharSeq(Map.Entry result) { - if (result.getKey() instanceof Utf8CharSequence || result.getValue() instanceof Utf8CharSequence) { - return new AbstractMap.SimpleEntry(convertCharSeq(result.getKey()), convertCharSeq(result.getValue())); + public static Map.Entry convertCharSeq(Map.Entry e) { + if (e.getKey() instanceof Utf8CharSequence || e.getValue() instanceof Utf8CharSequence) { + return new AbstractMap.SimpleEntry(convertCharSeq(e.getKey()), convertCharSeq(e.getValue())); } - return result; + return e; } @@ -157,7 +161,12 @@ public class ByteArrayUtf8CharSequence implements Utf8CharSequence { } } if (needsCopy) { - ArrayList copy = new ArrayList(vals.size()); + Collection copy = null; + if (vals instanceof Set){ + copy = new HashSet(vals.size()); + } else { + copy = new ArrayList(vals.size()); + } for (Object o : vals) copy.add(convertCharSeq(o)); return copy; } diff --git a/solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java b/solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java index 7a2d00e89ef..442f9ac4c1a 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/FastOutputStream.java @@ -238,11 +238,11 @@ public class FastOutputStream extends OutputStream implements DataOutput { int start = 0; int totalWritten = 0; for (; ; ) { + if (totalWritten >= utf8.size()) break; if (pos >= buf.length) flushBuffer(); int sz = utf8.write(start, buf, pos); pos += sz; totalWritten += sz; - if (totalWritten >= utf8.size()) break; start += sz; } } diff --git a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java index ca8c80fdecf..586adfbb9a2 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/JavaBinCodec.java @@ -35,7 +35,9 @@ import java.util.Map.Entry; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; +import java.util.function.BiConsumer; +import org.apache.solr.common.ConditionalMapWriter; import org.apache.solr.common.EnumFieldValue; import org.apache.solr.common.IteratorWriter; import org.apache.solr.common.IteratorWriter.ItemWriter; @@ -471,9 +473,17 @@ public class JavaBinCodec implements PushWriter { writeStr(v); return this; } + + private BiConsumer biConsumer; + + @Override + public BiConsumer getBiConsumer() { + if (biConsumer == null) biConsumer = MapWriter.EntryWriter.super.getBiConsumer(); + return biConsumer; + } } - private final MapWriter.EntryWriter ew = new BinEntryWriter(); + public final BinEntryWriter ew = new BinEntryWriter(); public void writeMap(MapWriter val) throws IOException { @@ -514,6 +524,7 @@ public class JavaBinCodec implements PushWriter { //use this to ignore the writable interface because , child docs will ignore the fl flag // is it a good design? private boolean ignoreWritable =false; + private ConditionalMapWriter.EntryWriterWrapper cew; public void writeSolrDocument(SolrDocument doc) throws IOException { List children = doc.getChildDocuments(); @@ -528,14 +539,8 @@ public class JavaBinCodec implements PushWriter { int sz = fieldsCount + (children==null ? 0 : children.size()); writeTag(SOLRDOC); writeTag(ORDERED_MAP, sz); - for (Map.Entry entry : doc) { - String name = entry.getKey(); - if(toWrite(name)) { - writeExternString(name); - Object val = entry.getValue(); - writeVal(val); - } - } + if (cew == null) cew = new ConditionalMapWriter.EntryWriterWrapper(ew, (k, o) -> toWrite(k.toString())); + doc.writeMap(cew); if (children != null) { try { ignoreWritable = true; @@ -683,7 +688,7 @@ public class JavaBinCodec implements PushWriter { return m; } - private final ItemWriter itemWriter = new ItemWriter() { + public final ItemWriter itemWriter = new ItemWriter() { @Override public ItemWriter add(Object o) throws IOException { writeVal(o); diff --git a/solr/solrj/src/java/org/apache/solr/common/util/NamedList.java b/solr/solrj/src/java/org/apache/solr/common/util/NamedList.java index 71cb78b5072..ec40f9764cf 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/NamedList.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/NamedList.java @@ -79,7 +79,7 @@ public class NamedList implements Cloneable, Serializable, Iterable= size()) break; int sz = write(start, buf, 0); totalWritten += sz; if (sz > 0) os.write(buf, 0, sz); - if (totalWritten >= size()) break; start += sz; } } diff --git a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleJettyTest.java b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleJettyTest.java index 43a31532e92..5fe8c9b68f7 100644 --- a/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleJettyTest.java +++ b/solr/solrj/src/test/org/apache/solr/client/solrj/embedded/SolrExampleJettyTest.java @@ -17,8 +17,13 @@ package org.apache.solr.client.solrj.embedded; import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; import java.util.Map; +import org.apache.commons.io.IOUtils; import org.apache.http.HttpResponse; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpPost; @@ -26,12 +31,17 @@ import org.apache.http.entity.InputStreamEntity; import org.apache.solr.SolrTestCaseJ4.SuppressSSL; import org.apache.solr.client.solrj.SolrExampleTests; import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.SolrServerException; +import org.apache.solr.client.solrj.impl.BinaryResponseParser; import org.apache.solr.client.solrj.impl.HttpClientUtil; import org.apache.solr.client.solrj.impl.HttpSolrClient; import org.apache.solr.client.solrj.response.QueryResponse; import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.common.util.NamedList; import org.junit.Assert; import org.junit.BeforeClass; +import org.junit.Ignore; import org.junit.Test; import org.noggit.ObjectBuilder; @@ -101,4 +111,59 @@ public class SolrExampleJettyTest extends SolrExampleTests { baseURL.replace("/collection1", "/____v2/cores/collection1/update") : baseURL + "/update/json/docs"; } + + @Ignore + public void testUtf8QueryPerf() throws Exception { + HttpSolrClient client = (HttpSolrClient) getSolrClient(); + client.deleteByQuery("*:*"); + client.commit(); + List docs = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + SolrInputDocument doc2 = new SolrInputDocument(); + doc2.addField("id", "" + i); + doc2.addField("fld1_s", "1 value 1 value 1 value 1 value 1 value 1 value 1 value "); + doc2.addField("fld2_s", "2 value 2 value 2 value 2 value 2 value 2 value 2 value 2 value 2 value 2 value "); + doc2.addField("fld3_s", "3 value 3 value 3 value 3 value 3 value 3 value 3 value 3 value 3 value 3 value 3 value 3 value 3 value 3 value "); + doc2.addField("fld4_s", "4 value 4 value 4 value 4 value 4 value 4 value 4 value 4 value 4 value "); + doc2.addField("fld5_s", "5 value 5 value 5 value 5 value 5 value 5 value 5 value 5 value 5 value 5 value 5 value 5 value "); + docs.add(doc2); + } + client.add(docs); + client.commit(); + QueryResponse rsp = client.query(new SolrQuery("*:*")); + assertEquals(10, rsp.getResults().getNumFound()); + + + client.setParser(new BinaryResponseParser() { + @Override + public NamedList processResponse(InputStream body, String encoding) { + try { + IOUtils.skip(body, 1024 * 1000); + } catch (IOException e) { + e.printStackTrace(); + } + return rsp.getResponse(); + } + }); + + + runQueries(client, 1000, true); + /*BinaryResponseWriter.useUtf8CharSeq = false; + System.out.println("BinaryResponseWriter.useUtf8CharSeq = " + BinaryResponseWriter.useUtf8CharSeq); + runQueries(client, 10000, false); + BinaryResponseWriter.useUtf8CharSeq = true; + System.out.println("BinaryResponseWriter.useUtf8CharSeq = " + BinaryResponseWriter.useUtf8CharSeq);*/ + runQueries(client, 10000, false); + } + + + private void runQueries(HttpSolrClient client, int count, boolean warmup) throws SolrServerException, IOException { + long start = System.nanoTime(); + for (int i = 0; i < count; i++) { + client.query(new SolrQuery("*:*")); + } + if (warmup) return; + System.out.println("time taken : " + ((System.nanoTime() - start)) / (1000 * 1000)); + } + }