mirror of https://github.com/apache/lucene.git
SOLR-12992: When using binary format, ExportWriter to directly copy BytesRef instead of creating new String
This commit is contained in:
parent
4efaecac34
commit
25bca6f165
|
@ -134,6 +134,9 @@ Improvements
|
|||
|
||||
* SOLR-12881: Remove unneeded import statements (Peter Somogyi via Erick Erickson)
|
||||
|
||||
* SOLR-12992: When using binary format, ExportWriter to directly copy BytesRef instead of
|
||||
creating new String (noble)
|
||||
|
||||
================== 7.6.0 ==================
|
||||
|
||||
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
|
||||
|
|
|
@ -25,12 +25,25 @@ import org.apache.lucene.index.SortedDocValues;
|
|||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.CharsRefBuilder;
|
||||
import org.apache.solr.common.MapWriter;
|
||||
import org.apache.solr.common.util.ByteArrayUtf8CharSequence;
|
||||
import org.apache.solr.common.util.JavaBinCodec;
|
||||
import org.apache.solr.schema.FieldType;
|
||||
|
||||
class StringFieldWriter extends FieldWriter {
|
||||
private String field;
|
||||
private FieldType fieldType;
|
||||
private CharsRefBuilder cref = new CharsRefBuilder();
|
||||
final ByteArrayUtf8CharSequence utf8 = new ByteArrayUtf8CharSequence(new byte[0], 0, 0) {
|
||||
@Override
|
||||
public String toString() {
|
||||
String str = super.utf16;
|
||||
if (str != null) return str;
|
||||
fieldType.indexedToReadable(new BytesRef(super.buf, super.offset, super.length), cref);
|
||||
str = cref.toString();
|
||||
super.utf16 = str;
|
||||
return str;
|
||||
}
|
||||
};
|
||||
|
||||
public StringFieldWriter(String field, FieldType fieldType) {
|
||||
this.field = field;
|
||||
|
@ -55,8 +68,14 @@ class StringFieldWriter extends FieldWriter {
|
|||
int ord = vals.ordValue();
|
||||
ref = vals.lookupOrd(ord);
|
||||
}
|
||||
fieldType.indexedToReadable(ref, cref);
|
||||
ew.put(this.field, cref.toString());
|
||||
|
||||
if (ew instanceof JavaBinCodec.BinEntryWriter) {
|
||||
ew.put(this.field, utf8.reset(ref.bytes, ref.offset, ref.length, null));
|
||||
} else {
|
||||
fieldType.indexedToReadable(ref, cref);
|
||||
String v = cref.toString();
|
||||
ew.put(this.field, v);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -125,7 +125,7 @@ public class TransactionLog implements Closeable {
|
|||
}
|
||||
|
||||
@Override
|
||||
public String readExternString(DataInputInputStream fis) throws IOException {
|
||||
public CharSequence readExternString(DataInputInputStream fis) throws IOException {
|
||||
int idx = readSize(fis);
|
||||
if (idx != 0) {// idx != 0 is the index of the extern string
|
||||
// no need to synchronize globalStringList - it's only updated before the first record is written to the log
|
||||
|
|
|
@ -145,5 +145,14 @@ public interface MapWriter extends MapSerializable , NavigableObject {
|
|||
put(k, (Boolean) v);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**This is an optimization to avoid the instanceof checks.
|
||||
*
|
||||
*/
|
||||
default EntryWriter put(CharSequence k, CharSequence v) throws IOException {
|
||||
put(k, (Object)v);
|
||||
return this;
|
||||
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,251 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.common.util;
|
||||
|
||||
import java.util.AbstractMap;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.Map;
|
||||
|
||||
import org.noggit.CharArr;
|
||||
|
||||
/**A mutable byte[] backed Utf8CharSequence. This is quite similar to the BytesRef of Lucene
|
||||
* This is designed for single-threaded use
|
||||
*
|
||||
*/
|
||||
public class ByteArrayUtf8CharSequence implements Utf8CharSequence {
|
||||
|
||||
protected byte[] buf;
|
||||
protected int offset;
|
||||
protected int hashCode = Integer.MIN_VALUE;
|
||||
protected int length;
|
||||
protected volatile String utf16;
|
||||
|
||||
public ByteArrayUtf8CharSequence(String utf16) {
|
||||
buf = new byte[Math.multiplyExact(utf16.length(), 3)];
|
||||
offset = 0;
|
||||
length = ByteUtils.UTF16toUTF8(utf16, 0, utf16.length(), buf, 0);
|
||||
if (buf.length > length) {
|
||||
byte[] copy = new byte[length];
|
||||
System.arraycopy(buf, 0, copy, 0, length);
|
||||
buf = copy;
|
||||
}
|
||||
assert isValid();
|
||||
}
|
||||
|
||||
public ByteArrayUtf8CharSequence(byte[] buf, int offset, int length) {
|
||||
this.buf = buf;
|
||||
this.offset = offset;
|
||||
this.length = length;
|
||||
}
|
||||
|
||||
public String getStringOrNull() {
|
||||
return utf16;
|
||||
}
|
||||
@Override
|
||||
public int write(int start, byte[] buffer, int pos) {
|
||||
if (start == -1 || start >= length) return -1;
|
||||
int writableBytes = Math.min(length - start, buffer.length - pos);
|
||||
System.arraycopy(buf, offset + start, buffer, pos, writableBytes);
|
||||
return writableBytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return length;
|
||||
}
|
||||
|
||||
private ByteArrayUtf8CharSequence(byte[] buf, int offset, int length, String utf16, int hashCode) {
|
||||
this.buf = buf;
|
||||
this.offset = offset;
|
||||
this.length = length;
|
||||
this.utf16 = utf16;
|
||||
this.hashCode = hashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
if (hashCode == Integer.MIN_VALUE) {
|
||||
hashCode = MurmurHash2.hash32(buf, offset, length);
|
||||
}
|
||||
return hashCode;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int length() {
|
||||
return _getStr().length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object other) {
|
||||
if (other == null) {
|
||||
return false;
|
||||
}
|
||||
if (other instanceof ByteArrayUtf8CharSequence) {
|
||||
ByteArrayUtf8CharSequence that = (ByteArrayUtf8CharSequence) other;
|
||||
return _equals(this.buf, this.offset, this.offset + this.length,
|
||||
that.buf, that.offset, that.offset + that.length);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public char charAt(int index) {
|
||||
return _getStr().charAt(index);
|
||||
}
|
||||
|
||||
private String _getStr() {
|
||||
if (utf16 == null) {
|
||||
synchronized (this) {
|
||||
if (utf16 == null) {
|
||||
CharArr arr = new CharArr();
|
||||
ByteUtils.UTF8toUTF16(buf, offset, length, arr);
|
||||
utf16 = arr.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
return utf16;
|
||||
}
|
||||
|
||||
@Override
|
||||
public CharSequence subSequence(int start, int end) {
|
||||
return new ByteArrayUtf8CharSequence(_getStr().subSequence(start, end).toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public ByteArrayUtf8CharSequence clone() {
|
||||
return new ByteArrayUtf8CharSequence(buf, offset, length, utf16, hashCode);
|
||||
}
|
||||
|
||||
public ByteArrayUtf8CharSequence deepCopy() {
|
||||
byte[] bytes = new byte[length];
|
||||
System.arraycopy(buf, offset, bytes, 0, length);
|
||||
return new ByteArrayUtf8CharSequence(bytes, 0, length, utf16, hashCode);
|
||||
}
|
||||
|
||||
public static Map.Entry convertCharSeq(Map.Entry result) {
|
||||
if (result.getKey() instanceof Utf8CharSequence || result.getValue() instanceof Utf8CharSequence) {
|
||||
return new AbstractMap.SimpleEntry(convertCharSeq(result.getKey()), convertCharSeq(result.getValue()));
|
||||
}
|
||||
return result;
|
||||
|
||||
}
|
||||
|
||||
public static Collection convertCharSeq(Collection vals) {
|
||||
if (vals == null) return vals;
|
||||
boolean needsCopy = false;
|
||||
for (Object o : vals) {
|
||||
if (o instanceof Utf8CharSequence) {
|
||||
needsCopy = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (needsCopy) {
|
||||
ArrayList copy = new ArrayList(vals.size());
|
||||
for (Object o : vals) copy.add(convertCharSeq(o));
|
||||
return copy;
|
||||
}
|
||||
return vals;
|
||||
}
|
||||
|
||||
public static Object convertCharSeq(Object o) {
|
||||
if (o == null) return null;
|
||||
if (o instanceof Utf8CharSequence) return ((Utf8CharSequence) o).toString();
|
||||
return o;
|
||||
}
|
||||
|
||||
|
||||
// methods in Arrays are defined stupid: they cannot use Objects.checkFromToIndex
|
||||
// they throw IAE (vs IOOBE) in the case of fromIndex > toIndex.
|
||||
// so this method works just like checkFromToIndex, but with that stupidity added.
|
||||
private static void checkFromToIndex(int fromIndex, int toIndex, int length) {
|
||||
if (fromIndex > toIndex) {
|
||||
throw new IllegalArgumentException("fromIndex " + fromIndex + " > toIndex " + toIndex);
|
||||
}
|
||||
if (fromIndex < 0 || toIndex > length) {
|
||||
throw new IndexOutOfBoundsException("Range [" + fromIndex + ", " + toIndex + ") out-of-bounds for length " + length);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return _getStr();
|
||||
}
|
||||
|
||||
/**
|
||||
* Behaves like Java 9's Arrays.equals
|
||||
*
|
||||
* @see <a href="http://download.java.net/java/jdk9/docs/api/java/util/Arrays.html#equals-byte:A-int-int-byte:A-int-int-">Arrays.equals</a>
|
||||
*/
|
||||
public static boolean _equals(byte[] a, int aFromIndex, int aToIndex, byte[] b, int bFromIndex, int bToIndex) {
|
||||
checkFromToIndex(aFromIndex, aToIndex, a.length);
|
||||
checkFromToIndex(bFromIndex, bToIndex, b.length);
|
||||
int aLen = aToIndex - aFromIndex;
|
||||
int bLen = bToIndex - bFromIndex;
|
||||
// lengths differ: cannot be equal
|
||||
if (aLen != bLen) {
|
||||
return false;
|
||||
}
|
||||
for (int i = 0; i < aLen; i++) {
|
||||
if (a[i + aFromIndex] != b[i + bFromIndex]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
public ByteArrayUtf8CharSequence reset(byte[] bytes, int offset, int length, String str) {
|
||||
this.buf = bytes;
|
||||
this.offset = offset;
|
||||
this.length = length;
|
||||
this.utf16 = str;
|
||||
this.hashCode = Integer.MIN_VALUE;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Performs internal consistency checks.
|
||||
* Always returns true (or throws IllegalStateException)
|
||||
*/
|
||||
public boolean isValid() {
|
||||
if (buf == null) {
|
||||
throw new IllegalStateException("bytes is null");
|
||||
}
|
||||
if (length < 0) {
|
||||
throw new IllegalStateException("length is negative: " + length);
|
||||
}
|
||||
if (length > buf.length) {
|
||||
throw new IllegalStateException("length is out of bounds: " + length + ",bytes.length=" + buf.length);
|
||||
}
|
||||
if (offset < 0) {
|
||||
throw new IllegalStateException("offset is negative: " + offset);
|
||||
}
|
||||
if (offset > buf.length) {
|
||||
throw new IllegalStateException("offset out of bounds: " + offset + ",bytes.length=" + buf.length);
|
||||
}
|
||||
if (offset + length < 0) {
|
||||
throw new IllegalStateException("offset+length is negative: offset=" + offset + ",length=" + length);
|
||||
}
|
||||
if (offset + length > buf.length) {
|
||||
throw new IllegalStateException("offset+length out of bounds: offset=" + offset + ",length=" + length + ",bytes.length=" + buf.length);
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
|
@ -142,6 +142,7 @@ public class ByteUtils {
|
|||
if (upto > scratch.length - 4) {
|
||||
// a code point may take upto 4 bytes and we don't have enough space, so reset
|
||||
totalBytes += upto;
|
||||
if(fos == null) throw new IOException("buffer over flow");
|
||||
fos.write(scratch, 0, upto);
|
||||
upto = 0;
|
||||
}
|
||||
|
@ -180,7 +181,7 @@ public class ByteUtils {
|
|||
}
|
||||
|
||||
totalBytes += upto;
|
||||
fos.write(scratch, 0, upto);
|
||||
if(fos != null) fos.write(scratch, 0, upto);
|
||||
|
||||
return totalBytes;
|
||||
}
|
||||
|
|
|
@ -16,7 +16,10 @@
|
|||
*/
|
||||
package org.apache.solr.common.util;
|
||||
|
||||
import java.io.*;
|
||||
import java.io.DataOutput;
|
||||
import java.io.DataOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/** Single threaded buffered OutputStream
|
||||
* Internal Solr use only, subject to change.
|
||||
|
@ -229,4 +232,18 @@ public class FastOutputStream extends OutputStream implements DataOutput {
|
|||
this.written = written;
|
||||
}
|
||||
|
||||
/**Copies a {@link Utf8CharSequence} without making extra copies
|
||||
*/
|
||||
public void writeUtf8CharSeq(Utf8CharSequence utf8) throws IOException {
|
||||
int start = 0;
|
||||
int totalWritten = 0;
|
||||
for (; ; ) {
|
||||
if (pos >= buf.length) flushBuffer();
|
||||
int sz = utf8.write(start, buf, pos);
|
||||
pos += sz;
|
||||
totalWritten += sz;
|
||||
if (totalWritten >= utf8.size()) break;
|
||||
start += sz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -115,12 +115,18 @@ public class JavaBinCodec implements PushWriter {
|
|||
private WritableDocFields writableDocFields;
|
||||
private boolean alreadyMarshalled;
|
||||
private boolean alreadyUnmarshalled;
|
||||
private boolean readStringAsCharSeq = false;
|
||||
|
||||
public JavaBinCodec() {
|
||||
resolver =null;
|
||||
writableDocFields =null;
|
||||
}
|
||||
|
||||
public JavaBinCodec setReadStringAsCharSeq(boolean flag) {
|
||||
readStringAsCharSeq = flag;
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Use this to use this as a PushWriter. ensure that close() is called explicitly after use
|
||||
*
|
||||
|
@ -403,7 +409,7 @@ public class JavaBinCodec implements PushWriter {
|
|||
return false;
|
||||
}
|
||||
|
||||
private final MapWriter.EntryWriter ew = new MapWriter.EntryWriter() {
|
||||
public class BinEntryWriter implements MapWriter.EntryWriter {
|
||||
@Override
|
||||
public MapWriter.EntryWriter put(CharSequence k, Object v) throws IOException {
|
||||
writeExternString(k);
|
||||
|
@ -445,7 +451,16 @@ public class JavaBinCodec implements PushWriter {
|
|||
writeBoolean(v);
|
||||
return this;
|
||||
}
|
||||
};
|
||||
|
||||
@Override
|
||||
public MapWriter.EntryWriter put(CharSequence k, CharSequence v) throws IOException {
|
||||
writeExternString(k);
|
||||
writeStr(v);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
private final MapWriter.EntryWriter ew = new BinEntryWriter();
|
||||
|
||||
|
||||
public void writeMap(MapWriter val) throws IOException {
|
||||
|
@ -832,6 +847,10 @@ public class JavaBinCodec implements PushWriter {
|
|||
writeTag(NULL);
|
||||
return;
|
||||
}
|
||||
if (s instanceof Utf8CharSequence) {
|
||||
writeUTF8Str((Utf8CharSequence) s);
|
||||
return;
|
||||
}
|
||||
int end = s.length();
|
||||
int maxSize = end * ByteUtils.MAX_UTF8_BYTES_PER_CHAR;
|
||||
|
||||
|
@ -853,11 +872,11 @@ public class JavaBinCodec implements PushWriter {
|
|||
CharArr arr = new CharArr();
|
||||
private StringBytes bytesRef = new StringBytes(bytes,0,0);
|
||||
|
||||
public String readStr(DataInputInputStream dis) throws IOException {
|
||||
public CharSequence readStr(DataInputInputStream dis) throws IOException {
|
||||
return readStr(dis,null);
|
||||
}
|
||||
|
||||
public String readStr(DataInputInputStream dis, StringCache stringCache) throws IOException {
|
||||
public CharSequence readStr(DataInputInputStream dis, StringCache stringCache) throws IOException {
|
||||
int sz = readSize(dis);
|
||||
if (bytes == null || bytes.length < sz) bytes = new byte[sz];
|
||||
dis.readFully(bytes, 0, sz);
|
||||
|
@ -865,8 +884,14 @@ public class JavaBinCodec implements PushWriter {
|
|||
return stringCache.get(bytesRef.reset(bytes, 0, sz));
|
||||
} else {
|
||||
arr.reset();
|
||||
ByteUtils.UTF8toUTF16(bytes, 0, sz, arr);
|
||||
return arr.toString();
|
||||
if (readStringAsCharSeq) {
|
||||
byte[] copyBuf = new byte[sz];
|
||||
System.arraycopy(bytes, 0, copyBuf, 0, sz);
|
||||
return new ByteArrayUtf8CharSequence(copyBuf, 0, sz);
|
||||
} else {
|
||||
ByteUtils.UTF8toUTF16(bytes, 0, sz, arr);
|
||||
return arr.toString();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -928,6 +953,8 @@ public class JavaBinCodec implements PushWriter {
|
|||
if (val == null) {
|
||||
daos.writeByte(NULL);
|
||||
return true;
|
||||
} else if (val instanceof Utf8CharSequence) {
|
||||
writeUTF8Str((Utf8CharSequence) val);
|
||||
} else if (val instanceof CharSequence) {
|
||||
writeStr((CharSequence) val);
|
||||
return true;
|
||||
|
@ -990,6 +1017,10 @@ public class JavaBinCodec implements PushWriter {
|
|||
|
||||
public void writeMap(Map<?,?> val) throws IOException {
|
||||
writeTag(MAP, val.size());
|
||||
if (val instanceof MapWriter) {
|
||||
((MapWriter) val).writeMap(ew);
|
||||
return;
|
||||
}
|
||||
for (Map.Entry<?,?> entry : val.entrySet()) {
|
||||
Object key = entry.getKey();
|
||||
if (key instanceof String) {
|
||||
|
@ -1060,7 +1091,7 @@ public class JavaBinCodec implements PushWriter {
|
|||
|
||||
private int stringsCount = 0;
|
||||
private Map<String, Integer> stringsMap;
|
||||
private List<String> stringsList;
|
||||
private List<CharSequence> stringsList;
|
||||
|
||||
public void writeExternString(CharSequence s) throws IOException {
|
||||
if (s == null) {
|
||||
|
@ -1078,19 +1109,33 @@ public class JavaBinCodec implements PushWriter {
|
|||
|
||||
}
|
||||
|
||||
public String readExternString(DataInputInputStream fis) throws IOException {
|
||||
public CharSequence readExternString(DataInputInputStream fis) throws IOException {
|
||||
int idx = readSize(fis);
|
||||
if (idx != 0) {// idx != 0 is the index of the extern string
|
||||
return stringsList.get(idx - 1);
|
||||
} else {// idx == 0 means it has a string value
|
||||
tagByte = fis.readByte();
|
||||
String s = readStr(fis, stringCache);
|
||||
CharSequence s = readStr(fis, stringCache);
|
||||
if (s != null) s = s.toString();
|
||||
if (stringsList == null) stringsList = new ArrayList<>();
|
||||
stringsList.add(s);
|
||||
return s;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public void writeUTF8Str(Utf8CharSequence utf8) throws IOException {
|
||||
writeTag(STR, utf8.size());
|
||||
daos.writeUtf8CharSeq(utf8);
|
||||
}
|
||||
|
||||
public long getTotalBytesWritten() {
|
||||
if (daos != null) {
|
||||
return daos.written;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allows extension of {@link JavaBinCodec} to support serialization of arbitrary data types.
|
||||
* <p>
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.common.util;
|
||||
|
||||
/**
|
||||
* This is a very fast, non-cryptographic hash suitable for general hash-based
|
||||
* lookup. See http://murmurhash.googlepages.com/ for more details.
|
||||
* <p>
|
||||
* The C version of MurmurHash 2.0 found at that site was ported to Java by
|
||||
* Andrzej Bialecki (ab at getopt org).
|
||||
* </p>
|
||||
* <p>
|
||||
* The code from getopt.org was adapted by Mark Harwood in the form here as one of a pluggable choice of
|
||||
* hashing functions as the core function had to be adapted to work with BytesRefs with offsets and lengths
|
||||
* rather than raw byte arrays.
|
||||
* </p>
|
||||
* @lucene.experimental
|
||||
*/
|
||||
public final class MurmurHash2 {
|
||||
|
||||
public static final MurmurHash2 INSTANCE = new MurmurHash2();
|
||||
|
||||
private MurmurHash2() {}
|
||||
|
||||
public static int hash(byte[] data, int seed, int offset, int len) {
|
||||
int m = 0x5bd1e995;
|
||||
int r = 24;
|
||||
int h = seed ^ len;
|
||||
int len_4 = len >> 2;
|
||||
for (int i = 0; i < len_4; i++) {
|
||||
int i_4 = offset + (i << 2);
|
||||
int k = data[i_4 + 3];
|
||||
k = k << 8;
|
||||
k = k | (data[i_4 + 2] & 0xff);
|
||||
k = k << 8;
|
||||
k = k | (data[i_4 + 1] & 0xff);
|
||||
k = k << 8;
|
||||
k = k | (data[i_4 + 0] & 0xff);
|
||||
k *= m;
|
||||
k ^= k >>> r;
|
||||
k *= m;
|
||||
h *= m;
|
||||
h ^= k;
|
||||
}
|
||||
int len_m = len_4 << 2;
|
||||
int left = len - len_m;
|
||||
if (left != 0) {
|
||||
if (left >= 3) {
|
||||
h ^= data[offset + len - 3] << 16;
|
||||
}
|
||||
if (left >= 2) {
|
||||
h ^= data[offset + len - 2] << 8;
|
||||
}
|
||||
if (left >= 1) {
|
||||
h ^= data[offset + len - 1];
|
||||
}
|
||||
h *= m;
|
||||
}
|
||||
h ^= h >>> 13;
|
||||
h *= m;
|
||||
h ^= h >>> 15;
|
||||
return h;
|
||||
}
|
||||
|
||||
/**
|
||||
* Generates 32 bit hash from byte array with default seed value.
|
||||
*
|
||||
* @param data
|
||||
* byte array to hash
|
||||
* @param offset
|
||||
* the start position in the array to hash
|
||||
* @param len
|
||||
* length of the array elements to hash
|
||||
* @return 32 bit hash of the given array
|
||||
*/
|
||||
public static final int hash32(final byte[] data, int offset, int len) {
|
||||
return MurmurHash2.hash(data, 0x9747b28c, offset, len);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return getClass().getSimpleName();
|
||||
}
|
||||
}
|
|
@ -48,7 +48,7 @@ public interface TextWriter extends PushWriter {
|
|||
// go in order of most common to least common, however some of the more general types like Map belong towards the end
|
||||
if (val == null) {
|
||||
writeNull(name);
|
||||
} else if (val instanceof String) {
|
||||
} else if (val instanceof CharSequence) {
|
||||
writeStr(name, val.toString(), true);
|
||||
// micro-optimization... using toString() avoids a cast first
|
||||
} else if (val instanceof Number) {
|
||||
|
|
|
@ -0,0 +1,63 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.common.util;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
|
||||
/**A byte[] backed String
|
||||
*
|
||||
*/
|
||||
public interface Utf8CharSequence extends CharSequence {
|
||||
|
||||
/**
|
||||
* Write the bytes into a buffer. The objective is to avoid the local bytes being exposed to
|
||||
* other classes if the implementation is expected to be immutable. It writes as many bytes as
|
||||
* possible into the buffer and then return how many bytes were written. It's the responsibility
|
||||
* of the caller to call this method repeatedly and ensure that everything is completely written
|
||||
*
|
||||
* @param start position from which to start writing
|
||||
* @param buffer the buffer to which to write to
|
||||
* @param pos position to start writing
|
||||
* @return no:of bytes written
|
||||
*/
|
||||
int write(int start, byte[] buffer, int pos);
|
||||
|
||||
/** The size of utf8 bytes
|
||||
* @return the size
|
||||
*/
|
||||
int size();
|
||||
|
||||
/**
|
||||
* Creates a byte[] and copy to it first before writing it out to the output
|
||||
* @param os The sink
|
||||
*/
|
||||
default void write(OutputStream os) throws IOException {
|
||||
byte[] buf = new byte[1024];
|
||||
int start = 0;
|
||||
int totalWritten = 0;
|
||||
for (; ; ) {
|
||||
int sz = write(start, buf, 0);
|
||||
totalWritten += sz;
|
||||
if (sz > 0) os.write(buf, 0, sz);
|
||||
if (totalWritten >= size()) break;
|
||||
start += sz;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,61 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.solr.common.util;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
|
||||
public class Utf8CharSequenceTest extends SolrTestCaseJ4 {
|
||||
|
||||
public void testLargeString() throws IOException {
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < 100; i++) {
|
||||
sb.append("Hello World!");
|
||||
}
|
||||
ByteArrayUtf8CharSequence utf8 = new ByteArrayUtf8CharSequence(sb.toString());
|
||||
ByteArrayOutputStream baos = new ByteArrayOutputStream();
|
||||
byte[] buf = new byte[256];
|
||||
FastOutputStream fos = new FastOutputStream(baos, buf, 0);
|
||||
fos.writeUtf8CharSeq(utf8);
|
||||
fos.flush();
|
||||
byte[] result = baos.toByteArray();
|
||||
ByteArrayUtf8CharSequence utf81 = new ByteArrayUtf8CharSequence(result, 0, result.length);
|
||||
assertTrue(utf81.equals(utf8));
|
||||
baos.reset();
|
||||
utf8.write(baos);
|
||||
result = baos.toByteArray();
|
||||
utf81 = new ByteArrayUtf8CharSequence(result, 0, result.length);
|
||||
assertTrue(utf81.equals(utf8));
|
||||
|
||||
Map m0 = new HashMap();
|
||||
m0.put("str", utf8);
|
||||
baos.reset();
|
||||
new JavaBinCodec().marshal(m0, baos);
|
||||
result = baos.toByteArray();
|
||||
Map m1 = (Map) new JavaBinCodec()
|
||||
.setReadStringAsCharSeq(true)
|
||||
.unmarshal(new ByteArrayInputStream(result));
|
||||
utf81 = (ByteArrayUtf8CharSequence) m1.get("str");
|
||||
assertTrue(utf81.equals(utf8));
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue