Index ids in binary form. (#25352)
Indexing ids in binary form should help with indexing speed since we would have to compare fewer bytes upon sorting, should help with memory usage of the live version map since keys will be shorter, and might help with disk usage depending on how efficient the terms dictionary is at compressing terms. Since we can only expect base64 ids in the auto-generated case, this PR tries to use an encoding that makes the binary id equal to the base64-decoded id in the majority of cases (253 out of 256). It also specializes numeric ids, since this seems to be common when content that is stored in Elasticsearch comes from another database that uses eg. auto-increment ids. Another option could be to require base64 ids all the time. It would make things simpler but I'm not sure users would welcome this requirement. This PR should bring some benefits, but I expect it to be mostly useful when coupled with something like #24615. Closes #18154
This commit is contained in:
parent
17a587e709
commit
40bb1663ee
|
@ -19,17 +19,13 @@
|
|||
|
||||
package org.elasticsearch.index.fielddata;
|
||||
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.index.MultiDocValues;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.plain.AbstractAtomicOrdinalsFieldData;
|
||||
import org.elasticsearch.index.mapper.UidFieldMapper;
|
||||
import org.elasticsearch.search.MultiValueMode;
|
||||
|
||||
|
@ -42,16 +38,14 @@ import java.io.IOException;
|
|||
* already using: this is just a view.
|
||||
* TODO: Remove fielddata access on _uid and _id, or add doc values to _id.
|
||||
*/
|
||||
public final class UidIndexFieldData implements IndexOrdinalsFieldData {
|
||||
public final class UidIndexFieldData implements IndexFieldData<AtomicFieldData> {
|
||||
|
||||
private final Index index;
|
||||
private final String type;
|
||||
private final BytesRef prefix;
|
||||
private final IndexOrdinalsFieldData idFieldData;
|
||||
private final IndexFieldData<?> idFieldData;
|
||||
|
||||
public UidIndexFieldData(Index index, String type, IndexOrdinalsFieldData idFieldData) {
|
||||
public UidIndexFieldData(Index index, String type, IndexFieldData<?> idFieldData) {
|
||||
this.index = index;
|
||||
this.type = type;
|
||||
BytesRefBuilder prefix = new BytesRefBuilder();
|
||||
prefix.append(new BytesRef(type));
|
||||
prefix.append((byte) '#');
|
||||
|
@ -76,12 +70,12 @@ public final class UidIndexFieldData implements IndexOrdinalsFieldData {
|
|||
}
|
||||
|
||||
@Override
|
||||
public AtomicOrdinalsFieldData load(LeafReaderContext context) {
|
||||
public AtomicFieldData load(LeafReaderContext context) {
|
||||
return new UidAtomicFieldData(prefix, idFieldData.load(context));
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicOrdinalsFieldData loadDirect(LeafReaderContext context) throws Exception {
|
||||
public AtomicFieldData loadDirect(LeafReaderContext context) throws Exception {
|
||||
return new UidAtomicFieldData(prefix, idFieldData.loadDirect(context));
|
||||
}
|
||||
|
||||
|
@ -90,39 +84,19 @@ public final class UidIndexFieldData implements IndexOrdinalsFieldData {
|
|||
idFieldData.clear();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOrdinalsFieldData loadGlobal(DirectoryReader indexReader) {
|
||||
return new UidIndexFieldData(index, type, idFieldData.loadGlobal(indexReader));
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOrdinalsFieldData localGlobalDirect(DirectoryReader indexReader) throws Exception {
|
||||
return new UidIndexFieldData(index, type, idFieldData.localGlobalDirect(indexReader));
|
||||
}
|
||||
|
||||
@Override
|
||||
public MultiDocValues.OrdinalMap getOrdinalMap() {
|
||||
return idFieldData.getOrdinalMap();
|
||||
}
|
||||
|
||||
static final class UidAtomicFieldData implements AtomicOrdinalsFieldData {
|
||||
static final class UidAtomicFieldData implements AtomicFieldData {
|
||||
|
||||
private final BytesRef prefix;
|
||||
private final AtomicOrdinalsFieldData idFieldData;
|
||||
private final AtomicFieldData idFieldData;
|
||||
|
||||
UidAtomicFieldData(BytesRef prefix, AtomicOrdinalsFieldData idFieldData) {
|
||||
UidAtomicFieldData(BytesRef prefix, AtomicFieldData idFieldData) {
|
||||
this.prefix = prefix;
|
||||
this.idFieldData = idFieldData;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScriptDocValues<?> getScriptValues() {
|
||||
return AbstractAtomicOrdinalsFieldData.DEFAULT_SCRIPT_FUNCTION.apply(getOrdinalsValues());
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedBinaryDocValues getBytesValues() {
|
||||
return FieldData.toString(getOrdinalsValues());
|
||||
return new ScriptDocValues.Strings(getBytesValues());
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -136,54 +110,30 @@ public final class UidIndexFieldData implements IndexOrdinalsFieldData {
|
|||
}
|
||||
|
||||
@Override
|
||||
public SortedSetDocValues getOrdinalsValues() {
|
||||
SortedSetDocValues idValues = idFieldData.getOrdinalsValues();
|
||||
return new SortedSetDocValues() {
|
||||
public SortedBinaryDocValues getBytesValues() {
|
||||
SortedBinaryDocValues idValues = idFieldData.getBytesValues();
|
||||
return new SortedBinaryDocValues() {
|
||||
|
||||
private final BytesRefBuilder scratch = new BytesRefBuilder();
|
||||
|
||||
@Override
|
||||
public int nextDoc() throws IOException {
|
||||
return idValues.nextDoc();
|
||||
public boolean advanceExact(int doc) throws IOException {
|
||||
return idValues.advanceExact(doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return idValues.docID();
|
||||
public int docValueCount() {
|
||||
return idValues.docValueCount();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long cost() {
|
||||
return idValues.cost();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int advance(int target) throws IOException {
|
||||
return idValues.advance(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int target) throws IOException {
|
||||
return idValues.advanceExact(target);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long nextOrd() throws IOException {
|
||||
return idValues.nextOrd();
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef lookupOrd(long ord) throws IOException {
|
||||
scratch.setLength(0);
|
||||
scratch.append(prefix);
|
||||
scratch.append(idValues.lookupOrd(ord));
|
||||
public BytesRef nextValue() throws IOException {
|
||||
BytesRef nextID = idValues.nextValue();
|
||||
scratch.copyBytes(prefix);
|
||||
scratch.append(nextID);
|
||||
return scratch.get();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getValueCount() {
|
||||
return idValues.getValueCount();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
|
|
|
@ -105,6 +105,8 @@ public class FieldsVisitor extends StoredFieldVisitor {
|
|||
public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException {
|
||||
if (SourceFieldMapper.NAME.equals(fieldInfo.name)) {
|
||||
source = new BytesArray(value);
|
||||
} else if (IdFieldMapper.NAME.equals(fieldInfo.name)) {
|
||||
id = Uid.decodeId(value);
|
||||
} else {
|
||||
addValue(fieldInfo.name, new BytesRef(value));
|
||||
}
|
||||
|
@ -114,10 +116,14 @@ public class FieldsVisitor extends StoredFieldVisitor {
|
|||
public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
|
||||
final String value = new String(bytes, StandardCharsets.UTF_8);
|
||||
if (UidFieldMapper.NAME.equals(fieldInfo.name)) {
|
||||
// 5.x-only
|
||||
// TODO: Remove when we are on 7.x
|
||||
Uid uid = Uid.createUid(value);
|
||||
type = uid.type();
|
||||
id = uid.id();
|
||||
} else if (IdFieldMapper.NAME.equals(fieldInfo.name)) {
|
||||
// only applies to 5.x indices that have single_type = true
|
||||
// TODO: Remove when we are on 7.x
|
||||
id = value;
|
||||
} else {
|
||||
addValue(fieldInfo.name, value);
|
||||
|
|
|
@ -436,7 +436,13 @@ final class DocumentParser {
|
|||
if (idField != null) {
|
||||
// We just need to store the id as indexed field, so that IndexWriter#deleteDocuments(term) can then
|
||||
// delete it when the root document is deleted too.
|
||||
nestedDoc.add(new Field(IdFieldMapper.NAME, idField.stringValue(), IdFieldMapper.Defaults.NESTED_FIELD_TYPE));
|
||||
if (idField.stringValue() != null) {
|
||||
// backward compat with 5.x
|
||||
// TODO: Remove on 7.0
|
||||
nestedDoc.add(new Field(IdFieldMapper.NAME, idField.stringValue(), IdFieldMapper.Defaults.NESTED_FIELD_TYPE));
|
||||
} else {
|
||||
nestedDoc.add(new Field(IdFieldMapper.NAME, idField.binaryValue(), IdFieldMapper.Defaults.NESTED_FIELD_TYPE));
|
||||
}
|
||||
} else {
|
||||
throw new IllegalStateException("The root document of a nested document should have an id field");
|
||||
}
|
||||
|
|
|
@ -22,15 +22,28 @@ package org.elasticsearch.index.mapper;
|
|||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TermInSetQuery;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.lucene.BytesRefs;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData.XFieldComparatorSource.Nested;
|
||||
import org.elasticsearch.index.fielddata.fieldcomparator.BytesRefFieldComparatorSource;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
|
||||
import org.elasticsearch.index.fielddata.ScriptDocValues;
|
||||
import org.elasticsearch.index.fielddata.SortedBinaryDocValues;
|
||||
import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
import org.elasticsearch.indices.breaker.CircuitBreakerService;
|
||||
import org.elasticsearch.search.MultiValueMode;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
|
@ -109,15 +122,31 @@ public class IdFieldMapper extends MetadataFieldMapper {
|
|||
}
|
||||
|
||||
@Override
|
||||
public Query termQuery(Object value, @Nullable QueryShardContext context) {
|
||||
public Query termQuery(Object value, QueryShardContext context) {
|
||||
return termsQuery(Arrays.asList(value), context);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Query termsQuery(List<?> values, @Nullable QueryShardContext context) {
|
||||
public Query termsQuery(List<?> values, QueryShardContext context) {
|
||||
if (indexOptions() != IndexOptions.NONE) {
|
||||
// 6.x index, _id is indexed
|
||||
return super.termsQuery(values, context);
|
||||
failIfNotIndexed();
|
||||
BytesRef[] bytesRefs = new BytesRef[values.size()];
|
||||
final boolean is5xIndex = context.indexVersionCreated().before(Version.V_6_0_0_alpha3);
|
||||
for (int i = 0; i < bytesRefs.length; i++) {
|
||||
BytesRef id;
|
||||
if (is5xIndex) {
|
||||
// 5.x index with index.mapping.single_type = true
|
||||
id = BytesRefs.toBytesRef(values.get(i));
|
||||
} else {
|
||||
Object idObject = values.get(i);
|
||||
if (idObject instanceof BytesRef) {
|
||||
idObject = ((BytesRef) idObject).utf8ToString();
|
||||
}
|
||||
id = Uid.encodeId(idObject.toString());
|
||||
}
|
||||
bytesRefs[i] = id;
|
||||
}
|
||||
return new TermInSetQuery(name(), bytesRefs);
|
||||
}
|
||||
// 5.x index, _uid is indexed
|
||||
return new TermInSetQuery(UidFieldMapper.NAME, Uid.createUidsForTypesAndIds(context.queryTypes(), values));
|
||||
|
@ -128,13 +157,106 @@ public class IdFieldMapper extends MetadataFieldMapper {
|
|||
if (indexOptions() == IndexOptions.NONE) {
|
||||
throw new IllegalArgumentException("Fielddata access on the _uid field is disallowed");
|
||||
}
|
||||
return new PagedBytesIndexFieldData.Builder(
|
||||
final IndexFieldData.Builder fieldDataBuilder = new PagedBytesIndexFieldData.Builder(
|
||||
TextFieldMapper.Defaults.FIELDDATA_MIN_FREQUENCY,
|
||||
TextFieldMapper.Defaults.FIELDDATA_MAX_FREQUENCY,
|
||||
TextFieldMapper.Defaults.FIELDDATA_MIN_SEGMENT_SIZE);
|
||||
return new IndexFieldData.Builder() {
|
||||
@Override
|
||||
public IndexFieldData<?> build(IndexSettings indexSettings, MappedFieldType fieldType, IndexFieldDataCache cache,
|
||||
CircuitBreakerService breakerService, MapperService mapperService) {
|
||||
final IndexFieldData<?> fieldData = fieldDataBuilder.build(indexSettings, fieldType, cache, breakerService, mapperService);
|
||||
if (indexSettings.getIndexVersionCreated().before(Version.V_6_0_0_alpha3)) {
|
||||
// ids were indexed as utf-8
|
||||
return fieldData;
|
||||
}
|
||||
return new IndexFieldData<AtomicFieldData>() {
|
||||
|
||||
@Override
|
||||
public Index index() {
|
||||
return fieldData.index();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getFieldName() {
|
||||
return fieldData.getFieldName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicFieldData load(LeafReaderContext context) {
|
||||
return wrap(fieldData.load(context));
|
||||
}
|
||||
|
||||
@Override
|
||||
public AtomicFieldData loadDirect(LeafReaderContext context) throws Exception {
|
||||
return wrap(fieldData.loadDirect(context));
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortField sortField(Object missingValue, MultiValueMode sortMode, Nested nested, boolean reverse) {
|
||||
XFieldComparatorSource source = new BytesRefFieldComparatorSource(this, missingValue, sortMode, nested);
|
||||
return new SortField(getFieldName(), source, reverse);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
fieldData.clear();
|
||||
}
|
||||
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
private static AtomicFieldData wrap(AtomicFieldData in) {
|
||||
return new AtomicFieldData() {
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
in.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long ramBytesUsed() {
|
||||
return in.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public ScriptDocValues<?> getScriptValues() {
|
||||
return new ScriptDocValues.Strings(getBytesValues());
|
||||
}
|
||||
|
||||
@Override
|
||||
public SortedBinaryDocValues getBytesValues() {
|
||||
SortedBinaryDocValues inValues = in.getBytesValues();
|
||||
return new SortedBinaryDocValues() {
|
||||
|
||||
@Override
|
||||
public BytesRef nextValue() throws IOException {
|
||||
BytesRef encoded = inValues.nextValue();
|
||||
return new BytesRef(Uid.decodeId(
|
||||
Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docValueCount() {
|
||||
final int count = inValues.docValueCount();
|
||||
// If the count is not 1 then the impl is not correct as the binary representation
|
||||
// does not preserve order. But id fields only have one value per doc so we are good.
|
||||
assert count == 1;
|
||||
return inValues.docValueCount();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean advanceExact(int doc) throws IOException {
|
||||
return inValues.advanceExact(doc);
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
static MappedFieldType defaultFieldType(IndexSettings indexSettings) {
|
||||
MappedFieldType defaultFieldType = Defaults.FIELD_TYPE.clone();
|
||||
if (indexSettings.isSingleType()) {
|
||||
|
@ -166,8 +288,12 @@ public class IdFieldMapper extends MetadataFieldMapper {
|
|||
@Override
|
||||
protected void parseCreateField(ParseContext context, List<IndexableField> fields) throws IOException {
|
||||
if (fieldType.indexOptions() != IndexOptions.NONE || fieldType.stored()) {
|
||||
Field id = new Field(NAME, context.sourceToParse().id(), fieldType);
|
||||
fields.add(id);
|
||||
if (context.mapperService().getIndexSettings().getIndexVersionCreated().onOrAfter(Version.V_6_0_0_alpha3)) {
|
||||
BytesRef id = Uid.encodeId(context.sourceToParse().id());
|
||||
fields.add(new Field(NAME, id, fieldType));
|
||||
} else {
|
||||
fields.add(new Field(NAME, context.sourceToParse().id(), fieldType));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -805,7 +805,10 @@ public class MapperService extends AbstractIndexComponent implements Closeable {
|
|||
if (hasMapping(type) == false) {
|
||||
return null;
|
||||
}
|
||||
if (indexSettings.isSingleType()) {
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_6_0_0_alpha3)) {
|
||||
assert indexSettings.isSingleType();
|
||||
return new Term(IdFieldMapper.NAME, Uid.encodeId(id));
|
||||
} else if (indexSettings.isSingleType()) {
|
||||
return new Term(IdFieldMapper.NAME, id);
|
||||
} else {
|
||||
return new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(type, id));
|
||||
|
|
|
@ -21,8 +21,11 @@ package org.elasticsearch.index.mapper;
|
|||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.elasticsearch.common.lucene.BytesRefs;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Base64;
|
||||
import java.util.Collection;
|
||||
import java.util.Collections;
|
||||
|
||||
|
@ -123,4 +126,169 @@ public final class Uid {
|
|||
return type + DELIMITER + id;
|
||||
}
|
||||
|
||||
private static final int UTF8 = 0xff;
|
||||
private static final int NUMERIC = 0xfe;
|
||||
private static final int BASE64_ESCAPE = 0xfd;
|
||||
|
||||
static boolean isURLBase64WithoutPadding(String id) {
|
||||
// We are not lenient about padding chars ('=') otherwise
|
||||
// 'xxx=' and 'xxx' could be considered the same id
|
||||
final int length = id.length();
|
||||
switch (length & 0x03) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
return false;
|
||||
case 2:
|
||||
// the last 2 symbols (12 bits) are encoding 1 byte (8 bits)
|
||||
// so the last symbol only actually uses 8-6=2 bits and can only take 4 values
|
||||
char last = id.charAt(length - 1);
|
||||
if (last != 'A' && last != 'Q' && last != 'g' && last != 'w') {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
// The last 3 symbols (18 bits) are encoding 2 bytes (16 bits)
|
||||
// so the last symbol only actually uses 16-12=4 bits and can only take 16 values
|
||||
last = id.charAt(length - 1);
|
||||
if (last != 'A' && last != 'E' && last != 'I' && last != 'M' && last != 'Q'&& last != 'U'&& last != 'Y'
|
||||
&& last != 'c'&& last != 'g'&& last != 'k' && last != 'o' && last != 's' && last != 'w'
|
||||
&& last != '0' && last != '4' && last != '8') {
|
||||
return false;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// number & 0x03 is always in [0,3]
|
||||
throw new AssertionError("Impossible case");
|
||||
}
|
||||
for (int i = 0; i < length; ++i) {
|
||||
final char c = id.charAt(i);
|
||||
final boolean allowed =
|
||||
(c >= '0' && c <= '9') ||
|
||||
(c >= 'A' && c <= 'Z') ||
|
||||
(c >= 'a' && c <= 'z') ||
|
||||
c == '-' || c == '_';
|
||||
if (allowed == false) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static boolean isPositiveNumeric(String id) {
|
||||
for (int i = 0; i < id.length(); ++i) {
|
||||
final char c = id.charAt(i);
|
||||
if (c < '0' || c > '9') {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/** With numeric ids, we just fold two consecutive chars in a single byte
|
||||
* and use 0x0f as an end marker. */
|
||||
private static BytesRef encodeNumericId(String id) {
|
||||
byte[] b = new byte[1 + (id.length() + 1) / 2];
|
||||
b[0] = (byte) NUMERIC;
|
||||
for (int i = 0; i < id.length(); i += 2) {
|
||||
int b1 = id.charAt(i) - '0';
|
||||
int b2;
|
||||
if (i + 1 == id.length()) {
|
||||
b2 = 0x0f; // end marker
|
||||
} else {
|
||||
b2 = id.charAt(i + 1) - '0';
|
||||
}
|
||||
b[1 + i/2] = (byte) ((b1 << 4) | b2);
|
||||
}
|
||||
return new BytesRef(b);
|
||||
}
|
||||
|
||||
/** With base64 ids, we decode and prepend an escape char in the cases that
|
||||
* it could be mixed up with numeric or utf8 encoding. In the majority of
|
||||
* cases (253/256) the encoded id is exactly the binary form. */
|
||||
private static BytesRef encodeBase64Id(String id) {
|
||||
byte[] b = Base64.getUrlDecoder().decode(id);
|
||||
if (Byte.toUnsignedInt(b[0]) >= BASE64_ESCAPE) {
|
||||
byte[] newB = new byte[b.length + 1];
|
||||
newB[0] = (byte) BASE64_ESCAPE;
|
||||
System.arraycopy(b, 0, newB, 1, b.length);
|
||||
b = newB;
|
||||
}
|
||||
return new BytesRef(b, 0, b.length);
|
||||
}
|
||||
|
||||
private static BytesRef encodeUtf8Id(String id) {
|
||||
byte[] b = new byte[1 + UnicodeUtil.maxUTF8Length(id.length())];
|
||||
// Prepend a byte that indicates that the content is an utf8 string
|
||||
b[0] = (byte) UTF8;
|
||||
int length = UnicodeUtil.UTF16toUTF8(id, 0, id.length(), b, 1);
|
||||
return new BytesRef(b, 0, length);
|
||||
}
|
||||
|
||||
/** Encode an id for storage in the index. This encoding is optimized for
|
||||
* numeric and base64 ids, which are encoded in a much denser way than
|
||||
* what UTF8 would do.
|
||||
* @see #decodeId */
|
||||
public static BytesRef encodeId(String id) {
|
||||
if (id.isEmpty()) {
|
||||
throw new IllegalArgumentException("Ids can't be empty");
|
||||
}
|
||||
if (isPositiveNumeric(id)) {
|
||||
// common for ids that come from databases with auto-increments
|
||||
return encodeNumericId(id);
|
||||
} else if (isURLBase64WithoutPadding(id)) {
|
||||
// common since it applies to autogenerated ids
|
||||
return encodeBase64Id(id);
|
||||
} else {
|
||||
return encodeUtf8Id(id);
|
||||
}
|
||||
}
|
||||
|
||||
private static String decodeNumericId(byte[] idBytes) {
|
||||
assert Byte.toUnsignedInt(idBytes[0]) == NUMERIC;
|
||||
int length = (idBytes.length - 1) * 2;
|
||||
char[] chars = new char[length];
|
||||
for (int i = 1; i < idBytes.length; ++i) {
|
||||
final int b = Byte.toUnsignedInt(idBytes[i]);
|
||||
final int b1 = (b >>> 4);
|
||||
final int b2 = b & 0x0f;
|
||||
chars[(i - 1) * 2] = (char) (b1 + '0');
|
||||
if (i == idBytes.length - 1 && b2 == 0x0f) {
|
||||
length--;
|
||||
break;
|
||||
}
|
||||
chars[(i - 1) * 2 + 1] = (char) (b2 + '0');
|
||||
}
|
||||
return new String(chars, 0, length);
|
||||
}
|
||||
|
||||
private static String decodeUtf8Id(byte[] idBytes) {
|
||||
assert Byte.toUnsignedInt(idBytes[0]) == UTF8;
|
||||
return new BytesRef(idBytes, 1, idBytes.length - 1).utf8ToString();
|
||||
}
|
||||
|
||||
private static String decodeBase64Id(byte[] idBytes) {
|
||||
assert Byte.toUnsignedInt(idBytes[0]) <= BASE64_ESCAPE;
|
||||
if (Byte.toUnsignedInt(idBytes[0]) == BASE64_ESCAPE) {
|
||||
idBytes = Arrays.copyOfRange(idBytes, 1, idBytes.length);
|
||||
}
|
||||
return Base64.getUrlEncoder().withoutPadding().encodeToString(idBytes);
|
||||
}
|
||||
|
||||
/** Decode an indexed id back to its original form.
|
||||
* @see #encodeId */
|
||||
public static String decodeId(byte[] idBytes) {
|
||||
if (idBytes.length == 0) {
|
||||
throw new IllegalArgumentException("Ids can't be empty");
|
||||
}
|
||||
final int magicChar = Byte.toUnsignedInt(idBytes[0]);
|
||||
switch (magicChar) {
|
||||
case NUMERIC:
|
||||
return decodeNumericId(idBytes);
|
||||
case UTF8:
|
||||
return decodeUtf8Id(idBytes);
|
||||
default:
|
||||
return decodeBase64Id(idBytes);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,8 +26,7 @@ import org.apache.lucene.search.MatchNoDocsQuery;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermInSetQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.logging.DeprecationLogger;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
|
@ -36,7 +35,6 @@ import org.elasticsearch.common.xcontent.XContentBuilder;
|
|||
import org.elasticsearch.index.IndexSettings;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.IndexFieldDataCache;
|
||||
import org.elasticsearch.index.fielddata.IndexOrdinalsFieldData;
|
||||
import org.elasticsearch.index.fielddata.UidIndexFieldData;
|
||||
import org.elasticsearch.index.fielddata.plain.PagedBytesIndexFieldData;
|
||||
import org.elasticsearch.index.query.QueryShardContext;
|
||||
|
@ -120,7 +118,7 @@ public class UidFieldMapper extends MetadataFieldMapper {
|
|||
public IndexFieldData<?> build(IndexSettings indexSettings, MappedFieldType fieldType, IndexFieldDataCache cache,
|
||||
CircuitBreakerService breakerService, MapperService mapperService) {
|
||||
MappedFieldType idFieldType = mapperService.fullName(IdFieldMapper.NAME);
|
||||
IndexOrdinalsFieldData idFieldData = (IndexOrdinalsFieldData) idFieldType.fielddataBuilder()
|
||||
IndexFieldData<?> idFieldData = idFieldType.fielddataBuilder()
|
||||
.build(indexSettings, idFieldType, cache, breakerService, mapperService);
|
||||
final String type = mapperService.types().iterator().next();
|
||||
return new UidIndexFieldData(indexSettings.getIndex(), type, idFieldData);
|
||||
|
@ -150,20 +148,22 @@ public class UidFieldMapper extends MetadataFieldMapper {
|
|||
return new MatchNoDocsQuery("No types");
|
||||
}
|
||||
assert indexTypes.size() == 1;
|
||||
BytesRef indexType = indexedValueForSearch(indexTypes.iterator().next());
|
||||
BytesRefBuilder prefixBuilder = new BytesRefBuilder();
|
||||
prefixBuilder.append(indexType);
|
||||
prefixBuilder.append((byte) '#');
|
||||
BytesRef expectedPrefix = prefixBuilder.get();
|
||||
final String expectedPrefix = indexTypes.iterator().next() + "#";
|
||||
List<BytesRef> ids = new ArrayList<>();
|
||||
for (Object uid : values) {
|
||||
BytesRef uidBytes = indexedValueForSearch(uid);
|
||||
if (StringHelper.startsWith(uidBytes, expectedPrefix)) {
|
||||
BytesRef id = new BytesRef();
|
||||
id.bytes = uidBytes.bytes;
|
||||
id.offset = uidBytes.offset + expectedPrefix.length;
|
||||
id.length = uidBytes.length - expectedPrefix.length;
|
||||
ids.add(id);
|
||||
if (uid instanceof BytesRef) {
|
||||
uid = ((BytesRef) uid).utf8ToString();
|
||||
}
|
||||
String uidString = uid.toString();
|
||||
if (uidString.startsWith(expectedPrefix)) {
|
||||
String id = uidString.substring(expectedPrefix.length(), uidString.length());
|
||||
BytesRef encodedId;
|
||||
if (context.indexVersionCreated().onOrAfter(Version.V_6_0_0_alpha3)) {
|
||||
encodedId = Uid.encodeId(id);
|
||||
} else {
|
||||
encodedId = new BytesRef(id);
|
||||
}
|
||||
ids.add(encodedId);
|
||||
}
|
||||
}
|
||||
return new TermInSetQuery(IdFieldMapper.NAME, ids);
|
||||
|
|
|
@ -30,6 +30,7 @@ import org.apache.lucene.search.ReferenceManager;
|
|||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.UsageTrackingQueryCachingPolicy;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
import org.elasticsearch.Assertions;
|
||||
|
@ -636,7 +637,8 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
|
|||
ensureWriteAllowed(origin);
|
||||
Engine.Index operation;
|
||||
try {
|
||||
operation = prepareIndex(docMapper(sourceToParse.type()), sourceToParse, seqNo, opPrimaryTerm, version, versionType, origin,
|
||||
operation = prepareIndex(docMapper(sourceToParse.type()), indexSettings.getIndexVersionCreated(), sourceToParse, seqNo,
|
||||
opPrimaryTerm, version, versionType, origin,
|
||||
autoGeneratedTimeStamp, isRetry);
|
||||
Mapping update = operation.parsedDoc().dynamicMappingsUpdate();
|
||||
if (update != null) {
|
||||
|
@ -653,15 +655,18 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
|
|||
return index(getEngine(), operation);
|
||||
}
|
||||
|
||||
public static Engine.Index prepareIndex(DocumentMapperForType docMapper, SourceToParse source, long seqNo, long primaryTerm, long version,
|
||||
VersionType versionType, Engine.Operation.Origin origin, long autoGeneratedIdTimestamp, boolean isRetry) {
|
||||
public static Engine.Index prepareIndex(DocumentMapperForType docMapper, Version indexCreatedVersion, SourceToParse source, long seqNo,
|
||||
long primaryTerm, long version, VersionType versionType, Engine.Operation.Origin origin, long autoGeneratedIdTimestamp,
|
||||
boolean isRetry) {
|
||||
long startTime = System.nanoTime();
|
||||
ParsedDocument doc = docMapper.getDocumentMapper().parse(source);
|
||||
if (docMapper.getMapping() != null) {
|
||||
doc.addDynamicMappingsUpdate(docMapper.getMapping());
|
||||
}
|
||||
Term uid;
|
||||
if (docMapper.getDocumentMapper().idFieldMapper().fieldType().indexOptions() != IndexOptions.NONE) {
|
||||
if (indexCreatedVersion.onOrAfter(Version.V_6_0_0_alpha3)) {
|
||||
uid = new Term(IdFieldMapper.NAME, Uid.encodeId(doc.id()));
|
||||
} else if (docMapper.getDocumentMapper().idFieldMapper().fieldType().indexOptions() != IndexOptions.NONE) {
|
||||
uid = new Term(IdFieldMapper.NAME, doc.id());
|
||||
} else {
|
||||
uid = new Term(UidFieldMapper.NAME, Uid.createUidAsBytes(doc.type(), doc.id()));
|
||||
|
@ -756,7 +761,13 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl
|
|||
}
|
||||
|
||||
private Term extractUidForDelete(String type, String id) {
|
||||
if (indexSettings.isSingleType()) {
|
||||
if (indexSettings.getIndexVersionCreated().onOrAfter(Version.V_6_0_0_alpha3)) {
|
||||
assert indexSettings.isSingleType();
|
||||
// This is only correct because we create types dynamically on delete operations
|
||||
// otherwise this could match the same _id from a different type
|
||||
BytesRef idBytes = Uid.encodeId(id);
|
||||
return new Term(IdFieldMapper.NAME, idBytes);
|
||||
} else if (indexSettings.isSingleType()) {
|
||||
// This is only correct because we create types dynamically on delete operations
|
||||
// otherwise this could match the same _id from a different type
|
||||
return new Term(IdFieldMapper.NAME, id);
|
||||
|
|
|
@ -1192,7 +1192,11 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
|
|||
if (format >= FORMAT_SINGLE_TYPE) {
|
||||
type = in.readString();
|
||||
id = in.readString();
|
||||
uid = new Term(in.readString(), in.readString());
|
||||
if (format >= FORMAT_SEQ_NO) {
|
||||
uid = new Term(in.readString(), in.readBytesRef());
|
||||
} else {
|
||||
uid = new Term(in.readString(), in.readString());
|
||||
}
|
||||
} else {
|
||||
uid = new Term(in.readString(), in.readString());
|
||||
// the uid was constructed from the type and id so we can
|
||||
|
@ -1283,7 +1287,7 @@ public class Translog extends AbstractIndexShardComponent implements IndexShardC
|
|||
out.writeString(type);
|
||||
out.writeString(id);
|
||||
out.writeString(uid.field());
|
||||
out.writeString(uid.text());
|
||||
out.writeBytesRef(uid.bytes());
|
||||
out.writeLong(version);
|
||||
out.writeByte(versionType.getValue());
|
||||
out.writeLong(seqNo);
|
||||
|
|
|
@ -301,7 +301,7 @@ public class InternalEngineTests extends ESTestCase {
|
|||
}
|
||||
|
||||
private static ParsedDocument testParsedDocument(String id, String routing, Document document, BytesReference source, Mapping mappingUpdate) {
|
||||
Field uidField = new Field("_id", id, IdFieldMapper.Defaults.FIELD_TYPE);
|
||||
Field uidField = new Field("_id", Uid.encodeId(id), IdFieldMapper.Defaults.FIELD_TYPE);
|
||||
Field versionField = new NumericDocValuesField("_version", 0);
|
||||
SeqNoFieldMapper.SequenceIDFields seqID = SeqNoFieldMapper.SequenceIDFields.emptySeqID();
|
||||
document.add(uidField);
|
||||
|
@ -2354,11 +2354,11 @@ public class InternalEngineTests extends ESTestCase {
|
|||
}
|
||||
|
||||
protected Term newUid(String id) {
|
||||
return new Term("_id", id);
|
||||
return new Term("_id", Uid.encodeId(id));
|
||||
}
|
||||
|
||||
protected Term newUid(ParsedDocument doc) {
|
||||
return new Term("_id", doc.id());
|
||||
return newUid(doc.id());
|
||||
}
|
||||
|
||||
protected Engine.Get newGet(boolean realtime, ParsedDocument doc) {
|
||||
|
@ -2819,6 +2819,7 @@ public class InternalEngineTests extends ESTestCase {
|
|||
final Translog.Index index = (Translog.Index) operation;
|
||||
final String indexName = mapperService.index().getName();
|
||||
final Engine.Index engineIndex = IndexShard.prepareIndex(docMapper(index.type()),
|
||||
mapperService.getIndexSettings().getIndexVersionCreated(),
|
||||
source(indexName, index.type(), index.id(), index.source(), XContentFactory.xContentType(index.source()))
|
||||
.routing(index.routing()).parent(index.parent()), index.seqNo(), index.primaryTerm(),
|
||||
index.version(), index.versionType().versionTypeForReplicationAndRecovery(), origin,
|
||||
|
|
|
@ -111,18 +111,15 @@ public class UidFieldDataTests extends ESTestCase {
|
|||
}
|
||||
|
||||
public void testSortedSetValues() throws Exception {
|
||||
AtomicOrdinalsFieldData fd = new UidIndexFieldData.UidAtomicFieldData(new BytesRef("type#"), new DummyAtomicOrdinalsFieldData());
|
||||
SortedSetDocValues dv = fd.getOrdinalsValues();
|
||||
AtomicFieldData fd = new UidIndexFieldData.UidAtomicFieldData(new BytesRef("type#"), new DummyAtomicOrdinalsFieldData());
|
||||
SortedBinaryDocValues dv = fd.getBytesValues();
|
||||
assertTrue(dv.advanceExact(30));
|
||||
assertEquals(30, dv.nextOrd());
|
||||
assertEquals(SortedSetDocValues.NO_MORE_ORDS, dv.nextOrd());
|
||||
assertEquals(new BytesRef("type#030"), dv.lookupOrd(30));
|
||||
assertEquals(30, dv.lookupTerm(new BytesRef("type#030")));
|
||||
assertEquals(-1 - 31, dv.lookupTerm(new BytesRef("type#0305")));
|
||||
assertEquals(1, dv.docValueCount());
|
||||
assertEquals(new BytesRef("type#030"), dv.nextValue());
|
||||
}
|
||||
|
||||
public void testScriptValues() throws IOException {
|
||||
AtomicOrdinalsFieldData fd = new UidIndexFieldData.UidAtomicFieldData(new BytesRef("type#"), new DummyAtomicOrdinalsFieldData());
|
||||
AtomicFieldData fd = new UidIndexFieldData.UidAtomicFieldData(new BytesRef("type#"), new DummyAtomicOrdinalsFieldData());
|
||||
ScriptDocValues<?> values = fd.getScriptValues();
|
||||
values.setNextDocId(30);
|
||||
assertEquals(Collections.singletonList("type#030"), values);
|
||||
|
|
|
@ -220,7 +220,7 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
|
|||
// Nested document:
|
||||
assertNull(result.docs().get(0).getField(UidFieldMapper.NAME));
|
||||
assertNotNull(result.docs().get(0).getField(IdFieldMapper.NAME));
|
||||
assertEquals("1", result.docs().get(0).getField(IdFieldMapper.NAME).stringValue());
|
||||
assertEquals(Uid.encodeId("1"), result.docs().get(0).getField(IdFieldMapper.NAME).binaryValue());
|
||||
assertEquals(IdFieldMapper.Defaults.NESTED_FIELD_TYPE, result.docs().get(0).getField(IdFieldMapper.NAME).fieldType());
|
||||
assertNotNull(result.docs().get(0).getField(TypeFieldMapper.NAME));
|
||||
assertEquals("__foo", result.docs().get(0).getField(TypeFieldMapper.NAME).stringValue());
|
||||
|
@ -228,7 +228,7 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
|
|||
// Root document:
|
||||
assertNull(result.docs().get(1).getField(UidFieldMapper.NAME));
|
||||
assertNotNull(result.docs().get(1).getField(IdFieldMapper.NAME));
|
||||
assertEquals("1", result.docs().get(1).getField(IdFieldMapper.NAME).stringValue());
|
||||
assertEquals(Uid.encodeId("1"), result.docs().get(1).getField(IdFieldMapper.NAME).binaryValue());
|
||||
assertEquals(IdFieldMapper.Defaults.FIELD_TYPE, result.docs().get(1).getField(IdFieldMapper.NAME).fieldType());
|
||||
assertNull(result.docs().get(1).getField(TypeFieldMapper.NAME));
|
||||
assertEquals("value2", result.docs().get(1).getField("baz").binaryValue().utf8ToString());
|
||||
|
@ -1039,7 +1039,7 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
|
|||
DocumentMapper builtDocMapper = parser.parse("person", new CompressedXContent(builtMapping));
|
||||
BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/index/mapper/simple/test1.json"));
|
||||
Document doc = builtDocMapper.parse(SourceToParse.source("test", "person", "1", json, XContentType.JSON)).rootDoc();
|
||||
assertThat(doc.get(docMapper.idFieldMapper().fieldType().name()), equalTo("1"));
|
||||
assertThat(doc.getBinaryValue(docMapper.idFieldMapper().fieldType().name()), equalTo(Uid.encodeId("1")));
|
||||
assertThat(doc.get(docMapper.mappers().getMapper("name.first").fieldType().name()), equalTo("shay"));
|
||||
}
|
||||
|
||||
|
@ -1051,7 +1051,7 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
|
|||
|
||||
BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/index/mapper/simple/test1.json"));
|
||||
Document doc = docMapper.parse(SourceToParse.source("test", "person", "1", json, XContentType.JSON)).rootDoc();
|
||||
assertThat(doc.get(docMapper.idFieldMapper().fieldType().name()), equalTo("1"));
|
||||
assertThat(doc.getBinaryValue(docMapper.idFieldMapper().fieldType().name()), equalTo(Uid.encodeId("1")));
|
||||
assertThat(doc.get(docMapper.mappers().getMapper("name.first").fieldType().name()), equalTo("shay"));
|
||||
}
|
||||
|
||||
|
@ -1060,7 +1060,7 @@ public class DocumentParserTests extends ESSingleNodeTestCase {
|
|||
DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse("person", new CompressedXContent(mapping));
|
||||
BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/index/mapper/simple/test1-notype-noid.json"));
|
||||
Document doc = docMapper.parse(SourceToParse.source("test", "person", "1", json, XContentType.JSON)).rootDoc();
|
||||
assertThat(doc.get(docMapper.idFieldMapper().fieldType().name()), equalTo("1"));
|
||||
assertThat(doc.getBinaryValue(docMapper.idFieldMapper().fieldType().name()), equalTo(Uid.encodeId("1")));
|
||||
assertThat(doc.get(docMapper.mappers().getMapper("name.first").fieldType().name()), equalTo("shay"));
|
||||
}
|
||||
|
||||
|
|
|
@ -19,7 +19,6 @@
|
|||
|
||||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import org.apache.lucene.index.DocValuesType;
|
||||
import org.apache.lucene.index.IndexOptions;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.elasticsearch.Version;
|
||||
|
@ -77,6 +76,7 @@ public class IdFieldMapperTests extends ESSingleNodeTestCase {
|
|||
assertEquals(1, fields.length);
|
||||
assertEquals(IndexOptions.DOCS, fields[0].fieldType().indexOptions());
|
||||
assertTrue(fields[0].fieldType().stored());
|
||||
assertEquals("id", fields[0].stringValue());
|
||||
assertEquals(Uid.encodeId("id"), fields[0].binaryValue());
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -74,6 +74,28 @@ public class IdFieldTypeTests extends FieldTypeTestCase {
|
|||
}
|
||||
|
||||
public void testTermsQueryWhenTypesAreDisabled() throws Exception {
|
||||
QueryShardContext context = Mockito.mock(QueryShardContext.class);
|
||||
Settings indexSettings = Settings.builder()
|
||||
.put(IndexSettings.INDEX_MAPPING_SINGLE_TYPE_SETTING_KEY, true)
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_5_6_0)
|
||||
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
|
||||
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
|
||||
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()).build();
|
||||
IndexMetaData indexMetaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(indexSettings).build();
|
||||
IndexSettings mockSettings = new IndexSettings(indexMetaData, Settings.EMPTY);
|
||||
Mockito.when(context.getIndexSettings()).thenReturn(mockSettings);
|
||||
Mockito.when(context.indexVersionCreated()).thenReturn(indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null));
|
||||
|
||||
MapperService mapperService = Mockito.mock(MapperService.class);
|
||||
Collection<String> types = Collections.singleton("type");
|
||||
Mockito.when(context.queryTypes()).thenReturn(types);
|
||||
Mockito.when(context.getMapperService()).thenReturn(mapperService);
|
||||
MappedFieldType ft = IdFieldMapper.defaultFieldType(mockSettings);
|
||||
Query query = ft.termQuery("id", context);
|
||||
assertEquals(new TermInSetQuery("_id", new BytesRef("id")), query);
|
||||
}
|
||||
|
||||
public void testTermsQuery() throws Exception {
|
||||
QueryShardContext context = Mockito.mock(QueryShardContext.class);
|
||||
Settings indexSettings = Settings.builder()
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||
|
@ -83,6 +105,7 @@ public class IdFieldTypeTests extends FieldTypeTestCase {
|
|||
IndexMetaData indexMetaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(indexSettings).build();
|
||||
IndexSettings mockSettings = new IndexSettings(indexMetaData, Settings.EMPTY);
|
||||
Mockito.when(context.getIndexSettings()).thenReturn(mockSettings);
|
||||
Mockito.when(context.indexVersionCreated()).thenReturn(indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null));
|
||||
|
||||
MapperService mapperService = Mockito.mock(MapperService.class);
|
||||
Collection<String> types = Collections.emptySet();
|
||||
|
@ -92,11 +115,11 @@ public class IdFieldTypeTests extends FieldTypeTestCase {
|
|||
MappedFieldType ft = IdFieldMapper.defaultFieldType(mockSettings);
|
||||
ft.setName(IdFieldMapper.NAME);
|
||||
Query query = ft.termQuery("id", context);
|
||||
assertEquals(new TermInSetQuery("_id", new BytesRef("id")), query);
|
||||
assertEquals(new TermInSetQuery("_id", Uid.encodeId("id")), query);
|
||||
|
||||
types = Collections.singleton("type");
|
||||
Mockito.when(context.queryTypes()).thenReturn(types);
|
||||
query = ft.termQuery("id", context);
|
||||
assertEquals(new TermInSetQuery("_id", new BytesRef("id")), query);
|
||||
assertEquals(new TermInSetQuery("_id", Uid.encodeId("id")), query);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -75,7 +75,8 @@ public class UidFieldTypeTests extends FieldTypeTestCase {
|
|||
public void testTermsQueryWhenTypesAreDisabled() throws Exception {
|
||||
QueryShardContext context = Mockito.mock(QueryShardContext.class);
|
||||
Settings indexSettings = Settings.builder()
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||
.put(IndexSettings.INDEX_MAPPING_SINGLE_TYPE_SETTING_KEY, true)
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.V_5_6_0)
|
||||
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
|
||||
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
|
||||
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()).build();
|
||||
|
@ -87,6 +88,7 @@ public class UidFieldTypeTests extends FieldTypeTestCase {
|
|||
Collection<String> types = Collections.emptySet();
|
||||
Mockito.when(mapperService.types()).thenReturn(types);
|
||||
Mockito.when(context.getMapperService()).thenReturn(mapperService);
|
||||
Mockito.when(context.indexVersionCreated()).thenReturn(indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null));
|
||||
|
||||
MappedFieldType ft = UidFieldMapper.defaultFieldType(mockSettings);
|
||||
ft.setName(UidFieldMapper.NAME);
|
||||
|
@ -100,4 +102,34 @@ public class UidFieldTypeTests extends FieldTypeTestCase {
|
|||
query = ft.termQuery("type2#id", context);
|
||||
assertEquals(new TermInSetQuery("_id"), query);
|
||||
}
|
||||
|
||||
public void testTermsQuery() throws Exception {
|
||||
QueryShardContext context = Mockito.mock(QueryShardContext.class);
|
||||
Settings indexSettings = Settings.builder()
|
||||
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
|
||||
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
|
||||
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1)
|
||||
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID()).build();
|
||||
IndexMetaData indexMetaData = IndexMetaData.builder(IndexMetaData.INDEX_UUID_NA_VALUE).settings(indexSettings).build();
|
||||
IndexSettings mockSettings = new IndexSettings(indexMetaData, Settings.EMPTY);
|
||||
Mockito.when(context.getIndexSettings()).thenReturn(mockSettings);
|
||||
|
||||
MapperService mapperService = Mockito.mock(MapperService.class);
|
||||
Collection<String> types = Collections.emptySet();
|
||||
Mockito.when(mapperService.types()).thenReturn(types);
|
||||
Mockito.when(context.getMapperService()).thenReturn(mapperService);
|
||||
Mockito.when(context.indexVersionCreated()).thenReturn(indexSettings.getAsVersion(IndexMetaData.SETTING_VERSION_CREATED, null));
|
||||
|
||||
MappedFieldType ft = UidFieldMapper.defaultFieldType(mockSettings);
|
||||
ft.setName(UidFieldMapper.NAME);
|
||||
Query query = ft.termQuery("type#id", context);
|
||||
assertEquals(new MatchNoDocsQuery(), query);
|
||||
|
||||
types = Collections.singleton("type");
|
||||
Mockito.when(mapperService.types()).thenReturn(types);
|
||||
query = ft.termQuery("type#id", context);
|
||||
assertEquals(new TermInSetQuery("_id", Uid.encodeId("id")), query);
|
||||
query = ft.termQuery("type2#id", context);
|
||||
assertEquals(new TermInSetQuery("_id"), query);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,8 +19,12 @@
|
|||
package org.elasticsearch.index.mapper;
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Base64;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
|
||||
public class UidTests extends ESTestCase {
|
||||
|
@ -59,4 +63,51 @@ public class UidTests extends ESTestCase {
|
|||
new BytesRef(uid.bytes, idStart, limit - idStart)
|
||||
};
|
||||
}
|
||||
|
||||
public void testIsURLBase64WithoutPadding() {
|
||||
assertTrue(Uid.isURLBase64WithoutPadding(""));
|
||||
assertFalse(Uid.isURLBase64WithoutPadding("a"));
|
||||
assertFalse(Uid.isURLBase64WithoutPadding("aa"));
|
||||
assertTrue(Uid.isURLBase64WithoutPadding("aw"));
|
||||
assertFalse(Uid.isURLBase64WithoutPadding("aaa"));
|
||||
assertTrue(Uid.isURLBase64WithoutPadding("aac"));
|
||||
assertTrue(Uid.isURLBase64WithoutPadding("aaaa"));
|
||||
}
|
||||
|
||||
public void testEncodeUTF8Ids() {
|
||||
final int iters = 10000;
|
||||
for (int iter = 0; iter < iters; ++iter) {
|
||||
final String id = TestUtil.randomRealisticUnicodeString(random(), 1, 10);
|
||||
BytesRef encoded = Uid.encodeId(id);
|
||||
assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
|
||||
assertTrue(encoded.length <= 1 + new BytesRef(id).length);
|
||||
}
|
||||
}
|
||||
|
||||
public void testEncodeNumericIds() {
|
||||
final int iters = 10000;
|
||||
for (int iter = 0; iter < iters; ++iter) {
|
||||
String id = Long.toString(TestUtil.nextLong(random(), 0, 1L << randomInt(62)));
|
||||
if (randomBoolean()) {
|
||||
// prepend a zero to make sure leading zeros are not ignored
|
||||
id = "0" + id;
|
||||
}
|
||||
BytesRef encoded = Uid.encodeId(id);
|
||||
assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
|
||||
assertEquals(1 + (id.length() + 1) / 2, encoded.length);
|
||||
}
|
||||
}
|
||||
|
||||
public void testEncodeBase64Ids() {
|
||||
final int iters = 10000;
|
||||
for (int iter = 0; iter < iters; ++iter) {
|
||||
final byte[] binaryId = new byte[TestUtil.nextInt(random(), 1, 10)];
|
||||
random().nextBytes(binaryId);
|
||||
final String id = Base64.getUrlEncoder().withoutPadding().encodeToString(binaryId);
|
||||
BytesRef encoded = Uid.encodeId(id);
|
||||
assertEquals(id, Uid.decodeId(Arrays.copyOfRange(encoded.bytes, encoded.offset, encoded.offset + encoded.length)));
|
||||
assertTrue(encoded.length <= 1 + binaryId.length);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -80,6 +80,7 @@ import org.elasticsearch.index.mapper.ParseContext;
|
|||
import org.elasticsearch.index.mapper.ParsedDocument;
|
||||
import org.elasticsearch.index.mapper.SeqNoFieldMapper;
|
||||
import org.elasticsearch.index.mapper.SourceToParse;
|
||||
import org.elasticsearch.index.mapper.Uid;
|
||||
import org.elasticsearch.index.seqno.SequenceNumbers;
|
||||
import org.elasticsearch.index.seqno.SequenceNumbersService;
|
||||
import org.elasticsearch.index.snapshots.IndexShardSnapshotStatus;
|
||||
|
@ -986,7 +987,8 @@ public class IndexShardTests extends IndexShardTestCase {
|
|||
}
|
||||
long refreshCount = shard.refreshStats().getTotal();
|
||||
indexDoc(shard, "test", "test");
|
||||
try (Engine.GetResult ignored = shard.get(new Engine.Get(true, "test", "test", new Term("_id", "test")))) {
|
||||
try (Engine.GetResult ignored = shard.get(new Engine.Get(true, "test", "test",
|
||||
new Term(IdFieldMapper.NAME, Uid.encodeId("test"))))) {
|
||||
assertThat(shard.refreshStats().getTotal(), equalTo(refreshCount + 1));
|
||||
}
|
||||
closeShards(shard);
|
||||
|
@ -1520,7 +1522,7 @@ public class IndexShardTests extends IndexShardTestCase {
|
|||
indexDoc(shard, "test", "1", "{\"foobar\" : \"bar\"}");
|
||||
shard.refresh("test");
|
||||
|
||||
Engine.GetResult getResult = shard.get(new Engine.Get(false, "test", "1", new Term(IdFieldMapper.NAME, "1")));
|
||||
Engine.GetResult getResult = shard.get(new Engine.Get(false, "test", "1", new Term(IdFieldMapper.NAME, Uid.encodeId("1"))));
|
||||
assertTrue(getResult.exists());
|
||||
assertNotNull(getResult.searcher());
|
||||
getResult.release();
|
||||
|
@ -1553,7 +1555,7 @@ public class IndexShardTests extends IndexShardTestCase {
|
|||
search = searcher.searcher().search(new TermQuery(new Term("foobar", "bar")), 10);
|
||||
assertEquals(search.totalHits, 1);
|
||||
}
|
||||
getResult = newShard.get(new Engine.Get(false, "test", "1", new Term(IdFieldMapper.NAME, "1")));
|
||||
getResult = newShard.get(new Engine.Get(false, "test", "1", new Term(IdFieldMapper.NAME, Uid.encodeId("1"))));
|
||||
assertTrue(getResult.exists());
|
||||
assertNotNull(getResult.searcher()); // make sure get uses the wrapped reader
|
||||
assertTrue(getResult.searcher().reader() instanceof FieldMaskingReader);
|
||||
|
|
|
@ -38,6 +38,7 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.common.xcontent.XContentBuilder;
|
||||
import org.elasticsearch.index.mapper.IdFieldMapper;
|
||||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.mapper.Uid;
|
||||
import org.elasticsearch.index.query.IdsQueryBuilder;
|
||||
import org.elasticsearch.index.query.InnerHitBuilder;
|
||||
import org.elasticsearch.index.query.InnerHitContextBuilder;
|
||||
|
@ -312,7 +313,7 @@ public class HasChildQueryBuilderTests extends AbstractQueryTestCase<HasChildQue
|
|||
assertThat(booleanTermsQuery.clauses().get(0).getQuery(), instanceOf(TermQuery.class));
|
||||
TermQuery termQuery = (TermQuery) booleanTermsQuery.clauses().get(0).getQuery();
|
||||
assertThat(termQuery.getTerm().field(), equalTo(IdFieldMapper.NAME));
|
||||
assertThat(termQuery.getTerm().bytes().utf8ToString(), equalTo(id));
|
||||
assertThat(termQuery.getTerm().bytes(), equalTo(Uid.encodeId(id)));
|
||||
//check the type filter
|
||||
assertThat(booleanQuery.clauses().get(1).getOccur(), equalTo(BooleanClause.Occur.FILTER));
|
||||
assertEquals(new TermQuery(new Term("join_field", type)), booleanQuery.clauses().get(1).getQuery());
|
||||
|
|
|
@ -24,6 +24,7 @@ import org.apache.lucene.index.LeafReader;
|
|||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.Version;
|
||||
|
@ -57,6 +58,7 @@ import org.elasticsearch.index.mapper.IdFieldMapper;
|
|||
import org.elasticsearch.index.mapper.MapperService;
|
||||
import org.elasticsearch.index.mapper.Mapping;
|
||||
import org.elasticsearch.index.mapper.SourceToParse;
|
||||
import org.elasticsearch.index.mapper.Uid;
|
||||
import org.elasticsearch.index.seqno.SequenceNumbersService;
|
||||
import org.elasticsearch.index.similarity.SimilarityService;
|
||||
import org.elasticsearch.index.store.DirectoryService;
|
||||
|
@ -455,7 +457,8 @@ public abstract class IndexShardTestCase extends ESTestCase {
|
|||
for (int i = 0; i < reader.maxDoc(); i++) {
|
||||
if (liveDocs == null || liveDocs.get(i)) {
|
||||
Document uuid = reader.document(i, Collections.singleton(IdFieldMapper.NAME));
|
||||
ids.add(uuid.get(IdFieldMapper.NAME));
|
||||
BytesRef binaryID = uuid.getBinaryValue(IdFieldMapper.NAME);
|
||||
ids.add(Uid.decodeId(Arrays.copyOfRange(binaryID.bytes, binaryID.offset, binaryID.offset + binaryID.length)));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue