lucene 4: Upgraded PercolatorExecutor
This commit is contained in:
parent
22c14c7354
commit
fcc4fe263e
|
@ -237,28 +237,6 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// LUCENE 4 UPGRADE: This logic should go to Uid class. Uid class should BR based instead of string
|
|
||||||
private static HashedBytesArray[] splitUidIntoTypeAndId(BytesRef term) {
|
|
||||||
int loc = -1;
|
|
||||||
for (int i = term.offset; i < term.length; i++) {
|
|
||||||
if (term.bytes[i] == 0x23) { // 0x23 is equal to '#'
|
|
||||||
loc = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (loc == -1) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
byte[] type = new byte[loc - term.offset];
|
|
||||||
System.arraycopy(term.bytes, term.offset, type, 0, type.length);
|
|
||||||
|
|
||||||
byte[] id = new byte[term.length - type.length -1];
|
|
||||||
System.arraycopy(term.bytes, loc + 1, id, 0, id.length);
|
|
||||||
return new HashedBytesArray[]{new HashedBytesArray(type), new HashedBytesArray(id)};
|
|
||||||
}
|
|
||||||
|
|
||||||
static class TypeBuilder {
|
static class TypeBuilder {
|
||||||
final ExtTObjectIntHasMap<HashedBytesArray> idToDoc = new ExtTObjectIntHasMap<HashedBytesArray>(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1);
|
final ExtTObjectIntHasMap<HashedBytesArray> idToDoc = new ExtTObjectIntHasMap<HashedBytesArray>(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1);
|
||||||
final HashedBytesArray[] docToId;
|
final HashedBytesArray[] docToId;
|
||||||
|
|
|
@ -19,6 +19,9 @@
|
||||||
|
|
||||||
package org.elasticsearch.index.mapper;
|
package org.elasticsearch.index.mapper;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
|
import org.elasticsearch.common.bytes.HashedBytesArray;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
@ -77,6 +80,14 @@ public final class Uid {
|
||||||
return uid.substring(delimiterIndex + 1);
|
return uid.substring(delimiterIndex + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public static HashedBytesArray idFromUid(BytesRef uid) {
|
||||||
|
return splitUidIntoTypeAndId(uid)[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
public static HashedBytesArray typeFromUid(BytesRef uid) {
|
||||||
|
return splitUidIntoTypeAndId(uid)[0];
|
||||||
|
}
|
||||||
|
|
||||||
public static String typeFromUid(String uid) {
|
public static String typeFromUid(String uid) {
|
||||||
int delimiterIndex = uid.indexOf(DELIMITER); // type is not allowed to have # in it..., ids can
|
int delimiterIndex = uid.indexOf(DELIMITER); // type is not allowed to have # in it..., ids can
|
||||||
return uid.substring(0, delimiterIndex);
|
return uid.substring(0, delimiterIndex);
|
||||||
|
@ -94,4 +105,27 @@ public final class Uid {
|
||||||
public static String createUid(StringBuilder sb, String type, String id) {
|
public static String createUid(StringBuilder sb, String type, String id) {
|
||||||
return sb.append(type).append(DELIMITER).append(id).toString();
|
return sb.append(type).append(DELIMITER).append(id).toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// LUCENE 4 UPGRADE: HashedBytesArray or BytesRef as return type?
|
||||||
|
private static HashedBytesArray[] splitUidIntoTypeAndId(BytesRef uid) {
|
||||||
|
int loc = -1;
|
||||||
|
for (int i = uid.offset; i < uid.length; i++) {
|
||||||
|
if (uid.bytes[i] == 0x23) { // 0x23 is equal to '#'
|
||||||
|
loc = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (loc == -1) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
byte[] type = new byte[loc - uid.offset];
|
||||||
|
System.arraycopy(uid.bytes, uid.offset, type, 0, type.length);
|
||||||
|
|
||||||
|
byte[] id = new byte[uid.length - type.length -1];
|
||||||
|
System.arraycopy(uid.bytes, loc + 1, id, 0, id.length);
|
||||||
|
return new HashedBytesArray[]{new HashedBytesArray(type), new HashedBytesArray(id)};
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,13 +20,14 @@
|
||||||
package org.elasticsearch.index.percolator;
|
package org.elasticsearch.index.percolator;
|
||||||
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
import org.apache.lucene.document.Fieldable;
|
import org.apache.lucene.index.AtomicReaderContext;
|
||||||
import org.apache.lucene.index.IndexReader;
|
import org.apache.lucene.index.IndexableField;
|
||||||
import org.apache.lucene.index.memory.CustomMemoryIndex;
|
import org.apache.lucene.index.memory.CustomMemoryIndex;
|
||||||
import org.apache.lucene.search.Collector;
|
import org.apache.lucene.search.Collector;
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
import org.apache.lucene.search.Query;
|
import org.apache.lucene.search.Query;
|
||||||
import org.apache.lucene.search.Scorer;
|
import org.apache.lucene.search.Scorer;
|
||||||
|
import org.apache.lucene.util.BytesRef;
|
||||||
import org.elasticsearch.ElasticSearchException;
|
import org.elasticsearch.ElasticSearchException;
|
||||||
import org.elasticsearch.common.Nullable;
|
import org.elasticsearch.common.Nullable;
|
||||||
import org.elasticsearch.common.Preconditions;
|
import org.elasticsearch.common.Preconditions;
|
||||||
|
@ -292,22 +293,29 @@ public class PercolatorExecutor extends AbstractIndexComponent {
|
||||||
final CustomMemoryIndex memoryIndex = new CustomMemoryIndex();
|
final CustomMemoryIndex memoryIndex = new CustomMemoryIndex();
|
||||||
|
|
||||||
// TODO: This means percolation does not support nested docs...
|
// TODO: This means percolation does not support nested docs...
|
||||||
for (Fieldable field : request.doc().rootDoc().getFields()) {
|
for (IndexableField field : request.doc().rootDoc().getFields()) {
|
||||||
if (!field.isIndexed()) {
|
if (!field.fieldType().indexed()) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// no need to index the UID field
|
// no need to index the UID field
|
||||||
if (field.name().equals(UidFieldMapper.NAME)) {
|
if (field.name().equals(UidFieldMapper.NAME)) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
TokenStream tokenStream = field.tokenStreamValue();
|
TokenStream tokenStream;
|
||||||
|
try {
|
||||||
|
tokenStream = field.tokenStream(
|
||||||
|
mapperService.documentMapper(request.doc().type()).mappers().smartNameFieldMapper(field.name()).indexAnalyzer()
|
||||||
|
);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new ElasticSearchException("Failed to create token stream", e);
|
||||||
|
}
|
||||||
if (tokenStream != null) {
|
if (tokenStream != null) {
|
||||||
memoryIndex.addField(field.name(), tokenStream, field.getBoost());
|
memoryIndex.addField(field.name(), tokenStream, field.boost());
|
||||||
} else {
|
} else {
|
||||||
Reader reader = field.readerValue();
|
Reader reader = field.readerValue();
|
||||||
if (reader != null) {
|
if (reader != null) {
|
||||||
try {
|
try {
|
||||||
memoryIndex.addField(field.name(), request.doc().analyzer().reusableTokenStream(field.name(), reader), field.getBoost() * request.doc().rootDoc().getBoost());
|
memoryIndex.addField(field.name(), request.doc().analyzer().tokenStream(field.name(), reader), field.boost() /** request.doc().rootDoc().getBoost()*/);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new MapperParsingException("Failed to analyze field [" + field.name() + "]", e);
|
throw new MapperParsingException("Failed to analyze field [" + field.name() + "]", e);
|
||||||
}
|
}
|
||||||
|
@ -315,7 +323,7 @@ public class PercolatorExecutor extends AbstractIndexComponent {
|
||||||
String value = field.stringValue();
|
String value = field.stringValue();
|
||||||
if (value != null) {
|
if (value != null) {
|
||||||
try {
|
try {
|
||||||
memoryIndex.addField(field.name(), request.doc().analyzer().reusableTokenStream(field.name(), new FastStringReader(value)), field.getBoost() * request.doc().rootDoc().getBoost());
|
memoryIndex.addField(field.name(), request.doc().analyzer().tokenStream(field.name(), new FastStringReader(value)), field.boost() /** request.doc().rootDoc().getBoost()*/);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
throw new MapperParsingException("Failed to analyze field [" + field.name() + "]", e);
|
throw new MapperParsingException("Failed to analyze field [" + field.name() + "]", e);
|
||||||
}
|
}
|
||||||
|
@ -398,11 +406,11 @@ public class PercolatorExecutor extends AbstractIndexComponent {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void collect(int doc) throws IOException {
|
public void collect(int doc) throws IOException {
|
||||||
String uid = fieldData.stringValue(doc);
|
BytesRef uid = fieldData.stringValue(doc);
|
||||||
if (uid == null) {
|
if (uid == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
String id = Uid.idFromUid(uid);
|
String id = Uid.idFromUid(uid).toUtf8();
|
||||||
Query query = queries.get(id);
|
Query query = queries.get(id);
|
||||||
if (query == null) {
|
if (query == null) {
|
||||||
// log???
|
// log???
|
||||||
|
@ -421,9 +429,9 @@ public class PercolatorExecutor extends AbstractIndexComponent {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void setNextReader(IndexReader reader, int docBase) throws IOException {
|
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||||
// we use the UID because id might not be indexed
|
// we use the UID because id might not be indexed
|
||||||
fieldData = percolatorIndex.cache().fieldData().cache(FieldDataType.DefaultTypes.STRING, reader, UidFieldMapper.NAME);
|
fieldData = percolatorIndex.cache().fieldData().cache(FieldDataType.DefaultTypes.STRING, context.reader(), UidFieldMapper.NAME);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue