lucene 4: Upgraded PercolatorExecutor

This commit is contained in:
Martijn van Groningen 2012-10-30 11:33:57 +01:00 committed by Shay Banon
parent 22c14c7354
commit fcc4fe263e
3 changed files with 54 additions and 34 deletions

View File

@ -237,28 +237,6 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se
return false; return false;
} }
// LUCENE 4 UPGRADE: This logic should go to Uid class. Uid class should BR based instead of string
private static HashedBytesArray[] splitUidIntoTypeAndId(BytesRef term) {
int loc = -1;
for (int i = term.offset; i < term.length; i++) {
if (term.bytes[i] == 0x23) { // 0x23 is equal to '#'
loc = i;
break;
}
}
if (loc == -1) {
return null;
}
byte[] type = new byte[loc - term.offset];
System.arraycopy(term.bytes, term.offset, type, 0, type.length);
byte[] id = new byte[term.length - type.length -1];
System.arraycopy(term.bytes, loc + 1, id, 0, id.length);
return new HashedBytesArray[]{new HashedBytesArray(type), new HashedBytesArray(id)};
}
static class TypeBuilder { static class TypeBuilder {
final ExtTObjectIntHasMap<HashedBytesArray> idToDoc = new ExtTObjectIntHasMap<HashedBytesArray>(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1); final ExtTObjectIntHasMap<HashedBytesArray> idToDoc = new ExtTObjectIntHasMap<HashedBytesArray>(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1);
final HashedBytesArray[] docToId; final HashedBytesArray[] docToId;

View File

@ -19,6 +19,9 @@
package org.elasticsearch.index.mapper; package org.elasticsearch.index.mapper;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.bytes.HashedBytesArray;
/** /**
* *
*/ */
@ -77,6 +80,14 @@ public final class Uid {
return uid.substring(delimiterIndex + 1); return uid.substring(delimiterIndex + 1);
} }
public static HashedBytesArray idFromUid(BytesRef uid) {
return splitUidIntoTypeAndId(uid)[1];
}
public static HashedBytesArray typeFromUid(BytesRef uid) {
return splitUidIntoTypeAndId(uid)[0];
}
public static String typeFromUid(String uid) { public static String typeFromUid(String uid) {
int delimiterIndex = uid.indexOf(DELIMITER); // type is not allowed to have # in it..., ids can int delimiterIndex = uid.indexOf(DELIMITER); // type is not allowed to have # in it..., ids can
return uid.substring(0, delimiterIndex); return uid.substring(0, delimiterIndex);
@ -94,4 +105,27 @@ public final class Uid {
public static String createUid(StringBuilder sb, String type, String id) { public static String createUid(StringBuilder sb, String type, String id) {
return sb.append(type).append(DELIMITER).append(id).toString(); return sb.append(type).append(DELIMITER).append(id).toString();
} }
// LUCENE 4 UPGRADE: HashedBytesArray or BytesRef as return type?
private static HashedBytesArray[] splitUidIntoTypeAndId(BytesRef uid) {
int loc = -1;
for (int i = uid.offset; i < uid.length; i++) {
if (uid.bytes[i] == 0x23) { // 0x23 is equal to '#'
loc = i;
break;
}
}
if (loc == -1) {
return null;
}
byte[] type = new byte[loc - uid.offset];
System.arraycopy(uid.bytes, uid.offset, type, 0, type.length);
byte[] id = new byte[uid.length - type.length -1];
System.arraycopy(uid.bytes, loc + 1, id, 0, id.length);
return new HashedBytesArray[]{new HashedBytesArray(type), new HashedBytesArray(id)};
}
} }

View File

@ -20,13 +20,14 @@
package org.elasticsearch.index.percolator; package org.elasticsearch.index.percolator;
import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.memory.CustomMemoryIndex; import org.apache.lucene.index.memory.CustomMemoryIndex;
import org.apache.lucene.search.Collector; import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Scorer;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchException;
import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.Preconditions; import org.elasticsearch.common.Preconditions;
@ -292,22 +293,29 @@ public class PercolatorExecutor extends AbstractIndexComponent {
final CustomMemoryIndex memoryIndex = new CustomMemoryIndex(); final CustomMemoryIndex memoryIndex = new CustomMemoryIndex();
// TODO: This means percolation does not support nested docs... // TODO: This means percolation does not support nested docs...
for (Fieldable field : request.doc().rootDoc().getFields()) { for (IndexableField field : request.doc().rootDoc().getFields()) {
if (!field.isIndexed()) { if (!field.fieldType().indexed()) {
continue; continue;
} }
// no need to index the UID field // no need to index the UID field
if (field.name().equals(UidFieldMapper.NAME)) { if (field.name().equals(UidFieldMapper.NAME)) {
continue; continue;
} }
TokenStream tokenStream = field.tokenStreamValue(); TokenStream tokenStream;
try {
tokenStream = field.tokenStream(
mapperService.documentMapper(request.doc().type()).mappers().smartNameFieldMapper(field.name()).indexAnalyzer()
);
} catch (IOException e) {
throw new ElasticSearchException("Failed to create token stream", e);
}
if (tokenStream != null) { if (tokenStream != null) {
memoryIndex.addField(field.name(), tokenStream, field.getBoost()); memoryIndex.addField(field.name(), tokenStream, field.boost());
} else { } else {
Reader reader = field.readerValue(); Reader reader = field.readerValue();
if (reader != null) { if (reader != null) {
try { try {
memoryIndex.addField(field.name(), request.doc().analyzer().reusableTokenStream(field.name(), reader), field.getBoost() * request.doc().rootDoc().getBoost()); memoryIndex.addField(field.name(), request.doc().analyzer().tokenStream(field.name(), reader), field.boost() /** request.doc().rootDoc().getBoost()*/);
} catch (IOException e) { } catch (IOException e) {
throw new MapperParsingException("Failed to analyze field [" + field.name() + "]", e); throw new MapperParsingException("Failed to analyze field [" + field.name() + "]", e);
} }
@ -315,7 +323,7 @@ public class PercolatorExecutor extends AbstractIndexComponent {
String value = field.stringValue(); String value = field.stringValue();
if (value != null) { if (value != null) {
try { try {
memoryIndex.addField(field.name(), request.doc().analyzer().reusableTokenStream(field.name(), new FastStringReader(value)), field.getBoost() * request.doc().rootDoc().getBoost()); memoryIndex.addField(field.name(), request.doc().analyzer().tokenStream(field.name(), new FastStringReader(value)), field.boost() /** request.doc().rootDoc().getBoost()*/);
} catch (IOException e) { } catch (IOException e) {
throw new MapperParsingException("Failed to analyze field [" + field.name() + "]", e); throw new MapperParsingException("Failed to analyze field [" + field.name() + "]", e);
} }
@ -398,11 +406,11 @@ public class PercolatorExecutor extends AbstractIndexComponent {
@Override @Override
public void collect(int doc) throws IOException { public void collect(int doc) throws IOException {
String uid = fieldData.stringValue(doc); BytesRef uid = fieldData.stringValue(doc);
if (uid == null) { if (uid == null) {
return; return;
} }
String id = Uid.idFromUid(uid); String id = Uid.idFromUid(uid).toUtf8();
Query query = queries.get(id); Query query = queries.get(id);
if (query == null) { if (query == null) {
// log??? // log???
@ -421,9 +429,9 @@ public class PercolatorExecutor extends AbstractIndexComponent {
} }
@Override @Override
public void setNextReader(IndexReader reader, int docBase) throws IOException { public void setNextReader(AtomicReaderContext context) throws IOException {
// we use the UID because id might not be indexed // we use the UID because id might not be indexed
fieldData = percolatorIndex.cache().fieldData().cache(FieldDataType.DefaultTypes.STRING, reader, UidFieldMapper.NAME); fieldData = percolatorIndex.cache().fieldData().cache(FieldDataType.DefaultTypes.STRING, context.reader(), UidFieldMapper.NAME);
} }
@Override @Override