lucene 4: Upgraded PercolatorExecutor

2012-10-30 11:33:57 +01:00 · 2012-10-30 11:33:57 +01:00 · fcc4fe263e
parent 22c14c7354
commit fcc4fe263e
3 changed files with 54 additions and 34 deletions
--- a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java
+++ b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java
@ -237,28 +237,6 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se
        return false;
    }

-    // LUCENE 4 UPGRADE: This logic should go to Uid class. Uid class should BR based instead of string
-    private static HashedBytesArray[] splitUidIntoTypeAndId(BytesRef term) {
-        int loc = -1;
-        for (int i = term.offset; i < term.length; i++) {
-            if (term.bytes[i] == 0x23) { // 0x23 is equal to '#'
-                loc = i;
-                break;
-            }
-        }
-
-        if (loc == -1) {
-            return null;
-        }
-
-        byte[] type = new byte[loc - term.offset];
-        System.arraycopy(term.bytes, term.offset, type, 0, type.length);
-
-        byte[] id = new byte[term.length - type.length -1];
-        System.arraycopy(term.bytes, loc + 1, id, 0, id.length);
-        return new HashedBytesArray[]{new HashedBytesArray(type), new HashedBytesArray(id)};
-    }
-
    static class TypeBuilder {
        final ExtTObjectIntHasMap<HashedBytesArray> idToDoc = new ExtTObjectIntHasMap<HashedBytesArray>(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1);
        final HashedBytesArray[] docToId;
--- a/src/main/java/org/elasticsearch/index/mapper/Uid.java
+++ b/src/main/java/org/elasticsearch/index/mapper/Uid.java
@ -19,6 +19,9 @@

 package org.elasticsearch.index.mapper;

+import org.apache.lucene.util.BytesRef;
+import org.elasticsearch.common.bytes.HashedBytesArray;
+
 /**
 *
 */
@ -77,6 +80,14 @@ public final class Uid {
        return uid.substring(delimiterIndex + 1);
    }

+    public static HashedBytesArray idFromUid(BytesRef uid) {
+        return splitUidIntoTypeAndId(uid)[1];
+    }
+
+    public static HashedBytesArray typeFromUid(BytesRef uid) {
+        return splitUidIntoTypeAndId(uid)[0];
+    }
+
    public static String typeFromUid(String uid) {
        int delimiterIndex = uid.indexOf(DELIMITER); // type is not allowed to have # in it..., ids can
        return uid.substring(0, delimiterIndex);
@ -94,4 +105,27 @@ public final class Uid {
    public static String createUid(StringBuilder sb, String type, String id) {
        return sb.append(type).append(DELIMITER).append(id).toString();
    }
+
+    // LUCENE 4 UPGRADE: HashedBytesArray or BytesRef as return type?
+    private static HashedBytesArray[] splitUidIntoTypeAndId(BytesRef uid) {
+        int loc = -1;
+        for (int i = uid.offset; i < uid.length; i++) {
+            if (uid.bytes[i] == 0x23) { // 0x23 is equal to '#'
+                loc = i;
+                break;
+            }
+        }
+
+        if (loc == -1) {
+            return null;
+        }
+
+        byte[] type = new byte[loc - uid.offset];
+        System.arraycopy(uid.bytes, uid.offset, type, 0, type.length);
+
+        byte[] id = new byte[uid.length - type.length -1];
+        System.arraycopy(uid.bytes, loc + 1, id, 0, id.length);
+        return new HashedBytesArray[]{new HashedBytesArray(type), new HashedBytesArray(id)};
+    }
+
 }
--- a/src/main/java/org/elasticsearch/index/percolator/PercolatorExecutor.java
+++ b/src/main/java/org/elasticsearch/index/percolator/PercolatorExecutor.java
@ -20,13 +20,14 @@
 package org.elasticsearch.index.percolator;

 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.document.Fieldable;
-import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.memory.CustomMemoryIndex;
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.Scorer;
+import org.apache.lucene.util.BytesRef;
 import org.elasticsearch.ElasticSearchException;
 import org.elasticsearch.common.Nullable;
 import org.elasticsearch.common.Preconditions;
@ -292,22 +293,29 @@ public class PercolatorExecutor extends AbstractIndexComponent {
        final CustomMemoryIndex memoryIndex = new CustomMemoryIndex();

        // TODO: This means percolation does not support nested docs...
-        for (Fieldable field : request.doc().rootDoc().getFields()) {
-            if (!field.isIndexed()) {
+        for (IndexableField field : request.doc().rootDoc().getFields()) {
+            if (!field.fieldType().indexed()) {
                continue;
            }
            // no need to index the UID field
            if (field.name().equals(UidFieldMapper.NAME)) {
                continue;
            }
-            TokenStream tokenStream = field.tokenStreamValue();
+            TokenStream tokenStream;
+            try {
+                tokenStream = field.tokenStream(
+                        mapperService.documentMapper(request.doc().type()).mappers().smartNameFieldMapper(field.name()).indexAnalyzer()
+                );
+            } catch (IOException e) {
+                throw new ElasticSearchException("Failed to create token stream", e);
+            }
            if (tokenStream != null) {
-                memoryIndex.addField(field.name(), tokenStream, field.getBoost());
+                memoryIndex.addField(field.name(), tokenStream, field.boost());
            } else {
                Reader reader = field.readerValue();
                if (reader != null) {
                    try {
-                        memoryIndex.addField(field.name(), request.doc().analyzer().reusableTokenStream(field.name(), reader), field.getBoost() * request.doc().rootDoc().getBoost());
+                        memoryIndex.addField(field.name(), request.doc().analyzer().tokenStream(field.name(), reader), field.boost() /** request.doc().rootDoc().getBoost()*/);
                    } catch (IOException e) {
                        throw new MapperParsingException("Failed to analyze field [" + field.name() + "]", e);
                    }
@ -315,7 +323,7 @@ public class PercolatorExecutor extends AbstractIndexComponent {
                    String value = field.stringValue();
                    if (value != null) {
                        try {
-                            memoryIndex.addField(field.name(), request.doc().analyzer().reusableTokenStream(field.name(), new FastStringReader(value)), field.getBoost() * request.doc().rootDoc().getBoost());
+                            memoryIndex.addField(field.name(), request.doc().analyzer().tokenStream(field.name(), new FastStringReader(value)), field.boost() /** request.doc().rootDoc().getBoost()*/);
                        } catch (IOException e) {
                            throw new MapperParsingException("Failed to analyze field [" + field.name() + "]", e);
                        }
@ -398,11 +406,11 @@ public class PercolatorExecutor extends AbstractIndexComponent {

        @Override
        public void collect(int doc) throws IOException {
-            String uid = fieldData.stringValue(doc);
+            BytesRef uid = fieldData.stringValue(doc);
            if (uid == null) {
                return;
            }
-            String id = Uid.idFromUid(uid);
+            String id = Uid.idFromUid(uid).toUtf8();
            Query query = queries.get(id);
            if (query == null) {
                // log???
@ -421,9 +429,9 @@ public class PercolatorExecutor extends AbstractIndexComponent {
        }

        @Override
-        public void setNextReader(IndexReader reader, int docBase) throws IOException {
+        public void setNextReader(AtomicReaderContext context) throws IOException {
            // we use the UID because id might not be indexed
-            fieldData = percolatorIndex.cache().fieldData().cache(FieldDataType.DefaultTypes.STRING, reader, UidFieldMapper.NAME);
+            fieldData = percolatorIndex.cache().fieldData().cache(FieldDataType.DefaultTypes.STRING, context.reader(), UidFieldMapper.NAME);
        }

        @Override