From 55a31f7ac595c9ca4f76b5846545c883aa13aae9 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Thu, 25 Oct 2012 19:19:04 +0200 Subject: [PATCH 001/146] change to lucene 4.0 dependency upgrade has begun... --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 2d89361c1ed..74a6e5d4614 100644 --- a/pom.xml +++ b/pom.xml @@ -30,7 +30,7 @@ - 3.6.1 + 4.0.0 From f9b0fcb3a3be970941dd0b192544aa4cf80dedfa Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Thu, 25 Oct 2012 19:19:59 +0200 Subject: [PATCH 002/146] remove BufferedDeletesStream by default, we will put a bloom filter code on the _uid field, so no need for the optimization of using bloom filters when trying to delete a doc by _uid term per segment --- .../lucene/index/BufferedDeletesStream.java | 451 ------------------ 1 file changed, 451 deletions(-) delete mode 100644 src/main/java/org/apache/lucene/index/BufferedDeletesStream.java diff --git a/src/main/java/org/apache/lucene/index/BufferedDeletesStream.java b/src/main/java/org/apache/lucene/index/BufferedDeletesStream.java deleted file mode 100644 index cf2f88c05af..00000000000 --- a/src/main/java/org/apache/lucene/index/BufferedDeletesStream.java +++ /dev/null @@ -1,451 +0,0 @@ -package org.apache.lucene.index; - -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -import org.apache.lucene.search.DocIdSet; -import org.apache.lucene.search.DocIdSetIterator; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.QueryWrapperFilter; -import org.apache.lucene.util.UnicodeUtil; -import org.elasticsearch.common.Unicode; -import org.elasticsearch.common.bloom.BloomFilter; -import org.elasticsearch.index.cache.bloom.BloomCache; -import org.elasticsearch.index.mapper.internal.UidFieldMapper; - -import java.io.IOException; -import java.io.PrintStream; -import java.util.*; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.atomic.AtomicLong; - -/* Tracks the stream of {@link BuffereDeletes}. - * When DocumensWriter flushes, its buffered - * deletes are appended to this stream. We later - * apply these deletes (resolve them to the actual - * docIDs, per segment) when a merge is started - * (only to the to-be-merged segments). We - * also apply to all segments when NRT reader is pulled, - * commit/close is called, or when too many deletes are - * buffered and must be flushed (by RAM usage or by count). - * - * Each packet is assigned a generation, and each flushed or - * merged segment is also assigned a generation, so we can - * track which BufferedDeletes packets to apply to any given - * segment. */ - -// LUCENE MONITOR: We copied this class from Lucene, effectively overriding it with our implementation -// if it comes first in the classpath, allowing for faster apply deletes based on terms -class BufferedDeletesStream implements XIndexWriter.XBufferedDeletesStream { - - // TODO: maybe linked list? - private final List deletes = new ArrayList(); - - // Starts at 1 so that SegmentInfos that have never had - // deletes applied (whose bufferedDelGen defaults to 0) - // will be correct: - private long nextGen = 1; - - // used only by assert - private Term lastDeleteTerm; - - private PrintStream infoStream; - private final AtomicLong bytesUsed = new AtomicLong(); - private final AtomicInteger numTerms = new AtomicInteger(); - private final int messageID; - - private BloomCache bloomCache; - - public BufferedDeletesStream(int messageID) { - this.messageID = messageID; - } - - private synchronized void message(String message) { - if (infoStream != null) { - infoStream.println("BD " + messageID + " [" + new Date() + "; " + Thread.currentThread().getName() + "]: " + message); - } - } - - public synchronized void setInfoStream(PrintStream infoStream) { - this.infoStream = infoStream; - } - - public void setBloomCache(BloomCache bloomCache) { - this.bloomCache = bloomCache; - } - - // Appends a new packet of buffered deletes to the stream, - // setting its generation: - public synchronized void push(FrozenBufferedDeletes packet) { - assert packet.any(); - assert checkDeleteStats(); - assert packet.gen < nextGen; - deletes.add(packet); - numTerms.addAndGet(packet.numTermDeletes); - bytesUsed.addAndGet(packet.bytesUsed); - if (infoStream != null) { - message("push deletes " + packet + " delGen=" + packet.gen + " packetCount=" + deletes.size()); - } - assert checkDeleteStats(); - } - - public synchronized void clear() { - deletes.clear(); - nextGen = 1; - numTerms.set(0); - bytesUsed.set(0); - } - - public boolean any() { - return bytesUsed.get() != 0; - } - - public int numTerms() { - return numTerms.get(); - } - - public long bytesUsed() { - return bytesUsed.get(); - } - - public static class ApplyDeletesResult { - // True if any actual deletes took place: - public final boolean anyDeletes; - - // Current gen, for the merged segment: - public final long gen; - - // If non-null, contains segments that are 100% deleted - public final List allDeleted; - - ApplyDeletesResult(boolean anyDeletes, long gen, List allDeleted) { - this.anyDeletes = anyDeletes; - this.gen = gen; - this.allDeleted = allDeleted; - } - } - - // Sorts SegmentInfos from smallest to biggest bufferedDelGen: - private static final Comparator sortByDelGen = new Comparator() { - // @Override -- not until Java 1.6 - public int compare(SegmentInfo si1, SegmentInfo si2) { - final long cmp = si1.getBufferedDeletesGen() - si2.getBufferedDeletesGen(); - if (cmp > 0) { - return 1; - } else if (cmp < 0) { - return -1; - } else { - return 0; - } - } - }; - - /** - * Resolves the buffered deleted Term/Query/docIDs, into - * actual deleted docIDs in the deletedDocs BitVector for - * each SegmentReader. - */ - public synchronized ApplyDeletesResult applyDeletes(IndexWriter.ReaderPool readerPool, List infos) throws IOException { - final long t0 = System.currentTimeMillis(); - - if (infos.size() == 0) { - return new ApplyDeletesResult(false, nextGen++, null); - } - - assert checkDeleteStats(); - - if (!any()) { - message("applyDeletes: no deletes; skipping"); - return new ApplyDeletesResult(false, nextGen++, null); - } - - if (infoStream != null) { - message("applyDeletes: infos=" + infos + " packetCount=" + deletes.size()); - } - - List infos2 = new ArrayList(); - infos2.addAll(infos); - Collections.sort(infos2, sortByDelGen); - - CoalescedDeletes coalescedDeletes = null; - boolean anyNewDeletes = false; - - int infosIDX = infos2.size() - 1; - int delIDX = deletes.size() - 1; - - List allDeleted = null; - - while (infosIDX >= 0) { - //System.out.println("BD: cycle delIDX=" + delIDX + " infoIDX=" + infosIDX); - - final FrozenBufferedDeletes packet = delIDX >= 0 ? deletes.get(delIDX) : null; - final SegmentInfo info = infos2.get(infosIDX); - final long segGen = info.getBufferedDeletesGen(); - - if (packet != null && segGen < packet.gen) { - //System.out.println(" coalesce"); - if (coalescedDeletes == null) { - coalescedDeletes = new CoalescedDeletes(); - } - coalescedDeletes.update(packet); - delIDX--; - } else if (packet != null && segGen == packet.gen) { - //System.out.println(" eq"); - - // Lock order: IW -> BD -> RP - assert readerPool.infoIsLive(info); - SegmentReader reader = readerPool.get(info, false); - int delCount = 0; - final boolean segAllDeletes; - try { - if (coalescedDeletes != null) { - //System.out.println(" del coalesced"); - delCount += applyTermDeletes(coalescedDeletes.termsIterable(), reader); - delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), reader); - } - //System.out.println(" del exact"); - // Don't delete by Term here; DocumentsWriter - // already did that on flush: - delCount += applyQueryDeletes(packet.queriesIterable(), reader); - segAllDeletes = reader.numDocs() == 0; - } finally { - readerPool.release(reader); - } - anyNewDeletes |= delCount > 0; - - if (segAllDeletes) { - if (allDeleted == null) { - allDeleted = new ArrayList(); - } - allDeleted.add(info); - } - - if (infoStream != null) { - message("seg=" + info + " segGen=" + segGen + " segDeletes=[" + packet + "]; coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount + (segAllDeletes ? " 100% deleted" : "")); - } - - if (coalescedDeletes == null) { - coalescedDeletes = new CoalescedDeletes(); - } - coalescedDeletes.update(packet); - delIDX--; - infosIDX--; - info.setBufferedDeletesGen(nextGen); - - } else { - //System.out.println(" gt"); - - if (coalescedDeletes != null) { - // Lock order: IW -> BD -> RP - assert readerPool.infoIsLive(info); - SegmentReader reader = readerPool.get(info, false); - int delCount = 0; - final boolean segAllDeletes; - try { - delCount += applyTermDeletes(coalescedDeletes.termsIterable(), reader); - delCount += applyQueryDeletes(coalescedDeletes.queriesIterable(), reader); - segAllDeletes = reader.numDocs() == 0; - } finally { - readerPool.release(reader); - } - anyNewDeletes |= delCount > 0; - - if (segAllDeletes) { - if (allDeleted == null) { - allDeleted = new ArrayList(); - } - allDeleted.add(info); - } - - if (infoStream != null) { - message("seg=" + info + " segGen=" + segGen + " coalesced deletes=[" + (coalescedDeletes == null ? "null" : coalescedDeletes) + "] delCount=" + delCount + (segAllDeletes ? " 100% deleted" : "")); - } - } - info.setBufferedDeletesGen(nextGen); - - infosIDX--; - } - } - - assert checkDeleteStats(); - if (infoStream != null) { - message("applyDeletes took " + (System.currentTimeMillis() - t0) + " msec"); - } - // assert infos != segmentInfos || !any() : "infos=" + infos + " segmentInfos=" + segmentInfos + " any=" + any; - - return new ApplyDeletesResult(anyNewDeletes, nextGen++, allDeleted); - } - - public synchronized long getNextGen() { - return nextGen++; - } - - // Lock order IW -> BD - /* Removes any BufferedDeletes that we no longer need to - * store because all segments in the index have had the - * deletes applied. */ - public synchronized void prune(SegmentInfos segmentInfos) { - assert checkDeleteStats(); - long minGen = Long.MAX_VALUE; - for (SegmentInfo info : segmentInfos) { - minGen = Math.min(info.getBufferedDeletesGen(), minGen); - } - - if (infoStream != null) { - message("prune sis=" + segmentInfos + " minGen=" + minGen + " packetCount=" + deletes.size()); - } - - final int limit = deletes.size(); - for (int delIDX = 0; delIDX < limit; delIDX++) { - if (deletes.get(delIDX).gen >= minGen) { - prune(delIDX); - assert checkDeleteStats(); - return; - } - } - - // All deletes pruned - prune(limit); - assert !any(); - assert checkDeleteStats(); - } - - private synchronized void prune(int count) { - if (count > 0) { - if (infoStream != null) { - message("pruneDeletes: prune " + count + " packets; " + (deletes.size() - count) + " packets remain"); - } - for (int delIDX = 0; delIDX < count; delIDX++) { - final FrozenBufferedDeletes packet = deletes.get(delIDX); - numTerms.addAndGet(-packet.numTermDeletes); - assert numTerms.get() >= 0; - bytesUsed.addAndGet(-packet.bytesUsed); - assert bytesUsed.get() >= 0; - } - deletes.subList(0, count).clear(); - } - } - - // ES CHANGE: Add bloom filter usage - // Delete by Term - private synchronized long applyTermDeletes(Iterable termsIter, SegmentReader reader) throws IOException { - long delCount = 0; - - assert checkDeleteTerm(null); - - BloomFilter filter = bloomCache == null ? BloomFilter.NONE : bloomCache.filter(reader, UidFieldMapper.NAME, true); - UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result(); - - TermDocs docs = reader.termDocs(); - - for (Term term : termsIter) { - - if (term.field() == UidFieldMapper.NAME) { - Unicode.fromStringAsUtf8(term.text(), utf8); - if (!filter.isPresent(utf8.result, 0, utf8.length)) { - continue; - } - } - if (docs == null) { - docs = reader.termDocs(); - } - - // Since we visit terms sorted, we gain performance - // by re-using the same TermsEnum and seeking only - // forwards - assert checkDeleteTerm(term); - docs.seek(term); - - while (docs.next()) { - final int docID = docs.doc(); - reader.deleteDocument(docID); - // TODO: we could/should change - // reader.deleteDocument to return boolean - // true if it did in fact delete, because here - // we could be deleting an already-deleted doc - // which makes this an upper bound: - delCount++; - } - } - - return delCount; - } - - public static class QueryAndLimit { - public final Query query; - public final int limit; - - public QueryAndLimit(Query query, int limit) { - this.query = query; - this.limit = limit; - } - } - - // Delete by query - private synchronized long applyQueryDeletes(Iterable queriesIter, SegmentReader reader) throws IOException { - long delCount = 0; - - for (QueryAndLimit ent : queriesIter) { - Query query = ent.query; - int limit = ent.limit; - final DocIdSet docs = new QueryWrapperFilter(query).getDocIdSet(reader); - if (docs != null) { - final DocIdSetIterator it = docs.iterator(); - if (it != null) { - while (true) { - int doc = it.nextDoc(); - if (doc >= limit) - break; - - reader.deleteDocument(doc); - // TODO: we could/should change - // reader.deleteDocument to return boolean - // true if it did in fact delete, because here - // we could be deleting an already-deleted doc - // which makes this an upper bound: - delCount++; - } - } - } - } - - return delCount; - } - - // used only by assert - private boolean checkDeleteTerm(Term term) { - if (term != null) { - assert lastDeleteTerm == null || term.compareTo(lastDeleteTerm) > 0 : "lastTerm=" + lastDeleteTerm + " vs term=" + term; - } - // TODO: we re-use term now in our merged iterable, but we shouldn't clone, instead copy for this assert - lastDeleteTerm = term == null ? null : new Term(term.field(), term.text()); - return true; - } - - // only for assert - private boolean checkDeleteStats() { - int numTerms2 = 0; - long bytesUsed2 = 0; - for (FrozenBufferedDeletes packet : deletes) { - numTerms2 += packet.numTermDeletes; - bytesUsed2 += packet.bytesUsed; - } - assert numTerms2 == numTerms.get() : "numTerms2=" + numTerms2 + " vs " + numTerms.get(); - assert bytesUsed2 == bytesUsed.get() : "bytesUsed2=" + bytesUsed2 + " vs " + bytesUsed; - return true; - } -} From 3d4ca81c29a27db3df6b45acefbd45c3466fd210 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Thu, 25 Oct 2012 19:22:45 +0200 Subject: [PATCH 003/146] remove XIndexWriter removing the buffered deletes bloom filter no longer requires setting the bloom filter on it --- .../org/apache/lucene/index/XIndexWriter.java | 29 ------------------- .../index/engine/robin/RobinEngine.java | 2 +- 2 files changed, 1 insertion(+), 30 deletions(-) delete mode 100644 src/main/java/org/apache/lucene/index/XIndexWriter.java diff --git a/src/main/java/org/apache/lucene/index/XIndexWriter.java b/src/main/java/org/apache/lucene/index/XIndexWriter.java deleted file mode 100644 index 72f052a8259..00000000000 --- a/src/main/java/org/apache/lucene/index/XIndexWriter.java +++ /dev/null @@ -1,29 +0,0 @@ -package org.apache.lucene.index; - -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.LockObtainFailedException; -import org.elasticsearch.common.logging.ESLogger; -import org.elasticsearch.index.cache.bloom.BloomCache; - -import java.io.IOException; - -/** - */ -public class XIndexWriter extends IndexWriter { - - private final ESLogger logger; - - public XIndexWriter(Directory d, IndexWriterConfig conf, ESLogger logger, BloomCache bloomCache) throws CorruptIndexException, LockObtainFailedException, IOException { - super(d, conf); - this.logger = logger; - if (bufferedDeletesStream instanceof XBufferedDeletesStream) { - logger.debug("using bloom filter enhanced delete handling"); - ((XBufferedDeletesStream) bufferedDeletesStream).setBloomCache(bloomCache); - } - } - - public static interface XBufferedDeletesStream { - - void setBloomCache(BloomCache bloomCache); - } -} diff --git a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java index 872a1862777..b52dbb878a8 100644 --- a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java +++ b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java @@ -1362,7 +1362,7 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { config.setReaderTermsIndexDivisor(termIndexDivisor); config.setMaxThreadStates(indexConcurrency); - indexWriter = new XIndexWriter(store.directory(), config, logger, bloomCache); + indexWriter = new IndexWriter(store.directory(), config); } catch (IOException e) { safeClose(indexWriter); throw e; From 7aacc8d44859887fbc72a22cd78e22d75fd5b554 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Thu, 25 Oct 2012 21:49:36 +0200 Subject: [PATCH 004/146] lucene 4: upgrade store/dir --- .../apache/lucene/store/XFSIndexOutput.java | 2 +- .../apache/lucene/store/XMMapFSDirectory.java | 6 +- .../apache/lucene/store/XNIOFSDirectory.java | 6 +- .../lucene/store/XSimpleFSDirectory.java | 6 +- .../store/bytebuffer/ByteBufferDirectory.java | 39 +------- .../bytebuffer/ByteBufferIndexInput.java | 2 +- .../blobstore/BlobStoreIndexShardGateway.java | 4 +- .../org/elasticsearch/index/store/Store.java | 90 ++++++------------- .../index/store/StoreFileMetaData.java | 23 ++--- .../store/support/ForceSyncDirectory.java | 4 +- .../indices/recovery/RecoverySource.java | 4 +- 11 files changed, 58 insertions(+), 128 deletions(-) diff --git a/src/main/java/org/apache/lucene/store/XFSIndexOutput.java b/src/main/java/org/apache/lucene/store/XFSIndexOutput.java index 479c27287cc..bedc8066ed5 100644 --- a/src/main/java/org/apache/lucene/store/XFSIndexOutput.java +++ b/src/main/java/org/apache/lucene/store/XFSIndexOutput.java @@ -13,7 +13,7 @@ class XFSIndexOutput extends FSDirectory.FSIndexOutput { private final StoreRateLimiting.Listener rateListener; XFSIndexOutput(FSDirectory parent, String name, RateLimiter rateLimiter, StoreRateLimiting.Listener rateListener) throws IOException { - super(parent, name); + super(parent, name, null /* we have our own rate limiter */); this.rateLimiter = rateLimiter; this.rateListener = rateListener; } diff --git a/src/main/java/org/apache/lucene/store/XMMapFSDirectory.java b/src/main/java/org/apache/lucene/store/XMMapFSDirectory.java index 132858c6f5f..d9367280628 100644 --- a/src/main/java/org/apache/lucene/store/XMMapFSDirectory.java +++ b/src/main/java/org/apache/lucene/store/XMMapFSDirectory.java @@ -40,12 +40,12 @@ public class XMMapFSDirectory extends MMapDirectory { } @Override - public IndexOutput createOutput(String name) throws IOException { + public IndexOutput createOutput(String name, IOContext context) throws IOException { StoreRateLimiting rateLimiting = rateLimitingProvider.rateLimiting(); StoreRateLimiting.Type type = rateLimiting.getType(); RateLimiter limiter = rateLimiting.getRateLimiter(); if (type == StoreRateLimiting.Type.NONE || limiter == null) { - return super.createOutput(name); + return super.createOutput(name, context); } if (TrackingMergeScheduler.getCurrentMerge() != null) { // we are mering, and type is either MERGE or ALL, rate limit... @@ -59,6 +59,6 @@ public class XMMapFSDirectory extends MMapDirectory { return new XFSIndexOutput(this, name, limiter, rateListener); } // we shouldn't really get here... - return super.createOutput(name); + return super.createOutput(name, context); } } diff --git a/src/main/java/org/apache/lucene/store/XNIOFSDirectory.java b/src/main/java/org/apache/lucene/store/XNIOFSDirectory.java index 32c936d1fe9..987ffb1dd0c 100644 --- a/src/main/java/org/apache/lucene/store/XNIOFSDirectory.java +++ b/src/main/java/org/apache/lucene/store/XNIOFSDirectory.java @@ -40,12 +40,12 @@ public class XNIOFSDirectory extends NIOFSDirectory { } @Override - public IndexOutput createOutput(String name) throws IOException { + public IndexOutput createOutput(String name, IOContext context) throws IOException { StoreRateLimiting rateLimiting = rateLimitingProvider.rateLimiting(); StoreRateLimiting.Type type = rateLimiting.getType(); RateLimiter limiter = rateLimiting.getRateLimiter(); if (type == StoreRateLimiting.Type.NONE || limiter == null) { - return super.createOutput(name); + return super.createOutput(name, context); } if (TrackingMergeScheduler.getCurrentMerge() != null) { // we are mering, and type is either MERGE or ALL, rate limit... @@ -59,6 +59,6 @@ public class XNIOFSDirectory extends NIOFSDirectory { return new XFSIndexOutput(this, name, limiter, rateListener); } // we shouldn't really get here... - return super.createOutput(name); + return super.createOutput(name, context); } } diff --git a/src/main/java/org/apache/lucene/store/XSimpleFSDirectory.java b/src/main/java/org/apache/lucene/store/XSimpleFSDirectory.java index 860123b2f34..9766e80fa70 100644 --- a/src/main/java/org/apache/lucene/store/XSimpleFSDirectory.java +++ b/src/main/java/org/apache/lucene/store/XSimpleFSDirectory.java @@ -40,12 +40,12 @@ public class XSimpleFSDirectory extends SimpleFSDirectory { } @Override - public IndexOutput createOutput(String name) throws IOException { + public IndexOutput createOutput(String name, IOContext context) throws IOException { StoreRateLimiting rateLimiting = rateLimitingProvider.rateLimiting(); StoreRateLimiting.Type type = rateLimiting.getType(); RateLimiter limiter = rateLimiting.getRateLimiter(); if (type == StoreRateLimiting.Type.NONE || limiter == null) { - return super.createOutput(name); + return super.createOutput(name, context); } if (TrackingMergeScheduler.getCurrentMerge() != null) { // we are mering, and type is either MERGE or ALL, rate limit... @@ -59,6 +59,6 @@ public class XSimpleFSDirectory extends SimpleFSDirectory { return new XFSIndexOutput(this, name, limiter, rateListener); } // we shouldn't really get here... - return super.createOutput(name); + return super.createOutput(name, context); } } diff --git a/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferDirectory.java b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferDirectory.java index 8ca016cf367..e3925ad1acc 100644 --- a/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferDirectory.java +++ b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferDirectory.java @@ -17,10 +17,7 @@ package org.apache.lucene.store.bytebuffer; * limitations under the License. */ -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.SingleInstanceLockFactory; +import org.apache.lucene.store.*; import java.io.FileNotFoundException; import java.io.IOException; @@ -98,36 +95,6 @@ public class ByteBufferDirectory extends Directory { return files.containsKey(name); } - @Override - public long fileModified(String name) throws IOException { - ByteBufferFile file = files.get(name); - if (file == null) - throw new FileNotFoundException(name); - return file.getLastModified(); - } - - @Override - public void touchFile(String name) throws IOException { - ByteBufferFile file = files.get(name); - if (file == null) - throw new FileNotFoundException(name); - - long ts2, ts1 = System.currentTimeMillis(); - do { - try { - Thread.sleep(0, 1); - } catch (java.lang.InterruptedException ie) { - // In 3.0 we will change this to throw - // InterruptedException instead - Thread.currentThread().interrupt(); - throw new RuntimeException(ie); - } - ts2 = System.currentTimeMillis(); - } while (ts1 == ts2); - - file.setLastModified(ts2); - } - @Override public void deleteFile(String name) throws IOException { ByteBufferFile file = files.remove(name); @@ -146,7 +113,7 @@ public class ByteBufferDirectory extends Directory { } @Override - public IndexOutput createOutput(String name) throws IOException { + public IndexOutput createOutput(String name, IOContext context) throws IOException { ByteBufferAllocator.Type allocatorType = ByteBufferAllocator.Type.LARGE; if (name.contains("segments") || name.endsWith(".del")) { allocatorType = ByteBufferAllocator.Type.SMALL; @@ -166,7 +133,7 @@ public class ByteBufferDirectory extends Directory { } @Override - public IndexInput openInput(String name) throws IOException { + public IndexInput openInput(String name, IOContext context) throws IOException { ByteBufferFile file = files.get(name); if (file == null) throw new FileNotFoundException(name); diff --git a/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferIndexInput.java b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferIndexInput.java index 7cb186dee69..aeba5535be2 100644 --- a/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferIndexInput.java +++ b/src/main/java/org/apache/lucene/store/bytebuffer/ByteBufferIndexInput.java @@ -186,7 +186,7 @@ public class ByteBufferIndexInput extends IndexInput { } @Override - public Object clone() { + public IndexInput clone() { ByteBufferIndexInput cloned = (ByteBufferIndexInput) super.clone(); cloned.file.incRef(); // inc ref on cloned one if (currentBuffer != EMPTY_BUFFER) { diff --git a/src/main/java/org/elasticsearch/index/gateway/blobstore/BlobStoreIndexShardGateway.java b/src/main/java/org/elasticsearch/index/gateway/blobstore/BlobStoreIndexShardGateway.java index 153bd4844ae..b247cbaad7f 100644 --- a/src/main/java/org/elasticsearch/index/gateway/blobstore/BlobStoreIndexShardGateway.java +++ b/src/main/java/org/elasticsearch/index/gateway/blobstore/BlobStoreIndexShardGateway.java @@ -24,6 +24,7 @@ import com.google.common.collect.Iterables; import com.google.common.collect.Lists; import org.apache.lucene.index.IndexReader; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.elasticsearch.ElasticSearchException; @@ -752,7 +753,8 @@ public abstract class BlobStoreIndexShardGateway extends AbstractIndexShardCompo IndexInput indexInput = null; try { - indexInput = indexShard.store().openInputRaw(fileInfo.physicalName()); + // TODO: maybe use IOContext.READONCE? + indexInput = indexShard.store().openInputRaw(fileInfo.physicalName(), IOContext.READ); indexInput.seek(partNumber * chunkBytes); InputStreamIndexInput is = new ThreadSafeInputStreamIndexInput(indexInput, chunkBytes); diff --git a/src/main/java/org/elasticsearch/index/store/Store.java b/src/main/java/org/elasticsearch/index/store/Store.java index 14e32a860fa..a7fef4f058e 100644 --- a/src/main/java/org/elasticsearch/index/store/Store.java +++ b/src/main/java/org/elasticsearch/index/store/Store.java @@ -149,7 +149,7 @@ public class Store extends AbstractIndexShardComponent { return null; } // IndexOutput not closed, does not exists - if (md.lastModified() == -1 || md.length() == -1) { + if (md.length() == -1) { return null; } return md; @@ -202,7 +202,7 @@ public class Store extends AbstractIndexShardComponent { throw new FileNotFoundException(from); } directoryService.renameFile(fromMetaData.directory(), from, to); - StoreFileMetaData toMetaData = new StoreFileMetaData(to, fromMetaData.length(), fromMetaData.lastModified(), fromMetaData.checksum(), fromMetaData.directory()); + StoreFileMetaData toMetaData = new StoreFileMetaData(to, fromMetaData.length(), fromMetaData.checksum(), fromMetaData.directory()); filesMetadata = MapBuilder.newMapBuilder(filesMetadata).remove(from).put(to, toMetaData).immutableMap(); files = filesMetadata.keySet().toArray(new String[filesMetadata.size()]); } @@ -246,7 +246,7 @@ public class Store extends AbstractIndexShardComponent { if (lastFound == -1) { return defaultValue; } - IndexInput indexInput = lastDir.openInput(CHECKSUMS_PREFIX + lastFound); + IndexInput indexInput = lastDir.openInput(CHECKSUMS_PREFIX + lastFound, IOContext.READONCE); try { indexInput.readInt(); // version return indexInput.readStringStringMap(); @@ -268,7 +268,7 @@ public class Store extends AbstractIndexShardComponent { checksums.put(metaData.name(), metaData.checksum()); } } - IndexOutput output = directory.createOutput(checksumName, true); + IndexOutput output = directory.createOutput(checksumName, IOContext.DEFAULT, true); output.writeInt(0); // version output.writeStringStringMap(checksums); output.close(); @@ -300,25 +300,25 @@ public class Store extends AbstractIndexShardComponent { * Creates a raw output, no checksum is computed, and no compression if enabled. */ public IndexOutput createOutputRaw(String name) throws IOException { - return directory.createOutput(name, true); + return directory.createOutput(name, IOContext.DEFAULT, true); } /** * Opened an index input in raw form, no decompression for example. */ - public IndexInput openInputRaw(String name) throws IOException { + public IndexInput openInputRaw(String name, IOContext context) throws IOException { StoreFileMetaData metaData = filesMetadata.get(name); if (metaData == null) { throw new FileNotFoundException(name); } - return metaData.directory().openInput(name); + return metaData.directory().openInput(name, context); } public void writeChecksum(String name, String checksum) throws IOException { // update the metadata to include the checksum and write a new checksums file synchronized (mutex) { StoreFileMetaData metaData = filesMetadata.get(name); - metaData = new StoreFileMetaData(metaData.name(), metaData.length(), metaData.lastModified(), checksum, metaData.directory()); + metaData = new StoreFileMetaData(metaData.name(), metaData.length(), checksum, metaData.directory()); filesMetadata = MapBuilder.newMapBuilder(filesMetadata).put(name, metaData).immutableMap(); writeChecksums(); } @@ -329,7 +329,7 @@ public class Store extends AbstractIndexShardComponent { synchronized (mutex) { for (Map.Entry entry : checksums.entrySet()) { StoreFileMetaData metaData = filesMetadata.get(entry.getKey()); - metaData = new StoreFileMetaData(metaData.name(), metaData.length(), metaData.lastModified(), entry.getValue(), metaData.directory()); + metaData = new StoreFileMetaData(metaData.name(), metaData.length(), entry.getValue(), metaData.directory()); filesMetadata = MapBuilder.newMapBuilder(filesMetadata).put(entry.getKey(), metaData).immutableMap(); } writeChecksums(); @@ -351,7 +351,7 @@ public class Store extends AbstractIndexShardComponent { for (Directory delegate : delegates) { for (String file : delegate.listAll()) { String checksum = checksums.get(file); - builder.put(file, new StoreFileMetaData(file, delegate.fileLength(file), delegate.fileModified(file), checksum, delegate)); + builder.put(file, new StoreFileMetaData(file, delegate.fileLength(file), checksum, delegate)); } } filesMetadata = builder.immutableMap(); @@ -363,6 +363,12 @@ public class Store extends AbstractIndexShardComponent { return delegates; } + @Override + public void copy(Directory to, String src, String dest, IOContext context) throws IOException { + // lets the default implementation happen, so we properly open an input and create an output + super.copy(to, src, dest, context); + } + @Override public String[] listAll() throws IOException { return files; @@ -373,31 +379,6 @@ public class Store extends AbstractIndexShardComponent { return filesMetadata.containsKey(name); } - @Override - public long fileModified(String name) throws IOException { - StoreFileMetaData metaData = filesMetadata.get(name); - if (metaData == null) { - throw new FileNotFoundException(name); - } - // not set yet (IndexOutput not closed) - if (metaData.lastModified() != -1) { - return metaData.lastModified(); - } - return metaData.directory().fileModified(name); - } - - @Override - public void touchFile(String name) throws IOException { - synchronized (mutex) { - StoreFileMetaData metaData = filesMetadata.get(name); - if (metaData != null) { - metaData.directory().touchFile(name); - metaData = new StoreFileMetaData(metaData.name(), metaData.length(), metaData.directory().fileModified(name), metaData.checksum(), metaData.directory()); - filesMetadata = MapBuilder.newMapBuilder(filesMetadata).put(name, metaData).immutableMap(); - } - } - } - public void deleteFileChecksum(String name) throws IOException { StoreFileMetaData metaData = filesMetadata.get(name); if (metaData != null) { @@ -455,11 +436,11 @@ public class Store extends AbstractIndexShardComponent { } @Override - public IndexOutput createOutput(String name) throws IOException { - return createOutput(name, false); + public IndexOutput createOutput(String name, IOContext context) throws IOException { + return createOutput(name, context, false); } - public IndexOutput createOutput(String name, boolean raw) throws IOException { + public IndexOutput createOutput(String name, IOContext context, boolean raw) throws IOException { Directory directory = null; if (isChecksum(name)) { directory = delegates[0]; @@ -484,9 +465,9 @@ public class Store extends AbstractIndexShardComponent { } } } - IndexOutput out = directory.createOutput(name); + IndexOutput out = directory.createOutput(name, context); synchronized (mutex) { - StoreFileMetaData metaData = new StoreFileMetaData(name, -1, -1, null, directory); + StoreFileMetaData metaData = new StoreFileMetaData(name, -1, null, directory); filesMetadata = MapBuilder.newMapBuilder(filesMetadata).put(name, metaData).immutableMap(); files = filesMetadata.keySet().toArray(new String[filesMetadata.size()]); boolean computeChecksum = !raw; @@ -513,12 +494,12 @@ public class Store extends AbstractIndexShardComponent { } @Override - public IndexInput openInput(String name) throws IOException { + public IndexInput openInput(String name, IOContext context) throws IOException { StoreFileMetaData metaData = filesMetadata.get(name); if (metaData == null) { throw new FileNotFoundException(name); } - IndexInput in = metaData.directory().openInput(name); + IndexInput in = metaData.directory().openInput(name, context); if (name.endsWith(".fdt") || name.endsWith(".tvf")) { Compressor compressor = CompressorFactory.compressor(in); if (compressor != null) { @@ -529,19 +510,17 @@ public class Store extends AbstractIndexShardComponent { } @Override - public IndexInput openInput(String name, int bufferSize) throws IOException { + public IndexInputSlicer createSlicer(String name, IOContext context) throws IOException { StoreFileMetaData metaData = filesMetadata.get(name); if (metaData == null) { throw new FileNotFoundException(name); } - IndexInput in = metaData.directory().openInput(name, bufferSize); if (name.endsWith(".fdt") || name.endsWith(".tvf")) { - Compressor compressor = CompressorFactory.compressor(in); - if (compressor != null) { - in = compressor.indexInput(in); - } + // rely on the slicer from the base class that uses an input, since they might be compressed... + // note, it seems like slicers are only used in compound file format..., so not relevant for now + return super.createSlicer(name, context); } - return in; + return metaData.directory().createSlicer(name, context); } @Override @@ -609,17 +588,6 @@ public class Store extends AbstractIndexShardComponent { } } - @Override - public void sync(String name) throws IOException { - if (sync) { - sync(ImmutableList.of(name)); - } - // write the checksums file when we sync on the segments file (committed) - if (!name.equals("segments.gen") && name.startsWith("segments")) { - writeChecksums(); - } - } - @Override public void forceSync(String name) throws IOException { sync(ImmutableList.of(name)); @@ -654,7 +622,7 @@ public class Store extends AbstractIndexShardComponent { checksum = Long.toString(((ChecksumIndexOutput) underlying).digest().getValue(), Character.MAX_RADIX); } synchronized (mutex) { - StoreFileMetaData md = new StoreFileMetaData(name, metaData.directory().fileLength(name), metaData.directory().fileModified(name), checksum, metaData.directory()); + StoreFileMetaData md = new StoreFileMetaData(name, metaData.directory().fileLength(name), checksum, metaData.directory()); filesMetadata = MapBuilder.newMapBuilder(filesMetadata).put(name, md).immutableMap(); files = filesMetadata.keySet().toArray(new String[filesMetadata.size()]); } diff --git a/src/main/java/org/elasticsearch/index/store/StoreFileMetaData.java b/src/main/java/org/elasticsearch/index/store/StoreFileMetaData.java index 138f6735a8e..9bc46cdb80a 100644 --- a/src/main/java/org/elasticsearch/index/store/StoreFileMetaData.java +++ b/src/main/java/org/elasticsearch/index/store/StoreFileMetaData.java @@ -34,8 +34,6 @@ public class StoreFileMetaData implements Streamable { private String name; - private long lastModified; - // the actual file size on "disk", if compressed, the compressed size private long length; @@ -43,16 +41,15 @@ public class StoreFileMetaData implements Streamable { private transient Directory directory; - StoreFileMetaData() { + private StoreFileMetaData() { } - public StoreFileMetaData(String name, long length, long lastModified, String checksum) { - this(name, length, lastModified, checksum, null); + public StoreFileMetaData(String name, long length, String checksum) { + this(name, length, checksum, null); } - public StoreFileMetaData(String name, long length, long lastModified, String checksum, @Nullable Directory directory) { + public StoreFileMetaData(String name, long length, String checksum, @Nullable Directory directory) { this.name = name; - this.lastModified = lastModified; this.length = length; this.checksum = checksum; this.directory = directory; @@ -66,10 +63,6 @@ public class StoreFileMetaData implements Streamable { return name; } - public long lastModified() { - return this.lastModified; - } - /** * the actual file size on "disk", if compressed, the compressed size */ @@ -102,22 +95,22 @@ public class StoreFileMetaData implements Streamable { @Override public void readFrom(StreamInput in) throws IOException { - name = in.readUTF(); + name = in.readString(); length = in.readVLong(); if (in.readBoolean()) { - checksum = in.readUTF(); + checksum = in.readString(); } } @Override public void writeTo(StreamOutput out) throws IOException { - out.writeUTF(name); + out.writeString(name); out.writeVLong(length); if (checksum == null) { out.writeBoolean(false); } else { out.writeBoolean(true); - out.writeUTF(checksum); + out.writeString(checksum); } } } diff --git a/src/main/java/org/elasticsearch/index/store/support/ForceSyncDirectory.java b/src/main/java/org/elasticsearch/index/store/support/ForceSyncDirectory.java index ec916a08815..ef2235d97a4 100644 --- a/src/main/java/org/elasticsearch/index/store/support/ForceSyncDirectory.java +++ b/src/main/java/org/elasticsearch/index/store/support/ForceSyncDirectory.java @@ -23,13 +23,11 @@ import java.io.IOException; /** * A custom directory that allows to forceSync (since the actual directory might disable it) - * - * */ public interface ForceSyncDirectory { /** - * Similar to {@link org.apache.lucene.store.Directory#sync(String)} but forces it even if its + * Similar to {@link org.apache.lucene.store.Directory#sync(java.util.Collection)} but forces it even if its * disabled. */ void forceSync(String name) throws IOException; diff --git a/src/main/java/org/elasticsearch/indices/recovery/RecoverySource.java b/src/main/java/org/elasticsearch/indices/recovery/RecoverySource.java index 0069c758c65..aefa5a0d1b8 100644 --- a/src/main/java/org/elasticsearch/indices/recovery/RecoverySource.java +++ b/src/main/java/org/elasticsearch/indices/recovery/RecoverySource.java @@ -21,6 +21,7 @@ package org.elasticsearch.indices.recovery; import com.google.common.collect.Lists; import com.google.common.collect.Sets; +import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.StopWatch; @@ -135,7 +136,8 @@ public class RecoverySource extends AbstractComponent { final int BUFFER_SIZE = (int) recoverySettings.fileChunkSize().bytes(); byte[] buf = new byte[BUFFER_SIZE]; StoreFileMetaData md = shard.store().metaData(name); - indexInput = shard.store().openInputRaw(name); + // TODO: maybe use IOContext.READONCE? + indexInput = shard.store().openInputRaw(name, IOContext.READ); boolean shouldCompressRequest = recoverySettings.compress(); if (CompressorFactory.isCompressed(indexInput)) { shouldCompressRequest = false; From 05138bb2fbbf38a1c1e14469dcf33f8e857f5adf Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Thu, 25 Oct 2012 15:51:34 -0400 Subject: [PATCH 005/146] lucene 4: upgrade analyzers --- pom.xml | 2 +- .../analysis/miscellaneous/UniqueTokenFilter.java | 2 +- .../miscellaneous/WordDelimiterFilter.java | 2 +- .../org/elasticsearch/common/lucene/Lucene.java | 2 +- .../index/analysis/StandardHtmlStripAnalyzer.java | 6 +++++- .../indices/analysis/IndicesAnalysisService.java | 14 +++++++++----- 6 files changed, 18 insertions(+), 10 deletions(-) diff --git a/pom.xml b/pom.xml index 74a6e5d4614..e41b516d3fd 100644 --- a/pom.xml +++ b/pom.xml @@ -51,7 +51,7 @@ org.apache.lucene - lucene-analyzers + lucene-analyzers-common ${lucene.version} compile diff --git a/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java b/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java index 020c26b0d2f..0c85ea9fd4c 100644 --- a/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java +++ b/src/main/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilter.java @@ -19,11 +19,11 @@ package org.apache.lucene.analysis.miscellaneous; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; import java.io.IOException; diff --git a/src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java b/src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java index e5d05e99482..930a09a1100 100644 --- a/src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java +++ b/src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java @@ -19,13 +19,13 @@ package org.apache.lucene.analysis.miscellaneous; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.analysis.tokenattributes.TypeAttribute; +import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.RamUsageEstimator; diff --git a/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/src/main/java/org/elasticsearch/common/lucene/Lucene.java index 67c6f52c2f9..6929b163224 100644 --- a/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -19,7 +19,7 @@ package org.elasticsearch.common.lucene; -import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.*; import org.apache.lucene.search.*; diff --git a/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java index b77861c551b..2b03fc82999 100644 --- a/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java +++ b/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java @@ -19,10 +19,14 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.LowerCaseFilter; +import org.apache.lucene.analysis.core.StopAnalyzer; +import org.apache.lucene.analysis.core.StopFilter; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardFilter; import org.apache.lucene.analysis.standard.StandardTokenizer; +import org.apache.lucene.analysis.util.StopwordAnalyzerBase; import org.apache.lucene.util.Version; import java.io.IOException; diff --git a/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java b/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java index 57ed7cfc46a..49d7a2f3d9f 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java +++ b/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java @@ -19,7 +19,9 @@ package org.elasticsearch.indices.analysis; -import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.ar.ArabicStemFilter; import org.apache.lucene.analysis.bg.BulgarianAnalyzer; @@ -29,6 +31,7 @@ import org.apache.lucene.analysis.ca.CatalanAnalyzer; import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; import org.apache.lucene.analysis.cjk.CJKAnalyzer; import org.apache.lucene.analysis.cn.ChineseAnalyzer; +import org.apache.lucene.analysis.core.*; import org.apache.lucene.analysis.cz.CzechAnalyzer; import org.apache.lucene.analysis.cz.CzechStemFilter; import org.apache.lucene.analysis.da.DanishAnalyzer; @@ -37,11 +40,11 @@ import org.apache.lucene.analysis.de.GermanStemFilter; import org.apache.lucene.analysis.el.GreekAnalyzer; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.en.KStemFilter; +import org.apache.lucene.analysis.en.PorterStemFilter; import org.apache.lucene.analysis.es.SpanishAnalyzer; import org.apache.lucene.analysis.eu.BasqueAnalyzer; import org.apache.lucene.analysis.fa.PersianAnalyzer; import org.apache.lucene.analysis.fi.FinnishAnalyzer; -import org.apache.lucene.analysis.fr.ElisionFilter; import org.apache.lucene.analysis.fr.FrenchAnalyzer; import org.apache.lucene.analysis.fr.FrenchStemFilter; import org.apache.lucene.analysis.ga.IrishAnalyzer; @@ -66,7 +69,6 @@ import org.apache.lucene.analysis.pt.PortugueseAnalyzer; import org.apache.lucene.analysis.reverse.ReverseStringFilter; import org.apache.lucene.analysis.ro.RomanianAnalyzer; import org.apache.lucene.analysis.ru.RussianAnalyzer; -import org.apache.lucene.analysis.ru.RussianStemFilter; import org.apache.lucene.analysis.shingle.ShingleFilter; import org.apache.lucene.analysis.snowball.SnowballAnalyzer; import org.apache.lucene.analysis.snowball.SnowballFilter; @@ -74,6 +76,7 @@ import org.apache.lucene.analysis.standard.*; import org.apache.lucene.analysis.sv.SwedishAnalyzer; import org.apache.lucene.analysis.th.ThaiAnalyzer; import org.apache.lucene.analysis.tr.TurkishAnalyzer; +import org.apache.lucene.analysis.util.ElisionFilter; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; @@ -565,7 +568,8 @@ public class IndicesAnalysisService extends AbstractComponent { @Override public TokenStream create(TokenStream tokenStream) { - return new ElisionFilter(Lucene.ANALYZER_VERSION, tokenStream); + // LUCENE 4 UPGRADE: French default for now, make set of articles configurable + return new ElisionFilter(tokenStream, FrenchAnalyzer.DEFAULT_ARTICLES); } })); tokenFilterFactories.put("arabic_stem", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @@ -642,7 +646,7 @@ public class IndicesAnalysisService extends AbstractComponent { @Override public TokenStream create(TokenStream tokenStream) { - return new RussianStemFilter(tokenStream); + return new SnowballFilter(tokenStream, "Russian"); } })); From a4d0e3a0e8fa29f127edb46ed74f12ba6a712ba1 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Thu, 25 Oct 2012 22:43:32 +0200 Subject: [PATCH 006/146] lucene 4: add codes dependency --- pom.xml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pom.xml b/pom.xml index e41b516d3fd..d35ea38bdef 100644 --- a/pom.xml +++ b/pom.xml @@ -55,6 +55,12 @@ ${lucene.version} compile + + org.apache.lucene + lucene-codecs + ${lucene.version} + compile + org.apache.lucene lucene-queries From 386c2ebdb9ec94096c657661c8bcd14e997bb098 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 26 Oct 2012 00:11:12 +0200 Subject: [PATCH 007/146] lucene 4: remove bloom cache we can add bloom cache, if we need it, as a codec on the uid field we still need to rewrite the UidFilter to not use bloom, but that will be the regular one --- .../cache/clear/ClearIndicesCacheRequest.java | 16 +- .../ClearIndicesCacheRequestBuilder.java | 5 - .../clear/ShardClearIndicesCacheRequest.java | 8 - .../TransportClearIndicesCacheAction.java | 4 - .../common/bloom/BloomCalculations.java | 172 ------------ .../common/bloom/BloomFilter.java | 64 ----- .../common/bloom/BloomFilterFactory.java | 98 ------- .../common/bloom/ObsBloomFilter.java | 97 ------- .../common/lucene/uid/UidField.java | 22 +- .../elasticsearch/index/cache/CacheStats.java | 29 +- .../elasticsearch/index/cache/IndexCache.java | 17 +- .../index/cache/IndexCacheModule.java | 2 - .../index/cache/bloom/BloomCache.java | 45 ---- .../index/cache/bloom/BloomCacheModule.java | 48 ---- .../cache/bloom/none/NoneBloomCache.java | 73 ----- .../cache/bloom/simple/SimpleBloomCache.java | 250 ------------------ .../index/engine/robin/RobinEngine.java | 38 +-- .../elasticsearch/index/search/UidFilter.java | 1 + .../clear/RestClearIndicesCacheAction.java | 1 - 19 files changed, 28 insertions(+), 962 deletions(-) delete mode 100644 src/main/java/org/elasticsearch/common/bloom/BloomCalculations.java delete mode 100644 src/main/java/org/elasticsearch/common/bloom/BloomFilter.java delete mode 100644 src/main/java/org/elasticsearch/common/bloom/BloomFilterFactory.java delete mode 100644 src/main/java/org/elasticsearch/common/bloom/ObsBloomFilter.java delete mode 100644 src/main/java/org/elasticsearch/index/cache/bloom/BloomCache.java delete mode 100644 src/main/java/org/elasticsearch/index/cache/bloom/BloomCacheModule.java delete mode 100644 src/main/java/org/elasticsearch/index/cache/bloom/none/NoneBloomCache.java delete mode 100644 src/main/java/org/elasticsearch/index/cache/bloom/simple/SimpleBloomCache.java diff --git a/src/main/java/org/elasticsearch/action/admin/indices/cache/clear/ClearIndicesCacheRequest.java b/src/main/java/org/elasticsearch/action/admin/indices/cache/clear/ClearIndicesCacheRequest.java index 6bf1193596e..df8153bc5d9 100644 --- a/src/main/java/org/elasticsearch/action/admin/indices/cache/clear/ClearIndicesCacheRequest.java +++ b/src/main/java/org/elasticsearch/action/admin/indices/cache/clear/ClearIndicesCacheRequest.java @@ -34,7 +34,6 @@ public class ClearIndicesCacheRequest extends BroadcastOperationRequest 0) { fields = new String[size]; for (int i = 0; i < size; i++) { - fields[i] = in.readUTF(); + fields[i] = in.readString(); } } } @@ -111,13 +100,12 @@ public class ClearIndicesCacheRequest extends BroadcastOperationRequest listener) { ((IndicesAdminClient) client).clearCache(request, listener); diff --git a/src/main/java/org/elasticsearch/action/admin/indices/cache/clear/ShardClearIndicesCacheRequest.java b/src/main/java/org/elasticsearch/action/admin/indices/cache/clear/ShardClearIndicesCacheRequest.java index 02ed102db1f..0a0f315624e 100644 --- a/src/main/java/org/elasticsearch/action/admin/indices/cache/clear/ShardClearIndicesCacheRequest.java +++ b/src/main/java/org/elasticsearch/action/admin/indices/cache/clear/ShardClearIndicesCacheRequest.java @@ -33,7 +33,6 @@ class ShardClearIndicesCacheRequest extends BroadcastShardOperationRequest { private boolean filterCache = false; private boolean fieldDataCache = false; private boolean idCache = false; - private boolean bloomCache = false; private String[] fields = null; ShardClearIndicesCacheRequest() { @@ -44,7 +43,6 @@ class ShardClearIndicesCacheRequest extends BroadcastShardOperationRequest { filterCache = request.filterCache(); fieldDataCache = request.fieldDataCache(); idCache = request.idCache(); - bloomCache = request.bloomCache(); fields = request.fields(); } @@ -60,10 +58,6 @@ class ShardClearIndicesCacheRequest extends BroadcastShardOperationRequest { return this.idCache; } - public boolean bloomCache() { - return this.bloomCache; - } - public String[] fields() { return this.fields; } @@ -79,7 +73,6 @@ class ShardClearIndicesCacheRequest extends BroadcastShardOperationRequest { filterCache = in.readBoolean(); fieldDataCache = in.readBoolean(); idCache = in.readBoolean(); - bloomCache = in.readBoolean(); int size = in.readVInt(); if (size > 0) { fields = new String[size]; @@ -95,7 +88,6 @@ class ShardClearIndicesCacheRequest extends BroadcastShardOperationRequest { out.writeBoolean(filterCache); out.writeBoolean(fieldDataCache); out.writeBoolean(idCache); - out.writeBoolean(bloomCache); if (fields == null) { out.writeVInt(0); } else { diff --git a/src/main/java/org/elasticsearch/action/admin/indices/cache/clear/TransportClearIndicesCacheAction.java b/src/main/java/org/elasticsearch/action/admin/indices/cache/clear/TransportClearIndicesCacheAction.java index 85b1a7edfa9..ea40b7fd4dd 100644 --- a/src/main/java/org/elasticsearch/action/admin/indices/cache/clear/TransportClearIndicesCacheAction.java +++ b/src/main/java/org/elasticsearch/action/admin/indices/cache/clear/TransportClearIndicesCacheAction.java @@ -138,10 +138,6 @@ public class TransportClearIndicesCacheAction extends TransportBroadcastOperatio clearedAtLeastOne = true; service.cache().idCache().clear(); } - if (request.bloomCache()) { - clearedAtLeastOne = true; - service.cache().bloomCache().clear(); - } if (!clearedAtLeastOne) { if (request.fields() != null && request.fields().length > 0) { // only clear caches relating to the specified fields diff --git a/src/main/java/org/elasticsearch/common/bloom/BloomCalculations.java b/src/main/java/org/elasticsearch/common/bloom/BloomCalculations.java deleted file mode 100644 index 1cde133deaf..00000000000 --- a/src/main/java/org/elasticsearch/common/bloom/BloomCalculations.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.common.bloom; - -/** - * The following calculations are taken from: - * http://www.cs.wisc.edu/~cao/papers/summary-cache/node8.html - * "Bloom Filters - the math" - *

- * This class's static methods are meant to facilitate the use of the Bloom - * Filter class by helping to choose correct values of 'bits per element' and - * 'number of hash functions, k'. - */ -class BloomCalculations { - - private static final int minBuckets = 2; - private static final int minK = 1; - - private static final int EXCESS = 20; - - /** - * In the following table, the row 'i' shows false positive rates if i buckets - * per element are used. Column 'j' shows false positive rates if j hash - * functions are used. The first row is 'i=0', the first column is 'j=0'. - * Each cell (i,j) the false positive rate determined by using i buckets per - * element and j hash functions. - */ - static final double[][] probs = new double[][]{ - {1.0}, // dummy row representing 0 buckets per element - {1.0, 1.0}, // dummy row representing 1 buckets per element - {1.0, 0.393, 0.400}, - {1.0, 0.283, 0.237, 0.253}, - {1.0, 0.221, 0.155, 0.147, 0.160}, - {1.0, 0.181, 0.109, 0.092, 0.092, 0.101}, // 5 - {1.0, 0.154, 0.0804, 0.0609, 0.0561, 0.0578, 0.0638}, - {1.0, 0.133, 0.0618, 0.0423, 0.0359, 0.0347, 0.0364}, - {1.0, 0.118, 0.0489, 0.0306, 0.024, 0.0217, 0.0216, 0.0229}, - {1.0, 0.105, 0.0397, 0.0228, 0.0166, 0.0141, 0.0133, 0.0135, 0.0145}, - {1.0, 0.0952, 0.0329, 0.0174, 0.0118, 0.00943, 0.00844, 0.00819, 0.00846}, // 10 - {1.0, 0.0869, 0.0276, 0.0136, 0.00864, 0.0065, 0.00552, 0.00513, 0.00509}, - {1.0, 0.08, 0.0236, 0.0108, 0.00646, 0.00459, 0.00371, 0.00329, 0.00314}, - {1.0, 0.074, 0.0203, 0.00875, 0.00492, 0.00332, 0.00255, 0.00217, 0.00199, 0.00194}, - {1.0, 0.0689, 0.0177, 0.00718, 0.00381, 0.00244, 0.00179, 0.00146, 0.00129, 0.00121, 0.0012}, - {1.0, 0.0645, 0.0156, 0.00596, 0.003, 0.00183, 0.00128, 0.001, 0.000852, 0.000775, 0.000744}, // 15 - {1.0, 0.0606, 0.0138, 0.005, 0.00239, 0.00139, 0.000935, 0.000702, 0.000574, 0.000505, 0.00047, 0.000459}, - {1.0, 0.0571, 0.0123, 0.00423, 0.00193, 0.00107, 0.000692, 0.000499, 0.000394, 0.000335, 0.000302, 0.000287, 0.000284}, - {1.0, 0.054, 0.0111, 0.00362, 0.00158, 0.000839, 0.000519, 0.00036, 0.000275, 0.000226, 0.000198, 0.000183, 0.000176}, - {1.0, 0.0513, 0.00998, 0.00312, 0.0013, 0.000663, 0.000394, 0.000264, 0.000194, 0.000155, 0.000132, 0.000118, 0.000111, 0.000109}, - {1.0, 0.0488, 0.00906, 0.0027, 0.00108, 0.00053, 0.000303, 0.000196, 0.00014, 0.000108, 8.89e-05, 7.77e-05, 7.12e-05, 6.79e-05, 6.71e-05} // 20 - }; // the first column is a dummy column representing K=0. - - /** - * The optimal number of hashes for a given number of bits per element. - * These values are automatically calculated from the data above. - */ - private static final int[] optKPerBuckets = new int[probs.length]; - - static { - for (int i = 0; i < probs.length; i++) { - double min = Double.MAX_VALUE; - double[] prob = probs[i]; - for (int j = 0; j < prob.length; j++) { - if (prob[j] < min) { - min = prob[j]; - optKPerBuckets[i] = Math.max(minK, j); - } - } - } - } - - /** - * Given the number of buckets that can be used per element, return a - * specification that minimizes the false positive rate. - * - * @param bucketsPerElement The number of buckets per element for the filter. - * @return A spec that minimizes the false positive rate. - */ - public static BloomSpecification computeBloomSpec(int bucketsPerElement) { - assert bucketsPerElement >= 1; - assert bucketsPerElement <= probs.length - 1; - return new BloomSpecification(optKPerBuckets[bucketsPerElement], bucketsPerElement); - } - - /** - * A wrapper class that holds two key parameters for a Bloom Filter: the - * number of hash functions used, and the number of buckets per element used. - */ - public static class BloomSpecification { - final int K; // number of hash functions. - final int bucketsPerElement; - - public BloomSpecification(int k, int bucketsPerElement) { - K = k; - this.bucketsPerElement = bucketsPerElement; - } - } - - /** - * Given a maximum tolerable false positive probability, compute a Bloom - * specification which will give less than the specified false positive rate, - * but minimize the number of buckets per element and the number of hash - * functions used. Because bandwidth (and therefore total bitvector size) - * is considered more expensive than computing power, preference is given - * to minimizing buckets per element rather than number of hash functions. - * - * @param maxBucketsPerElement The maximum number of buckets available for the filter. - * @param maxFalsePosProb The maximum tolerable false positive rate. - * @return A Bloom Specification which would result in a false positive rate - * less than specified by the function call - * @throws UnsupportedOperationException if a filter satisfying the parameters cannot be met - */ - public static BloomSpecification computeBloomSpec(int maxBucketsPerElement, double maxFalsePosProb) { - assert maxBucketsPerElement >= 1; - assert maxBucketsPerElement <= probs.length - 1; - int maxK = probs[maxBucketsPerElement].length - 1; - - // Handle the trivial cases - if (maxFalsePosProb >= probs[minBuckets][minK]) { - return new BloomSpecification(2, optKPerBuckets[2]); - } - if (maxFalsePosProb < probs[maxBucketsPerElement][maxK]) { - throw new UnsupportedOperationException(String.format("Unable to satisfy %s with %s buckets per element", - maxFalsePosProb, maxBucketsPerElement)); - } - - // First find the minimal required number of buckets: - int bucketsPerElement = 2; - int K = optKPerBuckets[2]; - while (probs[bucketsPerElement][K] > maxFalsePosProb) { - bucketsPerElement++; - K = optKPerBuckets[bucketsPerElement]; - } - // Now that the number of buckets is sufficient, see if we can relax K - // without losing too much precision. - while (probs[bucketsPerElement][K - 1] <= maxFalsePosProb) { - K--; - } - - return new BloomSpecification(K, bucketsPerElement); - } - - /** - * Calculates the maximum number of buckets per element that this implementation - * can support. Crucially, it will lower the bucket count if necessary to meet - * BitSet's size restrictions. - */ - public static int maxBucketsPerElement(long numElements) { - numElements = Math.max(1, numElements); - double v = (Long.MAX_VALUE - EXCESS) / (double) numElements; - if (v < 1.0) { - throw new UnsupportedOperationException("Cannot compute probabilities for " + numElements + " elements."); - } - return Math.min(BloomCalculations.probs.length - 1, (int) v); - } -} diff --git a/src/main/java/org/elasticsearch/common/bloom/BloomFilter.java b/src/main/java/org/elasticsearch/common/bloom/BloomFilter.java deleted file mode 100644 index 067277651fa..00000000000 --- a/src/main/java/org/elasticsearch/common/bloom/BloomFilter.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.common.bloom; - -/** - * - */ -public interface BloomFilter { - - public static final BloomFilter NONE = new BloomFilter() { - @Override - public void add(byte[] key, int offset, int length) { - } - - @Override - public boolean isPresent(byte[] key, int offset, int length) { - return true; - } - - @Override - public long sizeInBytes() { - return 0; - } - }; - - public static final BloomFilter EMPTY = new BloomFilter() { - @Override - public void add(byte[] key, int offset, int length) { - } - - @Override - public boolean isPresent(byte[] key, int offset, int length) { - return false; - } - - @Override - public long sizeInBytes() { - return 0; - } - }; - - void add(byte[] key, int offset, int length); - - boolean isPresent(byte[] key, int offset, int length); - - long sizeInBytes(); -} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/common/bloom/BloomFilterFactory.java b/src/main/java/org/elasticsearch/common/bloom/BloomFilterFactory.java deleted file mode 100644 index 264d7b65e4a..00000000000 --- a/src/main/java/org/elasticsearch/common/bloom/BloomFilterFactory.java +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.common.bloom; - -import org.elasticsearch.common.UUID; -import org.elasticsearch.common.logging.ESLogger; -import org.elasticsearch.common.logging.ESLoggerFactory; -import org.elasticsearch.common.unit.ByteSizeValue; -import org.elasticsearch.common.unit.SizeValue; -import org.elasticsearch.common.unit.TimeValue; - -import java.io.UnsupportedEncodingException; - -/** - * - */ -public class BloomFilterFactory { - - private static ESLogger logger = ESLoggerFactory.getLogger(BloomFilterFactory.class.getName()); - - private static final int EXCESS = 20; - - /** - * @return A BloomFilter with the lowest practical false positive probability - * for the given number of elements. - */ - public static BloomFilter getFilter(long numElements, int targetBucketsPerElem) { - int maxBucketsPerElement = Math.max(1, BloomCalculations.maxBucketsPerElement(numElements)); - int bucketsPerElement = Math.min(targetBucketsPerElem, maxBucketsPerElement); - if (bucketsPerElement < targetBucketsPerElem) { - logger.warn(String.format("Cannot provide an optimal BloomFilter for %d elements (%d/%d buckets per element).", - numElements, bucketsPerElement, targetBucketsPerElem)); - } - BloomCalculations.BloomSpecification spec = BloomCalculations.computeBloomSpec(bucketsPerElement); - return new ObsBloomFilter(spec.K, bucketsFor(numElements, spec.bucketsPerElement)); - } - - /** - * @return The smallest BloomFilter that can provide the given false positive - * probability rate for the given number of elements. - *

- * Asserts that the given probability can be satisfied using this filter. - */ - public static BloomFilter getFilter(long numElements, double maxFalsePosProbability) { - assert maxFalsePosProbability <= 1.0 : "Invalid probability"; - int bucketsPerElement = BloomCalculations.maxBucketsPerElement(numElements); - BloomCalculations.BloomSpecification spec = BloomCalculations.computeBloomSpec(bucketsPerElement, maxFalsePosProbability); - return new ObsBloomFilter(spec.K, bucketsFor(numElements, spec.bucketsPerElement)); - } - - private static long bucketsFor(long numElements, int bucketsPer) { - return numElements * bucketsPer + EXCESS; - } - - public static void main(String[] args) throws UnsupportedEncodingException { - long elements = SizeValue.parseSizeValue("100m").singles(); - BloomFilter filter = BloomFilterFactory.getFilter(elements, 15); - System.out.println("Filter size: " + new ByteSizeValue(filter.sizeInBytes())); - for (long i = 0; i < elements; i++) { - byte[] utf8s = UUID.randomBase64UUID().getBytes("UTF8"); - filter.add(utf8s, 0, utf8s.length); - } - long falsePositives = 0; - for (long i = 0; i < elements; i++) { - byte[] utf8s = UUID.randomBase64UUID().getBytes("UTF8"); - if (filter.isPresent(utf8s, 0, utf8s.length)) { - falsePositives++; - } - } - System.out.println("false positives: " + falsePositives); - - byte[] utf8s = UUID.randomBase64UUID().getBytes("UTF8"); - long time = System.currentTimeMillis(); - for (long i = 0; i < elements; i++) { - if (filter.isPresent(utf8s, 0, utf8s.length)) { - } - } - long timeSize = System.currentTimeMillis() - time; - System.out.println("Indexed in " + new TimeValue(timeSize) + ", TPS: " + (elements / timeSize) + " per millisecond"); - } -} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/common/bloom/ObsBloomFilter.java b/src/main/java/org/elasticsearch/common/bloom/ObsBloomFilter.java deleted file mode 100644 index d9cb31b6012..00000000000 --- a/src/main/java/org/elasticsearch/common/bloom/ObsBloomFilter.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.common.bloom; - -import org.apache.lucene.util.OpenBitSet; -import org.elasticsearch.common.MurmurHash; -import org.elasticsearch.common.RamUsage; - -public class ObsBloomFilter implements BloomFilter { - - private final int hashCount; - - private final OpenBitSet bitset; - private final long size; - - ObsBloomFilter(int hashCount, long size) { - this.hashCount = hashCount; - this.bitset = new OpenBitSet(size); - this.size = size; - } - - long emptyBuckets() { - long n = 0; - for (long i = 0; i < buckets(); i++) { - if (!bitset.get(i)) { - n++; - } - } - return n; - } - - private long buckets() { - return size; - } - - private long[] getHashBuckets(byte[] key, int offset, int length) { - return getHashBuckets(key, offset, length, hashCount, buckets()); - } - - static long[] getHashBuckets(byte[] b, int offset, int length, int hashCount, long max) { - long[] result = new long[hashCount]; - long[] hash = MurmurHash.hash3_x64_128(b, offset, length, 0L); - for (int i = 0; i < hashCount; ++i) { - result[i] = Math.abs((hash[0] + (long) i * hash[1]) % max); - } - return result; - } - - @Override - public void add(byte[] key, int offset, int length) { - // inline the hash buckets so we don't have to create the int[] each time... - long[] hash = MurmurHash.hash3_x64_128(key, offset, length, 0L); - for (int i = 0; i < hashCount; ++i) { - long bucketIndex = Math.abs((hash[0] + (long) i * hash[1]) % size); - bitset.fastSet(bucketIndex); - } - } - - @Override - public boolean isPresent(byte[] key, int offset, int length) { - // inline the hash buckets so we don't have to create the int[] each time... - long[] hash = MurmurHash.hash3_x64_128(key, offset, length, 0L); - for (int i = 0; i < hashCount; ++i) { - long bucketIndex = Math.abs((hash[0] + (long) i * hash[1]) % size); - if (!bitset.fastGet(bucketIndex)) { - return false; - } - } - return true; - } - - public void clear() { - bitset.clear(0, bitset.size()); - } - - @Override - public long sizeInBytes() { - return bitset.getBits().length * RamUsage.NUM_BYTES_LONG + RamUsage.NUM_BYTES_ARRAY_HEADER + RamUsage.NUM_BYTES_INT /* wlen */; - } -} diff --git a/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java b/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java index 2fd24860281..f0d56957b44 100644 --- a/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java +++ b/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java @@ -24,7 +24,9 @@ import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; import org.apache.lucene.document.AbstractField; import org.apache.lucene.document.Field; -import org.apache.lucene.index.*; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.Term; import org.elasticsearch.common.Numbers; import org.elasticsearch.common.lucene.Lucene; @@ -38,25 +40,23 @@ public class UidField extends AbstractField { public static class DocIdAndVersion { public final int docId; - public final int docStart; public final long version; - public final IndexReader reader; + public final AtomicReaderContext reader; - public DocIdAndVersion(int docId, long version, IndexReader reader, int docStart) { + public DocIdAndVersion(int docId, long version, AtomicReaderContext reader) { this.docId = docId; this.version = version; this.reader = reader; - this.docStart = docStart; } } // this works fine for nested docs since they don't have the payload which has the version // so we iterate till we find the one with the payload - public static DocIdAndVersion loadDocIdAndVersion(IndexReader subReader, int docStart, Term term) { + public static DocIdAndVersion loadDocIdAndVersion(AtomicReaderContext reader, Term term) { int docId = Lucene.NO_DOC; TermPositions uid = null; try { - uid = subReader.termPositions(term); + uid = reader.termPositions(term); if (!uid.next()) { return null; // no doc } @@ -72,11 +72,11 @@ public class UidField extends AbstractField { continue; } byte[] payload = uid.getPayload(new byte[8], 0); - return new DocIdAndVersion(docId, Numbers.bytesToLong(payload), subReader, docStart); + return new DocIdAndVersion(docId, Numbers.bytesToLong(payload), reader); } while (uid.next()); - return new DocIdAndVersion(docId, -2, subReader, docStart); + return new DocIdAndVersion(docId, -2, reader); } catch (Exception e) { - return new DocIdAndVersion(docId, -2, subReader, docStart); + return new DocIdAndVersion(docId, -2, reader); } finally { if (uid != null) { try { @@ -92,7 +92,7 @@ public class UidField extends AbstractField { * Load the version for the uid from the reader, returning -1 if no doc exists, or -2 if * no version is available (for backward comp.) */ - public static long loadVersion(IndexReader reader, Term term) { + public static long loadVersion(AtomicReaderContext reader, Term term) { TermPositions uid = null; try { uid = reader.termPositions(term); diff --git a/src/main/java/org/elasticsearch/index/cache/CacheStats.java b/src/main/java/org/elasticsearch/index/cache/CacheStats.java index 03cc05db419..eb1c420ae4b 100644 --- a/src/main/java/org/elasticsearch/index/cache/CacheStats.java +++ b/src/main/java/org/elasticsearch/index/cache/CacheStats.java @@ -39,19 +39,17 @@ public class CacheStats implements Streamable, ToXContent { long filterCount; long fieldSize; long filterSize; - long bloomSize; long idCacheSize; public CacheStats() { } - public CacheStats(long fieldEvictions, long filterEvictions, long fieldSize, long filterSize, long filterCount, long bloomSize, long idCacheSize) { + public CacheStats(long fieldEvictions, long filterEvictions, long fieldSize, long filterSize, long filterCount, long idCacheSize) { this.fieldEvictions = fieldEvictions; this.filterEvictions = filterEvictions; this.fieldSize = fieldSize; this.filterSize = filterSize; this.filterCount = filterCount; - this.bloomSize = bloomSize; this.idCacheSize = idCacheSize; } @@ -61,7 +59,6 @@ public class CacheStats implements Streamable, ToXContent { this.fieldSize += stats.fieldSize; this.filterSize += stats.filterSize; this.filterCount += stats.filterCount; - this.bloomSize += stats.bloomSize; this.idCacheSize += stats.idCacheSize; } @@ -129,22 +126,6 @@ public class CacheStats implements Streamable, ToXContent { return filterSize(); } - public long bloomSizeInBytes() { - return this.bloomSize; - } - - public long getBloomSizeInBytes() { - return this.bloomSize; - } - - public ByteSizeValue bloomSize() { - return new ByteSizeValue(bloomSize); - } - - public ByteSizeValue getBloomSize() { - return bloomSize(); - } - public long idCacheSizeInBytes() { return idCacheSize; } @@ -158,7 +139,7 @@ public class CacheStats implements Streamable, ToXContent { } public ByteSizeValue getIdCacheSize() { - return bloomSize(); + return idCacheSize(); } @Override @@ -171,8 +152,6 @@ public class CacheStats implements Streamable, ToXContent { builder.field(Fields.FILTER_EVICTIONS, filterEvictions); builder.field(Fields.FILTER_SIZE, filterSize().toString()); builder.field(Fields.FILTER_SIZE_IN_BYTES, filterSize); - builder.field(Fields.BLOOM_SIZE, bloomSize().toString()); - builder.field(Fields.BLOOM_SIZE_IN_BYTES, bloomSize); builder.field(Fields.ID_CACHE_SIZE, idCacheSize().toString()); builder.field(Fields.ID_CACHE_SIZE_IN_BYTES, idCacheSize); builder.endObject(); @@ -188,8 +167,6 @@ public class CacheStats implements Streamable, ToXContent { static final XContentBuilderString FILTER_COUNT = new XContentBuilderString("filter_count"); static final XContentBuilderString FILTER_SIZE = new XContentBuilderString("filter_size"); static final XContentBuilderString FILTER_SIZE_IN_BYTES = new XContentBuilderString("filter_size_in_bytes"); - static final XContentBuilderString BLOOM_SIZE = new XContentBuilderString("bloom_size"); - static final XContentBuilderString BLOOM_SIZE_IN_BYTES = new XContentBuilderString("bloom_size_in_bytes"); static final XContentBuilderString ID_CACHE_SIZE = new XContentBuilderString("id_cache_size"); static final XContentBuilderString ID_CACHE_SIZE_IN_BYTES = new XContentBuilderString("id_cache_size_in_bytes"); } @@ -207,7 +184,6 @@ public class CacheStats implements Streamable, ToXContent { fieldSize = in.readVLong(); filterSize = in.readVLong(); filterCount = in.readVLong(); - bloomSize = in.readVLong(); idCacheSize = in.readVLong(); } @@ -218,7 +194,6 @@ public class CacheStats implements Streamable, ToXContent { out.writeVLong(fieldSize); out.writeVLong(filterSize); out.writeVLong(filterCount); - out.writeVLong(bloomSize); out.writeVLong(idCacheSize); } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/cache/IndexCache.java b/src/main/java/org/elasticsearch/index/cache/IndexCache.java index 8350a69bbbc..0e2957bdded 100644 --- a/src/main/java/org/elasticsearch/index/cache/IndexCache.java +++ b/src/main/java/org/elasticsearch/index/cache/IndexCache.java @@ -31,7 +31,6 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.index.AbstractIndexComponent; import org.elasticsearch.index.Index; -import org.elasticsearch.index.cache.bloom.BloomCache; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.cache.filter.FilterCache; import org.elasticsearch.index.cache.id.IdCache; @@ -51,8 +50,6 @@ public class IndexCache extends AbstractIndexComponent implements CloseableCompo private final IdCache idCache; - private final BloomCache bloomCache; - private final TimeValue refreshInterval; private ClusterService clusterService; @@ -62,13 +59,12 @@ public class IndexCache extends AbstractIndexComponent implements CloseableCompo @Inject public IndexCache(Index index, @IndexSettings Settings indexSettings, FilterCache filterCache, FieldDataCache fieldDataCache, - QueryParserCache queryParserCache, IdCache idCache, BloomCache bloomCache) { + QueryParserCache queryParserCache, IdCache idCache) { super(index, indexSettings); this.filterCache = filterCache; this.fieldDataCache = fieldDataCache; this.queryParserCache = queryParserCache; this.idCache = idCache; - this.bloomCache = bloomCache; this.refreshInterval = componentSettings.getAsTime("stats.refresh_interval", TimeValue.timeValueSeconds(1)); @@ -85,7 +81,7 @@ public class IndexCache extends AbstractIndexComponent implements CloseableCompo public synchronized void invalidateCache() { FilterCache.EntriesStats filterEntriesStats = filterCache.entriesStats(); - latestCacheStats = new CacheStats(fieldDataCache.evictions(), filterCache.evictions(), fieldDataCache.sizeInBytes(), filterEntriesStats.sizeInBytes, filterEntriesStats.count, bloomCache.sizeInBytes(), idCache.sizeInBytes()); + latestCacheStats = new CacheStats(fieldDataCache.evictions(), filterCache.evictions(), fieldDataCache.sizeInBytes(), filterEntriesStats.sizeInBytes, filterEntriesStats.count, idCache.sizeInBytes()); latestCacheStatsTimestamp = System.currentTimeMillis(); } @@ -93,7 +89,7 @@ public class IndexCache extends AbstractIndexComponent implements CloseableCompo long timestamp = System.currentTimeMillis(); if ((timestamp - latestCacheStatsTimestamp) > refreshInterval.millis()) { FilterCache.EntriesStats filterEntriesStats = filterCache.entriesStats(); - latestCacheStats = new CacheStats(fieldDataCache.evictions(), filterCache.evictions(), fieldDataCache.sizeInBytes(), filterEntriesStats.sizeInBytes, filterEntriesStats.count, bloomCache.sizeInBytes(), idCache.sizeInBytes()); + latestCacheStats = new CacheStats(fieldDataCache.evictions(), filterCache.evictions(), fieldDataCache.sizeInBytes(), filterEntriesStats.sizeInBytes, filterEntriesStats.count, idCache.sizeInBytes()); latestCacheStatsTimestamp = timestamp; } return latestCacheStats; @@ -111,10 +107,6 @@ public class IndexCache extends AbstractIndexComponent implements CloseableCompo return this.idCache; } - public BloomCache bloomCache() { - return this.bloomCache; - } - public QueryParserCache queryParserCache() { return this.queryParserCache; } @@ -125,7 +117,6 @@ public class IndexCache extends AbstractIndexComponent implements CloseableCompo fieldDataCache.close(); idCache.close(); queryParserCache.close(); - bloomCache.close(); if (clusterService != null) { clusterService.remove(this); } @@ -135,7 +126,6 @@ public class IndexCache extends AbstractIndexComponent implements CloseableCompo filterCache.clear(reader); fieldDataCache.clear(reader); idCache.clear(reader); - bloomCache.clear(reader); } public void clear(String reason) { @@ -143,7 +133,6 @@ public class IndexCache extends AbstractIndexComponent implements CloseableCompo fieldDataCache.clear(reason); idCache.clear(); queryParserCache.clear(); - bloomCache.clear(); } @Override diff --git a/src/main/java/org/elasticsearch/index/cache/IndexCacheModule.java b/src/main/java/org/elasticsearch/index/cache/IndexCacheModule.java index e89b1aba6e9..c95535c2b70 100644 --- a/src/main/java/org/elasticsearch/index/cache/IndexCacheModule.java +++ b/src/main/java/org/elasticsearch/index/cache/IndexCacheModule.java @@ -21,7 +21,6 @@ package org.elasticsearch.index.cache; import org.elasticsearch.common.inject.AbstractModule; import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.cache.bloom.BloomCacheModule; import org.elasticsearch.index.cache.field.data.FieldDataCacheModule; import org.elasticsearch.index.cache.filter.FilterCacheModule; import org.elasticsearch.index.cache.id.IdCacheModule; @@ -44,7 +43,6 @@ public class IndexCacheModule extends AbstractModule { new FieldDataCacheModule(settings).configure(binder()); new IdCacheModule(settings).configure(binder()); new QueryParserCacheModule(settings).configure(binder()); - new BloomCacheModule(settings).configure(binder()); bind(IndexCache.class).asEagerSingleton(); } diff --git a/src/main/java/org/elasticsearch/index/cache/bloom/BloomCache.java b/src/main/java/org/elasticsearch/index/cache/bloom/BloomCache.java deleted file mode 100644 index a50c1ca419a..00000000000 --- a/src/main/java/org/elasticsearch/index/cache/bloom/BloomCache.java +++ /dev/null @@ -1,45 +0,0 @@ -/* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.cache.bloom; - -import org.apache.lucene.index.IndexReader; -import org.elasticsearch.common.bloom.BloomFilter; -import org.elasticsearch.common.component.CloseableComponent; -import org.elasticsearch.index.IndexComponent; - -/** - * - */ -public interface BloomCache extends IndexComponent, CloseableComponent { - - /** - * *Async* loads a bloom filter for the field name. Note, this one only supports - * for fields that have a single term per doc. - */ - BloomFilter filter(IndexReader reader, String fieldName, boolean asyncLoad); - - void clear(); - - void clear(IndexReader reader); - - long sizeInBytes(); - - long sizeInBytes(String fieldName); -} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/cache/bloom/BloomCacheModule.java b/src/main/java/org/elasticsearch/index/cache/bloom/BloomCacheModule.java deleted file mode 100644 index 55d7182b498..00000000000 --- a/src/main/java/org/elasticsearch/index/cache/bloom/BloomCacheModule.java +++ /dev/null @@ -1,48 +0,0 @@ -/* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.cache.bloom; - -import org.elasticsearch.common.inject.AbstractModule; -import org.elasticsearch.common.inject.Scopes; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.cache.bloom.simple.SimpleBloomCache; - -/** - * - */ -public class BloomCacheModule extends AbstractModule { - - public static final class BloomCacheSettings { - public static final String TYPE = "index.cache.bloom.type"; - } - - private final Settings settings; - - public BloomCacheModule(Settings settings) { - this.settings = settings; - } - - @Override - protected void configure() { - bind(BloomCache.class) - .to(settings.getAsClass(BloomCacheSettings.TYPE, SimpleBloomCache.class, "org.elasticsearch.index.cache.bloom.", "BloomCache")) - .in(Scopes.SINGLETON); - } -} diff --git a/src/main/java/org/elasticsearch/index/cache/bloom/none/NoneBloomCache.java b/src/main/java/org/elasticsearch/index/cache/bloom/none/NoneBloomCache.java deleted file mode 100644 index 1255d84260b..00000000000 --- a/src/main/java/org/elasticsearch/index/cache/bloom/none/NoneBloomCache.java +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.cache.bloom.none; - -import org.apache.lucene.index.IndexReader; -import org.elasticsearch.ElasticSearchException; -import org.elasticsearch.common.bloom.BloomFilter; -import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.settings.ImmutableSettings; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.index.AbstractIndexComponent; -import org.elasticsearch.index.Index; -import org.elasticsearch.index.cache.bloom.BloomCache; -import org.elasticsearch.index.settings.IndexSettings; - -/** - * - */ -public class NoneBloomCache extends AbstractIndexComponent implements BloomCache { - - public NoneBloomCache(Index index) { - super(index, ImmutableSettings.Builder.EMPTY_SETTINGS); - } - - @Inject - public NoneBloomCache(Index index, @IndexSettings Settings indexSettings) { - super(index, indexSettings); - } - - @Override - public BloomFilter filter(IndexReader reader, String fieldName, boolean asyncLoad) { - return BloomFilter.NONE; - } - - @Override - public void clear() { - } - - @Override - public void clear(IndexReader reader) { - } - - @Override - public long sizeInBytes() { - return 0; - } - - @Override - public long sizeInBytes(String fieldName) { - return 0; - } - - @Override - public void close() throws ElasticSearchException { - } -} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/cache/bloom/simple/SimpleBloomCache.java b/src/main/java/org/elasticsearch/index/cache/bloom/simple/SimpleBloomCache.java deleted file mode 100644 index f84a8b3e6c0..00000000000 --- a/src/main/java/org/elasticsearch/index/cache/bloom/simple/SimpleBloomCache.java +++ /dev/null @@ -1,250 +0,0 @@ -/* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.cache.bloom.simple; - -import org.apache.lucene.index.*; -import org.apache.lucene.store.AlreadyClosedException; -import org.apache.lucene.util.StringHelper; -import org.apache.lucene.util.UnicodeUtil; -import org.elasticsearch.ElasticSearchException; -import org.elasticsearch.common.Unicode; -import org.elasticsearch.common.bloom.BloomFilter; -import org.elasticsearch.common.bloom.BloomFilterFactory; -import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.settings.Settings; -import org.elasticsearch.common.unit.SizeUnit; -import org.elasticsearch.common.unit.SizeValue; -import org.elasticsearch.common.util.concurrent.ConcurrentCollections; -import org.elasticsearch.index.AbstractIndexComponent; -import org.elasticsearch.index.Index; -import org.elasticsearch.index.cache.bloom.BloomCache; -import org.elasticsearch.index.settings.IndexSettings; -import org.elasticsearch.threadpool.ThreadPool; - -import java.nio.channels.ClosedChannelException; -import java.util.concurrent.ConcurrentMap; -import java.util.concurrent.atomic.AtomicBoolean; - -/** - * - */ -public class SimpleBloomCache extends AbstractIndexComponent implements BloomCache, SegmentReader.CoreClosedListener { - - private final ThreadPool threadPool; - - private final long maxSize; - - private final ConcurrentMap> cache; - - private final Object creationMutex = new Object(); - - @Inject - public SimpleBloomCache(Index index, @IndexSettings Settings indexSettings, ThreadPool threadPool) { - super(index, indexSettings); - this.threadPool = threadPool; - - this.maxSize = indexSettings.getAsSize("index.cache.bloom.max_size", new SizeValue(500, SizeUnit.MEGA)).singles(); - this.cache = ConcurrentCollections.newConcurrentMap(); - } - - @Override - public void close() throws ElasticSearchException { - clear(); - } - - @Override - public void clear() { - cache.clear(); - } - - @Override - public void onClose(SegmentReader owner) { - clear(owner); - } - - @Override - public void clear(IndexReader reader) { - ConcurrentMap map = cache.remove(reader.getCoreCacheKey()); - // help soft/weak handling GC - if (map != null) { - map.clear(); - } - } - - @Override - public long sizeInBytes() { - // the overhead of the map is not really relevant... - long sizeInBytes = 0; - for (ConcurrentMap map : cache.values()) { - for (BloomFilterEntry filter : map.values()) { - sizeInBytes += filter.filter.sizeInBytes(); - } - } - return sizeInBytes; - } - - @Override - public long sizeInBytes(String fieldName) { - long sizeInBytes = 0; - for (ConcurrentMap map : cache.values()) { - BloomFilterEntry filter = map.get(fieldName); - if (filter != null) { - sizeInBytes += filter.filter.sizeInBytes(); - } - } - return sizeInBytes; - } - - @Override - public BloomFilter filter(IndexReader reader, String fieldName, boolean asyncLoad) { - int currentNumDocs = reader.numDocs(); - if (currentNumDocs == 0) { - return BloomFilter.EMPTY; - } - ConcurrentMap fieldCache = cache.get(reader.getCoreCacheKey()); - if (fieldCache == null) { - synchronized (creationMutex) { - fieldCache = cache.get(reader.getCoreCacheKey()); - if (fieldCache == null) { - if (reader instanceof SegmentReader) { - ((SegmentReader) reader).addCoreClosedListener(this); - } - fieldCache = ConcurrentCollections.newConcurrentMap(); - cache.put(reader.getCoreCacheKey(), fieldCache); - } - } - } - BloomFilterEntry filter = fieldCache.get(fieldName); - if (filter == null) { - synchronized (fieldCache) { - filter = fieldCache.get(fieldName); - if (filter == null) { - filter = new BloomFilterEntry(currentNumDocs, BloomFilter.NONE); - fieldCache.put(fieldName, filter); - // now, do the async load of it... - if (currentNumDocs < maxSize) { - filter.loading.set(true); - BloomFilterLoader loader = new BloomFilterLoader(reader, fieldName); - if (asyncLoad) { - threadPool.executor(ThreadPool.Names.CACHE).execute(loader); - } else { - loader.run(); - filter = fieldCache.get(fieldName); - } - } - } - } - } - // if we too many deletes, we need to reload the bloom filter so it will be more effective - if (filter.numDocs > 1000 && filter.numDocs < maxSize && (currentNumDocs / filter.numDocs) < 0.6) { - if (filter.loading.compareAndSet(false, true)) { - // do the async loading - BloomFilterLoader loader = new BloomFilterLoader(reader, fieldName); - if (asyncLoad) { - threadPool.executor(ThreadPool.Names.CACHE).execute(loader); - } else { - loader.run(); - filter = fieldCache.get(fieldName); - } - } - } - return filter.filter; - } - - class BloomFilterLoader implements Runnable { - private final IndexReader reader; - private final String field; - - BloomFilterLoader(IndexReader reader, String field) { - this.reader = reader; - this.field = StringHelper.intern(field); - } - - @SuppressWarnings({"StringEquality"}) - @Override - public void run() { - TermDocs termDocs = null; - TermEnum termEnum = null; - try { - UnicodeUtil.UTF8Result utf8Result = new UnicodeUtil.UTF8Result(); - BloomFilter filter = BloomFilterFactory.getFilter(reader.numDocs(), 15); - termDocs = reader.termDocs(); - termEnum = reader.terms(new Term(field)); - do { - Term term = termEnum.term(); - if (term == null || term.field() != field) break; - - // LUCENE MONITOR: 4.0, move to use bytes! - Unicode.fromStringAsUtf8(term.text(), utf8Result); - termDocs.seek(termEnum); - while (termDocs.next()) { - // when traversing, make sure to ignore deleted docs, so the key->docId will be correct - if (!reader.isDeleted(termDocs.doc())) { - filter.add(utf8Result.result, 0, utf8Result.length); - } - } - } while (termEnum.next()); - ConcurrentMap fieldCache = cache.get(reader.getCoreCacheKey()); - if (fieldCache != null) { - if (fieldCache.containsKey(field)) { - BloomFilterEntry filterEntry = new BloomFilterEntry(reader.numDocs(), filter); - filterEntry.loading.set(false); - fieldCache.put(field, filterEntry); - } - } - } catch (AlreadyClosedException e) { - // ignore, we are getting closed - } catch (ClosedChannelException e) { - // ignore, we are getting closed - } catch (Exception e) { - // ignore failures that result from a closed reader... - if (reader.getRefCount() > 0) { - logger.warn("failed to load bloom filter for [{}]", e, field); - } - } finally { - try { - if (termDocs != null) { - termDocs.close(); - } - } catch (Exception e) { - // ignore - } - try { - if (termEnum != null) { - termEnum.close(); - } - } catch (Exception e) { - // ignore - } - } - } - } - - static class BloomFilterEntry { - final int numDocs; - final BloomFilter filter; - final AtomicBoolean loading = new AtomicBoolean(); - - public BloomFilterEntry(int numDocs, BloomFilter filter) { - this.numDocs = numDocs; - this.filter = filter; - } - } -} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java index b52dbb878a8..103ecca8917 100644 --- a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java +++ b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java @@ -23,15 +23,12 @@ import com.google.common.collect.Lists; import org.apache.lucene.index.*; import org.apache.lucene.search.*; import org.apache.lucene.store.AlreadyClosedException; -import org.apache.lucene.util.UnicodeUtil; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.routing.operation.hash.djb.DjbHashFunction; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Preconditions; -import org.elasticsearch.common.Unicode; -import org.elasticsearch.common.bloom.BloomFilter; import org.elasticsearch.common.collect.MapBuilder; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.Lucene; @@ -43,12 +40,10 @@ import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.index.VersionType; import org.elasticsearch.index.analysis.AnalysisService; -import org.elasticsearch.index.cache.bloom.BloomCache; import org.elasticsearch.index.deletionpolicy.SnapshotDeletionPolicy; import org.elasticsearch.index.deletionpolicy.SnapshotIndexCommit; import org.elasticsearch.index.engine.*; import org.elasticsearch.index.indexing.ShardIndexingService; -import org.elasticsearch.index.mapper.internal.UidFieldMapper; import org.elasticsearch.index.merge.policy.EnableMergePolicy; import org.elasticsearch.index.merge.policy.MergePolicyProvider; import org.elasticsearch.index.merge.scheduler.MergeSchedulerProvider; @@ -91,7 +86,6 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { private volatile int indexConcurrency; private long gcDeletesInMillis; private volatile boolean enableGcDeletes = true; - private final boolean asyncLoadBloomFilter; private final ThreadPool threadPool; @@ -106,7 +100,6 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { private final MergeSchedulerProvider mergeScheduler; private final AnalysisService analysisService; private final SimilarityService similarityService; - private final BloomCache bloomCache; private final ReadWriteLock rwl = new ReentrantReadWriteLock(); @@ -154,8 +147,7 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { IndexSettingsService indexSettingsService, ShardIndexingService indexingService, @Nullable IndicesWarmer warmer, Store store, SnapshotDeletionPolicy deletionPolicy, Translog translog, MergePolicyProvider mergePolicyProvider, MergeSchedulerProvider mergeScheduler, - AnalysisService analysisService, SimilarityService similarityService, - BloomCache bloomCache) throws EngineException { + AnalysisService analysisService, SimilarityService similarityService) throws EngineException { super(shardId, indexSettings); Preconditions.checkNotNull(store, "Store must be provided to the engine"); Preconditions.checkNotNull(deletionPolicy, "Snapshot deletion policy must be provided to the engine"); @@ -165,7 +157,6 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { this.indexingBufferSize = componentSettings.getAsBytesSize("index_buffer_size", new ByteSizeValue(64, ByteSizeUnit.MB)); // not really important, as it is set by the IndexingMemory manager this.termIndexInterval = indexSettings.getAsInt("index.term_index_interval", IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL); this.termIndexDivisor = indexSettings.getAsInt("index.term_index_divisor", 1); // IndexReader#DEFAULT_TERMS_INDEX_DIVISOR - this.asyncLoadBloomFilter = componentSettings.getAsBoolean("async_load_bloom", true); // Here for testing, should always be true this.threadPool = threadPool; this.indexSettingsService = indexSettingsService; @@ -178,7 +169,6 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { this.mergeScheduler = mergeScheduler; this.analysisService = analysisService; this.similarityService = similarityService; - this.bloomCache = bloomCache; this.indexConcurrency = indexSettings.getAsInt("index.index_concurrency", IndexWriterConfig.DEFAULT_MAX_THREAD_STATES); this.versionMap = ConcurrentCollections.newConcurrentMap(); @@ -328,16 +318,10 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { // no version, get the version from the index, we know that we refresh on flush Searcher searcher = searcher(); try { - UnicodeUtil.UTF8Result utf8 = Unicode.fromStringAsUtf8(get.uid().text()); - for (int i = 0; i < searcher.searcher().subReaders().length; i++) { - IndexReader subReader = searcher.searcher().subReaders()[i]; - BloomFilter filter = bloomCache.filter(subReader, UidFieldMapper.NAME, asyncLoadBloomFilter); - // we know that its not there... - if (!filter.isPresent(utf8.result, 0, utf8.length)) { - continue; - } - int docStart = searcher.searcher().docStarts()[i]; - UidField.DocIdAndVersion docIdAndVersion = UidField.loadDocIdAndVersion(subReader, docStart, get.uid()); + List readers = searcher.reader().leaves(); + for (int i = 0; i < readers.size(); i++) { + AtomicReaderContext readerContext = readers.get(i); + UidField.DocIdAndVersion docIdAndVersion = UidField.loadDocIdAndVersion(readerContext, get.uid()); if (docIdAndVersion != null && docIdAndVersion.docId != Lucene.NO_DOC) { return new GetResult(searcher, docIdAndVersion); } @@ -1321,16 +1305,12 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { } private long loadCurrentVersionFromIndex(Term uid) { - UnicodeUtil.UTF8Result utf8 = Unicode.fromStringAsUtf8(uid.text()); Searcher searcher = searcher(); try { - for (IndexReader reader : searcher.searcher().subReaders()) { - BloomFilter filter = bloomCache.filter(reader, UidFieldMapper.NAME, asyncLoadBloomFilter); - // we know that its not there... - if (!filter.isPresent(utf8.result, 0, utf8.length)) { - continue; - } - long version = UidField.loadVersion(reader, uid); + List readers = searcher.reader().leaves(); + for (int i = 0; i < readers.size(); i++) { + AtomicReaderContext readerContext = readers.get(i); + long version = UidField.loadVersion(readerContext, uid); // either -2 (its there, but no version associated), or an actual version if (version != -1) { return version; diff --git a/src/main/java/org/elasticsearch/index/search/UidFilter.java b/src/main/java/org/elasticsearch/index/search/UidFilter.java index 79e65d896b2..99d320114bb 100644 --- a/src/main/java/org/elasticsearch/index/search/UidFilter.java +++ b/src/main/java/org/elasticsearch/index/search/UidFilter.java @@ -43,6 +43,7 @@ public class UidFilter extends Filter { private final BloomCache bloomCache; + // LUCENE 4 UPGRADE: We removed the bloom cache, so once we rewrite this filter, do it without public UidFilter(Collection types, List ids, BloomCache bloomCache) { this.bloomCache = bloomCache; this.uids = new Term[types.size() * ids.size()]; diff --git a/src/main/java/org/elasticsearch/rest/action/admin/indices/cache/clear/RestClearIndicesCacheAction.java b/src/main/java/org/elasticsearch/rest/action/admin/indices/cache/clear/RestClearIndicesCacheAction.java index 7d530459a65..63e36ad4594 100644 --- a/src/main/java/org/elasticsearch/rest/action/admin/indices/cache/clear/RestClearIndicesCacheAction.java +++ b/src/main/java/org/elasticsearch/rest/action/admin/indices/cache/clear/RestClearIndicesCacheAction.java @@ -66,7 +66,6 @@ public class RestClearIndicesCacheAction extends BaseRestHandler { clearIndicesCacheRequest.filterCache(request.paramAsBoolean("filter", clearIndicesCacheRequest.filterCache())); clearIndicesCacheRequest.fieldDataCache(request.paramAsBoolean("field_data", clearIndicesCacheRequest.fieldDataCache())); clearIndicesCacheRequest.idCache(request.paramAsBoolean("id", clearIndicesCacheRequest.idCache())); - clearIndicesCacheRequest.bloomCache(request.paramAsBoolean("bloom", clearIndicesCacheRequest.bloomCache())); clearIndicesCacheRequest.fields(request.paramAsStringArray("fields", clearIndicesCacheRequest.fields())); BroadcastOperationThreading operationThreading = BroadcastOperationThreading.fromString(request.param("operationThreading"), BroadcastOperationThreading.SINGLE_THREAD); From 7972f6f95903602f329dc3317ffab258d75911e1 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 26 Oct 2012 00:17:59 +0200 Subject: [PATCH 008/146] lucene 4: fix call to expungeDeletes --- .../java/org/elasticsearch/index/engine/robin/RobinEngine.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java index 103ecca8917..7505aa2e9da 100644 --- a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java +++ b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java @@ -1018,7 +1018,7 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { ((EnableMergePolicy) indexWriter.getConfig().getMergePolicy()).enableMerge(); } if (optimize.onlyExpungeDeletes()) { - indexWriter.expungeDeletes(false); + indexWriter.forceMergeDeletes(false); } else if (optimize.maxNumSegments() <= 0) { indexWriter.maybeMerge(); possibleMergeNeeded = false; From f4418fb181881d5e5158fc97b01291210599e7a8 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 26 Oct 2012 00:27:29 +0200 Subject: [PATCH 009/146] lucene 4: fix segments info usage --- .../elasticsearch/common/lucene/Lucene.java | 4 +- .../index/engine/robin/RobinEngine.java | 41 ++++++++----------- 2 files changed, 20 insertions(+), 25 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/src/main/java/org/elasticsearch/common/lucene/Lucene.java index 6929b163224..1e7e8688cb4 100644 --- a/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -312,9 +312,9 @@ public class Lucene { segmentReaderSegmentInfoField = segmentReaderSegmentInfoFieldX; } - public static SegmentInfo getSegmentInfo(SegmentReader reader) { + public static SegmentInfoPerCommit getSegmentInfo(SegmentReader reader) { try { - return (SegmentInfo) segmentReaderSegmentInfoField.get(reader); + return (SegmentInfoPerCommit) segmentReaderSegmentInfoField.get(reader); } catch (IllegalAccessException e) { return null; } diff --git a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java index 7505aa2e9da..0734d7e589f 100644 --- a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java +++ b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java @@ -1179,21 +1179,20 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { // first, go over and compute the search ones... Searcher searcher = searcher(); try { - IndexReader[] readers = searcher.reader().getSequentialSubReaders(); - for (IndexReader reader : readers) { - assert reader instanceof SegmentReader; - SegmentInfo info = Lucene.getSegmentInfo((SegmentReader) reader); - assert !segments.containsKey(info.name); - Segment segment = new Segment(info.name); + for (AtomicReaderContext reader : searcher.reader().leaves()) { + assert reader.reader() instanceof SegmentReader; + SegmentInfoPerCommit info = Lucene.getSegmentInfo((SegmentReader) reader.reader()); + assert !segments.containsKey(info.info.name); + Segment segment = new Segment(info.info.name); segment.search = true; - segment.docCount = reader.numDocs(); - segment.delDocCount = reader.numDeletedDocs(); + segment.docCount = reader.reader().numDocs(); + segment.delDocCount = reader.reader().numDeletedDocs(); try { - segment.sizeInBytes = info.sizeInBytes(true); + segment.sizeInBytes = info.sizeInBytes(); } catch (IOException e) { - logger.trace("failed to get size for [{}]", e, info.name); + logger.trace("failed to get size for [{}]", e, info.info.name); } - segments.put(info.name, segment); + segments.put(info.info.name, segment); } } finally { searcher.release(); @@ -1202,24 +1201,20 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { // now, correlate or add the committed ones... if (lastCommittedSegmentInfos != null) { SegmentInfos infos = lastCommittedSegmentInfos; - for (SegmentInfo info : infos) { - Segment segment = segments.get(info.name); + for (SegmentInfoPerCommit info : infos) { + Segment segment = segments.get(info.info.name); if (segment == null) { - segment = new Segment(info.name); + segment = new Segment(info.info.name); segment.search = false; segment.committed = true; - segment.docCount = info.docCount; + segment.docCount = info.info.getDocCount(); + segment.delDocCount = info.getDelCount(); try { - segment.delDocCount = indexWriter.numDeletedDocs(info); + segment.sizeInBytes = info.sizeInBytes(); } catch (IOException e) { - logger.trace("failed to get deleted docs for committed segment", e); + logger.trace("failed to get size for [{}]", e, info.info.name); } - try { - segment.sizeInBytes = info.sizeInBytes(true); - } catch (IOException e) { - logger.trace("failed to get size for [{}]", e, info.name); - } - segments.put(info.name, segment); + segments.put(info.info.name, segment); } else { segment.committed = true; } From 0c24928ef49a07f190724116a913ef1a92390870 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 26 Oct 2012 00:29:50 +0200 Subject: [PATCH 010/146] lucene 4: fix similarity packaging --- .../index/similarity/AbstractSimilarityProvider.java | 2 +- .../index/similarity/DefaultSimilarityProvider.java | 2 +- .../org/elasticsearch/index/similarity/SimilarityProvider.java | 2 +- .../org/elasticsearch/index/similarity/SimilarityService.java | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/similarity/AbstractSimilarityProvider.java b/src/main/java/org/elasticsearch/index/similarity/AbstractSimilarityProvider.java index 88b0ab3d7e5..829f0adebc7 100644 --- a/src/main/java/org/elasticsearch/index/similarity/AbstractSimilarityProvider.java +++ b/src/main/java/org/elasticsearch/index/similarity/AbstractSimilarityProvider.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.similarity; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.similarities.Similarity; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.AbstractIndexComponent; import org.elasticsearch.index.Index; diff --git a/src/main/java/org/elasticsearch/index/similarity/DefaultSimilarityProvider.java b/src/main/java/org/elasticsearch/index/similarity/DefaultSimilarityProvider.java index afbe4fc0d2c..9750e93a4eb 100644 --- a/src/main/java/org/elasticsearch/index/similarity/DefaultSimilarityProvider.java +++ b/src/main/java/org/elasticsearch/index/similarity/DefaultSimilarityProvider.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.similarity; -import org.apache.lucene.search.DefaultSimilarity; +import org.apache.lucene.search.similarities.DefaultSimilarity; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; diff --git a/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java b/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java index b867f70352e..e26c471956e 100644 --- a/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java +++ b/src/main/java/org/elasticsearch/index/similarity/SimilarityProvider.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.similarity; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.similarities.Similarity; import org.elasticsearch.common.inject.Provider; import org.elasticsearch.index.IndexComponent; diff --git a/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java b/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java index ca39c78a26e..c215dd605a7 100644 --- a/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java +++ b/src/main/java/org/elasticsearch/index/similarity/SimilarityService.java @@ -20,7 +20,7 @@ package org.elasticsearch.index.similarity; import com.google.common.collect.ImmutableMap; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.similarities.Similarity; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.ImmutableSettings; From 81d148b4e4d95243ec0d74a5e31504e5200d05f9 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 26 Oct 2012 00:51:18 +0200 Subject: [PATCH 011/146] lucene 4: fix warmup process also removed ExtendedIndexSearcher, we should do whats needed with the new context and leaves methods --- .../lucene/index/ExtendedIndexSearcher.java | 49 ------------------- .../elasticsearch/index/engine/Engine.java | 7 ++- .../index/engine/robin/RobinEngine.java | 26 ++++------ .../search/internal/ContextIndexSearcher.java | 7 ++- 4 files changed, 16 insertions(+), 73 deletions(-) delete mode 100644 src/main/java/org/apache/lucene/index/ExtendedIndexSearcher.java diff --git a/src/main/java/org/apache/lucene/index/ExtendedIndexSearcher.java b/src/main/java/org/apache/lucene/index/ExtendedIndexSearcher.java deleted file mode 100644 index a3bd3846771..00000000000 --- a/src/main/java/org/apache/lucene/index/ExtendedIndexSearcher.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.lucene.index; - -import org.apache.lucene.search.IndexSearcher; - -/** - * - */ -public class ExtendedIndexSearcher extends IndexSearcher { - - public ExtendedIndexSearcher(ExtendedIndexSearcher searcher) { - super(searcher.getIndexReader(), searcher.subReaders(), searcher.docStarts()); - setSimilarity(searcher.getSimilarity()); - } - - public ExtendedIndexSearcher(IndexReader r) { - super(r); - } - - public IndexReader[] subReaders() { - return this.subReaders; - } - - public int[] docStarts() { - return this.docStarts; - } - - public int readerIndex(int doc) { - return DirectoryReader.readerIndex(doc, docStarts, subReaders.length); - } -} diff --git a/src/main/java/org/elasticsearch/index/engine/Engine.java b/src/main/java/org/elasticsearch/index/engine/Engine.java index 0a5f15e7138..e08d2cb4f0c 100644 --- a/src/main/java/org/elasticsearch/index/engine/Engine.java +++ b/src/main/java/org/elasticsearch/index/engine/Engine.java @@ -21,7 +21,6 @@ package org.elasticsearch.index.engine; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.index.ExtendedIndexSearcher; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.Filter; @@ -156,7 +155,7 @@ public interface Engine extends IndexShardComponent, CloseableComponent { IndexReader reader(); - ExtendedIndexSearcher searcher(); + IndexSearcher searcher(); } static class SimpleSearcher implements Searcher { @@ -173,8 +172,8 @@ public interface Engine extends IndexShardComponent, CloseableComponent { } @Override - public ExtendedIndexSearcher searcher() { - return (ExtendedIndexSearcher) searcher; + public IndexSearcher searcher() { + return searcher; } @Override diff --git a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java index 0734d7e589f..449ac47798d 100644 --- a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java +++ b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java @@ -1419,8 +1419,8 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { } @Override - public ExtendedIndexSearcher searcher() { - return (ExtendedIndexSearcher) searcher; + public IndexSearcher searcher() { + return searcher; } @Override @@ -1468,13 +1468,13 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { @Override public IndexSearcher newSearcher(IndexReader reader) throws IOException { - ExtendedIndexSearcher searcher = new ExtendedIndexSearcher(reader); + IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(similarityService.defaultSearchSimilarity()); if (warmer != null) { // we need to pass a custom searcher that does not release anything on Engine.Search Release, // we will release explicitly Searcher currentSearcher = null; - ExtendedIndexSearcher newSearcher = null; + IndexSearcher newSearcher = null; boolean closeNewSearcher = false; try { if (searcherManager == null) { @@ -1484,21 +1484,21 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { currentSearcher = searcher(); // figure out the newSearcher, with only the new readers that are relevant for us List readers = Lists.newArrayList(); - for (IndexReader subReader : searcher.subReaders()) { + for (AtomicReaderContext newReaderContext : searcher.getIndexReader().leaves()) { boolean found = false; - for (IndexReader currentReader : currentSearcher.searcher().subReaders()) { - if (currentReader.getCoreCacheKey().equals(subReader.getCoreCacheKey())) { + for (AtomicReaderContext currentReaderContext : currentSearcher.reader().leaves()) { + if (currentReaderContext.reader().getCoreCacheKey().equals(newReaderContext.reader().getCoreCacheKey())) { found = true; break; } } if (!found) { - readers.add(subReader); + readers.add(newReaderContext.reader()); } } if (!readers.isEmpty()) { // we don't want to close the inner readers, just increase ref on them - newSearcher = new ExtendedIndexSearcher(new MultiReader(readers.toArray(new IndexReader[readers.size()]), false)); + newSearcher = new IndexSearcher(new MultiReader(readers.toArray(new IndexReader[readers.size()]), false)); closeNewSearcher = true; } } @@ -1520,13 +1520,7 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { } if (newSearcher != null && closeNewSearcher) { try { - newSearcher.close(); - } catch (Exception e) { - // ignore - } - try { - // close the reader as well, since closing the searcher does nothing - // and we want to decRef the inner readers + // close the reader since we want decRef the inner readers newSearcher.getIndexReader().close(); } catch (IOException e) { // ignore diff --git a/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java b/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java index 3056befe761..ab5f2c46271 100644 --- a/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java +++ b/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java @@ -22,7 +22,6 @@ package org.elasticsearch.search.internal; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.collect.Maps; -import org.apache.lucene.index.ExtendedIndexSearcher; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.*; import org.elasticsearch.common.lucene.MinimumScoreCollector; @@ -39,7 +38,7 @@ import java.util.Map; /** * */ -public class ContextIndexSearcher extends ExtendedIndexSearcher { +public class ContextIndexSearcher extends IndexSearcher { public static final class Scopes { public static final String MAIN = "_main_"; @@ -58,9 +57,9 @@ public class ContextIndexSearcher extends ExtendedIndexSearcher { private String processingScope; public ContextIndexSearcher(SearchContext searchContext, Engine.Searcher searcher) { - super(searcher.searcher()); + super(searcher.reader()); this.searchContext = searchContext; - this.reader = searcher.searcher().getIndexReader(); + this.reader = searcher.reader(); } public void dfSource(CachedDfSource dfSource) { From 454954e7be9fecf80311fbbfd184e22cd827f4e2 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 26 Oct 2012 01:13:47 +0200 Subject: [PATCH 012/146] lucene 4: Fix field data, facets and field comparators --- .../common/bytes/BytesArray.java | 18 +++++ .../common/bytes/BytesReference.java | 53 +++++++++++++++ .../elasticsearch/common/lucene/Lucene.java | 15 +++- .../common/xcontent/XContentBuilder.java | 7 ++ .../cache/field/data/FieldDataCache.java | 3 +- .../field/data/none/NoneFieldDataCache.java | 3 +- .../AbstractConcurrentMapFieldDataCache.java | 3 +- .../index/field/data/DocFieldData.java | 6 +- .../index/field/data/FieldData.java | 11 +-- .../index/field/data/FieldDataType.java | 7 +- .../index/field/data/bytes/ByteFieldData.java | 12 ++-- .../data/bytes/ByteFieldDataComparator.java | 14 +++- .../bytes/ByteFieldDataMissingComparator.java | 17 ++++- .../field/data/bytes/ByteFieldDataType.java | 20 +++--- .../data/bytes/MultiValueByteFieldData.java | 3 +- .../data/bytes/SingleValueByteFieldData.java | 3 +- .../field/data/doubles/DoubleFieldData.java | 12 ++-- .../doubles/DoubleFieldDataComparator.java | 14 +++- .../DoubleFieldDataMissingComparator.java | 17 ++++- .../data/doubles/DoubleFieldDataType.java | 20 +++--- .../doubles/MultiValueDoubleFieldData.java | 3 +- .../doubles/SingleValueDoubleFieldData.java | 3 +- .../field/data/floats/FloatFieldData.java | 12 ++-- .../data/floats/FloatFieldDataComparator.java | 14 +++- .../FloatFieldDataMissingComparator.java | 17 ++++- .../field/data/floats/FloatFieldDataType.java | 20 +++--- .../data/floats/MultiValueFloatFieldData.java | 3 +- .../floats/SingleValueFloatFieldData.java | 3 +- .../index/field/data/ints/IntFieldData.java | 12 ++-- .../data/ints/IntFieldDataComparator.java | 14 +++- .../ints/IntFieldDataMissingComparator.java | 17 ++++- .../field/data/ints/IntFieldDataType.java | 20 +++--- .../data/ints/MultiValueIntFieldData.java | 3 +- .../data/ints/SingleValueIntFieldData.java | 3 +- .../index/field/data/longs/LongFieldData.java | 13 ++-- .../data/longs/LongFieldDataComparator.java | 14 +++- .../longs/LongFieldDataMissingComparator.java | 15 +++- .../field/data/longs/LongFieldDataType.java | 20 +++--- .../data/longs/MultiValueLongFieldData.java | 3 +- .../data/longs/SingleValueLongFieldData.java | 3 +- .../data/shorts/MultiValueShortFieldData.java | 3 +- .../field/data/shorts/ShortFieldData.java | 13 ++-- .../data/shorts/ShortFieldDataComparator.java | 15 +++- .../ShortFieldDataMissingComparator.java | 18 ++++- .../field/data/shorts/ShortFieldDataType.java | 20 +++--- .../shorts/SingleValueShortFieldData.java | 3 +- .../strings/MultiValueStringFieldData.java | 32 +++++---- .../strings/SingleValueStringFieldData.java | 20 +++--- .../data/strings/StringDocFieldData.java | 5 +- .../field/data/strings/StringFieldData.java | 29 ++++---- .../data/strings/StringFieldDataType.java | 8 +-- .../StringOrdValFieldDataComparator.java | 68 +++++++++++++++---- .../strings/StringValFieldDataComparator.java | 37 +++++++--- .../field/data/support/FieldDataLoader.java | 65 +++++++----------- .../support/NumericFieldDataComparator.java | 8 ++- .../DoubleFieldsFunctionDataComparator.java | 2 +- .../StringFieldsFunctionDataComparator.java | 2 +- .../index/mapper/geo/GeoPointFieldData.java | 20 +++--- .../mapper/geo/GeoPointFieldDataType.java | 8 +-- .../search/geo/GeoDistanceDataComparator.java | 2 +- .../search/facet/AbstractFacetCollector.java | 10 +-- .../CountDateHistogramFacetCollector.java | 6 +- .../ValueDateHistogramFacetCollector.java | 8 +-- ...alueScriptDateHistogramFacetCollector.java | 8 +-- .../facet/filter/FilterFacetCollector.java | 6 +- .../GeoDistanceFacetCollector.java | 6 +- .../ScriptGeoDistanceFacetCollector.java | 8 +-- .../ValueGeoDistanceFacetCollector.java | 8 +-- .../BoundedCountHistogramFacetCollector.java | 6 +- .../BoundedValueHistogramFacetCollector.java | 8 +-- ...dedValueScriptHistogramFacetCollector.java | 8 +-- .../CountHistogramFacetCollector.java | 6 +- .../FullHistogramFacetCollector.java | 6 +- .../ScriptHistogramFacetCollector.java | 8 +-- .../ValueHistogramFacetCollector.java | 8 +-- .../ValueScriptHistogramFacetCollector.java | 8 +-- .../facet/query/QueryFacetCollector.java | 7 +- .../range/KeyValueRangeFacetCollector.java | 8 +-- .../facet/range/RangeFacetCollector.java | 6 +- .../range/ScriptRangeFacetCollector.java | 8 +-- .../ScriptStatisticalFacetCollector.java | 6 +- .../StatisticalFacetCollector.java | 6 +- .../StatisticalFieldsFacetCollector.java | 6 +- .../search/facet/terms/TermsFacet.java | 6 +- .../facet/terms/TermsFacetProcessor.java | 7 +- .../terms/bytes/InternalByteTermsFacet.java | 13 ++-- .../terms/bytes/TermsByteFacetCollector.java | 22 +++--- .../TermsByteOrdinalsFacetCollector.java | 17 ++--- .../doubles/InternalDoubleTermsFacet.java | 13 ++-- .../doubles/TermsDoubleFacetCollector.java | 22 +++--- .../TermsDoubleOrdinalsFacetCollector.java | 17 ++--- .../terms/floats/InternalFloatTermsFacet.java | 9 ++- .../floats/TermsFloatFacetCollector.java | 21 +++--- .../TermsFloatOrdinalsFacetCollector.java | 17 ++--- .../terms/index/IndexNameFacetCollector.java | 6 +- .../terms/ints/InternalIntTermsFacet.java | 9 ++- .../terms/ints/TermsIntFacetCollector.java | 21 +++--- .../ints/TermsIntOrdinalsFacetCollector.java | 17 ++--- .../facet/terms/ip/InternalIpTermsFacet.java | 9 ++- .../facet/terms/ip/TermsIpFacetCollector.java | 12 ++-- .../ip/TermsIpOrdinalsFacetCollector.java | 10 +-- .../terms/longs/InternalLongTermsFacet.java | 8 ++- .../terms/longs/TermsLongFacetCollector.java | 21 +++--- .../TermsLongOrdinalsFacetCollector.java | 17 ++--- .../terms/shorts/InternalShortTermsFacet.java | 9 ++- .../shorts/TermsShortFacetCollector.java | 21 +++--- .../TermsShortOrdinalsFacetCollector.java | 17 ++--- .../FieldsTermsStringFacetCollector.java | 61 +++++++++-------- .../strings/InternalStringTermsFacet.java | 56 +++++++++------ .../ScriptTermsStringFieldFacetCollector.java | 38 ++++++----- .../strings/TermsStringFacetCollector.java | 61 +++++++++-------- .../TermsStringOrdinalsFacetCollector.java | 39 ++++++----- .../terms/support/EntryPriorityQueue.java | 2 +- .../facet/termsstats/TermsStatsFacet.java | 6 +- .../InternalTermsStatsDoubleFacet.java | 8 ++- .../TermsStatsDoubleFacetCollector.java | 10 +-- .../longs/InternalTermsStatsLongFacet.java | 8 ++- .../longs/TermsStatsLongFacetCollector.java | 10 +-- .../InternalTermsStatsStringFacet.java | 25 ++++--- .../TermsStatsStringFacetCollector.java | 18 ++--- .../data/strings/StringFieldDataTests.java | 63 +++++++++-------- 121 files changed, 1071 insertions(+), 688 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/bytes/BytesArray.java b/src/main/java/org/elasticsearch/common/bytes/BytesArray.java index 8005035cb59..a274c70e03f 100644 --- a/src/main/java/org/elasticsearch/common/bytes/BytesArray.java +++ b/src/main/java/org/elasticsearch/common/bytes/BytesArray.java @@ -20,6 +20,7 @@ package org.elasticsearch.common.bytes; import com.google.common.base.Charsets; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.Bytes; import org.elasticsearch.common.io.stream.BytesStreamInput; @@ -43,6 +44,23 @@ public class BytesArray implements BytesReference { this(bytes.getBytes(Charsets.UTF_8)); } + public BytesArray(BytesRef bytesRef) { + this(bytesRef, false); + } + + public BytesArray(BytesRef bytesRef, boolean deepCopy) { + if (deepCopy) { + BytesRef copy = BytesRef.deepCopyOf(bytesRef); + bytes = copy.bytes; + offset = copy.offset; + length = copy.length; + } else { + bytes = bytesRef.bytes; + offset = bytesRef.offset; + length = bytesRef.length; + } + } + public BytesArray(byte[] bytes) { this.bytes = bytes; this.offset = 0; diff --git a/src/main/java/org/elasticsearch/common/bytes/BytesReference.java b/src/main/java/org/elasticsearch/common/bytes/BytesReference.java index dc5650d9e9f..9502caa130f 100644 --- a/src/main/java/org/elasticsearch/common/bytes/BytesReference.java +++ b/src/main/java/org/elasticsearch/common/bytes/BytesReference.java @@ -24,6 +24,7 @@ import org.jboss.netty.buffer.ChannelBuffer; import java.io.IOException; import java.io.OutputStream; +import java.util.Comparator; /** * A reference to bytes. @@ -94,4 +95,56 @@ public interface BytesReference { * Converts to a string based on utf8. */ String toUtf8(); + + + // LUCENE 4 UPGRADE: Used by facets to order. Perhaps make this call implement Comparable. + public final static Comparator utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator(); + + public static class UTF8SortedAsUnicodeComparator implements Comparator { + + // Only singleton + private UTF8SortedAsUnicodeComparator() {} + + public int compare(BytesReference a, BytesReference b) { + if (a.hasArray() && b.hasArray()) { + final byte[] aBytes = a.array(); + int aUpto = a.arrayOffset(); + final byte[] bBytes = b.array(); + int bUpto = b.arrayOffset(); + + final int aStop = aUpto + Math.min(a.length(), b.length()); + while(aUpto < aStop) { + int aByte = aBytes[aUpto++] & 0xff; + int bByte = bBytes[bUpto++] & 0xff; + + int diff = aByte - bByte; + if (diff != 0) { + return diff; + } + } + + // One is a prefix of the other, or, they are equal: + return a.length() - b.length(); + } else { + final byte[] aBytes = a.toBytes(); + int aUpto = 0; + final byte[] bBytes = b.toBytes(); + int bUpto = 0; + + final int aStop = aUpto + Math.min(a.length(), b.length()); + while(aUpto < aStop) { + int aByte = aBytes[aUpto++] & 0xff; + int bByte = bBytes[bUpto++] & 0xff; + + int diff = aByte - bByte; + if (diff != 0) { + return diff; + } + } + + // One is a prefix of the other, or, they are equal: + return a.length() - b.length(); + } + } + } } diff --git a/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/src/main/java/org/elasticsearch/common/lucene/Lucene.java index 1e7e8688cb4..b275145fc93 100644 --- a/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -134,7 +134,7 @@ public class Lucene { if (in.readBoolean()) { field = in.readUTF(); } - fields[i] = new SortField(field, in.readVInt(), in.readBoolean()); + fields[i] = new SortField(field, readSortType(in), in.readBoolean()); } FieldDoc[] fieldDocs = new FieldDoc[in.readVInt()]; @@ -201,9 +201,9 @@ public class Lucene { out.writeUTF(sortField.getField()); } if (sortField.getComparatorSource() != null) { - out.writeVInt(((FieldDataType.ExtendedFieldComparatorSource) sortField.getComparatorSource()).reducedType()); + writeSortType(out, ((FieldDataType.ExtendedFieldComparatorSource) sortField.getComparatorSource()).reducedType()); } else { - out.writeVInt(sortField.getType()); + writeSortType(out, sortField.getType()); } out.writeBoolean(sortField.getReverse()); } @@ -271,6 +271,15 @@ public class Lucene { } } + // LUCENE 4 UPGRADE: We might want to maintain our own ordinal, instead of Lucene's ordinal + public static SortField.Type readSortType(StreamInput in) throws IOException { + return SortField.Type.values()[in.readVInt()]; + } + + public static void writeSortType(StreamOutput out, SortField.Type sortType) throws IOException { + out.writeVInt(sortType.ordinal()); + } + public static Explanation readExplanation(StreamInput in) throws IOException { float value = in.readFloat(); String description = in.readUTF(); diff --git a/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java b/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java index 1df7e046085..85668c0930e 100644 --- a/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java +++ b/src/main/java/org/elasticsearch/common/xcontent/XContentBuilder.java @@ -20,6 +20,7 @@ package org.elasticsearch.common.xcontent; import com.google.common.base.Charsets; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesArray; @@ -526,6 +527,12 @@ public final class XContentBuilder implements BytesStream { return this; } + public XContentBuilder field(XContentBuilderString name, BytesRef value) throws IOException { + field(name); + generator.writeUTF8String(value.bytes, value.offset, value.length); + return this; + } + public XContentBuilder field(String name, Text value) throws IOException { field(name); if (value.hasBytes() && value.bytes().hasArray()) { diff --git a/src/main/java/org/elasticsearch/index/cache/field/data/FieldDataCache.java b/src/main/java/org/elasticsearch/index/cache/field/data/FieldDataCache.java index d0deeea4b41..a1ba5b4cc41 100644 --- a/src/main/java/org/elasticsearch/index/cache/field/data/FieldDataCache.java +++ b/src/main/java/org/elasticsearch/index/cache/field/data/FieldDataCache.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.cache.field.data; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.elasticsearch.common.component.CloseableComponent; import org.elasticsearch.index.IndexComponent; @@ -32,7 +33,7 @@ import java.io.IOException; */ public interface FieldDataCache extends IndexComponent, CloseableComponent { - FieldData cache(FieldDataType type, IndexReader reader, String fieldName) throws IOException; + FieldData cache(FieldDataType type, AtomicReader reader, String fieldName) throws IOException; String type(); diff --git a/src/main/java/org/elasticsearch/index/cache/field/data/none/NoneFieldDataCache.java b/src/main/java/org/elasticsearch/index/cache/field/data/none/NoneFieldDataCache.java index c6350907963..04c404f0343 100644 --- a/src/main/java/org/elasticsearch/index/cache/field/data/none/NoneFieldDataCache.java +++ b/src/main/java/org/elasticsearch/index/cache/field/data/none/NoneFieldDataCache.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.cache.field.data.none; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.inject.Inject; @@ -44,7 +45,7 @@ public class NoneFieldDataCache extends AbstractIndexComponent implements FieldD } @Override - public FieldData cache(FieldDataType type, IndexReader reader, String fieldName) throws IOException { + public FieldData cache(FieldDataType type, AtomicReader reader, String fieldName) throws IOException { return FieldData.load(type, reader, fieldName); } diff --git a/src/main/java/org/elasticsearch/index/cache/field/data/support/AbstractConcurrentMapFieldDataCache.java b/src/main/java/org/elasticsearch/index/cache/field/data/support/AbstractConcurrentMapFieldDataCache.java index f3ef6b4fd40..7edb9f90ac6 100644 --- a/src/main/java/org/elasticsearch/index/cache/field/data/support/AbstractConcurrentMapFieldDataCache.java +++ b/src/main/java/org/elasticsearch/index/cache/field/data/support/AbstractConcurrentMapFieldDataCache.java @@ -20,6 +20,7 @@ package org.elasticsearch.index.cache.field.data.support; import com.google.common.cache.Cache; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.SegmentReader; import org.elasticsearch.ElasticSearchException; @@ -106,7 +107,7 @@ public abstract class AbstractConcurrentMapFieldDataCache extends AbstractIndexC } @Override - public FieldData cache(FieldDataType type, IndexReader reader, String fieldName) throws IOException { + public FieldData cache(FieldDataType type, AtomicReader reader, String fieldName) throws IOException { Cache fieldDataCache = cache.get(reader.getCoreCacheKey()); if (fieldDataCache == null) { synchronized (creationMutex) { diff --git a/src/main/java/org/elasticsearch/index/field/data/DocFieldData.java b/src/main/java/org/elasticsearch/index/field/data/DocFieldData.java index 8c4f834ad63..c8e435aa9ee 100644 --- a/src/main/java/org/elasticsearch/index/field/data/DocFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/DocFieldData.java @@ -19,6 +19,8 @@ package org.elasticsearch.index.field.data; +import org.apache.lucene.util.BytesRef; + /** * */ @@ -44,11 +46,11 @@ public abstract class DocFieldData { return !fieldData.hasValue(docId); } - public String stringValue() { + public BytesRef stringValue() { return fieldData.stringValue(docId); } - public String getStringValue() { + public BytesRef getStringValue() { return stringValue(); } diff --git a/src/main/java/org/elasticsearch/index/field/data/FieldData.java b/src/main/java/org/elasticsearch/index/field/data/FieldData.java index 3a200f59a71..acb7b96e9da 100644 --- a/src/main/java/org/elasticsearch/index/field/data/FieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/FieldData.java @@ -19,7 +19,8 @@ package org.elasticsearch.index.field.data; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.util.concurrent.ThreadLocals; import java.io.IOException; @@ -80,18 +81,18 @@ public abstract class FieldData { */ public abstract boolean hasValue(int docId); - public abstract String stringValue(int docId); + public abstract BytesRef stringValue(int docId); public abstract void forEachValue(StringValueProc proc); public static interface StringValueProc { - void onValue(String value); + void onValue(BytesRef value); } public abstract void forEachValueInDoc(int docId, StringValueInDocProc proc); public static interface StringValueInDocProc { - void onValue(int docId, String value); + void onValue(int docId, BytesRef value); void onMissing(int docId); } @@ -107,7 +108,7 @@ public abstract class FieldData { */ public abstract FieldDataType type(); - public static FieldData load(FieldDataType type, IndexReader reader, String fieldName) throws IOException { + public static FieldData load(FieldDataType type, AtomicReader reader, String fieldName) throws IOException { return type.load(reader, fieldName); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/FieldDataType.java b/src/main/java/org/elasticsearch/index/field/data/FieldDataType.java index 55e160b42f8..d2d724f2a4b 100644 --- a/src/main/java/org/elasticsearch/index/field/data/FieldDataType.java +++ b/src/main/java/org/elasticsearch/index/field/data/FieldDataType.java @@ -19,8 +19,9 @@ package org.elasticsearch.index.field.data; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.search.FieldComparatorSource; +import org.apache.lucene.search.SortField; import org.elasticsearch.common.Nullable; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.bytes.ByteFieldDataType; @@ -50,13 +51,13 @@ public interface FieldDataType { ExtendedFieldComparatorSource newFieldComparatorSource(FieldDataCache cache, @Nullable String missing); - T load(IndexReader reader, String fieldName) throws IOException; + T load(AtomicReader reader, String fieldName) throws IOException; // we need this extended source we we have custom comparators to reuse our field data // in this case, we need to reduce type that will be used when search results are reduced // on another node (we don't have the custom source them...) public abstract class ExtendedFieldComparatorSource extends FieldComparatorSource { - public abstract int reducedType(); + public abstract SortField.Type reducedType(); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldData.java b/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldData.java index c2069ea3992..a3e602e719a 100644 --- a/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldData.java @@ -20,8 +20,10 @@ package org.elasticsearch.index.field.data.bytes; import gnu.trove.list.array.TByteArrayList; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.NumericFieldData; @@ -69,13 +71,13 @@ public abstract class ByteFieldData extends NumericFieldData { @Override public void forEachValue(StringValueProc proc) { for (int i = 1; i < values.length; i++) { - proc.onValue(Byte.toString(values[i])); + proc.onValue(new BytesRef(Byte.toString(values[i]))); } } @Override - public String stringValue(int docId) { - return Byte.toString(value(docId)); + public BytesRef stringValue(int docId) { + return new BytesRef(Byte.toString(value(docId))); } @Override @@ -131,7 +133,7 @@ public abstract class ByteFieldData extends NumericFieldData { void onMissing(int docID); } - public static ByteFieldData load(IndexReader reader, String field) throws IOException { + public static ByteFieldData load(AtomicReader reader, String field) throws IOException { return FieldDataLoader.load(reader, field, new ByteTypeLoader()); } @@ -146,7 +148,7 @@ public abstract class ByteFieldData extends NumericFieldData { } @Override - public void collectTerm(String term) { + public void collectTerm(BytesRef term) { terms.add((byte) FieldCache.NUMERIC_UTILS_INT_PARSER.parseInt(term)); } diff --git a/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataComparator.java b/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataComparator.java index e192e2fc106..c7abe4b95f3 100644 --- a/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataComparator.java +++ b/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataComparator.java @@ -23,11 +23,13 @@ import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.support.NumericFieldDataComparator; +import java.io.IOException; + /** * */ // LUCENE MONITOR: Monitor against FieldComparator.Short -public class ByteFieldDataComparator extends NumericFieldDataComparator { +public class ByteFieldDataComparator extends NumericFieldDataComparator { private final byte[] values; private short bottom; @@ -52,6 +54,12 @@ public class ByteFieldDataComparator extends NumericFieldDataComparator { return bottom - currentFieldData.byteValue(doc); } + @Override + public int compareDocToValue(int doc, Byte val2) throws IOException { + byte val1 = currentFieldData.byteValue(doc); + return val1 - val2; + } + @Override public void copy(int slot, int doc) { values[slot] = currentFieldData.byteValue(doc); @@ -63,7 +71,7 @@ public class ByteFieldDataComparator extends NumericFieldDataComparator { } @Override - public Comparable value(int slot) { - return Byte.valueOf(values[slot]); + public Byte value(int slot) { + return values[slot]; } } diff --git a/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataMissingComparator.java b/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataMissingComparator.java index 491b2eeea46..fe699897f8f 100644 --- a/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataMissingComparator.java +++ b/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataMissingComparator.java @@ -23,11 +23,13 @@ import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.support.NumericFieldDataComparator; +import java.io.IOException; + /** * */ // LUCENE MONITOR: Monitor against FieldComparator.Short -public class ByteFieldDataMissingComparator extends NumericFieldDataComparator { +public class ByteFieldDataMissingComparator extends NumericFieldDataComparator { private final byte[] values; private short bottom; @@ -58,6 +60,15 @@ public class ByteFieldDataMissingComparator extends NumericFieldDataComparator { return bottom - value; } + @Override + public int compareDocToValue(int doc, Byte val2) throws IOException { + byte val1 = missingValue; + if (currentFieldData.hasValue(doc)) { + val1 = currentFieldData.byteValue(doc); + } + return val1 - val2; + } + @Override public void copy(int slot, int doc) { byte value = missingValue; @@ -73,7 +84,7 @@ public class ByteFieldDataMissingComparator extends NumericFieldDataComparator { } @Override - public Comparable value(int slot) { - return Byte.valueOf(values[slot]); + public Byte value(int slot) { + return values[slot]; } } diff --git a/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataType.java b/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataType.java index 5e9264873a5..40201d1116e 100644 --- a/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataType.java +++ b/src/main/java/org/elasticsearch/index/field/data/bytes/ByteFieldDataType.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.field.data.bytes; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.SortField; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -42,8 +42,8 @@ public class ByteFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.BYTE; + public SortField.Type reducedType() { + return SortField.Type.BYTE; } }; } @@ -55,8 +55,8 @@ public class ByteFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.BYTE; + public SortField.Type reducedType() { + return SortField.Type.BYTE; } }; } @@ -68,8 +68,8 @@ public class ByteFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.BYTE; + public SortField.Type reducedType() { + return SortField.Type.BYTE; } }; } @@ -80,14 +80,14 @@ public class ByteFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.BYTE; + public SortField.Type reducedType() { + return SortField.Type.BYTE; } }; } @Override - public ByteFieldData load(IndexReader reader, String fieldName) throws IOException { + public ByteFieldData load(AtomicReader reader, String fieldName) throws IOException { return ByteFieldData.load(reader, fieldName); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/bytes/MultiValueByteFieldData.java b/src/main/java/org/elasticsearch/index/field/data/bytes/MultiValueByteFieldData.java index f72b0d003e6..e699fd0bb5f 100644 --- a/src/main/java/org/elasticsearch/index/field/data/bytes/MultiValueByteFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/bytes/MultiValueByteFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.field.data.bytes; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.index.field.data.doubles.DoubleFieldData; @@ -95,7 +96,7 @@ public class MultiValueByteFieldData extends ByteFieldData { } break; } - proc.onValue(docId, Byte.toString(values[loc])); + proc.onValue(docId, new BytesRef(Byte.toString(values[loc]))); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/bytes/SingleValueByteFieldData.java b/src/main/java/org/elasticsearch/index/field/data/bytes/SingleValueByteFieldData.java index 8d3cf2581d8..e258b0b419f 100644 --- a/src/main/java/org/elasticsearch/index/field/data/bytes/SingleValueByteFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/bytes/SingleValueByteFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.field.data.bytes; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.index.field.data.doubles.DoubleFieldData; @@ -73,7 +74,7 @@ public class SingleValueByteFieldData extends ByteFieldData { proc.onMissing(docId); return; } - proc.onValue(docId, Byte.toString(values[loc])); + proc.onValue(docId, new BytesRef(Byte.toString(values[loc]))); } @Override diff --git a/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldData.java b/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldData.java index f6906de98b0..26747293d72 100644 --- a/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldData.java @@ -20,8 +20,10 @@ package org.elasticsearch.index.field.data.doubles; import gnu.trove.list.array.TDoubleArrayList; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.NumericFieldData; @@ -67,14 +69,14 @@ public abstract class DoubleFieldData extends NumericFieldData { private final double[] values; private double bottom; @@ -67,6 +69,12 @@ public class DoubleFieldDataComparator extends NumericFieldDataComparator { } } + @Override + public int compareDocToValue(int doc, Double val2) throws IOException { + double val1 = currentFieldData.doubleValue(doc); + return Double.compare(val1, val2); + } + @Override public void copy(int slot, int doc) { values[slot] = currentFieldData.doubleValue(doc); @@ -78,7 +86,7 @@ public class DoubleFieldDataComparator extends NumericFieldDataComparator { } @Override - public Comparable value(int slot) { - return Double.valueOf(values[slot]); + public Double value(int slot) { + return values[slot]; } } diff --git a/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldDataMissingComparator.java b/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldDataMissingComparator.java index 4e36137b7d6..425b37bef45 100644 --- a/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldDataMissingComparator.java +++ b/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldDataMissingComparator.java @@ -23,11 +23,13 @@ import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.support.NumericFieldDataComparator; +import java.io.IOException; + /** * */ // LUCENE MONITOR: Monitor against FieldComparator.Double -public class DoubleFieldDataMissingComparator extends NumericFieldDataComparator { +public class DoubleFieldDataMissingComparator extends NumericFieldDataComparator { private final double[] values; private double bottom; @@ -72,6 +74,15 @@ public class DoubleFieldDataMissingComparator extends NumericFieldDataComparator } } + @Override + public int compareDocToValue(int doc, Double val2) throws IOException { + double val1 = missingValue; + if (currentFieldData.hasValue(doc)) { + val1 = currentFieldData.doubleValue(doc); + } + return Double.compare(val1, val2); + } + @Override public void copy(int slot, int doc) { double value = missingValue; @@ -87,7 +98,7 @@ public class DoubleFieldDataMissingComparator extends NumericFieldDataComparator } @Override - public Comparable value(int slot) { - return Double.valueOf(values[slot]); + public Double value(int slot) { + return values[slot]; } } diff --git a/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldDataType.java b/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldDataType.java index 7c3830316f6..d7bd9370937 100644 --- a/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldDataType.java +++ b/src/main/java/org/elasticsearch/index/field/data/doubles/DoubleFieldDataType.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.field.data.doubles; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.SortField; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -42,8 +42,8 @@ public class DoubleFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.DOUBLE; + public SortField.Type reducedType() { + return SortField.Type.DOUBLE; } }; } @@ -55,8 +55,8 @@ public class DoubleFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.DOUBLE; + public SortField.Type reducedType() { + return SortField.Type.DOUBLE; } }; } @@ -68,8 +68,8 @@ public class DoubleFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.DOUBLE; + public SortField.Type reducedType() { + return SortField.Type.DOUBLE; } }; } @@ -80,14 +80,14 @@ public class DoubleFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.DOUBLE; + public SortField.Type reducedType() { + return SortField.Type.DOUBLE; } }; } @Override - public DoubleFieldData load(IndexReader reader, String fieldName) throws IOException { + public DoubleFieldData load(AtomicReader reader, String fieldName) throws IOException { return DoubleFieldData.load(reader, fieldName); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/doubles/MultiValueDoubleFieldData.java b/src/main/java/org/elasticsearch/index/field/data/doubles/MultiValueDoubleFieldData.java index 373464ce621..1b3bcfd2739 100644 --- a/src/main/java/org/elasticsearch/index/field/data/doubles/MultiValueDoubleFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/doubles/MultiValueDoubleFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.field.data.doubles; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; @@ -83,7 +84,7 @@ public class MultiValueDoubleFieldData extends DoubleFieldData { } break; } - proc.onValue(docId, Double.toString(values[loc])); + proc.onValue(docId, new BytesRef(Double.toString(values[loc]))); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/doubles/SingleValueDoubleFieldData.java b/src/main/java/org/elasticsearch/index/field/data/doubles/SingleValueDoubleFieldData.java index f79f1748b93..829a352a641 100644 --- a/src/main/java/org/elasticsearch/index/field/data/doubles/SingleValueDoubleFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/doubles/SingleValueDoubleFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.field.data.doubles; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; @@ -65,7 +66,7 @@ public class SingleValueDoubleFieldData extends DoubleFieldData { proc.onMissing(docId); return; } - proc.onValue(docId, Double.toString(values[loc])); + proc.onValue(docId, new BytesRef(Double.toString(values[loc]))); } @Override diff --git a/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldData.java b/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldData.java index 36f69229ccd..fe4c555ab12 100644 --- a/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldData.java @@ -20,8 +20,10 @@ package org.elasticsearch.index.field.data.floats; import gnu.trove.list.array.TFloatArrayList; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.NumericFieldData; @@ -67,14 +69,14 @@ public abstract class FloatFieldData extends NumericFieldData } @Override - public String stringValue(int docId) { - return Float.toString(value(docId)); + public BytesRef stringValue(int docId) { + return new BytesRef(Float.toString(value(docId))); } @Override public void forEachValue(StringValueProc proc) { for (int i = 1; i < values.length; i++) { - proc.onValue(Float.toString(values[i])); + proc.onValue(new BytesRef(Float.toString(values[i]))); } } @@ -131,7 +133,7 @@ public abstract class FloatFieldData extends NumericFieldData void onMissing(int docId); } - public static FloatFieldData load(IndexReader reader, String field) throws IOException { + public static FloatFieldData load(AtomicReader reader, String field) throws IOException { return FieldDataLoader.load(reader, field, new FloatTypeLoader()); } @@ -146,7 +148,7 @@ public abstract class FloatFieldData extends NumericFieldData } @Override - public void collectTerm(String term) { + public void collectTerm(BytesRef term) { terms.add(FieldCache.NUMERIC_UTILS_FLOAT_PARSER.parseFloat(term)); } diff --git a/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldDataComparator.java b/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldDataComparator.java index 7c76685c867..8341043fbdb 100644 --- a/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldDataComparator.java +++ b/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldDataComparator.java @@ -23,11 +23,13 @@ import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.support.NumericFieldDataComparator; +import java.io.IOException; + /** * */ // LUCENE MONITOR - Monitor against FieldComparator.Float -public class FloatFieldDataComparator extends NumericFieldDataComparator { +public class FloatFieldDataComparator extends NumericFieldDataComparator { private final float[] values; private float bottom; @@ -71,6 +73,12 @@ public class FloatFieldDataComparator extends NumericFieldDataComparator { } } + @Override + public int compareDocToValue(int doc, Float val2) throws IOException { + float val1 = currentFieldData.floatValue(doc); + return Float.compare(val1, val2); + } + @Override public void copy(int slot, int doc) { values[slot] = currentFieldData.floatValue(doc); @@ -82,7 +90,7 @@ public class FloatFieldDataComparator extends NumericFieldDataComparator { } @Override - public Comparable value(int slot) { - return Float.valueOf(values[slot]); + public Float value(int slot) { + return values[slot]; } } diff --git a/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldDataMissingComparator.java b/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldDataMissingComparator.java index df2bd5f621d..540a7fbbf00 100644 --- a/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldDataMissingComparator.java +++ b/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldDataMissingComparator.java @@ -23,11 +23,13 @@ import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.support.NumericFieldDataComparator; +import java.io.IOException; + /** * */ // LUCENE MONITOR - Monitor against FieldComparator.Float -public class FloatFieldDataMissingComparator extends NumericFieldDataComparator { +public class FloatFieldDataMissingComparator extends NumericFieldDataComparator { private final float[] values; private float bottom; @@ -76,6 +78,15 @@ public class FloatFieldDataMissingComparator extends NumericFieldDataComparator } } + @Override + public int compareDocToValue(int doc, Float val2) throws IOException { + float val1 = missingValue; + if (currentFieldData.hasValue(doc)) { + val1 = currentFieldData.floatValue(doc); + } + return Float.compare(val1, val2); + } + @Override public void copy(int slot, int doc) { float value = missingValue; @@ -91,7 +102,7 @@ public class FloatFieldDataMissingComparator extends NumericFieldDataComparator } @Override - public Comparable value(int slot) { - return Float.valueOf(values[slot]); + public Float value(int slot) { + return values[slot]; } } diff --git a/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldDataType.java b/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldDataType.java index b2c830e39b6..037ee25f8f5 100644 --- a/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldDataType.java +++ b/src/main/java/org/elasticsearch/index/field/data/floats/FloatFieldDataType.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.field.data.floats; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.SortField; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -42,8 +42,8 @@ public class FloatFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.FLOAT; + public SortField.Type reducedType() { + return SortField.Type.FLOAT; } }; } @@ -55,8 +55,8 @@ public class FloatFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.FLOAT; + public SortField.Type reducedType() { + return SortField.Type.FLOAT; } }; } @@ -68,8 +68,8 @@ public class FloatFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.FLOAT; + public SortField.Type reducedType() { + return SortField.Type.FLOAT; } }; } @@ -80,14 +80,14 @@ public class FloatFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.FLOAT; + public SortField.Type reducedType() { + return SortField.Type.FLOAT; } }; } @Override - public FloatFieldData load(IndexReader reader, String fieldName) throws IOException { + public FloatFieldData load(AtomicReader reader, String fieldName) throws IOException { return FloatFieldData.load(reader, fieldName); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/floats/MultiValueFloatFieldData.java b/src/main/java/org/elasticsearch/index/field/data/floats/MultiValueFloatFieldData.java index 1da88e1f501..0b7ce83d0a4 100644 --- a/src/main/java/org/elasticsearch/index/field/data/floats/MultiValueFloatFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/floats/MultiValueFloatFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.field.data.floats; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.index.field.data.doubles.DoubleFieldData; @@ -95,7 +96,7 @@ public class MultiValueFloatFieldData extends FloatFieldData { } break; } - proc.onValue(docId, Double.toString(values[loc])); + proc.onValue(docId, new BytesRef(Double.toString(values[loc]))); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/floats/SingleValueFloatFieldData.java b/src/main/java/org/elasticsearch/index/field/data/floats/SingleValueFloatFieldData.java index 36eded25658..8814ba50303 100644 --- a/src/main/java/org/elasticsearch/index/field/data/floats/SingleValueFloatFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/floats/SingleValueFloatFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.field.data.floats; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.index.field.data.doubles.DoubleFieldData; @@ -73,7 +74,7 @@ public class SingleValueFloatFieldData extends FloatFieldData { proc.onMissing(docId); return; } - proc.onValue(docId, Float.toString(values[loc])); + proc.onValue(docId, new BytesRef(Float.toString(values[loc]))); } @Override diff --git a/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldData.java b/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldData.java index 0bed01060fb..05a77687d75 100644 --- a/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldData.java @@ -20,8 +20,10 @@ package org.elasticsearch.index.field.data.ints; import gnu.trove.list.array.TIntArrayList; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.NumericFieldData; @@ -67,14 +69,14 @@ public abstract class IntFieldData extends NumericFieldData { } @Override - public String stringValue(int docId) { - return Integer.toString(value(docId)); + public BytesRef stringValue(int docId) { + return new BytesRef(Integer.toString(value(docId))); } @Override public void forEachValue(StringValueProc proc) { for (int i = 1; i < values.length; i++) { - proc.onValue(Integer.toString(values[i])); + proc.onValue(new BytesRef(Integer.toString(values[i]))); } } @@ -131,7 +133,7 @@ public abstract class IntFieldData extends NumericFieldData { void onMissing(int docId); } - public static IntFieldData load(IndexReader reader, String field) throws IOException { + public static IntFieldData load(AtomicReader reader, String field) throws IOException { return FieldDataLoader.load(reader, field, new IntTypeLoader()); } @@ -146,7 +148,7 @@ public abstract class IntFieldData extends NumericFieldData { } @Override - public void collectTerm(String term) { + public void collectTerm(BytesRef term) { terms.add(FieldCache.NUMERIC_UTILS_INT_PARSER.parseInt(term)); } diff --git a/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldDataComparator.java b/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldDataComparator.java index 548180687cd..963f116ae11 100644 --- a/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldDataComparator.java +++ b/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldDataComparator.java @@ -23,11 +23,13 @@ import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.support.NumericFieldDataComparator; +import java.io.IOException; + /** * */ // LUCENE MONITOR - Monitor against FieldComparator.Int -public class IntFieldDataComparator extends NumericFieldDataComparator { +public class IntFieldDataComparator extends NumericFieldDataComparator { private final int[] values; @@ -77,6 +79,12 @@ public class IntFieldDataComparator extends NumericFieldDataComparator { } } + @Override + public int compareDocToValue(int doc, Integer val2) throws IOException { + int val1 = currentFieldData.intValue(doc); + return val1 - val2; + } + @Override public void copy(int slot, int doc) { values[slot] = currentFieldData.intValue(doc); @@ -88,7 +96,7 @@ public class IntFieldDataComparator extends NumericFieldDataComparator { } @Override - public Comparable value(int slot) { - return Integer.valueOf(values[slot]); + public Integer value(int slot) { + return values[slot]; } } diff --git a/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldDataMissingComparator.java b/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldDataMissingComparator.java index d5955ba00ec..a96cdac5aa1 100644 --- a/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldDataMissingComparator.java +++ b/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldDataMissingComparator.java @@ -23,11 +23,13 @@ import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.support.NumericFieldDataComparator; +import java.io.IOException; + /** * */ // LUCENE MONITOR - Monitor against FieldComparator.Int -public class IntFieldDataMissingComparator extends NumericFieldDataComparator { +public class IntFieldDataMissingComparator extends NumericFieldDataComparator { private final int[] values; @@ -82,6 +84,15 @@ public class IntFieldDataMissingComparator extends NumericFieldDataComparator { } } + @Override + public int compareDocToValue(int doc, Integer val2) throws IOException { + int val1 = missingValue; + if (currentFieldData.hasValue(doc)) { + val1 = currentFieldData.intValue(doc); + } + return val1 - val2; + } + @Override public void copy(int slot, int doc) { int value = missingValue; @@ -97,7 +108,7 @@ public class IntFieldDataMissingComparator extends NumericFieldDataComparator { } @Override - public Comparable value(int slot) { - return Integer.valueOf(values[slot]); + public Integer value(int slot) { + return values[slot]; } } diff --git a/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldDataType.java b/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldDataType.java index bd3614a9f0f..4b8adc65b99 100644 --- a/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldDataType.java +++ b/src/main/java/org/elasticsearch/index/field/data/ints/IntFieldDataType.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.field.data.ints; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.SortField; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -42,8 +42,8 @@ public class IntFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.INT; + public SortField.Type reducedType() { + return SortField.Type.INT; } }; } @@ -55,8 +55,8 @@ public class IntFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.INT; + public SortField.Type reducedType() { + return SortField.Type.INT; } }; } @@ -68,8 +68,8 @@ public class IntFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.INT; + public SortField.Type reducedType() { + return SortField.Type.INT; } }; } @@ -80,14 +80,14 @@ public class IntFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.INT; + public SortField.Type reducedType() { + return SortField.Type.INT; } }; } @Override - public IntFieldData load(IndexReader reader, String fieldName) throws IOException { + public IntFieldData load(AtomicReader reader, String fieldName) throws IOException { return IntFieldData.load(reader, fieldName); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/ints/MultiValueIntFieldData.java b/src/main/java/org/elasticsearch/index/field/data/ints/MultiValueIntFieldData.java index 7c28608d6ad..26f825b12cf 100644 --- a/src/main/java/org/elasticsearch/index/field/data/ints/MultiValueIntFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/ints/MultiValueIntFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.field.data.ints; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.index.field.data.doubles.DoubleFieldData; @@ -95,7 +96,7 @@ public class MultiValueIntFieldData extends IntFieldData { } break; } - proc.onValue(docId, Integer.toString(values[loc])); + proc.onValue(docId, new BytesRef(Integer.toString(values[loc]))); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/ints/SingleValueIntFieldData.java b/src/main/java/org/elasticsearch/index/field/data/ints/SingleValueIntFieldData.java index 50ece5f3397..499066b6935 100644 --- a/src/main/java/org/elasticsearch/index/field/data/ints/SingleValueIntFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/ints/SingleValueIntFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.field.data.ints; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.index.field.data.doubles.DoubleFieldData; @@ -73,7 +74,7 @@ public class SingleValueIntFieldData extends IntFieldData { proc.onMissing(docId); return; } - proc.onValue(docId, Integer.toString(values[loc])); + proc.onValue(docId, new BytesRef(Integer.toString(values[loc]))); } @Override diff --git a/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldData.java b/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldData.java index c5127bd3dfd..736d90c6448 100644 --- a/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldData.java @@ -20,8 +20,9 @@ package org.elasticsearch.index.field.data.longs; import gnu.trove.list.array.TLongArrayList; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.index.field.data.FieldDataType; @@ -92,13 +93,13 @@ public abstract class LongFieldData extends NumericFieldData { @Override public void forEachValue(StringValueProc proc) { for (int i = 1; i < values.length; i++) { - proc.onValue(Long.toString(values[i])); + proc.onValue(new BytesRef(Long.toString(values[i]))); } } @Override - public String stringValue(int docId) { - return Long.toString(docId); + public BytesRef stringValue(int docId) { + return new BytesRef(Long.toString(value(docId))); } @Override @@ -162,7 +163,7 @@ public abstract class LongFieldData extends NumericFieldData { void onValue(int docId, MutableDateTime dateTime); } - public static LongFieldData load(IndexReader reader, String field) throws IOException { + public static LongFieldData load(AtomicReader reader, String field) throws IOException { return FieldDataLoader.load(reader, field, new LongTypeLoader()); } @@ -177,7 +178,7 @@ public abstract class LongFieldData extends NumericFieldData { } @Override - public void collectTerm(String term) { + public void collectTerm(BytesRef term) { terms.add(FieldCache.NUMERIC_UTILS_LONG_PARSER.parseLong(term)); } diff --git a/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldDataComparator.java b/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldDataComparator.java index fd396bc51fd..6de15ea4033 100644 --- a/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldDataComparator.java +++ b/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldDataComparator.java @@ -23,11 +23,13 @@ import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.support.NumericFieldDataComparator; +import java.io.IOException; + /** * */ // LUCENE MONITOR - Monitor against FieldComparator.Long -public class LongFieldDataComparator extends NumericFieldDataComparator { +public class LongFieldDataComparator extends NumericFieldDataComparator { private final long[] values; private long bottom; @@ -72,6 +74,12 @@ public class LongFieldDataComparator extends NumericFieldDataComparator { } } + @Override + public int compareDocToValue(int doc, Long val2) throws IOException { + long val1 = currentFieldData.longValue(doc); + return (int) (val1 - val2); + } + @Override public void copy(int slot, int doc) { values[slot] = currentFieldData.longValue(doc); @@ -83,8 +91,8 @@ public class LongFieldDataComparator extends NumericFieldDataComparator { } @Override - public Comparable value(int slot) { - return Long.valueOf(values[slot]); + public Long value(int slot) { + return values[slot]; } } diff --git a/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldDataMissingComparator.java b/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldDataMissingComparator.java index 2289729959a..fb2617d48f2 100644 --- a/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldDataMissingComparator.java +++ b/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldDataMissingComparator.java @@ -23,11 +23,13 @@ import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.support.NumericFieldDataComparator; +import java.io.IOException; + /** * */ // LUCENE MONITOR - Monitor against FieldComparator.Long -public class LongFieldDataMissingComparator extends NumericFieldDataComparator { +public class LongFieldDataMissingComparator extends NumericFieldDataComparator { private final long[] values; private long bottom; @@ -77,6 +79,15 @@ public class LongFieldDataMissingComparator extends NumericFieldDataComparator { } } + @Override + public int compareDocToValue(int doc, Long val2) throws IOException { + long val1 = missingValue; + if (currentFieldData.hasValue(doc)) { + val1 = currentFieldData.longValue(doc); + } + return (int) (val1 - val2); + } + @Override public void copy(int slot, int doc) { long value = missingValue; @@ -92,7 +103,7 @@ public class LongFieldDataMissingComparator extends NumericFieldDataComparator { } @Override - public Comparable value(int slot) { + public Long value(int slot) { return Long.valueOf(values[slot]); } diff --git a/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldDataType.java b/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldDataType.java index 7fcff5d0457..545ad883fba 100644 --- a/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldDataType.java +++ b/src/main/java/org/elasticsearch/index/field/data/longs/LongFieldDataType.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.field.data.longs; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.SortField; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -42,8 +42,8 @@ public class LongFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.LONG; + public SortField.Type reducedType() { + return SortField.Type.LONG; } }; } @@ -55,8 +55,8 @@ public class LongFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.LONG; + public SortField.Type reducedType() { + return SortField.Type.LONG; } }; } @@ -68,8 +68,8 @@ public class LongFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.LONG; + public SortField.Type reducedType() { + return SortField.Type.LONG; } }; } @@ -80,14 +80,14 @@ public class LongFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.LONG; + public SortField.Type reducedType() { + return SortField.Type.LONG; } }; } @Override - public LongFieldData load(IndexReader reader, String fieldName) throws IOException { + public LongFieldData load(AtomicReader reader, String fieldName) throws IOException { return LongFieldData.load(reader, fieldName); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/longs/MultiValueLongFieldData.java b/src/main/java/org/elasticsearch/index/field/data/longs/MultiValueLongFieldData.java index 0a9619d7e2a..edaabc8f1e3 100644 --- a/src/main/java/org/elasticsearch/index/field/data/longs/MultiValueLongFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/longs/MultiValueLongFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.field.data.longs; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.index.field.data.doubles.DoubleFieldData; @@ -112,7 +113,7 @@ public class MultiValueLongFieldData extends LongFieldData { } break; } - proc.onValue(docId, Long.toString(values[loc])); + proc.onValue(docId, new BytesRef(Long.toString(values[loc]))); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/longs/SingleValueLongFieldData.java b/src/main/java/org/elasticsearch/index/field/data/longs/SingleValueLongFieldData.java index 2b0beaf0d54..29561e9ed20 100644 --- a/src/main/java/org/elasticsearch/index/field/data/longs/SingleValueLongFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/longs/SingleValueLongFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.field.data.longs; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.index.field.data.doubles.DoubleFieldData; @@ -84,7 +85,7 @@ public class SingleValueLongFieldData extends LongFieldData { proc.onMissing(docId); return; } - proc.onValue(docId, Long.toString(values[loc])); + proc.onValue(docId, new BytesRef(Long.toString(values[loc]))); } @Override diff --git a/src/main/java/org/elasticsearch/index/field/data/shorts/MultiValueShortFieldData.java b/src/main/java/org/elasticsearch/index/field/data/shorts/MultiValueShortFieldData.java index 6ce48478582..12ee6191614 100644 --- a/src/main/java/org/elasticsearch/index/field/data/shorts/MultiValueShortFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/shorts/MultiValueShortFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.field.data.shorts; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.index.field.data.doubles.DoubleFieldData; @@ -92,7 +93,7 @@ public class MultiValueShortFieldData extends ShortFieldData { int loc = ordinal[docId]; if (loc != 0) { found = true; - proc.onValue(docId, Short.toString(values[loc])); + proc.onValue(docId, new BytesRef(Short.toString(values[loc]))); } } if (!found) { diff --git a/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldData.java b/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldData.java index deec92dcb49..3c02442255a 100644 --- a/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldData.java @@ -20,8 +20,9 @@ package org.elasticsearch.index.field.data.shorts; import gnu.trove.list.array.TShortArrayList; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.search.FieldCache; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.NumericFieldData; @@ -69,13 +70,13 @@ public abstract class ShortFieldData extends NumericFieldData @Override public void forEachValue(StringValueProc proc) { for (int i = 1; i < values.length; i++) { - proc.onValue(Short.toString(values[i])); + proc.onValue(new BytesRef(Short.toString(values[i]))); } } @Override - public String stringValue(int docId) { - return Short.toString(value(docId)); + public BytesRef stringValue(int docId) { + return new BytesRef(Short.toString(value(docId))); } @Override @@ -131,7 +132,7 @@ public abstract class ShortFieldData extends NumericFieldData void onMissing(int docId); } - public static ShortFieldData load(IndexReader reader, String field) throws IOException { + public static ShortFieldData load(AtomicReader reader, String field) throws IOException { return FieldDataLoader.load(reader, field, new ShortTypeLoader()); } @@ -146,7 +147,7 @@ public abstract class ShortFieldData extends NumericFieldData } @Override - public void collectTerm(String term) { + public void collectTerm(BytesRef term) { terms.add((short) FieldCache.NUMERIC_UTILS_INT_PARSER.parseInt(term)); } diff --git a/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldDataComparator.java b/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldDataComparator.java index bb7fd43da90..0d437899625 100644 --- a/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldDataComparator.java +++ b/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldDataComparator.java @@ -19,15 +19,18 @@ package org.elasticsearch.index.field.data.shorts; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.support.NumericFieldDataComparator; +import java.io.IOException; + /** * */ // LUCENE MONITOR: Monitor against FieldComparator.Short -public class ShortFieldDataComparator extends NumericFieldDataComparator { +public class ShortFieldDataComparator extends NumericFieldDataComparator { private final short[] values; private short bottom; @@ -63,7 +66,13 @@ public class ShortFieldDataComparator extends NumericFieldDataComparator { } @Override - public Comparable value(int slot) { - return Short.valueOf(values[slot]); + public int compareDocToValue(int doc, Short val2) throws IOException { + short val1 = currentFieldData.shortValue(doc); + return val1 - val2; + } + + @Override + public Short value(int slot) { + return values[slot]; } } diff --git a/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldDataMissingComparator.java b/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldDataMissingComparator.java index 0f03d35cc2c..a85e3510217 100644 --- a/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldDataMissingComparator.java +++ b/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldDataMissingComparator.java @@ -19,15 +19,18 @@ package org.elasticsearch.index.field.data.shorts; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.support.NumericFieldDataComparator; +import java.io.IOException; + /** * */ // LUCENE MONITOR: Monitor against FieldComparator.Short -public class ShortFieldDataMissingComparator extends NumericFieldDataComparator { +public class ShortFieldDataMissingComparator extends NumericFieldDataComparator { private final short[] values; private short bottom; @@ -73,7 +76,16 @@ public class ShortFieldDataMissingComparator extends NumericFieldDataComparator } @Override - public Comparable value(int slot) { - return Short.valueOf(values[slot]); + public int compareDocToValue(int doc, Short val2) throws IOException { + short val1 = missingValue; + if (currentFieldData.hasValue(doc)) { + val1 = currentFieldData.shortValue(doc); + } + return val1 - val2; + } + + @Override + public Short value(int slot) { + return values[slot]; } } diff --git a/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldDataType.java b/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldDataType.java index c1d10a70226..b69d47ef031 100644 --- a/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldDataType.java +++ b/src/main/java/org/elasticsearch/index/field/data/shorts/ShortFieldDataType.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.field.data.shorts; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.SortField; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -42,8 +42,8 @@ public class ShortFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.SHORT; + public SortField.Type reducedType() { + return SortField.Type.SHORT; } }; } @@ -55,8 +55,8 @@ public class ShortFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.SHORT; + public SortField.Type reducedType() { + return SortField.Type.SHORT; } }; } @@ -68,8 +68,8 @@ public class ShortFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.SHORT; + public SortField.Type reducedType() { + return SortField.Type.SHORT; } }; } @@ -80,14 +80,14 @@ public class ShortFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.SHORT; + public SortField.Type reducedType() { + return SortField.Type.SHORT; } }; } @Override - public ShortFieldData load(IndexReader reader, String fieldName) throws IOException { + public ShortFieldData load(AtomicReader reader, String fieldName) throws IOException { return ShortFieldData.load(reader, fieldName); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/shorts/SingleValueShortFieldData.java b/src/main/java/org/elasticsearch/index/field/data/shorts/SingleValueShortFieldData.java index 6a9f0791675..732a0cd81ec 100644 --- a/src/main/java/org/elasticsearch/index/field/data/shorts/SingleValueShortFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/shorts/SingleValueShortFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.field.data.shorts; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.index.field.data.doubles.DoubleFieldData; @@ -73,7 +74,7 @@ public class SingleValueShortFieldData extends ShortFieldData { proc.onMissing(docId); return; } - proc.onValue(docId, Short.toString(values[loc])); + proc.onValue(docId, new BytesRef(Short.toString(values[loc]))); } @Override diff --git a/src/main/java/org/elasticsearch/index/field/data/strings/MultiValueStringFieldData.java b/src/main/java/org/elasticsearch/index/field/data/strings/MultiValueStringFieldData.java index 020a17e612e..1cbf77876ea 100644 --- a/src/main/java/org/elasticsearch/index/field/data/strings/MultiValueStringFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/strings/MultiValueStringFieldData.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.field.data.strings; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; -import org.elasticsearch.common.Strings; import org.elasticsearch.common.util.concurrent.ThreadLocals; /** @@ -28,23 +28,25 @@ import org.elasticsearch.common.util.concurrent.ThreadLocals; */ public class MultiValueStringFieldData extends StringFieldData { + private static final BytesRef[] EMPTY_ARRAY = new BytesRef[0]; + private static final int VALUE_CACHE_SIZE = 100; - private static ThreadLocal> valuesCache = new ThreadLocal>() { + private static ThreadLocal> valuesCache = new ThreadLocal>() { @Override - protected ThreadLocals.CleanableValue initialValue() { - String[][] value = new String[VALUE_CACHE_SIZE][]; + protected ThreadLocals.CleanableValue initialValue() { + BytesRef[][] value = new BytesRef[VALUE_CACHE_SIZE][]; for (int i = 0; i < value.length; i++) { - value[i] = new String[i]; + value[i] = new BytesRef[i]; } - return new ThreadLocals.CleanableValue(value); + return new ThreadLocals.CleanableValue(value); } }; // order with value 0 indicates no value private final int[][] ordinals; - public MultiValueStringFieldData(String fieldName, int[][] ordinals, String[] values) { + public MultiValueStringFieldData(String fieldName, int[][] ordinals, BytesRef[] values) { super(fieldName, values); this.ordinals = ordinals; } @@ -103,7 +105,7 @@ public class MultiValueStringFieldData extends StringFieldData { } @Override - public String value(int docId) { + public BytesRef value(int docId) { for (int[] ordinal : ordinals) { int loc = ordinal[docId]; if (loc != 0) { @@ -114,7 +116,7 @@ public class MultiValueStringFieldData extends StringFieldData { } @Override - public String[] values(int docId) { + public BytesRef[] values(int docId) { int length = 0; for (int[] ordinal : ordinals) { if (ordinal[docId] == 0) { @@ -123,17 +125,17 @@ public class MultiValueStringFieldData extends StringFieldData { length++; } if (length == 0) { - return Strings.EMPTY_ARRAY; + return EMPTY_ARRAY; } - String[] strings; + BytesRef[] refs; if (length < VALUE_CACHE_SIZE) { - strings = valuesCache.get().get()[length]; + refs = valuesCache.get().get()[length]; } else { - strings = new String[length]; + refs = new BytesRef[length]; } for (int i = 0; i < length; i++) { - strings[i] = values[ordinals[i][docId]]; + refs[i] = values[ordinals[i][docId]]; } - return strings; + return refs; } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/field/data/strings/SingleValueStringFieldData.java b/src/main/java/org/elasticsearch/index/field/data/strings/SingleValueStringFieldData.java index bc325cc1329..b13c6cf4dfd 100644 --- a/src/main/java/org/elasticsearch/index/field/data/strings/SingleValueStringFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/strings/SingleValueStringFieldData.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.field.data.strings; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; -import org.elasticsearch.common.Strings; import org.elasticsearch.common.util.concurrent.ThreadLocals; /** @@ -28,17 +28,19 @@ import org.elasticsearch.common.util.concurrent.ThreadLocals; */ public class SingleValueStringFieldData extends StringFieldData { - private static ThreadLocal> valuesCache = new ThreadLocal>() { + private static final BytesRef[] EMPTY_ARRAY = new BytesRef[0]; + + private static ThreadLocal> valuesCache = new ThreadLocal>() { @Override - protected ThreadLocals.CleanableValue initialValue() { - return new ThreadLocals.CleanableValue(new String[1]); + protected ThreadLocals.CleanableValue initialValue() { + return new ThreadLocals.CleanableValue(new BytesRef[1]); } }; // order with value 0 indicates no value private final int[] ordinals; - public SingleValueStringFieldData(String fieldName, int[] ordinals, String[] values) { + public SingleValueStringFieldData(String fieldName, int[] ordinals, BytesRef[] values) { super(fieldName, values); this.ordinals = ordinals; } @@ -79,17 +81,17 @@ public class SingleValueStringFieldData extends StringFieldData { } @Override - public String value(int docId) { + public BytesRef value(int docId) { return values[ordinals[docId]]; } @Override - public String[] values(int docId) { + public BytesRef[] values(int docId) { int loc = ordinals[docId]; if (loc == 0) { - return Strings.EMPTY_ARRAY; + return EMPTY_ARRAY; } - String[] ret = valuesCache.get().get(); + BytesRef[] ret = valuesCache.get().get(); ret[0] = values[loc]; return ret; } diff --git a/src/main/java/org/elasticsearch/index/field/data/strings/StringDocFieldData.java b/src/main/java/org/elasticsearch/index/field/data/strings/StringDocFieldData.java index d135391c344..a4edcdf8e01 100644 --- a/src/main/java/org/elasticsearch/index/field/data/strings/StringDocFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/strings/StringDocFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.field.data.strings; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.field.data.DocFieldData; /** @@ -30,11 +31,11 @@ public class StringDocFieldData extends DocFieldData { super(fieldData); } - public String getValue() { + public BytesRef getValue() { return fieldData.value(docId); } - public String[] getValues() { + public BytesRef[] getValues() { return fieldData.values(docId); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/strings/StringFieldData.java b/src/main/java/org/elasticsearch/index/field/data/strings/StringFieldData.java index a1684c88d3e..867157beca7 100644 --- a/src/main/java/org/elasticsearch/index/field/data/strings/StringFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/strings/StringFieldData.java @@ -19,7 +19,8 @@ package org.elasticsearch.index.field.data.strings; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.index.field.data.FieldData; import org.elasticsearch.index.field.data.FieldDataType; @@ -33,9 +34,9 @@ import java.util.ArrayList; */ public abstract class StringFieldData extends FieldData { - protected final String[] values; + protected final BytesRef[] values; - protected StringFieldData(String fieldName, String[] values) { + protected StringFieldData(String fieldName, BytesRef[] values) { super(fieldName); this.values = values; } @@ -43,21 +44,21 @@ public abstract class StringFieldData extends FieldData { @Override protected long computeSizeInBytes() { long size = RamUsage.NUM_BYTES_ARRAY_HEADER; - for (String value : values) { + for (BytesRef value : values) { if (value != null) { - size += RamUsage.NUM_BYTES_OBJECT_HEADER + ((value.length() * RamUsage.NUM_BYTES_CHAR) + (3 * RamUsage.NUM_BYTES_INT)); + size += RamUsage.NUM_BYTES_OBJECT_HEADER + (value.length + (2 * RamUsage.NUM_BYTES_INT)); } } return size; } - public String[] values() { + public BytesRef[] values() { return this.values; } - abstract public String value(int docId); + abstract public BytesRef value(int docId); - abstract public String[] values(int docId); + abstract public BytesRef[] values(int docId); @Override public StringDocFieldData docFieldData(int docId) { @@ -65,7 +66,7 @@ public abstract class StringFieldData extends FieldData { } @Override - public String stringValue(int docId) { + public BytesRef stringValue(int docId) { return value(docId); } @@ -86,13 +87,13 @@ public abstract class StringFieldData extends FieldData { } } - public static StringFieldData load(IndexReader reader, String field) throws IOException { + public static StringFieldData load(AtomicReader reader, String field) throws IOException { return FieldDataLoader.load(reader, field, new StringTypeLoader()); } static class StringTypeLoader extends FieldDataLoader.FreqsTypeLoader { - private final ArrayList terms = new ArrayList(); + private final ArrayList terms = new ArrayList(); StringTypeLoader() { super(); @@ -101,18 +102,18 @@ public abstract class StringFieldData extends FieldData { } @Override - public void collectTerm(String term) { + public void collectTerm(BytesRef term) { terms.add(term); } @Override public StringFieldData buildSingleValue(String field, int[] ordinals) { - return new SingleValueStringFieldData(field, ordinals, terms.toArray(new String[terms.size()])); + return new SingleValueStringFieldData(field, ordinals, terms.toArray(new BytesRef[terms.size()])); } @Override public StringFieldData buildMultiValue(String field, int[][] ordinals) { - return new MultiValueStringFieldData(field, ordinals, terms.toArray(new String[terms.size()])); + return new MultiValueStringFieldData(field, ordinals, terms.toArray(new BytesRef[terms.size()])); } } } diff --git a/src/main/java/org/elasticsearch/index/field/data/strings/StringFieldDataType.java b/src/main/java/org/elasticsearch/index/field/data/strings/StringFieldDataType.java index 1e46c46b1e2..c5e9f797850 100644 --- a/src/main/java/org/elasticsearch/index/field/data/strings/StringFieldDataType.java +++ b/src/main/java/org/elasticsearch/index/field/data/strings/StringFieldDataType.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.field.data.strings; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.SortField; import org.elasticsearch.ElasticSearchIllegalArgumentException; @@ -45,14 +45,14 @@ public class StringFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.STRING; + public SortField.Type reducedType() { + return SortField.Type.STRING; } }; } @Override - public StringFieldData load(IndexReader reader, String fieldName) throws IOException { + public StringFieldData load(AtomicReader reader, String fieldName) throws IOException { return StringFieldData.load(reader, fieldName); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/strings/StringOrdValFieldDataComparator.java b/src/main/java/org/elasticsearch/index/field/data/strings/StringOrdValFieldDataComparator.java index 1aeb7ff7995..0c9d6dd6d6a 100644 --- a/src/main/java/org/elasticsearch/index/field/data/strings/StringOrdValFieldDataComparator.java +++ b/src/main/java/org/elasticsearch/index/field/data/strings/StringOrdValFieldDataComparator.java @@ -19,8 +19,10 @@ package org.elasticsearch.index.field.data.strings; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldData; import org.elasticsearch.index.field.data.FieldDataType; @@ -31,28 +33,28 @@ import java.io.IOException; * */ // LUCENE MONITOR: Monitor against FieldComparator#String -public class StringOrdValFieldDataComparator extends FieldComparator { +public class StringOrdValFieldDataComparator extends FieldComparator { private final FieldDataCache fieldDataCache; private final int[] ords; - private final String[] values; + private final BytesRef[] values; private final int[] readerGen; private int currentReaderGen = -1; - private String[] lookup; + private BytesRef[] lookup; private int[] order; private final String field; private int bottomSlot = -1; private int bottomOrd; private boolean bottomSameReader; - private String bottomValue; + private BytesRef bottomValue; public StringOrdValFieldDataComparator(int numHits, String field, int sortPos, boolean reversed, FieldDataCache fieldDataCache) { this.fieldDataCache = fieldDataCache; ords = new int[numHits]; - values = new String[numHits]; + values = new BytesRef[numHits]; readerGen = new int[numHits]; this.field = field; } @@ -63,8 +65,8 @@ public class StringOrdValFieldDataComparator extends FieldComparator { return ords[slot1] - ords[slot2]; } - final String val1 = values[slot1]; - final String val2 = values[slot2]; + final BytesRef val1 = values[slot1]; + final BytesRef val2 = values[slot2]; if (val1 == null) { if (val2 == null) { return 0; @@ -92,7 +94,7 @@ public class StringOrdValFieldDataComparator extends FieldComparator { return cmp; } - final String val2 = lookup[order]; + final BytesRef val2 = lookup[order]; if (bottomValue == null) { if (val2 == null) { return 0; @@ -117,8 +119,8 @@ public class StringOrdValFieldDataComparator extends FieldComparator { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - FieldData cleanFieldData = fieldDataCache.cache(FieldDataType.DefaultTypes.STRING, reader, field); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + FieldData cleanFieldData = fieldDataCache.cache(FieldDataType.DefaultTypes.STRING, context.reader(), field); if (cleanFieldData instanceof MultiValueStringFieldData) { throw new IOException("Can't sort on string types with more than one value per doc, or more than one token per field"); } @@ -130,6 +132,21 @@ public class StringOrdValFieldDataComparator extends FieldComparator { if (bottomSlot != -1) { setBottom(bottomSlot); } + return this; + } + + @Override + public int compareDocToValue(int doc, BytesRef otherVal) throws IOException { + BytesRef val = values[ords[doc]]; + if (otherVal == null) { + if (val == null) { + return 0; + } + return -1; + } else if (val == null) { + return 1; + } + return val.compareTo(otherVal); } @Override @@ -147,7 +164,7 @@ public class StringOrdValFieldDataComparator extends FieldComparator { bottomSameReader = true; readerGen[bottomSlot] = currentReaderGen; } else { - final int index = binarySearch(lookup, bottomValue); + final int index = binarySearch(bottomValue, lookup); if (index < 0) { bottomOrd = -index - 2; bottomSameReader = false; @@ -162,12 +179,37 @@ public class StringOrdValFieldDataComparator extends FieldComparator { } } + private static int binarySearch(BytesRef value, BytesRef[] values) { + return binarySearch(value, values, 1, values.length-1); + } + + private static int binarySearch(BytesRef value, BytesRef[] values, int low, int high) { + while (low <= high) { + int mid = (low + high) >>> 1; + BytesRef midVal = values[mid]; + int cmp; + if (midVal != null) { + cmp = midVal.compareTo(value); + } else { + cmp = -1; + } + + if (cmp < 0) + low = mid + 1; + else if (cmp > 0) + high = mid - 1; + else + return mid; + } + return -(low + 1); + } + @Override - public Comparable value(int slot) { + public BytesRef value(int slot) { return values[slot]; } - public String[] getValues() { + public BytesRef[] getValues() { return values; } diff --git a/src/main/java/org/elasticsearch/index/field/data/strings/StringValFieldDataComparator.java b/src/main/java/org/elasticsearch/index/field/data/strings/StringValFieldDataComparator.java index 1f13606949d..c9103a82dd5 100644 --- a/src/main/java/org/elasticsearch/index/field/data/strings/StringValFieldDataComparator.java +++ b/src/main/java/org/elasticsearch/index/field/data/strings/StringValFieldDataComparator.java @@ -19,8 +19,10 @@ package org.elasticsearch.index.field.data.strings; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldComparator; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldData; import org.elasticsearch.index.field.data.FieldDataType; @@ -31,7 +33,7 @@ import java.io.IOException; * */ // LUCENE MONITOR: Monitor against FieldComparator#String -public class StringValFieldDataComparator extends FieldComparator { +public class StringValFieldDataComparator extends FieldComparator { private final String fieldName; @@ -39,20 +41,20 @@ public class StringValFieldDataComparator extends FieldComparator { protected FieldData currentFieldData; - private String[] values; + private BytesRef[] values; - private String bottom; + private BytesRef bottom; public StringValFieldDataComparator(int numHits, String fieldName, FieldDataCache fieldDataCache) { this.fieldName = fieldName; this.fieldDataCache = fieldDataCache; - values = new String[numHits]; + values = new BytesRef[numHits]; } @Override public int compare(int slot1, int slot2) { - final String val1 = values[slot1]; - final String val2 = values[slot2]; + final BytesRef val1 = values[slot1]; + final BytesRef val2 = values[slot2]; if (val1 == null) { if (val2 == null) { return 0; @@ -67,7 +69,7 @@ public class StringValFieldDataComparator extends FieldComparator { @Override public int compareBottom(int doc) { - final String val2 = currentFieldData.stringValue(doc); + final BytesRef val2 = currentFieldData.stringValue(doc); if (bottom == null) { if (val2 == null) { return 0; @@ -85,8 +87,23 @@ public class StringValFieldDataComparator extends FieldComparator { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - currentFieldData = fieldDataCache.cache(FieldDataType.DefaultTypes.STRING, reader, fieldName); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + currentFieldData = fieldDataCache.cache(FieldDataType.DefaultTypes.STRING, context.reader(), fieldName); + return this; + } + + @Override + public int compareDocToValue(int doc, BytesRef val2) throws IOException { + BytesRef val1 = currentFieldData.stringValue(doc); + if (val1 == null) { + if (val2 == null) { + return 0; + } + return -1; + } else if (val2 == null) { + return 1; + } + return currentFieldData.stringValue(doc).compareTo(val2); } @Override @@ -95,7 +112,7 @@ public class StringValFieldDataComparator extends FieldComparator { } @Override - public Comparable value(int slot) { + public BytesRef value(int slot) { return values[slot]; } } diff --git a/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java b/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java index 28a80616fdf..414acaaa15a 100644 --- a/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java +++ b/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java @@ -19,11 +19,11 @@ package org.elasticsearch.index.field.data.support; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; -import org.apache.lucene.util.StringHelper; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.index.field.data.FieldData; import java.io.IOException; @@ -35,57 +35,44 @@ import java.util.ArrayList; public class FieldDataLoader { @SuppressWarnings({"StringEquality"}) - public static T load(IndexReader reader, String field, TypeLoader loader) throws IOException { + public static T load(AtomicReader reader, String field, TypeLoader loader) throws IOException { loader.init(); - field = StringHelper.intern(field); + // LUCENE 4 UPGRADE: StringHelper? + field = field.intern();//StringHelper.intern(field); ArrayList ordinals = new ArrayList(); int[] idx = new int[reader.maxDoc()]; ordinals.add(new int[reader.maxDoc()]); int t = 1; // current term number - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms(new Term(field)); - try { - // bulk read (in lucene 4 it won't be needed). - int size = Math.min(128, reader.maxDoc()); - int[] docs = new int[size]; - int[] freqs = new int[size]; - do { - Term term = termEnum.term(); - if (term == null || term.field() != field) break; - loader.collectTerm(term.text()); - termDocs.seek(termEnum); + Terms terms = reader.terms(field); + TermsEnum termsEnum = terms.iterator(null); - int number = termDocs.read(docs, freqs); - while (number > 0) { - for (int i = 0; i < number; i++) { - int doc = docs[i]; - int[] ordinal; - if (idx[doc] >= ordinals.size()) { - ordinal = new int[reader.maxDoc()]; - ordinals.add(ordinal); - } else { - ordinal = ordinals.get(idx[doc]); - } - ordinal[doc] = t; - idx[doc]++; + try { + DocsEnum docsEnum = null; + for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.term()) { + loader.collectTerm(BytesRef.deepCopyOf(term)); + docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0); + for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { + int[] ordinal; + if (idx[docId] >= ordinals.size()) { + ordinal = new int[reader.maxDoc()]; + ordinals.add(ordinal); + } else { + ordinal = ordinals.get(idx[docId]); } - number = termDocs.read(docs, freqs); + ordinal[docId] = t; + idx[docId]++; } - t++; - } while (termEnum.next()); + } } catch (RuntimeException e) { if (e.getClass().getName().endsWith("StopFillCacheException")) { // all is well, in case numeric parsers are used. } else { throw e; } - } finally { - termDocs.close(); - termEnum.close(); } if (ordinals.size() == 1) { @@ -103,7 +90,7 @@ public class FieldDataLoader { void init(); - void collectTerm(String term); + void collectTerm(BytesRef term); T buildSingleValue(String fieldName, int[] ordinals); diff --git a/src/main/java/org/elasticsearch/index/field/data/support/NumericFieldDataComparator.java b/src/main/java/org/elasticsearch/index/field/data/support/NumericFieldDataComparator.java index 8857369ac45..f56a3bb10d2 100644 --- a/src/main/java/org/elasticsearch/index/field/data/support/NumericFieldDataComparator.java +++ b/src/main/java/org/elasticsearch/index/field/data/support/NumericFieldDataComparator.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.field.data.support; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldComparator; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -30,7 +31,7 @@ import java.io.IOException; /** * */ -public abstract class NumericFieldDataComparator extends FieldComparator { +public abstract class NumericFieldDataComparator extends FieldComparator { private final String fieldName; @@ -46,7 +47,8 @@ public abstract class NumericFieldDataComparator extends FieldComparator { public abstract FieldDataType fieldDataType(); @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - currentFieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType(), reader, fieldName); + public NumericFieldDataComparator setNextReader(AtomicReaderContext context) throws IOException { + currentFieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType(), context.reader(), fieldName); + return this; } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/field/function/sort/DoubleFieldsFunctionDataComparator.java b/src/main/java/org/elasticsearch/index/field/function/sort/DoubleFieldsFunctionDataComparator.java index ac7376cd371..6a567f9ab3b 100644 --- a/src/main/java/org/elasticsearch/index/field/function/sort/DoubleFieldsFunctionDataComparator.java +++ b/src/main/java/org/elasticsearch/index/field/function/sort/DoubleFieldsFunctionDataComparator.java @@ -52,7 +52,7 @@ public class DoubleFieldsFunctionDataComparator extends FieldComparator { } @Override - public int reducedType() { + public SortField.Type reducedType() { return SortField.DOUBLE; } } diff --git a/src/main/java/org/elasticsearch/index/field/function/sort/StringFieldsFunctionDataComparator.java b/src/main/java/org/elasticsearch/index/field/function/sort/StringFieldsFunctionDataComparator.java index aaae60bcf55..ddf9eb81c25 100644 --- a/src/main/java/org/elasticsearch/index/field/function/sort/StringFieldsFunctionDataComparator.java +++ b/src/main/java/org/elasticsearch/index/field/function/sort/StringFieldsFunctionDataComparator.java @@ -51,7 +51,7 @@ public class StringFieldsFunctionDataComparator extends FieldComparator { } @Override - public int reducedType() { + public SortField.Type reducedType() { return SortField.STRING; } } diff --git a/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldData.java b/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldData.java index 2b7cc774815..5e5a1bcb8b7 100644 --- a/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldData.java +++ b/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldData.java @@ -20,7 +20,9 @@ package org.elasticsearch.index.mapper.geo; import gnu.trove.list.array.TDoubleArrayList; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.unit.DistanceUnit; import org.elasticsearch.common.util.concurrent.ThreadLocals; @@ -115,8 +117,8 @@ public abstract class GeoPointFieldData extends FieldData } @Override - public String stringValue(int docId) { - return value(docId).geohash(); + public BytesRef stringValue(int docId) { + return new BytesRef(value(docId).geohash()); } @Override @@ -132,7 +134,7 @@ public abstract class GeoPointFieldData extends FieldData @Override public void forEachValue(StringValueProc proc) { for (int i = 1; i < lat.length; i++) { - proc.onValue(GeoHashUtils.encode(lat[i], lon[i])); + proc.onValue(new BytesRef(GeoHashUtils.encode(lat[i], lon[i]))); } } @@ -164,7 +166,7 @@ public abstract class GeoPointFieldData extends FieldData void onValue(int docId, double lat, double lon); } - public static GeoPointFieldData load(IndexReader reader, String field) throws IOException { + public static GeoPointFieldData load(AtomicReader reader, String field) throws IOException { return FieldDataLoader.load(reader, field, new StringTypeLoader()); } @@ -181,10 +183,12 @@ public abstract class GeoPointFieldData extends FieldData } @Override - public void collectTerm(String term) { - int comma = term.indexOf(','); - lat.add(Double.parseDouble(term.substring(0, comma))); - lon.add(Double.parseDouble(term.substring(comma + 1))); + public void collectTerm(BytesRef term) { + // LUCENE 4 UPGRADE: Not nice. We can't operate on a bytesref... + String location = term.utf8ToString(); + int comma = location.indexOf(','); + lat.add(Double.parseDouble(location.substring(0, comma))); + lon.add(Double.parseDouble(location.substring(comma + 1))); } diff --git a/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldDataType.java b/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldDataType.java index 225017b3148..adc1bbb9cb9 100644 --- a/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldDataType.java +++ b/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldDataType.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.mapper.geo; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.SortField; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -44,14 +44,14 @@ public class GeoPointFieldDataType implements FieldDataType { } @Override - public int reducedType() { - return SortField.STRING; + public SortField.Type reducedType() { + return SortField.Type.STRING; } }; } @Override - public GeoPointFieldData load(IndexReader reader, String fieldName) throws IOException { + public GeoPointFieldData load(AtomicReader reader, String fieldName) throws IOException { return GeoPointFieldData.load(reader, fieldName); } } diff --git a/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceDataComparator.java b/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceDataComparator.java index 5afeaa5260a..5898b241114 100644 --- a/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceDataComparator.java +++ b/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceDataComparator.java @@ -77,7 +77,7 @@ public class GeoDistanceDataComparator extends FieldComparator { } @Override - public int reducedType() { + public SortField.Type reducedType() { return SortField.DOUBLE; } } diff --git a/src/main/java/org/elasticsearch/search/facet/AbstractFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/AbstractFacetCollector.java index 126dd3dcd16..97815a3ec0d 100644 --- a/src/main/java/org/elasticsearch/search/facet/AbstractFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/AbstractFacetCollector.java @@ -20,7 +20,7 @@ package org.elasticsearch.search.facet; import com.google.common.collect.ImmutableList; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Scorer; import org.elasticsearch.common.lucene.docset.DocSet; @@ -74,14 +74,14 @@ public abstract class AbstractFacetCollector extends FacetCollector { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { + public void setNextReader(AtomicReaderContext context) throws IOException { if (filter != null) { - docSet = DocSets.convert(reader, filter.getDocIdSet(reader)); + docSet = DocSets.convert(context.reader(), filter.getDocIdSet(context, context.reader().getLiveDocs())); } - doSetNextReader(reader, docBase); + doSetNextReader(context); } - protected abstract void doSetNextReader(IndexReader reader, int docBase) throws IOException; + protected abstract void doSetNextReader(AtomicReaderContext context) throws IOException; @Override public void collect(int doc) throws IOException { diff --git a/src/main/java/org/elasticsearch/search/facet/datehistogram/CountDateHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/datehistogram/CountDateHistogramFacetCollector.java index 0fabdddd8b5..69fb71f5eb3 100644 --- a/src/main/java/org/elasticsearch/search/facet/datehistogram/CountDateHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/datehistogram/CountDateHistogramFacetCollector.java @@ -20,7 +20,7 @@ package org.elasticsearch.search.facet.datehistogram; import gnu.trove.map.hash.TLongLongHashMap; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.joda.TimeZoneRounding; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -81,8 +81,8 @@ public class CountDateHistogramFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueDateHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueDateHistogramFacetCollector.java index 24e3bc9e7c3..4cba87d96fa 100644 --- a/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueDateHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueDateHistogramFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.datehistogram; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.joda.TimeZoneRounding; import org.elasticsearch.common.trove.ExtTLongObjectHashMap; @@ -89,9 +89,9 @@ public class ValueDateHistogramFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - keyFieldData = (LongFieldData) fieldDataCache.cache(keyFieldDataType, reader, keyIndexFieldName); - histoProc.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, reader, valueIndexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + keyFieldData = (LongFieldData) fieldDataCache.cache(keyFieldDataType, context.reader(), keyIndexFieldName); + histoProc.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, context.reader(), valueIndexFieldName); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueScriptDateHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueScriptDateHistogramFacetCollector.java index 3c38a9c7bf7..21a7f84444e 100644 --- a/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueScriptDateHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueScriptDateHistogramFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.datehistogram; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.joda.TimeZoneRounding; @@ -94,9 +94,9 @@ public class ValueScriptDateHistogramFacetCollector extends AbstractFacetCollect } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); - valueScript.setNextReader(reader); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); + valueScript.setNextReader(context.reader()); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/filter/FilterFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/filter/FilterFacetCollector.java index 6cba434dcdd..bca6310825c 100644 --- a/src/main/java/org/elasticsearch/search/facet/filter/FilterFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/filter/FilterFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.filter; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.*; import org.elasticsearch.common.lucene.docset.DocSet; import org.elasticsearch.common.lucene.docset.DocSets; @@ -63,8 +63,8 @@ public class FilterFacetCollector extends AbstractFacetCollector implements Opti } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - docSet = DocSets.convert(reader, filter.getDocIdSet(reader)); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + docSet = DocSets.convert(context.reader(), filter.getDocIdSet(context, context.reader().getLiveDocs())); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/geodistance/GeoDistanceFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/geodistance/GeoDistanceFacetCollector.java index 9dceb0c43ec..4144f8d1e07 100644 --- a/src/main/java/org/elasticsearch/search/facet/geodistance/GeoDistanceFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/geodistance/GeoDistanceFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.geodistance; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.common.unit.DistanceUnit; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.mapper.MapperService; @@ -87,8 +87,8 @@ public class GeoDistanceFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (GeoPointFieldData) fieldDataCache.cache(GeoPointFieldDataType.TYPE, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (GeoPointFieldData) fieldDataCache.cache(GeoPointFieldDataType.TYPE, context.reader(), indexFieldName); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/geodistance/ScriptGeoDistanceFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/geodistance/ScriptGeoDistanceFacetCollector.java index 22f29ec5a82..7625902c62b 100644 --- a/src/main/java/org/elasticsearch/search/facet/geodistance/ScriptGeoDistanceFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/geodistance/ScriptGeoDistanceFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.geodistance; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.elasticsearch.common.unit.DistanceUnit; import org.elasticsearch.index.mapper.geo.GeoPointFieldData; @@ -55,9 +55,9 @@ public class ScriptGeoDistanceFacetCollector extends GeoDistanceFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - super.doSetNextReader(reader, docBase); - script.setNextReader(reader); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + super.doSetNextReader(context); + script.setNextReader(context.reader()); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/geodistance/ValueGeoDistanceFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/geodistance/ValueGeoDistanceFacetCollector.java index c16ac70ae85..45c94ef59a3 100644 --- a/src/main/java/org/elasticsearch/search/facet/geodistance/ValueGeoDistanceFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/geodistance/ValueGeoDistanceFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.geodistance; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.common.unit.DistanceUnit; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.NumericFieldData; @@ -54,9 +54,9 @@ public class ValueGeoDistanceFacetCollector extends GeoDistanceFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - super.doSetNextReader(reader, docBase); - ((Aggregator) this.aggregator).valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, reader, indexValueFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + super.doSetNextReader(context); + ((Aggregator) this.aggregator).valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, context.reader(), indexValueFieldName); } public static class Aggregator implements GeoPointFieldData.ValueInDocProc { diff --git a/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedCountHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedCountHistogramFacetCollector.java index c6253ae98c4..1b73ad307cd 100644 --- a/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedCountHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedCountHistogramFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.histogram.bounded; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; @@ -85,8 +85,8 @@ public class BoundedCountHistogramFacetCollector extends AbstractFacetCollector } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedValueHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedValueHistogramFacetCollector.java index 28817cb44d5..d57cc2974f0 100644 --- a/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedValueHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedValueHistogramFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.histogram.bounded; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; @@ -98,9 +98,9 @@ public class BoundedValueHistogramFacetCollector extends AbstractFacetCollector } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - keyFieldData = (NumericFieldData) fieldDataCache.cache(keyFieldDataType, reader, keyIndexFieldName); - histoProc.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, reader, valueIndexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + keyFieldData = (NumericFieldData) fieldDataCache.cache(keyFieldDataType, context.reader(), keyIndexFieldName); + histoProc.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, context.reader(), valueIndexFieldName); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedValueScriptHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedValueScriptHistogramFacetCollector.java index 5e28dc9026c..0f0ad6ed907 100644 --- a/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedValueScriptHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedValueScriptHistogramFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.histogram.bounded; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -100,9 +100,9 @@ public class BoundedValueScriptHistogramFacetCollector extends AbstractFacetColl } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); - valueScript.setNextReader(reader); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); + valueScript.setNextReader(context.reader()); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/CountHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/CountHistogramFacetCollector.java index 972acb9a5ee..f6e8cdbc6d5 100644 --- a/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/CountHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/CountHistogramFacetCollector.java @@ -20,7 +20,7 @@ package org.elasticsearch.search.facet.histogram.unbounded; import gnu.trove.map.hash.TLongLongHashMap; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; @@ -82,8 +82,8 @@ public class CountHistogramFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/FullHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/FullHistogramFacetCollector.java index e8ddc9405b4..f7407e15c86 100644 --- a/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/FullHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/FullHistogramFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.histogram.unbounded; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.trove.ExtTLongObjectHashMap; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -82,8 +82,8 @@ public class FullHistogramFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ScriptHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ScriptHistogramFacetCollector.java index 335864a2c08..e484c4440e8 100644 --- a/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ScriptHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ScriptHistogramFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.histogram.unbounded; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.trove.ExtTLongObjectHashMap; @@ -91,9 +91,9 @@ public class ScriptHistogramFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - keyScript.setNextReader(reader); - valueScript.setNextReader(reader); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + keyScript.setNextReader(context.reader()); + valueScript.setNextReader(context.reader()); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ValueHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ValueHistogramFacetCollector.java index d93fc51fcca..8a3a7428c8b 100644 --- a/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ValueHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ValueHistogramFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.histogram.unbounded; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.trove.ExtTLongObjectHashMap; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -91,9 +91,9 @@ public class ValueHistogramFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - keyFieldData = (NumericFieldData) fieldDataCache.cache(keyFieldDataType, reader, keyIndexFieldName); - histoProc.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, reader, valueIndexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + keyFieldData = (NumericFieldData) fieldDataCache.cache(keyFieldDataType, context.reader(), keyIndexFieldName); + histoProc.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, context.reader(), valueIndexFieldName); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ValueScriptHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ValueScriptHistogramFacetCollector.java index d889ff3c3d1..eca0817bba5 100644 --- a/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ValueScriptHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ValueScriptHistogramFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.histogram.unbounded; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.trove.ExtTLongObjectHashMap; @@ -94,9 +94,9 @@ public class ValueScriptHistogramFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); - valueScript.setNextReader(reader); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); + valueScript.setNextReader(context.reader()); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/query/QueryFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/query/QueryFacetCollector.java index 2f047b63868..cb1e7a2b845 100644 --- a/src/main/java/org/elasticsearch/search/facet/query/QueryFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/query/QueryFacetCollector.java @@ -19,7 +19,8 @@ package org.elasticsearch.search.facet.query; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.*; import org.elasticsearch.common.lucene.docset.DocSet; import org.elasticsearch.common.lucene.docset.DocSets; @@ -57,8 +58,8 @@ public class QueryFacetCollector extends AbstractFacetCollector implements Optim } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - docSet = DocSets.convert(reader, filter.getDocIdSet(reader)); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + docSet = DocSets.convert(context.reader(), filter.getDocIdSet(context, context.reader().getLiveDocs())); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/range/KeyValueRangeFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/range/KeyValueRangeFacetCollector.java index 034f04ade1d..cbe237958ac 100644 --- a/src/main/java/org/elasticsearch/search/facet/range/KeyValueRangeFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/range/KeyValueRangeFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.range; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.NumericFieldData; @@ -80,9 +80,9 @@ public class KeyValueRangeFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - keyFieldData = (NumericFieldData) fieldDataCache.cache(keyFieldDataType, reader, keyIndexFieldName); - rangeProc.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, reader, valueIndexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + keyFieldData = (NumericFieldData) fieldDataCache.cache(keyFieldDataType, context.reader(), keyIndexFieldName); + rangeProc.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, context.reader(), valueIndexFieldName); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/range/RangeFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/range/RangeFacetCollector.java index 1ce9ca71b17..1291a344dae 100644 --- a/src/main/java/org/elasticsearch/search/facet/range/RangeFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/range/RangeFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.range; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.NumericFieldData; @@ -70,8 +70,8 @@ public class RangeFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/range/ScriptRangeFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/range/ScriptRangeFacetCollector.java index d07fdd11c8a..81bf33e6157 100644 --- a/src/main/java/org/elasticsearch/search/facet/range/ScriptRangeFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/range/ScriptRangeFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.range; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.elasticsearch.script.SearchScript; import org.elasticsearch.search.facet.AbstractFacetCollector; @@ -54,9 +54,9 @@ public class ScriptRangeFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - keyScript.setNextReader(reader); - valueScript.setNextReader(reader); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + keyScript.setNextReader(context.reader()); + valueScript.setNextReader(context.reader()); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/statistical/ScriptStatisticalFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/statistical/ScriptStatisticalFacetCollector.java index 164cee8f648..7708b80ea1d 100644 --- a/src/main/java/org/elasticsearch/search/facet/statistical/ScriptStatisticalFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/statistical/ScriptStatisticalFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.statistical; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.elasticsearch.script.SearchScript; import org.elasticsearch.search.facet.AbstractFacetCollector; @@ -72,8 +72,8 @@ public class ScriptStatisticalFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - script.setNextReader(reader); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + script.setNextReader(context.reader()); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/statistical/StatisticalFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/statistical/StatisticalFacetCollector.java index 8fba4dc9ab7..f2777d13a08 100644 --- a/src/main/java/org/elasticsearch/search/facet/statistical/StatisticalFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/statistical/StatisticalFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.statistical; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.NumericFieldData; @@ -70,8 +70,8 @@ public class StatisticalFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/statistical/StatisticalFieldsFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/statistical/StatisticalFieldsFacetCollector.java index 7a3620a8395..f2b74bdf67e 100644 --- a/src/main/java/org/elasticsearch/search/facet/statistical/StatisticalFieldsFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/statistical/StatisticalFieldsFacetCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.search.facet.statistical; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.field.data.FieldDataType; import org.elasticsearch.index.field.data.NumericFieldData; @@ -73,9 +73,9 @@ public class StatisticalFieldsFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + protected void doSetNextReader(AtomicReaderContext context) throws IOException { for (int i = 0; i < indexFieldsNames.length; i++) { - fieldsData[i] = (NumericFieldData) fieldDataCache.cache(fieldsDataType[i], reader, indexFieldsNames[i]); + fieldsData[i] = (NumericFieldData) fieldDataCache.cache(fieldsDataType[i], context.reader(), indexFieldsNames[i]); } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/TermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/TermsFacet.java index 59d03dc3cce..b67569be344 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/TermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/TermsFacet.java @@ -19,7 +19,9 @@ package org.elasticsearch.search.facet.terms; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.search.facet.Facet; import java.util.Comparator; @@ -39,9 +41,9 @@ public interface TermsFacet extends Facet, Iterable { public interface Entry extends Comparable { - String term(); + BytesReference term(); - String getTerm(); + BytesReference getTerm(); Number termAsNumber(); diff --git a/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetProcessor.java b/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetProcessor.java index c613c4d4fff..55811cd46f9 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetProcessor.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/TermsFacetProcessor.java @@ -21,6 +21,7 @@ package org.elasticsearch.search.facet.terms; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.regex.Regex; @@ -80,7 +81,7 @@ public class TermsFacetProcessor extends AbstractComponent implements FacetProce int size = 10; String[] fieldsNames = null; - ImmutableSet excluded = ImmutableSet.of(); + ImmutableSet excluded = ImmutableSet.of(); String regex = null; String regexFlags = null; TermsFacet.ComparatorType comparatorType = TermsFacet.ComparatorType.COUNT; @@ -101,9 +102,9 @@ public class TermsFacetProcessor extends AbstractComponent implements FacetProce } } else if (token == XContentParser.Token.START_ARRAY) { if ("exclude".equals(currentFieldName)) { - ImmutableSet.Builder builder = ImmutableSet.builder(); + ImmutableSet.Builder builder = ImmutableSet.builder(); while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) { - builder.add(parser.text()); + builder.add(new BytesRef(parser.text())); } excluded = builder.build(); } else if ("fields".equals(currentFieldName)) { diff --git a/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java index f2a0c153688..dad3cbe0f28 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java @@ -22,7 +22,10 @@ package org.elasticsearch.search.facet.terms.bytes; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TByteIntIterator; import gnu.trove.map.hash.TByteIntHashMap; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -71,11 +74,11 @@ public class InternalByteTermsFacet extends InternalTermsFacet { this.count = count; } - public String term() { - return Short.toString(term); + public BytesReference term() { + return new BytesArray(Short.toString(term)); } - public String getTerm() { + public BytesReference getTerm() { return term(); } @@ -279,7 +282,7 @@ public class InternalByteTermsFacet extends InternalTermsFacet { @Override public void readFrom(StreamInput in) throws IOException { - name = in.readUTF(); + name = in.readString(); comparatorType = ComparatorType.fromId(in.readByte()); requiredSize = in.readVInt(); missing = in.readVLong(); @@ -294,7 +297,7 @@ public class InternalByteTermsFacet extends InternalTermsFacet { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeUTF(name); + out.writeString(name); out.writeByte(comparatorType.id()); out.writeVInt(requiredSize); diff --git a/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java index 89676d5fe78..92bfd60c4a1 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java @@ -24,8 +24,10 @@ import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TByteIntIterator; import gnu.trove.map.hash.TByteIntHashMap; import gnu.trove.set.hash.TByteHashSet; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; @@ -70,7 +72,7 @@ public class TermsByteFacetCollector extends AbstractFacetCollector { private final SearchScript script; public TermsByteFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded, String scriptLang, String script, Map params) { + ImmutableSet excluded, String scriptLang, String script, Map params) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -108,8 +110,8 @@ public class TermsByteFacetCollector extends AbstractFacetCollector { if (allTerms) { try { - for (IndexReader reader : context.searcher().subReaders()) { - ByteFieldData fieldData = (ByteFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) { + ByteFieldData fieldData = (ByteFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName); fieldData.forEachValue(aggregator); } } catch (Exception e) { @@ -126,10 +128,10 @@ public class TermsByteFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (ByteFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (ByteFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(reader); + script.setNextReader(context.reader()); } } @@ -175,14 +177,14 @@ public class TermsByteFacetCollector extends AbstractFacetCollector { private final TByteHashSet excluded; - public AggregatorValueProc(TByteIntHashMap facets, Set excluded, SearchScript script) { + public AggregatorValueProc(TByteIntHashMap facets, Set excluded, SearchScript script) { super(facets); if (excluded == null || excluded.isEmpty()) { this.excluded = null; } else { this.excluded = new TByteHashSet(excluded.size()); - for (String s : excluded) { - this.excluded.add(Byte.parseByte(s)); + for (BytesRef s : excluded) { + this.excluded.add(Byte.parseByte(s.utf8ToString())); } } this.script = script; diff --git a/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteOrdinalsFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteOrdinalsFacetCollector.java index 8695ae3e279..f4571d8f3c4 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteOrdinalsFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteOrdinalsFacetCollector.java @@ -21,7 +21,8 @@ package org.elasticsearch.search.facet.terms.bytes; import com.google.common.collect.ImmutableSet; import gnu.trove.set.hash.TByteHashSet; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; @@ -73,7 +74,7 @@ public class TermsByteOrdinalsFacetCollector extends AbstractFacetCollector { private final TByteHashSet excluded; public TermsByteOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded) { + ImmutableSet excluded) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -101,8 +102,8 @@ public class TermsByteOrdinalsFacetCollector extends AbstractFacetCollector { this.excluded = null; } else { this.excluded = new TByteHashSet(excluded.size()); - for (String s : excluded) { - this.excluded.add(Byte.parseByte(s)); + for (BytesRef s : excluded) { + this.excluded.add(Byte.parseByte(s.utf8ToString())); } } @@ -113,11 +114,11 @@ public class TermsByteOrdinalsFacetCollector extends AbstractFacetCollector { minCount = 0; } - this.aggregators = new ArrayList(context.searcher().subReaders().length); + this.aggregators = new ArrayList(context.searcher().getIndexReader().leaves().size()); } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + protected void doSetNextReader(AtomicReaderContext context) throws IOException { if (current != null) { missing += current.counts[0]; total += current.total - current.counts[0]; @@ -125,7 +126,7 @@ public class TermsByteOrdinalsFacetCollector extends AbstractFacetCollector { aggregators.add(current); } } - fieldData = (ByteFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + fieldData = (ByteFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); current = new ReaderAggregator(fieldData); } @@ -256,7 +257,7 @@ public class TermsByteOrdinalsFacetCollector extends AbstractFacetCollector { public static class AggregatorPriorityQueue extends PriorityQueue { public AggregatorPriorityQueue(int size) { - initialize(size); + super(size); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/terms/doubles/InternalDoubleTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/doubles/InternalDoubleTermsFacet.java index 85dbbdcd36c..4522b036a27 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/doubles/InternalDoubleTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/doubles/InternalDoubleTermsFacet.java @@ -22,7 +22,10 @@ package org.elasticsearch.search.facet.terms.doubles; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TDoubleIntIterator; import gnu.trove.map.hash.TDoubleIntHashMap; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -71,11 +74,11 @@ public class InternalDoubleTermsFacet extends InternalTermsFacet { this.count = count; } - public String term() { - return Double.toString(term); + public BytesReference term() { + return new BytesArray(Double.toString(term)); } - public String getTerm() { + public BytesReference getTerm() { return term(); } @@ -279,7 +282,7 @@ public class InternalDoubleTermsFacet extends InternalTermsFacet { @Override public void readFrom(StreamInput in) throws IOException { - name = in.readUTF(); + name = in.readString(); comparatorType = ComparatorType.fromId(in.readByte()); requiredSize = in.readVInt(); missing = in.readVLong(); @@ -294,7 +297,7 @@ public class InternalDoubleTermsFacet extends InternalTermsFacet { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeUTF(name); + out.writeString(name); out.writeByte(comparatorType.id()); out.writeVInt(requiredSize); out.writeVLong(missing); diff --git a/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java index 8fbb1a3ca82..23968376171 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java @@ -24,8 +24,10 @@ import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TDoubleIntIterator; import gnu.trove.map.hash.TDoubleIntHashMap; import gnu.trove.set.hash.TDoubleHashSet; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; @@ -70,7 +72,7 @@ public class TermsDoubleFacetCollector extends AbstractFacetCollector { private final SearchScript script; public TermsDoubleFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded, String scriptLang, String script, Map params) { + ImmutableSet excluded, String scriptLang, String script, Map params) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -107,8 +109,8 @@ public class TermsDoubleFacetCollector extends AbstractFacetCollector { if (allTerms) { try { - for (IndexReader reader : context.searcher().subReaders()) { - DoubleFieldData fieldData = (DoubleFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) { + DoubleFieldData fieldData = (DoubleFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName); fieldData.forEachValue(aggregator); } } catch (Exception e) { @@ -125,10 +127,10 @@ public class TermsDoubleFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (DoubleFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (DoubleFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(reader); + script.setNextReader(context.reader()); } } @@ -174,15 +176,15 @@ public class TermsDoubleFacetCollector extends AbstractFacetCollector { private final TDoubleHashSet excluded; - public AggregatorValueProc(TDoubleIntHashMap facets, Set excluded, SearchScript script) { + public AggregatorValueProc(TDoubleIntHashMap facets, Set excluded, SearchScript script) { super(facets); this.script = script; if (excluded == null || excluded.isEmpty()) { this.excluded = null; } else { this.excluded = new TDoubleHashSet(excluded.size()); - for (String s : excluded) { - this.excluded.add(Double.parseDouble(s)); + for (BytesRef s : excluded) { + this.excluded.add(Double.parseDouble(s.utf8ToString())); } } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleOrdinalsFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleOrdinalsFacetCollector.java index 0b4e0e7bb13..5e2331bca7b 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleOrdinalsFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleOrdinalsFacetCollector.java @@ -21,7 +21,8 @@ package org.elasticsearch.search.facet.terms.doubles; import com.google.common.collect.ImmutableSet; import gnu.trove.set.hash.TDoubleHashSet; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; @@ -73,7 +74,7 @@ public class TermsDoubleOrdinalsFacetCollector extends AbstractFacetCollector { private final TDoubleHashSet excluded; public TermsDoubleOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded) { + ImmutableSet excluded) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -100,8 +101,8 @@ public class TermsDoubleOrdinalsFacetCollector extends AbstractFacetCollector { this.excluded = null; } else { this.excluded = new TDoubleHashSet(excluded.size()); - for (String s : excluded) { - this.excluded.add(Double.parseDouble(s)); + for (BytesRef s : excluded) { + this.excluded.add(Double.parseDouble(s.utf8ToString())); } } @@ -112,11 +113,11 @@ public class TermsDoubleOrdinalsFacetCollector extends AbstractFacetCollector { minCount = 0; } - this.aggregators = new ArrayList(context.searcher().subReaders().length); + this.aggregators = new ArrayList(context.searcher().getIndexReader().leaves().size()); } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + protected void doSetNextReader(AtomicReaderContext context) throws IOException { if (current != null) { missing += current.counts[0]; total += current.total - current.counts[0]; @@ -124,7 +125,7 @@ public class TermsDoubleOrdinalsFacetCollector extends AbstractFacetCollector { aggregators.add(current); } } - fieldData = (DoubleFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + fieldData = (DoubleFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); current = new ReaderAggregator(fieldData); } @@ -255,7 +256,7 @@ public class TermsDoubleOrdinalsFacetCollector extends AbstractFacetCollector { public static class AggregatorPriorityQueue extends PriorityQueue { public AggregatorPriorityQueue(int size) { - initialize(size); + super(size); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/terms/floats/InternalFloatTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/floats/InternalFloatTermsFacet.java index a511a29a713..8dac5e38a67 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/floats/InternalFloatTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/floats/InternalFloatTermsFacet.java @@ -22,7 +22,10 @@ package org.elasticsearch.search.facet.terms.floats; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TFloatIntIterator; import gnu.trove.map.hash.TFloatIntHashMap; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -71,11 +74,11 @@ public class InternalFloatTermsFacet extends InternalTermsFacet { this.count = count; } - public String term() { - return Float.toString(term); + public BytesReference term() { + return new BytesArray(Float.toString(term)); } - public String getTerm() { + public BytesReference getTerm() { return term(); } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java index d2fb439241d..19333a3426c 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java @@ -24,8 +24,11 @@ import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TFloatIntIterator; import gnu.trove.map.hash.TFloatIntHashMap; import gnu.trove.set.hash.TFloatHashSet; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; @@ -70,7 +73,7 @@ public class TermsFloatFacetCollector extends AbstractFacetCollector { private final SearchScript script; public TermsFloatFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded, String scriptLang, String script, Map params) { + ImmutableSet excluded, String scriptLang, String script, Map params) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -107,8 +110,8 @@ public class TermsFloatFacetCollector extends AbstractFacetCollector { if (allTerms) { try { - for (IndexReader reader : context.searcher().subReaders()) { - FloatFieldData fieldData = (FloatFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) { + FloatFieldData fieldData = (FloatFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName); fieldData.forEachValue(aggregator); } } catch (Exception e) { @@ -125,10 +128,10 @@ public class TermsFloatFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (FloatFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (FloatFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(reader); + script.setNextReader(context.reader()); } } @@ -174,14 +177,14 @@ public class TermsFloatFacetCollector extends AbstractFacetCollector { private final TFloatHashSet excluded; - public AggregatorValueProc(TFloatIntHashMap facets, Set excluded, SearchScript script) { + public AggregatorValueProc(TFloatIntHashMap facets, Set excluded, SearchScript script) { super(facets); if (excluded == null || excluded.isEmpty()) { this.excluded = null; } else { this.excluded = new TFloatHashSet(excluded.size()); - for (String s : excluded) { - this.excluded.add(Float.parseFloat(s)); + for (BytesRef s : excluded) { + this.excluded.add(Float.parseFloat(s.utf8ToString())); } } this.script = script; diff --git a/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatOrdinalsFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatOrdinalsFacetCollector.java index cb44daa1b9b..e5a717ec2a3 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatOrdinalsFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatOrdinalsFacetCollector.java @@ -21,7 +21,8 @@ package org.elasticsearch.search.facet.terms.floats; import com.google.common.collect.ImmutableSet; import gnu.trove.set.hash.TFloatHashSet; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; @@ -73,7 +74,7 @@ public class TermsFloatOrdinalsFacetCollector extends AbstractFacetCollector { private final TFloatHashSet excluded; public TermsFloatOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded) { + ImmutableSet excluded) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -100,8 +101,8 @@ public class TermsFloatOrdinalsFacetCollector extends AbstractFacetCollector { this.excluded = null; } else { this.excluded = new TFloatHashSet(excluded.size()); - for (String s : excluded) { - this.excluded.add(Float.parseFloat(s)); + for (BytesRef s : excluded) { + this.excluded.add(Float.parseFloat(s.utf8ToString())); } } @@ -112,11 +113,11 @@ public class TermsFloatOrdinalsFacetCollector extends AbstractFacetCollector { minCount = 0; } - this.aggregators = new ArrayList(context.searcher().subReaders().length); + this.aggregators = new ArrayList(context.searcher().getIndexReader().leaves().size()); } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + protected void doSetNextReader(AtomicReaderContext context) throws IOException { if (current != null) { missing += current.counts[0]; total += current.total - current.counts[0]; @@ -124,7 +125,7 @@ public class TermsFloatOrdinalsFacetCollector extends AbstractFacetCollector { aggregators.add(current); } } - fieldData = (FloatFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + fieldData = (FloatFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); current = new ReaderAggregator(fieldData); } @@ -255,7 +256,7 @@ public class TermsFloatOrdinalsFacetCollector extends AbstractFacetCollector { public static class AggregatorPriorityQueue extends PriorityQueue { public AggregatorPriorityQueue(int size) { - initialize(size); + super(size); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/terms/index/IndexNameFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/index/IndexNameFacetCollector.java index 53cc4a6fc6a..c1cccdddd4b 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/index/IndexNameFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/index/IndexNameFacetCollector.java @@ -20,7 +20,7 @@ package org.elasticsearch.search.facet.terms.index; import com.google.common.collect.Sets; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.search.facet.AbstractFacetCollector; import org.elasticsearch.search.facet.Facet; import org.elasticsearch.search.facet.terms.TermsFacet; @@ -49,7 +49,7 @@ public class IndexNameFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + protected void doSetNextReader(AtomicReaderContext context) throws IOException { } @Override @@ -59,6 +59,6 @@ public class IndexNameFacetCollector extends AbstractFacetCollector { @Override public Facet facet() { - return new InternalStringTermsFacet(facetName, comparatorType, size, Sets.newHashSet(new InternalStringTermsFacet.StringEntry(indexName, count)), 0, count); + return new InternalStringTermsFacet(facetName, comparatorType, size, Sets.newHashSet(new InternalStringTermsFacet.TermEntry(indexName, count)), 0, count); } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/ints/InternalIntTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/ints/InternalIntTermsFacet.java index 032153fbb09..b7738b7d193 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/ints/InternalIntTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/ints/InternalIntTermsFacet.java @@ -22,7 +22,10 @@ package org.elasticsearch.search.facet.terms.ints; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TIntIntIterator; import gnu.trove.map.hash.TIntIntHashMap; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -71,11 +74,11 @@ public class InternalIntTermsFacet extends InternalTermsFacet { this.count = count; } - public String term() { - return Integer.toString(term); + public BytesReference term() { + return new BytesArray(Integer.toString(term)); } - public String getTerm() { + public BytesReference getTerm() { return term(); } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java index 8f7530582d2..e06ca78c9e2 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java @@ -24,8 +24,11 @@ import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TIntIntIterator; import gnu.trove.map.hash.TIntIntHashMap; import gnu.trove.set.hash.TIntHashSet; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; @@ -70,7 +73,7 @@ public class TermsIntFacetCollector extends AbstractFacetCollector { private final SearchScript script; public TermsIntFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded, String scriptLang, String script, Map params) { + ImmutableSet excluded, String scriptLang, String script, Map params) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -107,8 +110,8 @@ public class TermsIntFacetCollector extends AbstractFacetCollector { if (allTerms) { try { - for (IndexReader reader : context.searcher().subReaders()) { - IntFieldData fieldData = (IntFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) { + IntFieldData fieldData = (IntFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName); fieldData.forEachValue(aggregator); } } catch (Exception e) { @@ -125,10 +128,10 @@ public class TermsIntFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (IntFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (IntFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(reader); + script.setNextReader(context.reader()); } } @@ -174,14 +177,14 @@ public class TermsIntFacetCollector extends AbstractFacetCollector { private final TIntHashSet excluded; - public AggregatorValueProc(TIntIntHashMap facets, Set excluded, SearchScript script) { + public AggregatorValueProc(TIntIntHashMap facets, Set excluded, SearchScript script) { super(facets); if (excluded == null || excluded.isEmpty()) { this.excluded = null; } else { this.excluded = new TIntHashSet(excluded.size()); - for (String s : excluded) { - this.excluded.add(Integer.parseInt(s)); + for (BytesRef s : excluded) { + this.excluded.add(Integer.parseInt(s.utf8ToString())); } } this.script = script; diff --git a/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntOrdinalsFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntOrdinalsFacetCollector.java index 240bc41753d..9ba28768bff 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntOrdinalsFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntOrdinalsFacetCollector.java @@ -21,7 +21,8 @@ package org.elasticsearch.search.facet.terms.ints; import com.google.common.collect.ImmutableSet; import gnu.trove.set.hash.TIntHashSet; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; @@ -73,7 +74,7 @@ public class TermsIntOrdinalsFacetCollector extends AbstractFacetCollector { private final TIntHashSet excluded; public TermsIntOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded) { + ImmutableSet excluded) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -100,8 +101,8 @@ public class TermsIntOrdinalsFacetCollector extends AbstractFacetCollector { this.excluded = null; } else { this.excluded = new TIntHashSet(excluded.size()); - for (String s : excluded) { - this.excluded.add(Integer.parseInt(s)); + for (BytesRef s : excluded) { + this.excluded.add(Integer.parseInt(s.utf8ToString())); } } @@ -112,11 +113,11 @@ public class TermsIntOrdinalsFacetCollector extends AbstractFacetCollector { minCount = 0; } - this.aggregators = new ArrayList(context.searcher().subReaders().length); + this.aggregators = new ArrayList(context.searcher().getIndexReader().leaves().size()); } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + protected void doSetNextReader(AtomicReaderContext context) throws IOException { if (current != null) { missing += current.counts[0]; total += current.total - current.counts[0]; @@ -124,7 +125,7 @@ public class TermsIntOrdinalsFacetCollector extends AbstractFacetCollector { aggregators.add(current); } } - fieldData = (IntFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + fieldData = (IntFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); current = new ReaderAggregator(fieldData); } @@ -255,7 +256,7 @@ public class TermsIntOrdinalsFacetCollector extends AbstractFacetCollector { public static class AggregatorPriorityQueue extends PriorityQueue { public AggregatorPriorityQueue(int size) { - initialize(size); + super(size); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/terms/ip/InternalIpTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/ip/InternalIpTermsFacet.java index 009bcf8b26e..447a4c6c341 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/ip/InternalIpTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/ip/InternalIpTermsFacet.java @@ -22,7 +22,10 @@ package org.elasticsearch.search.facet.terms.ip; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TLongIntIterator; import gnu.trove.map.hash.TLongIntHashMap; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -72,11 +75,11 @@ public class InternalIpTermsFacet extends InternalTermsFacet { this.count = count; } - public String term() { - return IpFieldMapper.longToIp(term); + public BytesReference term() { + return new BytesArray(IpFieldMapper.longToIp(term)); } - public String getTerm() { + public BytesReference getTerm() { return term(); } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java index ee03f571f0b..9b2f0370e16 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java @@ -22,6 +22,8 @@ package org.elasticsearch.search.facet.terms.ip; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TLongIntIterator; import gnu.trove.map.hash.TLongIntHashMap; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; import org.elasticsearch.ElasticSearchIllegalArgumentException; @@ -104,8 +106,8 @@ public class TermsIpFacetCollector extends AbstractFacetCollector { if (allTerms) { try { - for (IndexReader reader : context.searcher().subReaders()) { - LongFieldData fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) { + LongFieldData fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName); fieldData.forEachValue(aggregator); } } catch (Exception e) { @@ -122,10 +124,10 @@ public class TermsIpFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(reader); + script.setNextReader(context.reader()); } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpOrdinalsFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpOrdinalsFacetCollector.java index 91b625a67e1..9e6a5a594ce 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpOrdinalsFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpOrdinalsFacetCollector.java @@ -21,7 +21,7 @@ package org.elasticsearch.search.facet.terms.ip; import com.google.common.collect.ImmutableSet; import gnu.trove.set.hash.TLongHashSet; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.util.PriorityQueue; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; @@ -112,11 +112,11 @@ public class TermsIpOrdinalsFacetCollector extends AbstractFacetCollector { minCount = 0; } - this.aggregators = new ArrayList(context.searcher().subReaders().length); + this.aggregators = new ArrayList(context.searcher().getIndexReader().leaves().size()); } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + protected void doSetNextReader(AtomicReaderContext context) throws IOException { if (current != null) { missing += current.counts[0]; total += current.total - current.counts[0]; @@ -124,7 +124,7 @@ public class TermsIpOrdinalsFacetCollector extends AbstractFacetCollector { aggregators.add(current); } } - fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); current = new ReaderAggregator(fieldData); } @@ -255,7 +255,7 @@ public class TermsIpOrdinalsFacetCollector extends AbstractFacetCollector { public static class AggregatorPriorityQueue extends PriorityQueue { public AggregatorPriorityQueue(int size) { - initialize(size); + super(size); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/terms/longs/InternalLongTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/longs/InternalLongTermsFacet.java index 2a910ce53a1..4a0ebdcfddd 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/longs/InternalLongTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/longs/InternalLongTermsFacet.java @@ -23,6 +23,8 @@ import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TLongIntIterator; import gnu.trove.map.hash.TLongIntHashMap; import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -71,11 +73,11 @@ public class InternalLongTermsFacet extends InternalTermsFacet { this.count = count; } - public String term() { - return Long.toString(term); + public BytesReference term() { + return new BytesArray(Long.toString(term)); } - public String getTerm() { + public BytesReference getTerm() { return term(); } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongFacetCollector.java index 475059c1ebe..9a505d35b7e 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongFacetCollector.java @@ -24,8 +24,11 @@ import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TLongIntIterator; import gnu.trove.map.hash.TLongIntHashMap; import gnu.trove.set.hash.TLongHashSet; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; @@ -77,7 +80,7 @@ public class TermsLongFacetCollector extends AbstractFacetCollector { private final SearchScript script; public TermsLongFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded, String scriptLang, String script, Map params) { + ImmutableSet excluded, String scriptLang, String script, Map params) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -114,8 +117,8 @@ public class TermsLongFacetCollector extends AbstractFacetCollector { if (allTerms) { try { - for (IndexReader reader : context.searcher().subReaders()) { - LongFieldData fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) { + LongFieldData fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName); fieldData.forEachValue(aggregator); } } catch (Exception e) { @@ -132,10 +135,10 @@ public class TermsLongFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(reader); + script.setNextReader(context.reader()); } } @@ -181,15 +184,15 @@ public class TermsLongFacetCollector extends AbstractFacetCollector { private final TLongHashSet excluded; - public AggregatorValueProc(TLongIntHashMap facets, Set excluded, SearchScript script) { + public AggregatorValueProc(TLongIntHashMap facets, Set excluded, SearchScript script) { super(facets); this.script = script; if (excluded == null || excluded.isEmpty()) { this.excluded = null; } else { this.excluded = new TLongHashSet(excluded.size()); - for (String s : excluded) { - this.excluded.add(Long.parseLong(s)); + for (BytesRef s : excluded) { + this.excluded.add(Long.parseLong(s.utf8ToString())); } } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongOrdinalsFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongOrdinalsFacetCollector.java index 1de512975a0..4fd7f15b7f9 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongOrdinalsFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongOrdinalsFacetCollector.java @@ -21,7 +21,8 @@ package org.elasticsearch.search.facet.terms.longs; import com.google.common.collect.ImmutableSet; import gnu.trove.set.hash.TLongHashSet; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; @@ -73,7 +74,7 @@ public class TermsLongOrdinalsFacetCollector extends AbstractFacetCollector { private final TLongHashSet excluded; public TermsLongOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded) { + ImmutableSet excluded) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -100,8 +101,8 @@ public class TermsLongOrdinalsFacetCollector extends AbstractFacetCollector { this.excluded = null; } else { this.excluded = new TLongHashSet(excluded.size()); - for (String s : excluded) { - this.excluded.add(Long.parseLong(s)); + for (BytesRef s : excluded) { + this.excluded.add(Long.parseLong(s.utf8ToString())); } } @@ -112,11 +113,11 @@ public class TermsLongOrdinalsFacetCollector extends AbstractFacetCollector { minCount = 0; } - this.aggregators = new ArrayList(context.searcher().subReaders().length); + this.aggregators = new ArrayList(context.searcher().getIndexReader().leaves().size()); } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + protected void doSetNextReader(AtomicReaderContext context) throws IOException { if (current != null) { missing += current.counts[0]; total += current.total - current.counts[0]; @@ -124,7 +125,7 @@ public class TermsLongOrdinalsFacetCollector extends AbstractFacetCollector { aggregators.add(current); } } - fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); current = new ReaderAggregator(fieldData); } @@ -255,7 +256,7 @@ public class TermsLongOrdinalsFacetCollector extends AbstractFacetCollector { public static class AggregatorPriorityQueue extends PriorityQueue { public AggregatorPriorityQueue(int size) { - initialize(size); + super(size); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/terms/shorts/InternalShortTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/shorts/InternalShortTermsFacet.java index c6e71503e9e..c199d631e28 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/shorts/InternalShortTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/shorts/InternalShortTermsFacet.java @@ -22,7 +22,10 @@ package org.elasticsearch.search.facet.terms.shorts; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TShortIntIterator; import gnu.trove.map.hash.TShortIntHashMap; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -71,11 +74,11 @@ public class InternalShortTermsFacet extends InternalTermsFacet { this.count = count; } - public String term() { - return Short.toString(term); + public BytesReference term() { + return new BytesArray(Short.toString(term)); } - public String getTerm() { + public BytesReference getTerm() { return term(); } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java index b6f4f3bb57c..a362b18a2f5 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java @@ -24,8 +24,11 @@ import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TShortIntIterator; import gnu.trove.map.hash.TShortIntHashMap; import gnu.trove.set.hash.TShortHashSet; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; @@ -70,7 +73,7 @@ public class TermsShortFacetCollector extends AbstractFacetCollector { private final SearchScript script; public TermsShortFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded, String scriptLang, String script, Map params) { + ImmutableSet excluded, String scriptLang, String script, Map params) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -107,8 +110,8 @@ public class TermsShortFacetCollector extends AbstractFacetCollector { if (allTerms) { try { - for (IndexReader reader : context.searcher().subReaders()) { - ShortFieldData fieldData = (ShortFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) { + ShortFieldData fieldData = (ShortFieldData) fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName); fieldData.forEachValue(aggregator); } } catch (Exception e) { @@ -125,10 +128,10 @@ public class TermsShortFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (ShortFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = (ShortFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(reader); + script.setNextReader(context.reader()); } } @@ -174,14 +177,14 @@ public class TermsShortFacetCollector extends AbstractFacetCollector { private final TShortHashSet excluded; - public AggregatorValueProc(TShortIntHashMap facets, Set excluded, SearchScript script) { + public AggregatorValueProc(TShortIntHashMap facets, Set excluded, SearchScript script) { super(facets); if (excluded == null || excluded.isEmpty()) { this.excluded = null; } else { this.excluded = new TShortHashSet(excluded.size()); - for (String s : excluded) { - this.excluded.add(Short.parseShort(s)); + for (BytesRef s : excluded) { + this.excluded.add(Short.parseShort(s.utf8ToString())); } } this.script = script; diff --git a/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortOrdinalsFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortOrdinalsFacetCollector.java index 899f97917ba..9b0631c3aa1 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortOrdinalsFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortOrdinalsFacetCollector.java @@ -21,7 +21,8 @@ package org.elasticsearch.search.facet.terms.shorts; import com.google.common.collect.ImmutableSet; import gnu.trove.set.hash.TShortHashSet; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; @@ -73,7 +74,7 @@ public class TermsShortOrdinalsFacetCollector extends AbstractFacetCollector { private final TShortHashSet excluded; public TermsShortOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded) { + ImmutableSet excluded) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -100,8 +101,8 @@ public class TermsShortOrdinalsFacetCollector extends AbstractFacetCollector { this.excluded = null; } else { this.excluded = new TShortHashSet(excluded.size()); - for (String s : excluded) { - this.excluded.add(Short.parseShort(s)); + for (BytesRef s : excluded) { + this.excluded.add(Short.parseShort(s.utf8ToString())); } } @@ -112,11 +113,11 @@ public class TermsShortOrdinalsFacetCollector extends AbstractFacetCollector { minCount = 0; } - this.aggregators = new ArrayList(context.searcher().subReaders().length); + this.aggregators = new ArrayList(context.searcher().getIndexReader().leaves().size()); } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + protected void doSetNextReader(AtomicReaderContext context) throws IOException { if (current != null) { missing += current.counts[0]; total += current.total - current.counts[0]; @@ -124,7 +125,7 @@ public class TermsShortOrdinalsFacetCollector extends AbstractFacetCollector { aggregators.add(current); } } - fieldData = (ShortFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + fieldData = (ShortFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); current = new ReaderAggregator(fieldData); } @@ -255,7 +256,7 @@ public class TermsShortOrdinalsFacetCollector extends AbstractFacetCollector { public static class AggregatorPriorityQueue extends PriorityQueue { public AggregatorPriorityQueue(int size) { - initialize(size); + super(size); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java index 47e140a2644..436d896b9b5 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java @@ -23,8 +23,9 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TObjectIntIterator; import gnu.trove.map.hash.TObjectIntHashMap; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -68,7 +69,7 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector { private final SearchScript script; public FieldsTermsStringFacetCollector(String facetName, String[] fieldsNames, int size, InternalStringTermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded, Pattern pattern, String scriptLang, String script, Map params) { + ImmutableSet excluded, Pattern pattern, String scriptLang, String script, Map params) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -98,16 +99,16 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector { } if (excluded.isEmpty() && pattern == null && this.script == null) { - aggregator = new StaticAggregatorValueProc(CacheRecycler.popObjectIntMap()); + aggregator = new StaticAggregatorValueProc(CacheRecycler.popObjectIntMap()); } else { - aggregator = new AggregatorValueProc(CacheRecycler.popObjectIntMap(), excluded, pattern, this.script); + aggregator = new AggregatorValueProc(CacheRecycler.popObjectIntMap(), excluded, pattern, this.script); } if (allTerms) { try { for (int i = 0; i < fieldsNames.length; i++) { - for (IndexReader reader : context.searcher().subReaders()) { - FieldData fieldData = fieldDataCache.cache(fieldsDataType[i], reader, indexFieldsNames[i]); + for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) { + FieldData fieldData = fieldDataCache.cache(fieldsDataType[i], readerContext.reader(), indexFieldsNames[i]); fieldData.forEachValue(aggregator); } } @@ -125,12 +126,12 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + protected void doSetNextReader(AtomicReaderContext context) throws IOException { for (int i = 0; i < indexFieldsNames.length; i++) { - fieldsData[i] = fieldDataCache.cache(fieldsDataType[i], reader, indexFieldsNames[i]); + fieldsData[i] = fieldDataCache.cache(fieldsDataType[i], context.reader(), indexFieldsNames[i]); } if (script != null) { - script.setNextReader(reader); + script.setNextReader(context.reader()); } } @@ -143,28 +144,28 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector { @Override public Facet facet() { - TObjectIntHashMap facets = aggregator.facets(); + TObjectIntHashMap facets = aggregator.facets(); if (facets.isEmpty()) { CacheRecycler.pushObjectIntMap(facets); - return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.of(), aggregator.missing(), aggregator.total()); + return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.of(), aggregator.missing(), aggregator.total()); } else { if (size < EntryPriorityQueue.LIMIT) { EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); - for (TObjectIntIterator it = facets.iterator(); it.hasNext(); ) { + for (TObjectIntIterator it = facets.iterator(); it.hasNext(); ) { it.advance(); - ordered.insertWithOverflow(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + ordered.insertWithOverflow(new InternalStringTermsFacet.TermEntry(it.key(), it.value())); } - InternalStringTermsFacet.StringEntry[] list = new InternalStringTermsFacet.StringEntry[ordered.size()]; + InternalStringTermsFacet.TermEntry[] list = new InternalStringTermsFacet.TermEntry[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; i--) { - list[i] = ((InternalStringTermsFacet.StringEntry) ordered.pop()); + list[i] = ((InternalStringTermsFacet.TermEntry) ordered.pop()); } CacheRecycler.pushObjectIntMap(facets); return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing(), aggregator.total()); } else { - BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); - for (TObjectIntIterator it = facets.iterator(); it.hasNext(); ) { + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + for (TObjectIntIterator it = facets.iterator(); it.hasNext(); ) { it.advance(); - ordered.add(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + ordered.add(new InternalStringTermsFacet.TermEntry(it.key(), it.value())); } CacheRecycler.pushObjectIntMap(facets); return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing(), aggregator.total()); @@ -174,13 +175,13 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector { public static class AggregatorValueProc extends StaticAggregatorValueProc { - private final ImmutableSet excluded; + private final ImmutableSet excluded; private final Matcher matcher; private final SearchScript script; - public AggregatorValueProc(TObjectIntHashMap facets, ImmutableSet excluded, Pattern pattern, SearchScript script) { + public AggregatorValueProc(TObjectIntHashMap facets, ImmutableSet excluded, Pattern pattern, SearchScript script) { super(facets); this.excluded = excluded; this.matcher = pattern != null ? pattern.matcher("") : null; @@ -188,11 +189,13 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector { } @Override - public void onValue(int docId, String value) { + public void onValue(int docId, BytesRef value) { if (excluded != null && excluded.contains(value)) { return; } - if (matcher != null && !matcher.reset(value).matches()) { + + // LUCENE 4 UPGRADE: use Lucene's RegexCapabilities + if (matcher != null && !matcher.reset(value.utf8ToString()).matches()) { return; } if (script != null) { @@ -207,7 +210,8 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector { return; } } else { - value = scriptValue.toString(); + // LUCENE 4 UPGRADE: make script return BR? + value = new BytesRef(scriptValue.toString()); } } super.onValue(docId, value); @@ -216,22 +220,23 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector { public static class StaticAggregatorValueProc implements FieldData.StringValueInDocProc, FieldData.StringValueProc { - private final TObjectIntHashMap facets; + // LUCENE 4 UPGRADE: check if hashcode is not too expensive + private final TObjectIntHashMap facets; private int missing; private int total; - public StaticAggregatorValueProc(TObjectIntHashMap facets) { + public StaticAggregatorValueProc(TObjectIntHashMap facets) { this.facets = facets; } @Override - public void onValue(String value) { + public void onValue(BytesRef value) { facets.putIfAbsent(value, 0); } @Override - public void onValue(int docId, String value) { + public void onValue(int docId, BytesRef value) { facets.adjustOrPutValue(value, 1, 1); total++; } @@ -241,7 +246,7 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector { missing++; } - public final TObjectIntHashMap facets() { + public final TObjectIntHashMap facets() { return facets; } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/InternalStringTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/InternalStringTermsFacet.java index f8d168e35f5..a6d217e0b64 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/InternalStringTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/InternalStringTermsFacet.java @@ -22,7 +22,10 @@ package org.elasticsearch.search.facet.terms.strings; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TObjectIntIterator; import gnu.trove.map.hash.TObjectIntHashMap; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; @@ -61,27 +64,38 @@ public class InternalStringTermsFacet extends InternalTermsFacet { return STREAM_TYPE; } - public static class StringEntry implements Entry { + public static class TermEntry implements Entry { - private String term; + private BytesReference term; private int count; - public StringEntry(String term, int count) { + public TermEntry(String term, int count) { + this.term = new BytesArray(term); + this.count = count; + } + + public TermEntry(BytesRef term, int count) { + this.term = new BytesArray(term); + this.count = count; + } + + public TermEntry(BytesReference term, int count) { this.term = term; this.count = count; } - public String term() { + public BytesReference term() { return term; } - public String getTerm() { + public BytesReference getTerm() { return term; } @Override public Number termAsNumber() { - return Double.parseDouble(term); + // LUCENE 4 UPGRADE: better way? + return Double.parseDouble(term.toUtf8()); } @Override @@ -99,7 +113,7 @@ public class InternalStringTermsFacet extends InternalTermsFacet { @Override public int compareTo(Entry o) { - int i = term.compareTo(o.term()); + int i = BytesReference.utf8SortedAsUnicodeSortOrder.compare(this.term, o.term()); if (i == 0) { i = count - o.count(); if (i == 0) { @@ -118,14 +132,14 @@ public class InternalStringTermsFacet extends InternalTermsFacet { long total; - Collection entries = ImmutableList.of(); + Collection entries = ImmutableList.of(); ComparatorType comparatorType; InternalStringTermsFacet() { } - public InternalStringTermsFacet(String name, ComparatorType comparatorType, int requiredSize, Collection entries, long missing, long total) { + public InternalStringTermsFacet(String name, ComparatorType comparatorType, int requiredSize, Collection entries, long missing, long total) { this.name = name; this.comparatorType = comparatorType; this.requiredSize = requiredSize; @@ -155,15 +169,15 @@ public class InternalStringTermsFacet extends InternalTermsFacet { } @Override - public List entries() { + public List entries() { if (!(entries instanceof List)) { entries = ImmutableList.copyOf(entries); } - return (List) entries; + return (List) entries; } @Override - public List getEntries() { + public List getEntries() { return entries(); } @@ -213,22 +227,22 @@ public class InternalStringTermsFacet extends InternalTermsFacet { return facets.get(0); } InternalStringTermsFacet first = (InternalStringTermsFacet) facets.get(0); - TObjectIntHashMap aggregated = CacheRecycler.popObjectIntMap(); + TObjectIntHashMap aggregated = CacheRecycler.popObjectIntMap(); long missing = 0; long total = 0; for (Facet facet : facets) { InternalStringTermsFacet mFacet = (InternalStringTermsFacet) facet; missing += mFacet.missingCount(); total += mFacet.totalCount(); - for (InternalStringTermsFacet.StringEntry entry : mFacet.entries) { + for (TermEntry entry : mFacet.entries) { aggregated.adjustOrPutValue(entry.term(), entry.count(), entry.count()); } } - BoundedTreeSet ordered = new BoundedTreeSet(first.comparatorType.comparator(), first.requiredSize); - for (TObjectIntIterator it = aggregated.iterator(); it.hasNext(); ) { + BoundedTreeSet ordered = new BoundedTreeSet(first.comparatorType.comparator(), first.requiredSize); + for (TObjectIntIterator it = aggregated.iterator(); it.hasNext(); ) { it.advance(); - ordered.add(new StringEntry(it.key(), it.value())); + ordered.add(new TermEntry(it.key(), it.value())); } first.entries = ordered; first.missing = missing; @@ -283,15 +297,15 @@ public class InternalStringTermsFacet extends InternalTermsFacet { total = in.readVLong(); int size = in.readVInt(); - entries = new ArrayList(size); + entries = new ArrayList(size); for (int i = 0; i < size; i++) { - entries.add(new StringEntry(in.readUTF(), in.readVInt())); + entries.add(new TermEntry(in.readBytesReference(), in.readVInt())); } } @Override public void writeTo(StreamOutput out) throws IOException { - out.writeUTF(name); + out.writeString(name); out.writeByte(comparatorType.id()); out.writeVInt(requiredSize); out.writeVLong(missing); @@ -299,7 +313,7 @@ public class InternalStringTermsFacet extends InternalTermsFacet { out.writeVInt(entries.size()); for (Entry entry : entries) { - out.writeUTF(entry.term()); + out.writeBytesReference(entry.term()); out.writeVInt(entry.count()); } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java index eeca203135e..bf2dc8e0898 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java @@ -23,8 +23,9 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TObjectIntIterator; import gnu.trove.map.hash.TObjectIntHashMap; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.script.SearchScript; @@ -54,15 +55,15 @@ public class ScriptTermsStringFieldFacetCollector extends AbstractFacetCollector private final Matcher matcher; - private final ImmutableSet excluded; + private final ImmutableSet excluded; - private final TObjectIntHashMap facets; + private final TObjectIntHashMap facets; private int missing; private int total; public ScriptTermsStringFieldFacetCollector(String facetName, int size, InternalStringTermsFacet.ComparatorType comparatorType, SearchContext context, - ImmutableSet excluded, Pattern pattern, String scriptLang, String script, Map params) { + ImmutableSet excluded, Pattern pattern, String scriptLang, String script, Map params) { super(facetName); this.size = size; this.comparatorType = comparatorType; @@ -81,8 +82,8 @@ public class ScriptTermsStringFieldFacetCollector extends AbstractFacetCollector } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - script.setNextReader(reader); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + script.setNextReader(context.reader()); } @Override @@ -99,7 +100,8 @@ public class ScriptTermsStringFieldFacetCollector extends AbstractFacetCollector String value = o1.toString(); if (match(value)) { found = true; - facets.adjustOrPutValue(value, 1, 1); + // LUCENE 4 UPGRADE: should be possible to convert directly to BR + facets.adjustOrPutValue(new BytesRef(value), 1, 1); total++; } } @@ -112,7 +114,8 @@ public class ScriptTermsStringFieldFacetCollector extends AbstractFacetCollector String value = o1.toString(); if (match(value)) { found = true; - facets.adjustOrPutValue(value, 1, 1); + // LUCENE 4 UPGRADE: should be possible to convert directly to BR + facets.adjustOrPutValue(new BytesRef(value), 1, 1); total++; } } @@ -122,7 +125,8 @@ public class ScriptTermsStringFieldFacetCollector extends AbstractFacetCollector } else { String value = o.toString(); if (match(value)) { - facets.adjustOrPutValue(value, 1, 1); + // LUCENE 4 UPGRADE: should be possible to convert directly to BR + facets.adjustOrPutValue(new BytesRef(value), 1, 1); total++; } else { missing++; @@ -144,25 +148,25 @@ public class ScriptTermsStringFieldFacetCollector extends AbstractFacetCollector public Facet facet() { if (facets.isEmpty()) { CacheRecycler.pushObjectIntMap(facets); - return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.of(), missing, total); + return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.of(), missing, total); } else { if (size < EntryPriorityQueue.LIMIT) { EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); - for (TObjectIntIterator it = facets.iterator(); it.hasNext(); ) { + for (TObjectIntIterator it = facets.iterator(); it.hasNext(); ) { it.advance(); - ordered.insertWithOverflow(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + ordered.insertWithOverflow(new InternalStringTermsFacet.TermEntry(it.key(), it.value())); } - InternalStringTermsFacet.StringEntry[] list = new InternalStringTermsFacet.StringEntry[ordered.size()]; + InternalStringTermsFacet.TermEntry[] list = new InternalStringTermsFacet.TermEntry[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; i--) { - list[i] = ((InternalStringTermsFacet.StringEntry) ordered.pop()); + list[i] = ((InternalStringTermsFacet.TermEntry) ordered.pop()); } CacheRecycler.pushObjectIntMap(facets); return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total); } else { - BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); - for (TObjectIntIterator it = facets.iterator(); it.hasNext(); ) { + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + for (TObjectIntIterator it = facets.iterator(); it.hasNext(); ) { it.advance(); - ordered.add(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + ordered.add(new InternalStringTermsFacet.TermEntry(it.key(), it.value())); } CacheRecycler.pushObjectIntMap(facets); return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, missing, total); diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java index 0caf691f723..6706b0d6cd9 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java @@ -23,8 +23,10 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TObjectIntIterator; import gnu.trove.map.hash.TObjectIntHashMap; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.util.concurrent.ThreadLocals; @@ -80,7 +82,7 @@ public class TermsStringFacetCollector extends AbstractFacetCollector { private final SearchScript script; public TermsStringFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded, Pattern pattern, String scriptLang, String script, Map params) { + ImmutableSet excluded, Pattern pattern, String scriptLang, String script, Map params) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -108,15 +110,15 @@ public class TermsStringFacetCollector extends AbstractFacetCollector { } if (excluded.isEmpty() && pattern == null && this.script == null) { - aggregator = new StaticAggregatorValueProc(CacheRecycler.popObjectIntMap()); + aggregator = new StaticAggregatorValueProc(CacheRecycler.popObjectIntMap()); } else { - aggregator = new AggregatorValueProc(CacheRecycler.popObjectIntMap(), excluded, pattern, this.script); + aggregator = new AggregatorValueProc(CacheRecycler.popObjectIntMap(), excluded, pattern, this.script); } if (allTerms) { try { - for (IndexReader reader : context.searcher().subReaders()) { - FieldData fieldData = fieldDataCache.cache(fieldDataType, reader, indexFieldName); + for (AtomicReaderContext readerContext : context.searcher().getTopReaderContext().leaves()) { + FieldData fieldData = fieldDataCache.cache(fieldDataType, readerContext.reader(), indexFieldName); fieldData.forEachValue(aggregator); } } catch (Exception e) { @@ -133,10 +135,10 @@ public class TermsStringFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = fieldDataCache.cache(fieldDataType, reader, indexFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + fieldData = fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(reader); + script.setNextReader(context.reader()); } } @@ -147,28 +149,28 @@ public class TermsStringFacetCollector extends AbstractFacetCollector { @Override public Facet facet() { - TObjectIntHashMap facets = aggregator.facets(); + TObjectIntHashMap facets = aggregator.facets(); if (facets.isEmpty()) { CacheRecycler.pushObjectIntMap(facets); - return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.of(), aggregator.missing(), aggregator.total()); + return new InternalStringTermsFacet(facetName, comparatorType, size, ImmutableList.of(), aggregator.missing(), aggregator.total()); } else { if (size < EntryPriorityQueue.LIMIT) { EntryPriorityQueue ordered = new EntryPriorityQueue(size, comparatorType.comparator()); - for (TObjectIntIterator it = facets.iterator(); it.hasNext(); ) { + for (TObjectIntIterator it = facets.iterator(); it.hasNext(); ) { it.advance(); - ordered.insertWithOverflow(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + ordered.insertWithOverflow(new InternalStringTermsFacet.TermEntry(it.key(), it.value())); } - InternalStringTermsFacet.StringEntry[] list = new InternalStringTermsFacet.StringEntry[ordered.size()]; + InternalStringTermsFacet.TermEntry[] list = new InternalStringTermsFacet.TermEntry[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; i--) { - list[i] = ((InternalStringTermsFacet.StringEntry) ordered.pop()); + list[i] = ((InternalStringTermsFacet.TermEntry) ordered.pop()); } CacheRecycler.pushObjectIntMap(facets); return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), aggregator.missing(), aggregator.total()); } else { - BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); - for (TObjectIntIterator it = facets.iterator(); it.hasNext(); ) { + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + for (TObjectIntIterator it = facets.iterator(); it.hasNext(); ) { it.advance(); - ordered.add(new InternalStringTermsFacet.StringEntry(it.key(), it.value())); + ordered.add(new InternalStringTermsFacet.TermEntry(it.key(), it.value())); } CacheRecycler.pushObjectIntMap(facets); return new InternalStringTermsFacet(facetName, comparatorType, size, ordered, aggregator.missing(), aggregator.total()); @@ -178,13 +180,13 @@ public class TermsStringFacetCollector extends AbstractFacetCollector { public static class AggregatorValueProc extends StaticAggregatorValueProc { - private final ImmutableSet excluded; + private final ImmutableSet excluded; private final Matcher matcher; private final SearchScript script; - public AggregatorValueProc(TObjectIntHashMap facets, ImmutableSet excluded, Pattern pattern, SearchScript script) { + public AggregatorValueProc(TObjectIntHashMap facets, ImmutableSet excluded, Pattern pattern, SearchScript script) { super(facets); this.excluded = excluded; this.matcher = pattern != null ? pattern.matcher("") : null; @@ -192,11 +194,12 @@ public class TermsStringFacetCollector extends AbstractFacetCollector { } @Override - public void onValue(int docId, String value) { + public void onValue(int docId, BytesRef value) { if (excluded != null && excluded.contains(value)) { return; } - if (matcher != null && !matcher.reset(value).matches()) { + // LUCENE 4 UPGRADE: use Lucene's RegexCapabilities + if (matcher != null && !matcher.reset(value.utf8ToString()).matches()) { return; } if (script != null) { @@ -211,7 +214,8 @@ public class TermsStringFacetCollector extends AbstractFacetCollector { return; } } else { - value = scriptValue.toString(); + // LUCENE 4 UPGRADE: should be possible to convert directly to BR + value = new BytesRef(scriptValue.toString()); } } super.onValue(docId, value); @@ -220,22 +224,23 @@ public class TermsStringFacetCollector extends AbstractFacetCollector { public static class StaticAggregatorValueProc implements FieldData.StringValueInDocProc, FieldData.StringValueProc { - private final TObjectIntHashMap facets; + // LUCENE 4 UPGRADE: check if hashcode is not too expensive + private final TObjectIntHashMap facets; private int missing = 0; private int total = 0; - public StaticAggregatorValueProc(TObjectIntHashMap facets) { + public StaticAggregatorValueProc(TObjectIntHashMap facets) { this.facets = facets; } @Override - public void onValue(String value) { + public void onValue(BytesRef value) { facets.putIfAbsent(value, 0); } @Override - public void onValue(int docId, String value) { + public void onValue(int docId, BytesRef value) { facets.adjustOrPutValue(value, 1, 1); total++; } @@ -245,7 +250,7 @@ public class TermsStringFacetCollector extends AbstractFacetCollector { missing++; } - public final TObjectIntHashMap facets() { + public final TObjectIntHashMap facets() { return facets; } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetCollector.java index cd08b7f194b..42856f356ea 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringOrdinalsFacetCollector.java @@ -20,7 +20,8 @@ package org.elasticsearch.search.facet.terms.strings; import com.google.common.collect.ImmutableSet; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; @@ -71,12 +72,12 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector { long missing; long total; - private final ImmutableSet excluded; + private final ImmutableSet excluded; private final Matcher matcher; public TermsStringOrdinalsFacetCollector(String facetName, String fieldName, int size, TermsFacet.ComparatorType comparatorType, boolean allTerms, SearchContext context, - ImmutableSet excluded, Pattern pattern) { + ImmutableSet excluded, Pattern pattern) { super(facetName); this.fieldDataCache = context.fieldDataCache(); this.size = size; @@ -113,11 +114,11 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector { minCount = 0; } - this.aggregators = new ArrayList(context.searcher().subReaders().length); + this.aggregators = new ArrayList(context.searcher().getIndexReader().leaves().size()); } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { + protected void doSetNextReader(AtomicReaderContext context) throws IOException { if (current != null) { missing += current.counts[0]; total += current.total - current.counts[0]; @@ -125,7 +126,7 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector { aggregators.add(current); } } - fieldData = (StringFieldData) fieldDataCache.cache(fieldDataType, reader, indexFieldName); + fieldData = (StringFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); current = new ReaderAggregator(fieldData); } @@ -160,7 +161,7 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector { while (queue.size() > 0) { ReaderAggregator agg = queue.top(); - String value = agg.current; + BytesRef value = agg.current; int count = 0; do { count += agg.counts[agg.position]; @@ -177,16 +178,17 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector { if (excluded != null && excluded.contains(value)) { continue; } - if (matcher != null && !matcher.reset(value).matches()) { + // LUCENE 4 UPGRADE: use Lucene's RegexCapabilities + if (matcher != null && !matcher.reset(value.utf8ToString()).matches()) { continue; } - InternalStringTermsFacet.StringEntry entry = new InternalStringTermsFacet.StringEntry(value, count); + InternalStringTermsFacet.TermEntry entry = new InternalStringTermsFacet.TermEntry(value, count); ordered.insertWithOverflow(entry); } } - InternalStringTermsFacet.StringEntry[] list = new InternalStringTermsFacet.StringEntry[ordered.size()]; + InternalStringTermsFacet.TermEntry[] list = new InternalStringTermsFacet.TermEntry[ordered.size()]; for (int i = ordered.size() - 1; i >= 0; i--) { - list[i] = (InternalStringTermsFacet.StringEntry) ordered.pop(); + list[i] = (InternalStringTermsFacet.TermEntry) ordered.pop(); } for (ReaderAggregator aggregator : aggregators) { @@ -196,11 +198,11 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector { return new InternalStringTermsFacet(facetName, comparatorType, size, Arrays.asList(list), missing, total); } - BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); + BoundedTreeSet ordered = new BoundedTreeSet(comparatorType.comparator(), size); while (queue.size() > 0) { ReaderAggregator agg = queue.top(); - String value = agg.current; + BytesRef value = agg.current; int count = 0; do { count += agg.counts[agg.position]; @@ -217,10 +219,11 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector { if (excluded != null && excluded.contains(value)) { continue; } - if (matcher != null && !matcher.reset(value).matches()) { + // LUCENE 4 UPGRADE: use Lucene's RegexCapabilities + if (matcher != null && !matcher.reset(value.utf8ToString()).matches()) { continue; } - InternalStringTermsFacet.StringEntry entry = new InternalStringTermsFacet.StringEntry(value, count); + InternalStringTermsFacet.TermEntry entry = new InternalStringTermsFacet.TermEntry(value, count); ordered.add(entry); } } @@ -235,11 +238,11 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector { public static class ReaderAggregator implements FieldData.OrdinalInDocProc { - final String[] values; + final BytesRef[] values; final int[] counts; int position = 0; - String current; + BytesRef current; int total; public ReaderAggregator(StringFieldData fieldData) { @@ -265,7 +268,7 @@ public class TermsStringOrdinalsFacetCollector extends AbstractFacetCollector { public static class AggregatorPriorityQueue extends PriorityQueue { public AggregatorPriorityQueue(int size) { - initialize(size); + super(size); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/terms/support/EntryPriorityQueue.java b/src/main/java/org/elasticsearch/search/facet/terms/support/EntryPriorityQueue.java index 65295118896..e59c6faa96e 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/support/EntryPriorityQueue.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/support/EntryPriorityQueue.java @@ -31,7 +31,7 @@ public class EntryPriorityQueue extends PriorityQueue { private final Comparator comparator; public EntryPriorityQueue(int size, Comparator comparator) { - initialize(size); + super(size); this.comparator = comparator; } diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/TermsStatsFacet.java b/src/main/java/org/elasticsearch/search/facet/termsstats/TermsStatsFacet.java index fbccaa2a5dc..d2b5d85e25c 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/TermsStatsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/TermsStatsFacet.java @@ -19,7 +19,9 @@ package org.elasticsearch.search.facet.termsstats; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.search.facet.Facet; import java.util.Comparator; @@ -392,9 +394,9 @@ public interface TermsStatsFacet extends Facet, Iterable public interface Entry extends Comparable { - String term(); + BytesReference term(); - String getTerm(); + BytesReference getTerm(); Number termAsNumber(); diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/InternalTermsStatsDoubleFacet.java b/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/InternalTermsStatsDoubleFacet.java index 368fdcb63a0..5c2894a8730 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/InternalTermsStatsDoubleFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/InternalTermsStatsDoubleFacet.java @@ -21,6 +21,8 @@ package org.elasticsearch.search.facet.termsstats.doubles; import com.google.common.collect.ImmutableList; import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.trove.ExtTDoubleObjectHashMap; @@ -74,12 +76,12 @@ public class InternalTermsStatsDoubleFacet extends InternalTermsStatsFacet { } @Override - public String term() { - return Double.toString(term); + public BytesReference term() { + return new BytesArray(Double.toString(term)); } @Override - public String getTerm() { + public BytesReference getTerm() { return term(); } diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/TermsStatsDoubleFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/TermsStatsDoubleFacetCollector.java index 91bd2abccbe..07864d4375b 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/TermsStatsDoubleFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/TermsStatsDoubleFacetCollector.java @@ -21,7 +21,7 @@ package org.elasticsearch.search.facet.termsstats.doubles; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; @@ -113,12 +113,12 @@ public class TermsStatsDoubleFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - keyFieldData = (NumericFieldData) fieldDataCache.cache(keyFieldDataType, reader, keyFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + keyFieldData = (NumericFieldData) fieldDataCache.cache(keyFieldDataType, context.reader(), keyFieldName); if (script != null) { - script.setNextReader(reader); + script.setNextReader(context.reader()); } else { - aggregator.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, reader, valueFieldName); + aggregator.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, context.reader(), valueFieldName); } } diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/longs/InternalTermsStatsLongFacet.java b/src/main/java/org/elasticsearch/search/facet/termsstats/longs/InternalTermsStatsLongFacet.java index 85b5f8aa9e2..0e77fd785da 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/longs/InternalTermsStatsLongFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/longs/InternalTermsStatsLongFacet.java @@ -21,6 +21,8 @@ package org.elasticsearch.search.facet.termsstats.longs; import com.google.common.collect.ImmutableList; import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.trove.ExtTLongObjectHashMap; @@ -74,12 +76,12 @@ public class InternalTermsStatsLongFacet extends InternalTermsStatsFacet { } @Override - public String term() { - return Long.toString(term); + public BytesReference term() { + return new BytesArray(Long.toString(term)); } @Override - public String getTerm() { + public BytesReference getTerm() { return term(); } diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java index 1fff910d8d5..2998fc0195c 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java @@ -21,7 +21,7 @@ package org.elasticsearch.search.facet.termsstats.longs; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; @@ -114,12 +114,12 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - keyFieldData = (NumericFieldData) fieldDataCache.cache(keyFieldDataType, reader, keyFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + keyFieldData = (NumericFieldData) fieldDataCache.cache(keyFieldDataType, context.reader(), keyFieldName); if (script != null) { - script.setNextReader(reader); + script.setNextReader(context.reader()); } else { - aggregator.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, reader, valueFieldName); + aggregator.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, context.reader(), valueFieldName); } } diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/strings/InternalTermsStatsStringFacet.java b/src/main/java/org/elasticsearch/search/facet/termsstats/strings/InternalTermsStatsStringFacet.java index 942cc879276..8c5150a5860 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/strings/InternalTermsStatsStringFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/strings/InternalTermsStatsStringFacet.java @@ -20,7 +20,10 @@ package org.elasticsearch.search.facet.termsstats.strings; import com.google.common.collect.ImmutableList; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.trove.ExtTHashMap; @@ -57,14 +60,18 @@ public class InternalTermsStatsStringFacet extends InternalTermsStatsFacet { public static class StringEntry implements Entry { - String term; + BytesReference term; long count; long totalCount; double total; double min; double max; - public StringEntry(String term, long count, long totalCount, double total, double min, double max) { + public StringEntry(BytesRef term, long count, long totalCount, double total, double min, double max) { + this(new BytesArray(term), count, totalCount, total, min, max); + } + + public StringEntry(BytesReference term, long count, long totalCount, double total, double min, double max) { this.term = term; this.count = count; this.totalCount = totalCount; @@ -74,18 +81,18 @@ public class InternalTermsStatsStringFacet extends InternalTermsStatsFacet { } @Override - public String term() { + public BytesReference term() { return term; } @Override - public String getTerm() { + public BytesReference getTerm() { return term(); } @Override public Number termAsNumber() { - return Double.parseDouble(term); + return Double.parseDouble(term.toUtf8()); } @Override @@ -158,7 +165,7 @@ public class InternalTermsStatsStringFacet extends InternalTermsStatsFacet { @Override public int compareTo(Entry o) { - return term.compareTo(o.term()); + return BytesReference.utf8SortedAsUnicodeSortOrder.compare(this.term, o.term()); } } @@ -250,7 +257,7 @@ public class InternalTermsStatsStringFacet extends InternalTermsStatsFacet { return facets.get(0); } int missing = 0; - ExtTHashMap map = CacheRecycler.popHashMap(); + ExtTHashMap map = CacheRecycler.popHashMap(); for (Facet facet : facets) { InternalTermsStatsStringFacet tsFacet = (InternalTermsStatsStringFacet) facet; missing += tsFacet.missing; @@ -346,7 +353,7 @@ public class InternalTermsStatsStringFacet extends InternalTermsStatsFacet { int size = in.readVInt(); entries = new ArrayList(size); for (int i = 0; i < size; i++) { - entries.add(new StringEntry(in.readUTF(), in.readVLong(), in.readVLong(), in.readDouble(), in.readDouble(), in.readDouble())); + entries.add(new StringEntry(in.readBytesReference(), in.readVLong(), in.readVLong(), in.readDouble(), in.readDouble(), in.readDouble())); } } @@ -359,7 +366,7 @@ public class InternalTermsStatsStringFacet extends InternalTermsStatsFacet { out.writeVInt(entries.size()); for (Entry entry : entries) { - out.writeUTF(entry.term()); + out.writeBytesReference(entry.term()); out.writeVLong(entry.count()); out.writeVLong(entry.totalCount()); out.writeDouble(entry.total()); diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java index 4a180ccfb86..82afa173f92 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java @@ -21,8 +21,9 @@ package org.elasticsearch.search.facet.termsstats.strings; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.trove.ExtTHashMap; @@ -114,12 +115,12 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector { } @Override - protected void doSetNextReader(IndexReader reader, int docBase) throws IOException { - keyFieldData = fieldDataCache.cache(keyFieldDataType, reader, keyFieldName); + protected void doSetNextReader(AtomicReaderContext context) throws IOException { + keyFieldData = fieldDataCache.cache(keyFieldDataType, context.reader(), keyFieldName); if (script != null) { - script.setNextReader(reader); + script.setNextReader(context.reader()); } else { - aggregator.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, reader, valueFieldName); + aggregator.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, context.reader(), valueFieldName); } } @@ -156,7 +157,8 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector { public static class Aggregator implements FieldData.StringValueInDocProc { - final ExtTHashMap entries = CacheRecycler.popHashMap(); + // LUCENE 4 UPGRADE: check if hashcode is not too expensive + final ExtTHashMap entries = CacheRecycler.popHashMap(); int missing = 0; @@ -165,7 +167,7 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector { ValueAggregator valueAggregator = new ValueAggregator(); @Override - public void onValue(int docId, String value) { + public void onValue(int docId, BytesRef value) { InternalTermsStatsStringFacet.StringEntry stringEntry = entries.get(value); if (stringEntry == null) { stringEntry = new InternalTermsStatsStringFacet.StringEntry(value, 0, 0, 0, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY); @@ -207,7 +209,7 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector { } @Override - public void onValue(int docId, String value) { + public void onValue(int docId, BytesRef value) { InternalTermsStatsStringFacet.StringEntry stringEntry = entries.get(value); if (stringEntry == null) { stringEntry = new InternalTermsStatsStringFacet.StringEntry(value, 1, 0, 0, Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY); diff --git a/src/test/java/org/elasticsearch/test/unit/index/field/data/strings/StringFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/field/data/strings/StringFieldDataTests.java index c216b4f945f..a82a2cd00c0 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/field/data/strings/StringFieldDataTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/field/data/strings/StringFieldDataTests.java @@ -19,11 +19,10 @@ package org.elasticsearch.test.unit.index.field.data.strings; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.index.field.data.FieldData; import org.elasticsearch.index.field.data.strings.StringFieldData; @@ -63,7 +62,7 @@ public class StringFieldDataTests { indexWriter.addDocument(doc() .add(field("svalue", "aaa")).build()); - IndexReader reader = IndexReader.open(indexWriter, true); + AtomicReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(indexWriter, false)); StringFieldData sFieldData = StringFieldData.load(reader, "svalue"); StringFieldData mFieldData = StringFieldData.load(reader, "mvalue"); @@ -76,61 +75,61 @@ public class StringFieldDataTests { // svalue assertThat(sFieldData.hasValue(0), equalTo(true)); - assertThat(sFieldData.value(0), equalTo("zzz")); - assertThat(sFieldData.docFieldData(0).getValue(), equalTo("zzz")); + assertThat(sFieldData.value(0).utf8ToString(), equalTo("zzz")); + assertThat(sFieldData.docFieldData(0).getValue().utf8ToString(), equalTo("zzz")); assertThat(sFieldData.values(0).length, equalTo(1)); assertThat(sFieldData.docFieldData(0).getValues().length, equalTo(1)); - assertThat(sFieldData.values(0)[0], equalTo("zzz")); - assertThat(sFieldData.docFieldData(0).getValues()[0], equalTo("zzz")); + assertThat(sFieldData.values(0)[0].utf8ToString(), equalTo("zzz")); + assertThat(sFieldData.docFieldData(0).getValues()[0].utf8ToString(), equalTo("zzz")); assertThat(sFieldData.hasValue(1), equalTo(true)); - assertThat(sFieldData.value(1), equalTo("xxx")); + assertThat(sFieldData.value(1).utf8ToString(), equalTo("xxx")); assertThat(sFieldData.values(1).length, equalTo(1)); - assertThat(sFieldData.values(1)[0], equalTo("xxx")); + assertThat(sFieldData.values(1)[0].utf8ToString(), equalTo("xxx")); assertThat(sFieldData.hasValue(2), equalTo(false)); assertThat(sFieldData.hasValue(3), equalTo(true)); - assertThat(sFieldData.value(3), equalTo("aaa")); + assertThat(sFieldData.value(3).utf8ToString(), equalTo("aaa")); assertThat(sFieldData.values(3).length, equalTo(1)); - assertThat(sFieldData.values(3)[0], equalTo("aaa")); + assertThat(sFieldData.values(3)[0].utf8ToString(), equalTo("aaa")); assertThat(sFieldData.hasValue(4), equalTo(true)); - assertThat(sFieldData.value(4), equalTo("aaa")); + assertThat(sFieldData.value(4).utf8ToString(), equalTo("aaa")); assertThat(sFieldData.values(4).length, equalTo(1)); - assertThat(sFieldData.values(4)[0], equalTo("aaa")); + assertThat(sFieldData.values(4)[0].utf8ToString(), equalTo("aaa")); // check order is correct - final ArrayList values = new ArrayList(); + final ArrayList values = new ArrayList(); sFieldData.forEachValue(new FieldData.StringValueProc() { @Override - public void onValue(String value) { + public void onValue(BytesRef value) { values.add(value); } }); assertThat(values.size(), equalTo(3)); - assertThat(values.get(0), equalTo("aaa")); - assertThat(values.get(1), equalTo("xxx")); - assertThat(values.get(2), equalTo("zzz")); + assertThat(values.get(0).utf8ToString(), equalTo("aaa")); + assertThat(values.get(1).utf8ToString(), equalTo("xxx")); + assertThat(values.get(2).utf8ToString(), equalTo("zzz")); // mvalue assertThat(mFieldData.hasValue(0), equalTo(true)); - assertThat(mFieldData.value(0), equalTo("111")); + assertThat(mFieldData.value(0).utf8ToString(), equalTo("111")); assertThat(mFieldData.values(0).length, equalTo(1)); - assertThat(mFieldData.values(0)[0], equalTo("111")); + assertThat(mFieldData.values(0)[0].utf8ToString(), equalTo("111")); assertThat(mFieldData.hasValue(1), equalTo(true)); - assertThat(mFieldData.value(1), equalTo("222")); + assertThat(mFieldData.value(1).utf8ToString(), equalTo("222")); assertThat(mFieldData.values(1).length, equalTo(2)); - assertThat(mFieldData.values(1)[0], equalTo("222")); - assertThat(mFieldData.values(1)[1], equalTo("333")); + assertThat(mFieldData.values(1)[0].utf8ToString(), equalTo("222")); + assertThat(mFieldData.values(1)[1].utf8ToString(), equalTo("333")); assertThat(mFieldData.hasValue(2), equalTo(true)); - assertThat(mFieldData.value(2), equalTo("333")); + assertThat(mFieldData.value(2).utf8ToString(), equalTo("333")); assertThat(mFieldData.values(2).length, equalTo(2)); - assertThat(mFieldData.values(2)[0], equalTo("333")); - assertThat(mFieldData.values(2)[1], equalTo("444")); + assertThat(mFieldData.values(2)[0].utf8ToString(), equalTo("333")); + assertThat(mFieldData.values(2)[1].utf8ToString(), equalTo("444")); assertThat(mFieldData.hasValue(3), equalTo(false)); @@ -139,16 +138,16 @@ public class StringFieldDataTests { values.clear(); mFieldData.forEachValue(new FieldData.StringValueProc() { @Override - public void onValue(String value) { + public void onValue(BytesRef value) { values.add(value); } }); assertThat(values.size(), equalTo(4)); - assertThat(values.get(0), equalTo("111")); - assertThat(values.get(1), equalTo("222")); - assertThat(values.get(2), equalTo("333")); - assertThat(values.get(3), equalTo("444")); + assertThat(values.get(0).utf8ToString(), equalTo("111")); + assertThat(values.get(1).utf8ToString(), equalTo("222")); + assertThat(values.get(2).utf8ToString(), equalTo("333")); + assertThat(values.get(3).utf8ToString(), equalTo("444")); indexWriter.close(); } From 15c9cd514208b4b71bcb59068500ae6a4bdb578d Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 26 Oct 2012 01:37:06 +0200 Subject: [PATCH 013/146] lucene 4: Field name no longed interned when loading field data cache and return empty field data cache for fields that don't exist. --- .../index/field/data/support/FieldDataLoader.java | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java b/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java index 414acaaa15a..0d41783c9c5 100644 --- a/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java +++ b/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java @@ -38,9 +38,6 @@ public class FieldDataLoader { public static T load(AtomicReader reader, String field, TypeLoader loader) throws IOException { loader.init(); - - // LUCENE 4 UPGRADE: StringHelper? - field = field.intern();//StringHelper.intern(field); ArrayList ordinals = new ArrayList(); int[] idx = new int[reader.maxDoc()]; ordinals.add(new int[reader.maxDoc()]); @@ -48,8 +45,11 @@ public class FieldDataLoader { int t = 1; // current term number Terms terms = reader.terms(field); - TermsEnum termsEnum = terms.iterator(null); + if (terms == null) { + return loader.buildSingleValue(field, new int[0]); // Return empty field data if field doesn't exists. + } + TermsEnum termsEnum = terms.iterator(null); try { DocsEnum docsEnum = null; for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.term()) { From ed0374135311d9305acab4a14151e4c5566c1620 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 26 Oct 2012 02:33:00 +0200 Subject: [PATCH 014/146] lucene 4: hashCode and equals for Text and BytesReference now that we are going to use those more in places like facets, they need to implement equals and hasCode to be used in hashes --- .../bytes/ByteBufferBytesReference.java | 10 ++++ .../common/bytes/BytesArray.java | 29 ++---------- .../common/bytes/BytesReference.java | 47 +++++++++++++++++-- .../bytes/ChannelBufferBytesReference.java | 10 ++++ .../common/bytes/HashedBytesArray.java | 10 ++-- .../elasticsearch/common/text/BytesText.java | 10 ++++ .../common/text/StringAndBytesText.java | 10 ++++ .../elasticsearch/common/text/StringText.java | 12 +++++ 8 files changed, 104 insertions(+), 34 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/bytes/ByteBufferBytesReference.java b/src/main/java/org/elasticsearch/common/bytes/ByteBufferBytesReference.java index 2739be08522..eb403438bad 100644 --- a/src/main/java/org/elasticsearch/common/bytes/ByteBufferBytesReference.java +++ b/src/main/java/org/elasticsearch/common/bytes/ByteBufferBytesReference.java @@ -125,6 +125,16 @@ public class ByteBufferBytesReference implements BytesReference { return buffer.arrayOffset() + buffer.position(); } + @Override + public int hashCode() { + return Helper.bytesHashCode(this); + } + + @Override + public boolean equals(Object obj) { + return Helper.bytesEqual(this, (BytesReference) obj); + } + @Override public String toUtf8() { if (!buffer.hasRemaining()) { diff --git a/src/main/java/org/elasticsearch/common/bytes/BytesArray.java b/src/main/java/org/elasticsearch/common/bytes/BytesArray.java index a274c70e03f..86f6edea530 100644 --- a/src/main/java/org/elasticsearch/common/bytes/BytesArray.java +++ b/src/main/java/org/elasticsearch/common/bytes/BytesArray.java @@ -148,33 +148,12 @@ public class BytesArray implements BytesReference { } @Override - public boolean equals(Object obj) { - return bytesEquals((BytesArray) obj); - } - - public boolean bytesEquals(BytesArray other) { - if (length == other.length) { - int otherUpto = other.offset; - final byte[] otherBytes = other.bytes; - final int end = offset + length; - for (int upto = offset; upto < end; upto++, otherUpto++) { - if (bytes[upto] != otherBytes[otherUpto]) { - return false; - } - } - return true; - } else { - return false; - } + public int hashCode() { + return Helper.bytesHashCode(this); } @Override - public int hashCode() { - int result = 0; - final int end = offset + length; - for (int i = offset; i < end; i++) { - result = 31 * result + bytes[i]; - } - return result; + public boolean equals(Object obj) { + return Helper.bytesEqual(this, (BytesReference) obj); } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/common/bytes/BytesReference.java b/src/main/java/org/elasticsearch/common/bytes/BytesReference.java index 9502caa130f..67c4114ad19 100644 --- a/src/main/java/org/elasticsearch/common/bytes/BytesReference.java +++ b/src/main/java/org/elasticsearch/common/bytes/BytesReference.java @@ -31,6 +31,46 @@ import java.util.Comparator; */ public interface BytesReference { + public static class Helper { + + public static boolean bytesEqual(BytesReference a, BytesReference b) { + if (a == b) { + return true; + } + if (a.length() != b.length()) { + return false; + } + if (!a.hasArray()) { + a = a.toBytesArray(); + } + if (!b.hasArray()) { + b = b.toBytesArray(); + } + int bUpTo = b.arrayOffset(); + final byte[] aArray = a.array(); + final byte[] bArray = b.array(); + final int end = a.arrayOffset() + a.length(); + for (int aUpTo = a.arrayOffset(); aUpTo < end; aUpTo++, bUpTo++) { + if (aArray[aUpTo] != bArray[bUpTo]) { + return false; + } + } + return true; + } + + public static int bytesHashCode(BytesReference a) { + if (!a.hasArray()) { + a = a.toBytesArray(); + } + int result = 0; + final int end = a.arrayOffset() + a.length(); + for (int i = a.arrayOffset(); i < end; i++) { + result = 31 * result + a.array()[i]; + } + return result; + } + } + /** * Returns the byte at the specified index. Need to be between 0 and length. */ @@ -103,7 +143,8 @@ public interface BytesReference { public static class UTF8SortedAsUnicodeComparator implements Comparator { // Only singleton - private UTF8SortedAsUnicodeComparator() {} + private UTF8SortedAsUnicodeComparator() { + } public int compare(BytesReference a, BytesReference b) { if (a.hasArray() && b.hasArray()) { @@ -113,7 +154,7 @@ public interface BytesReference { int bUpto = b.arrayOffset(); final int aStop = aUpto + Math.min(a.length(), b.length()); - while(aUpto < aStop) { + while (aUpto < aStop) { int aByte = aBytes[aUpto++] & 0xff; int bByte = bBytes[bUpto++] & 0xff; @@ -132,7 +173,7 @@ public interface BytesReference { int bUpto = 0; final int aStop = aUpto + Math.min(a.length(), b.length()); - while(aUpto < aStop) { + while (aUpto < aStop) { int aByte = aBytes[aUpto++] & 0xff; int bByte = bBytes[bUpto++] & 0xff; diff --git a/src/main/java/org/elasticsearch/common/bytes/ChannelBufferBytesReference.java b/src/main/java/org/elasticsearch/common/bytes/ChannelBufferBytesReference.java index f6fa8a116e6..b029c43330a 100644 --- a/src/main/java/org/elasticsearch/common/bytes/ChannelBufferBytesReference.java +++ b/src/main/java/org/elasticsearch/common/bytes/ChannelBufferBytesReference.java @@ -106,4 +106,14 @@ public class ChannelBufferBytesReference implements BytesReference { public String toUtf8() { return buffer.toString(Charsets.UTF_8); } + + @Override + public int hashCode() { + return Helper.bytesHashCode(this); + } + + @Override + public boolean equals(Object obj) { + return Helper.bytesEqual(this, (BytesReference) obj); + } } diff --git a/src/main/java/org/elasticsearch/common/bytes/HashedBytesArray.java b/src/main/java/org/elasticsearch/common/bytes/HashedBytesArray.java index 2182ed65a2a..eddacc26981 100644 --- a/src/main/java/org/elasticsearch/common/bytes/HashedBytesArray.java +++ b/src/main/java/org/elasticsearch/common/bytes/HashedBytesArray.java @@ -124,14 +124,12 @@ public class HashedBytesArray implements BytesReference { } @Override - public boolean equals(Object o) { - if (this == o) return true; - HashedBytesArray bytesWrap = (HashedBytesArray) o; - return Arrays.equals(bytes, bytesWrap.bytes); + public int hashCode() { + return Helper.bytesHashCode(this); } @Override - public int hashCode() { - return hashCode; + public boolean equals(Object obj) { + return Helper.bytesEqual(this, (BytesReference) obj); } } diff --git a/src/main/java/org/elasticsearch/common/text/BytesText.java b/src/main/java/org/elasticsearch/common/text/BytesText.java index 79180ba254e..6059ad8a186 100644 --- a/src/main/java/org/elasticsearch/common/text/BytesText.java +++ b/src/main/java/org/elasticsearch/common/text/BytesText.java @@ -61,4 +61,14 @@ public class BytesText implements Text { public String toString() { return string(); } + + @Override + public int hashCode() { + return bytes().hashCode(); + } + + @Override + public boolean equals(Object obj) { + return bytes().equals(((Text) obj).bytes()); + } } diff --git a/src/main/java/org/elasticsearch/common/text/StringAndBytesText.java b/src/main/java/org/elasticsearch/common/text/StringAndBytesText.java index 484449a2b20..e825e8ad846 100644 --- a/src/main/java/org/elasticsearch/common/text/StringAndBytesText.java +++ b/src/main/java/org/elasticsearch/common/text/StringAndBytesText.java @@ -87,4 +87,14 @@ public class StringAndBytesText implements Text { public String toString() { return string(); } + + @Override + public int hashCode() { + return bytes().hashCode(); + } + + @Override + public boolean equals(Object obj) { + return bytes().equals(((Text) obj).bytes()); + } } diff --git a/src/main/java/org/elasticsearch/common/text/StringText.java b/src/main/java/org/elasticsearch/common/text/StringText.java index 69bd67b7426..20183863b71 100644 --- a/src/main/java/org/elasticsearch/common/text/StringText.java +++ b/src/main/java/org/elasticsearch/common/text/StringText.java @@ -71,4 +71,16 @@ public class StringText implements Text { public String toString() { return string(); } + + @Override + public int hashCode() { + // we use bytes here so we can be consistent with other text implementations + return bytes().hashCode(); + } + + @Override + public boolean equals(Object obj) { + // we use bytes here so we can be consistent with other text implementations + return bytes().equals(((Text) obj).bytes()); + } } From f572a7bcf78ca688690aff75a0ef4246046f5e96 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 26 Oct 2012 02:39:13 +0200 Subject: [PATCH 015/146] lucene 4: no close on searcher anymore --- .../org/elasticsearch/search/internal/SearchContext.java | 6 ------ .../test/unit/common/lucene/all/SimpleAllTests.java | 8 -------- 2 files changed, 14 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/internal/SearchContext.java b/src/main/java/org/elasticsearch/search/internal/SearchContext.java index eec9f9d5b60..41b8e4a86d9 100644 --- a/src/main/java/org/elasticsearch/search/internal/SearchContext.java +++ b/src/main/java/org/elasticsearch/search/internal/SearchContext.java @@ -199,12 +199,6 @@ public class SearchContext implements Releasable { scopePhase.clear(); } } - // we should close this searcher, since its a new one we create each time, and we use the IndexReader - try { - searcher.close(); - } catch (Exception e) { - // ignore any exception here - } engineSearcher.release(); return true; } diff --git a/src/test/java/org/elasticsearch/test/unit/common/lucene/all/SimpleAllTests.java b/src/test/java/org/elasticsearch/test/unit/common/lucene/all/SimpleAllTests.java index ff4ade9d346..df0fd614ea4 100644 --- a/src/test/java/org/elasticsearch/test/unit/common/lucene/all/SimpleAllTests.java +++ b/src/test/java/org/elasticsearch/test/unit/common/lucene/all/SimpleAllTests.java @@ -110,8 +110,6 @@ public class SimpleAllTests { assertThat(docs.scoreDocs[0].doc, equalTo(0)); assertThat(docs.scoreDocs[1].doc, equalTo(1)); - searcher.close(); - indexWriter.close(); } @@ -154,8 +152,6 @@ public class SimpleAllTests { assertThat(docs.scoreDocs[0].doc, equalTo(0)); assertThat(docs.scoreDocs[1].doc, equalTo(1)); - searcher.close(); - indexWriter.close(); } @@ -207,8 +203,6 @@ public class SimpleAllTests { assertThat(docs.scoreDocs[0].doc, equalTo(0)); assertThat(docs.scoreDocs[1].doc, equalTo(1)); - searcher.close(); - indexWriter.close(); } @@ -260,8 +254,6 @@ public class SimpleAllTests { assertThat(docs.scoreDocs[0].doc, equalTo(0)); assertThat(docs.scoreDocs[1].doc, equalTo(1)); - searcher.close(); - indexWriter.close(); } } From 097cb2dac7b1578f8a32d85a7367d755920c11db Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Thu, 25 Oct 2012 21:23:47 -0400 Subject: [PATCH 016/146] lucene 4: migrate char filter from CharStream to Reader --- .../index/analysis/CharFilterFactory.java | 4 ++-- .../index/analysis/CustomAnalyzer.java | 4 +--- .../analysis/HtmlStripCharFilterFactory.java | 5 +++-- .../analysis/MappingCharFilterFactory.java | 18 +++++++++--------- .../analysis/IndicesAnalysisService.java | 4 ++-- 5 files changed, 17 insertions(+), 18 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/analysis/CharFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/CharFilterFactory.java index 4df5a3c08f2..d3587cfe272 100644 --- a/src/main/java/org/elasticsearch/index/analysis/CharFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/CharFilterFactory.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharStream; +import java.io.Reader; /** * @@ -28,5 +28,5 @@ public interface CharFilterFactory { String name(); - CharStream create(CharStream tokenStream); + Reader create(Reader tokenStream); } diff --git a/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java index 62c530e41ff..15d6a706006 100644 --- a/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java +++ b/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java @@ -106,11 +106,9 @@ public final class CustomAnalyzer extends Analyzer { private Reader charFilterIfNeeded(Reader reader) { if (charFilters != null && charFilters.length > 0) { - CharStream charStream = CharReader.get(reader); for (CharFilterFactory charFilter : charFilters) { - charStream = charFilter.create(charStream); + reader = charFilter.create(reader); } - reader = charStream; } return reader; } diff --git a/src/main/java/org/elasticsearch/index/analysis/HtmlStripCharFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/HtmlStripCharFilterFactory.java index 4cb3ea41b98..1c6d9f4c378 100644 --- a/src/main/java/org/elasticsearch/index/analysis/HtmlStripCharFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/HtmlStripCharFilterFactory.java @@ -20,7 +20,6 @@ package org.elasticsearch.index.analysis; import com.google.common.collect.ImmutableSet; -import org.apache.lucene.analysis.CharStream; import org.apache.lucene.analysis.charfilter.HTMLStripCharFilter; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; @@ -28,6 +27,8 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; +import java.io.Reader; + /** * */ @@ -51,7 +52,7 @@ public class HtmlStripCharFilterFactory extends AbstractCharFilterFactory { } @Override - public CharStream create(CharStream tokenStream) { + public Reader create(Reader tokenStream) { return new HTMLStripCharFilter(tokenStream, escapedTags); } } diff --git a/src/main/java/org/elasticsearch/index/analysis/MappingCharFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/MappingCharFilterFactory.java index 848cbd25a55..a74b5b6a796 100644 --- a/src/main/java/org/elasticsearch/index/analysis/MappingCharFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/MappingCharFilterFactory.java @@ -19,9 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharStream; -import org.apache.lucene.analysis.MappingCharFilter; -import org.apache.lucene.analysis.NormalizeCharMap; +import org.apache.lucene.analysis.charfilter.MappingCharFilter; +import org.apache.lucene.analysis.charfilter.NormalizeCharMap; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; @@ -30,6 +29,7 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; +import java.io.Reader; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -37,7 +37,7 @@ import java.util.regex.Pattern; @AnalysisSettingsRequired public class MappingCharFilterFactory extends AbstractCharFilterFactory { - private final NormalizeCharMap normMap; + private final NormalizeCharMap.Builder normMapBuilder; @Inject public MappingCharFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { @@ -48,13 +48,13 @@ public class MappingCharFilterFactory extends AbstractCharFilterFactory { throw new ElasticSearchIllegalArgumentException("mapping requires either `mappings` or `mappings_path` to be configured"); } - normMap = new NormalizeCharMap(); - parseRules(rules, normMap); + normMapBuilder = new NormalizeCharMap.Builder(); + parseRules(rules, normMapBuilder); } @Override - public CharStream create(CharStream tokenStream) { - return new MappingCharFilter(normMap, tokenStream); + public Reader create(Reader tokenStream) { + return new MappingCharFilter(normMapBuilder.build(), tokenStream); } // source => target @@ -63,7 +63,7 @@ public class MappingCharFilterFactory extends AbstractCharFilterFactory { /** * parses a list of MappingCharFilter style rules into a normalize char map */ - private void parseRules(List rules, NormalizeCharMap map) { + private void parseRules(List rules, NormalizeCharMap.Builder map) { for (String rule : rules) { Matcher m = rulePattern.matcher(rule); if (!m.find()) diff --git a/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java b/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java index 49d7a2f3d9f..198e5133893 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java +++ b/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java @@ -658,7 +658,7 @@ public class IndicesAnalysisService extends AbstractComponent { } @Override - public CharStream create(CharStream tokenStream) { + public Reader create(Reader tokenStream) { return new HTMLStripCharFilter(tokenStream); } })); @@ -670,7 +670,7 @@ public class IndicesAnalysisService extends AbstractComponent { } @Override - public CharStream create(CharStream tokenStream) { + public Reader create(Reader tokenStream) { return new HTMLStripCharFilter(tokenStream); } })); From 6fad75df824a087fb304c766ee8dc61ee178f594 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Fri, 26 Oct 2012 00:03:16 -0400 Subject: [PATCH 017/146] lucene 4: remove Pattern tokenizer and filter --- .../pattern/PatternReplaceFilter.java | 85 ---------- .../analysis/pattern/PatternTokenizer.java | 153 ------------------ 2 files changed, 238 deletions(-) delete mode 100644 src/main/java/org/apache/lucene/analysis/pattern/PatternReplaceFilter.java delete mode 100644 src/main/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java diff --git a/src/main/java/org/apache/lucene/analysis/pattern/PatternReplaceFilter.java b/src/main/java/org/apache/lucene/analysis/pattern/PatternReplaceFilter.java deleted file mode 100644 index ef7565215b1..00000000000 --- a/src/main/java/org/apache/lucene/analysis/pattern/PatternReplaceFilter.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.lucene.analysis.pattern; - -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; - -import java.io.IOException; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * A TokenFilter which applies a Pattern to each token in the stream, - * replacing match occurances with the specified replacement string. - *

- *

- * Note: Depending on the input and the pattern used and the input - * TokenStream, this TokenFilter may produce Tokens whose text is the empty - * string. - *

- * - * @see Pattern - */ -public final class PatternReplaceFilter extends TokenFilter { - private final Pattern p; - private final String replacement; - private final boolean all; - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final Matcher m; - - /** - * Constructs an instance to replace either the first, or all occurances - * - * @param in the TokenStream to process - * @param p the patterm to apply to each Token - * @param replacement the "replacement string" to substitute, if null a - * blank string will be used. Note that this is not the literal - * string that will be used, '$' and '\' have special meaning. - * @param all if true, all matches will be replaced otherwise just the first match. - * @see Matcher#quoteReplacement - */ - public PatternReplaceFilter(TokenStream in, - Pattern p, - String replacement, - boolean all) { - super(in); - this.p = p; - this.replacement = (null == replacement) ? "" : replacement; - this.all = all; - this.m = p.matcher(termAtt); - } - - @Override - public boolean incrementToken() throws IOException { - if (!input.incrementToken()) return false; - - m.reset(); - if (m.find()) { - // replaceAll/replaceFirst will reset() this previous find. - String transformed = all ? m.replaceAll(replacement) : m.replaceFirst(replacement); - termAtt.setEmpty().append(transformed); - } - - return true; - } - -} diff --git a/src/main/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java b/src/main/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java deleted file mode 100644 index 09d32506976..00000000000 --- a/src/main/java/org/apache/lucene/analysis/pattern/PatternTokenizer.java +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.lucene.analysis.pattern; - -import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; - -import java.io.IOException; -import java.io.Reader; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -/** - * This tokenizer uses regex pattern matching to construct distinct tokens - * for the input stream. It takes two arguments: "pattern" and "group". - *

- *

    - *
  • "pattern" is the regular expression.
  • - *
  • "group" says which group to extract into tokens.
  • - *
- *

- * group=-1 (the default) is equivalent to "split". In this case, the tokens will - * be equivalent to the output from (without empty tokens): - * {@link String#split(java.lang.String)} - *

- *

- * Using group >= 0 selects the matching group as the token. For example, if you have:
- *

- *  pattern = \'([^\']+)\'
- *  group = 0
- *  input = aaa 'bbb' 'ccc'
- * 
- * the output will be two tokens: 'bbb' and 'ccc' (including the ' marks). With the same input - * but using group=1, the output would be: bbb and ccc (no ' marks) - *

- *

NOTE: This Tokenizer does not output tokens that are of zero length.

- * - * @see Pattern - */ -public final class PatternTokenizer extends Tokenizer { - - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - - private final StringBuilder str = new StringBuilder(); - private int index; - - private final Pattern pattern; - private final int group; - private final Matcher matcher; - - /** - * creates a new PatternTokenizer returning tokens from group (-1 for split functionality) - */ - public PatternTokenizer(Reader input, Pattern pattern, int group) throws IOException { - super(input); - this.pattern = pattern; - this.group = group; - fillBuffer(str, input); - matcher = pattern.matcher(str); - index = 0; - } - - @Override - public boolean incrementToken() throws IOException { - if (index >= str.length()) return false; - clearAttributes(); - if (group >= 0) { - - // match a specific group - while (matcher.find()) { - index = matcher.start(group); - final int endIndex = matcher.end(group); - if (index == endIndex) continue; - termAtt.setEmpty().append(str, index, endIndex); - offsetAtt.setOffset(correctOffset(index), correctOffset(endIndex)); - return true; - } - - index = Integer.MAX_VALUE; // mark exhausted - return false; - - } else { - - // String.split() functionality - while (matcher.find()) { - if (matcher.start() - index > 0) { - // found a non-zero-length token - termAtt.setEmpty().append(str, index, matcher.start()); - offsetAtt.setOffset(correctOffset(index), correctOffset(matcher.start())); - index = matcher.end(); - return true; - } - - index = matcher.end(); - } - - if (str.length() - index == 0) { - index = Integer.MAX_VALUE; // mark exhausted - return false; - } - - termAtt.setEmpty().append(str, index, str.length()); - offsetAtt.setOffset(correctOffset(index), correctOffset(str.length())); - index = Integer.MAX_VALUE; // mark exhausted - return true; - } - } - - @Override - public void end() throws IOException { - final int ofs = correctOffset(str.length()); - offsetAtt.setOffset(ofs, ofs); - } - - @Override - public void reset(Reader input) throws IOException { - super.reset(input); - fillBuffer(str, input); - matcher.reset(str); - index = 0; - } - - // TODO: we should see if we can make this tokenizer work without reading - // the entire document into RAM, perhaps with Matcher.hitEnd/requireEnd ? - final char[] buffer = new char[8192]; - - private void fillBuffer(StringBuilder sb, Reader input) throws IOException { - int len; - sb.setLength(0); - while ((len = input.read(buffer)) > 0) { - sb.append(buffer, 0, len); - } - } -} From 1cc5ee7ad9be4412392b0f7e4de53c1c8b5fdd10 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Fri, 26 Oct 2012 00:07:20 -0400 Subject: [PATCH 018/146] lucene 4: implement createComponents in Analyzers --- .../index/analysis/CustomAnalyzer.java | 39 ++++--------------- .../index/analysis/NamedAnalyzer.java | 29 +++----------- .../index/analysis/NumericAnalyzer.java | 19 ++------- .../index/analysis/NumericTokenizer.java | 15 ++++--- 4 files changed, 24 insertions(+), 78 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java index 15d6a706006..a43b2185974 100644 --- a/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java +++ b/src/main/java/org/elasticsearch/index/analysis/CustomAnalyzer.java @@ -19,10 +19,10 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.*; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; -import java.io.IOException; import java.io.Reader; /** @@ -71,7 +71,7 @@ public final class CustomAnalyzer extends Analyzer { } @Override - public int getOffsetGap(Fieldable field) { + public int getOffsetGap(String field) { if (offsetGap < 0) { return super.getOffsetGap(field); } @@ -79,29 +79,13 @@ public final class CustomAnalyzer extends Analyzer { } @Override - public final TokenStream tokenStream(String fieldName, Reader reader) { - return buildHolder(reader).tokenStream; - } - - @Override - public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - Holder holder = (Holder) getPreviousTokenStream(); - if (holder == null) { - holder = buildHolder(charFilterIfNeeded(reader)); - setPreviousTokenStream(holder); - } else { - holder.tokenizer.reset(charFilterIfNeeded(reader)); - } - return holder.tokenStream; - } - - private Holder buildHolder(Reader input) { - Tokenizer tokenizer = tokenizerFactory.create(input); + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { + Tokenizer tokenizer = tokenizerFactory.create(charFilterIfNeeded(reader)); TokenStream tokenStream = tokenizer; for (TokenFilterFactory tokenFilter : tokenFilters) { tokenStream = tokenFilter.create(tokenStream); } - return new Holder(tokenizer, tokenStream); + return new TokenStreamComponents(tokenizer, tokenStream); } private Reader charFilterIfNeeded(Reader reader) { @@ -113,13 +97,4 @@ public final class CustomAnalyzer extends Analyzer { return reader; } - static class Holder { - final Tokenizer tokenizer; - final TokenStream tokenStream; - - private Holder(Tokenizer tokenizer, TokenStream tokenStream) { - this.tokenizer = tokenizer; - this.tokenStream = tokenStream; - } - } } diff --git a/src/main/java/org/elasticsearch/index/analysis/NamedAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/NamedAnalyzer.java index 823286c244b..70cff9e27a0 100644 --- a/src/main/java/org/elasticsearch/index/analysis/NamedAnalyzer.java +++ b/src/main/java/org/elasticsearch/index/analysis/NamedAnalyzer.java @@ -20,17 +20,15 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.analysis.AnalyzerWrapper; -import java.io.IOException; import java.io.Reader; /** * Named analyzer is an analyzer wrapper around an actual analyzer ({@link #analyzer} that is associated * with a name ({@link #name()}. */ -public class NamedAnalyzer extends Analyzer { +public class NamedAnalyzer extends AnalyzerWrapper { private final String name; @@ -70,28 +68,13 @@ public class NamedAnalyzer extends Analyzer { } @Override - public final TokenStream tokenStream(String fieldName, Reader reader) { - return analyzer.tokenStream(fieldName, reader); + protected Analyzer getWrappedAnalyzer(String fieldName) { + return this.analyzer; } @Override - public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - return analyzer.reusableTokenStream(fieldName, reader); - } - - @Override - public int getPositionIncrementGap(String fieldName) { - return analyzer.getPositionIncrementGap(fieldName); - } - - @Override - public int getOffsetGap(Fieldable field) { - return analyzer.getOffsetGap(field); - } - - @Override - public void close() { - analyzer.close(); + protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { + return components; } @Override diff --git a/src/main/java/org/elasticsearch/index/analysis/NumericAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/NumericAnalyzer.java index 24db00cfe3f..1b20e95a504 100644 --- a/src/main/java/org/elasticsearch/index/analysis/NumericAnalyzer.java +++ b/src/main/java/org/elasticsearch/index/analysis/NumericAnalyzer.java @@ -31,27 +31,16 @@ import java.io.Reader; public abstract class NumericAnalyzer extends Analyzer { @Override - public final TokenStream tokenStream(String fieldName, Reader reader) { + protected TokenStreamComponents createComponents(String fieldName, Reader reader) { try { - return createNumericTokenizer(reader, new char[32]); + // LUCENE 4 UPGRADE: in reusableTokenStream the buffer size was char[120] + // Not sure if this is intentional or not + return new TokenStreamComponents(createNumericTokenizer(reader, new char[32])); } catch (IOException e) { throw new RuntimeException("Failed to create numeric tokenizer", e); } } - @Override - public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - Holder holder = (Holder) getPreviousTokenStream(); - if (holder == null) { - char[] buffer = new char[120]; - holder = new Holder(createNumericTokenizer(reader, buffer), buffer); - setPreviousTokenStream(holder); - } else { - holder.tokenizer.reset(reader, holder.buffer); - } - return holder.tokenizer; - } - protected abstract T createNumericTokenizer(Reader reader, char[] buffer) throws IOException; private static final class Holder { diff --git a/src/main/java/org/elasticsearch/index/analysis/NumericTokenizer.java b/src/main/java/org/elasticsearch/index/analysis/NumericTokenizer.java index a3f11037eef..46a1379a47c 100644 --- a/src/main/java/org/elasticsearch/index/analysis/NumericTokenizer.java +++ b/src/main/java/org/elasticsearch/index/analysis/NumericTokenizer.java @@ -35,27 +35,26 @@ public abstract class NumericTokenizer extends Tokenizer { protected final Object extra; protected NumericTokenizer(Reader reader, NumericTokenStream numericTokenStream, Object extra) throws IOException { - super(numericTokenStream); + super(numericTokenStream, reader); this.numericTokenStream = numericTokenStream; this.extra = extra; - reset(reader); + reset(); } protected NumericTokenizer(Reader reader, NumericTokenStream numericTokenStream, char[] buffer, Object extra) throws IOException { - super(numericTokenStream); + super(numericTokenStream, reader); this.numericTokenStream = numericTokenStream; this.extra = extra; - reset(reader, buffer); + reset(buffer); } @Override - public void reset(Reader input) throws IOException { + public void reset() throws IOException { char[] buffer = new char[32]; - reset(input, buffer); + reset(buffer); } - public void reset(Reader input, char[] buffer) throws IOException { - super.reset(input); + public void reset(char[] buffer) throws IOException { int len = input.read(buffer); String value = new String(buffer, 0, len); setValue(numericTokenStream, value); From b128b7a7509ee0cda67fdc8e28844cbc55bd1449 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Fri, 26 Oct 2012 00:10:19 -0400 Subject: [PATCH 019/146] lucene 4: use CharArraySet for stem exclusions, stop words and articles and fix analyzer namespaces --- .../ASCIIFoldingTokenFilterFactory.java | 2 +- .../index/analysis/Analysis.java | 29 +++++++++++-------- .../analysis/ArabicAnalyzerProvider.java | 4 +-- .../analysis/ArmenianAnalyzerProvider.java | 4 +-- .../analysis/BasqueAnalyzerProvider.java | 4 +-- .../analysis/BrazilianAnalyzerProvider.java | 4 +-- .../BrazilianStemTokenFilterFactory.java | 13 ++++----- .../analysis/BulgarianAnalyzerProvider.java | 4 +-- .../analysis/CatalanAnalyzerProvider.java | 4 +-- .../index/analysis/CjkAnalyzerProvider.java | 5 ++-- .../index/analysis/CzechAnalyzerProvider.java | 4 +-- .../analysis/DanishAnalyzerProvider.java | 4 +-- .../index/analysis/DutchAnalyzerProvider.java | 4 +-- .../analysis/DutchStemTokenFilterFactory.java | 17 ++++------- .../analysis/ElisionTokenFilterFactory.java | 13 +++------ .../analysis/EnglishAnalyzerProvider.java | 4 +-- .../index/analysis/FieldNameAnalyzer.java | 25 ++++------------ .../analysis/FinnishAnalyzerProvider.java | 4 +-- .../analysis/FrenchAnalyzerProvider.java | 4 +-- .../FrenchStemTokenFilterFactory.java | 15 ++++------ .../analysis/GalicianAnalyzerProvider.java | 4 +-- .../analysis/GermanAnalyzerProvider.java | 4 +-- .../GermanStemTokenFilterFactory.java | 17 ++++------- .../index/analysis/HindiAnalyzerProvider.java | 4 +-- .../analysis/HungarianAnalyzerProvider.java | 4 +-- .../analysis/IndonesianAnalyzerProvider.java | 4 +-- .../analysis/ItalianAnalyzerProvider.java | 4 +-- .../analysis/KeywordAnalyzerProvider.java | 2 +- .../KeywordMarkerTokenFilterFactory.java | 4 +-- .../analysis/KeywordTokenizerFactory.java | 2 +- .../analysis/LatvianAnalyzerProvider.java | 4 +-- .../analysis/LengthTokenFilterFactory.java | 2 +- .../analysis/LetterTokenizerFactory.java | 2 +- .../analysis/LowerCaseTokenFilterFactory.java | 2 +- .../analysis/LowerCaseTokenizerFactory.java | 2 +- .../analysis/NorwegianAnalyzerProvider.java | 4 +-- .../analysis/PatternAnalyzerProvider.java | 6 ++-- .../PorterStemTokenFilterFactory.java | 2 +- .../analysis/PortugueseAnalyzerProvider.java | 4 +-- .../analysis/RomanianAnalyzerProvider.java | 4 +-- .../analysis/RussianAnalyzerProvider.java | 4 +-- .../RussianStemTokenFilterFactory.java | 4 +-- .../analysis/SimpleAnalyzerProvider.java | 2 +- .../analysis/SnowballAnalyzerProvider.java | 9 +++--- .../analysis/SpanishAnalyzerProvider.java | 4 +-- .../analysis/StandardAnalyzerProvider.java | 7 ++--- .../analysis/StandardHtmlStripAnalyzer.java | 4 +-- .../StemmerOverrideTokenFilterFactory.java | 7 +++-- .../analysis/StemmerTokenFilterFactory.java | 2 +- .../index/analysis/StopAnalyzerProvider.java | 7 ++--- .../analysis/StopTokenFilterFactory.java | 14 +++++---- .../analysis/SwedishAnalyzerProvider.java | 4 +-- .../analysis/SynonymTokenFilterFactory.java | 4 ++- .../analysis/TurkishAnalyzerProvider.java | 4 +-- .../UAX29URLEmailTokenizerFactory.java | 2 +- .../analysis/WhitespaceAnalyzerProvider.java | 2 +- .../analysis/WhitespaceTokenizerFactory.java | 2 +- .../WordDelimiterTokenFilterFactory.java | 2 +- ...bstractCompoundWordTokenFilterFactory.java | 5 ++-- 59 files changed, 152 insertions(+), 185 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/analysis/ASCIIFoldingTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/ASCIIFoldingTokenFilterFactory.java index 3278a339837..8a9f08dcf47 100644 --- a/src/main/java/org/elasticsearch/index/analysis/ASCIIFoldingTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/ASCIIFoldingTokenFilterFactory.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.ASCIIFoldingFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; diff --git a/src/main/java/org/elasticsearch/index/analysis/Analysis.java b/src/main/java/org/elasticsearch/index/analysis/Analysis.java index 4ccedab918f..16dd6e3053f 100644 --- a/src/main/java/org/elasticsearch/index/analysis/Analysis.java +++ b/src/main/java/org/elasticsearch/index/analysis/Analysis.java @@ -20,10 +20,8 @@ package org.elasticsearch.index.analysis; import com.google.common.base.Charsets; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; -import com.google.common.collect.Iterators; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.ar.ArabicAnalyzer; import org.apache.lucene.analysis.bg.BulgarianAnalyzer; import org.apache.lucene.analysis.br.BrazilianAnalyzer; @@ -51,6 +49,7 @@ import org.apache.lucene.analysis.ro.RomanianAnalyzer; import org.apache.lucene.analysis.ru.RussianAnalyzer; import org.apache.lucene.analysis.sv.SwedishAnalyzer; import org.apache.lucene.analysis.tr.TurkishAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.Strings; @@ -78,18 +77,20 @@ public class Analysis { return value != null && "_none_".equals(value); } - public static Set parseStemExclusion(Settings settings, Set defaultStemExclusion) { + public static CharArraySet parseStemExclusion(Settings settings, CharArraySet defaultStemExclusion, Version version) { String value = settings.get("stem_exclusion"); if (value != null) { if ("_none_".equals(value)) { - return ImmutableSet.of(); + return CharArraySet.EMPTY_SET; } else { - return ImmutableSet.copyOf(Strings.commaDelimitedListToSet(value)); + // LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)? + return new CharArraySet(version, Strings.commaDelimitedListToSet(value), false); } } String[] stopWords = settings.getAsArray("stem_exclusion", null); if (stopWords != null) { - return ImmutableSet.copyOf(Iterators.forArray(stopWords)); + // LUCENE 4 UPGRADE: Should be settings.getAsBoolean("stem_exclusion_case", false)? + return new CharArraySet(version, ImmutableList.of(stopWords), false); } else { return defaultStemExclusion; } @@ -125,7 +126,7 @@ public class Analysis { .put("_turkish_", TurkishAnalyzer.getDefaultStopSet()) .immutableMap(); - public static Set parseArticles(Environment env, Settings settings, Version version) { + public static CharArraySet parseArticles(Environment env, Settings settings, Version version) { String value = settings.get("articles"); if (value != null) { if ("_none_".equals(value)) { @@ -146,18 +147,22 @@ public class Analysis { return null; } - public static Set parseStopWords(Environment env, Settings settings, Set defaultStopWords, Version version) { + public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, Version version) { + return parseStopWords(env, settings, defaultStopWords, version, settings.getAsBoolean("stopwords_case", false)); + } + + public static CharArraySet parseStopWords(Environment env, Settings settings, CharArraySet defaultStopWords, Version version, boolean ignore_case) { String value = settings.get("stopwords"); if (value != null) { if ("_none_".equals(value)) { return CharArraySet.EMPTY_SET; } else { - return new CharArraySet(version, Strings.commaDelimitedListToSet(value), settings.getAsBoolean("stopwords_case", false)); + return new CharArraySet(version, Strings.commaDelimitedListToSet(value), ignore_case); } } String[] stopWords = settings.getAsArray("stopwords", null); if (stopWords != null) { - CharArraySet setStopWords = new CharArraySet(version, stopWords.length, settings.getAsBoolean("stopwords_case", false)); + CharArraySet setStopWords = new CharArraySet(version, stopWords.length, ignore_case); for (String stopWord : stopWords) { if (namedStopWords.containsKey(stopWord)) { setStopWords.addAll(namedStopWords.get(stopWord)); @@ -169,7 +174,7 @@ public class Analysis { } List pathLoadedStopWords = getWordList(env, settings, "stopwords"); if (pathLoadedStopWords != null) { - CharArraySet setStopWords = new CharArraySet(version, pathLoadedStopWords.size(), settings.getAsBoolean("stopwords_case", false)); + CharArraySet setStopWords = new CharArraySet(version, pathLoadedStopWords.size(), ignore_case); for (String stopWord : pathLoadedStopWords) { if (namedStopWords.containsKey(stopWord)) { setStopWords.addAll(namedStopWords.get(stopWord)); diff --git a/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java index 94e4767c094..5da6921d482 100644 --- a/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/ArabicAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.ar.ArabicAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class ArabicAnalyzerProvider extends AbstractIndexAnalyzerProvider exclusions; + private final CharArraySet exclusions; @Inject public BrazilianStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - String[] stemExclusion = settings.getAsArray("stem_exclusion"); - if (stemExclusion.length > 0) { - this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion)); - } else { - this.exclusions = ImmutableSet.of(); - } + this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version); } @Override public TokenStream create(TokenStream tokenStream) { - return new BrazilianStemFilter(tokenStream, exclusions); + return new BrazilianStemFilter(new KeywordMarkerFilter(tokenStream, exclusions)); } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java index 2acfc857c7d..085a362b062 100644 --- a/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/BulgarianAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.bg.BulgarianAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class BulgarianAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet(), version); + CharArraySet stopWords = Analysis.parseStopWords(env, settings, CJKAnalyzer.getDefaultStopSet(), version); analyzer = new CJKAnalyzer(version, stopWords); } diff --git a/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java index b66662b231f..cfdb9146718 100644 --- a/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/CzechAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.cz.CzechAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class CzechAnalyzerProvider extends AbstractIndexAnalyzerProvider exclusions; + private final CharArraySet exclusions; @Inject public DutchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - String[] stemExclusion = settings.getAsArray("stem_exclusion"); - if (stemExclusion.length > 0) { - this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion)); - } else { - this.exclusions = ImmutableSet.of(); - } + this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version); } @Override public TokenStream create(TokenStream tokenStream) { - return new DutchStemFilter(tokenStream, exclusions); + return new DutchStemFilter(new KeywordMarkerFilter(tokenStream, exclusions)); } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java index 1bed5963046..9f472ec8f1c 100644 --- a/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/ElisionTokenFilterFactory.java @@ -20,7 +20,8 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.fr.ElisionFilter; +import org.apache.lucene.analysis.util.CharArraySet; +import org.apache.lucene.analysis.util.ElisionFilter; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -28,14 +29,12 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; -import java.util.Set; - /** * */ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory { - private final Set articles; + private final CharArraySet articles; @Inject public ElisionTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { @@ -45,10 +44,6 @@ public class ElisionTokenFilterFactory extends AbstractTokenFilterFactory { @Override public TokenStream create(TokenStream tokenStream) { - if (articles == null) { - return new ElisionFilter(version, tokenStream); - } else { - return new ElisionFilter(version, tokenStream, articles); - } + return new ElisionFilter(tokenStream, articles); } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java index 82996d0f666..e2db40e6345 100644 --- a/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/EnglishAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.en.EnglishAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class EnglishAnalyzerProvider extends AbstractIndexAnalyzerProvider analyzers; @@ -51,23 +48,13 @@ public final class FieldNameAnalyzer extends Analyzer { } @Override - public final TokenStream tokenStream(String fieldName, Reader reader) { - return getAnalyzer(fieldName).tokenStream(fieldName, reader); + protected Analyzer getWrappedAnalyzer(String fieldName) { + return getAnalyzer(fieldName); } @Override - public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - return getAnalyzer(fieldName).reusableTokenStream(fieldName, reader); - } - - @Override - public int getPositionIncrementGap(String fieldName) { - return getAnalyzer(fieldName).getPositionIncrementGap(fieldName); - } - - @Override - public int getOffsetGap(Fieldable field) { - return getAnalyzer(field.name()).getOffsetGap(field); + protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { + return components; } private Analyzer getAnalyzer(String name) { diff --git a/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java index d2d8029e969..098dfc6e884 100644 --- a/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/FinnishAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.fi.FinnishAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class FinnishAnalyzerProvider extends AbstractIndexAnalyzerProvider exclusions; + private final CharArraySet exclusions; @Inject public FrenchStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - String[] stemExclusion = settings.getAsArray("stem_exclusion"); - if (stemExclusion.length > 0) { - this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion)); - } else { - this.exclusions = ImmutableSet.of(); - } + this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version); } @Override public TokenStream create(TokenStream tokenStream) { - return new FrenchStemFilter(tokenStream, exclusions); + return new FrenchStemFilter(new KeywordMarkerFilter(tokenStream, exclusions)); } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java index c1c8ab46afa..3c2b463d74d 100644 --- a/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/GalicianAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.gl.GalicianAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class GalicianAnalyzerProvider extends AbstractIndexAnalyzerProvider exclusions; + private final CharArraySet exclusions; @Inject public GermanStemTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - String[] stemExclusion = settings.getAsArray("stem_exclusion"); - if (stemExclusion.length > 0) { - this.exclusions = ImmutableSet.copyOf(Iterators.forArray(stemExclusion)); - } else { - this.exclusions = ImmutableSet.of(); - } + this.exclusions = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET, version); } @Override public TokenStream create(TokenStream tokenStream) { - return new GermanStemFilter(tokenStream, exclusions); + return new GermanStemFilter(new KeywordMarkerFilter(tokenStream, exclusions)); } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java index 8185947f986..2df095e6a14 100644 --- a/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/HindiAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.hi.HindiAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class HindiAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); + CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); String sPattern = settings.get("pattern", "\\W+" /*PatternAnalyzer.NON_WORD_PATTERN*/); if (sPattern == null) { diff --git a/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java index 7e96ce349af..9a59c651f79 100644 --- a/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/PorterStemTokenFilterFactory.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.PorterStemFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.en.PorterStemFilter; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; diff --git a/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java index 372d034f7eb..2e64c235c0c 100644 --- a/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/PortugueseAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.pt.PortugueseAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class PortugueseAnalyzerProvider extends AbstractIndexAnalyzerProvider { - private static final ImmutableMap> defaultLanguageStopwords = MapBuilder.>newMapBuilder() + private static final ImmutableMap defaultLanguageStopwords = MapBuilder.newMapBuilder() .put("English", StopAnalyzer.ENGLISH_STOP_WORDS_SET) .put("Dutch", DutchAnalyzer.getDefaultStopSet()) .put("German", GermanAnalyzer.getDefaultStopSet()) @@ -66,8 +67,8 @@ public class SnowballAnalyzerProvider extends AbstractIndexAnalyzerProvider defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : ImmutableSet.>of(); - Set stopWords = Analysis.parseStopWords(env, settings, defaultStopwords, version); + CharArraySet defaultStopwords = defaultLanguageStopwords.containsKey(language) ? defaultLanguageStopwords.get(language) : CharArraySet.EMPTY_SET; + CharArraySet stopWords = Analysis.parseStopWords(env, settings, defaultStopwords, version); analyzer = new SnowballAnalyzer(version, language, stopWords); } diff --git a/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java index 2f6c19ee4d5..dce0e329dfa 100644 --- a/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/SpanishAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.es.SpanishAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class SpanishAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); + CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); int maxTokenLength = settings.getAsInt("max_token_length", StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); standardAnalyzer = new StandardAnalyzer(version, stopWords); standardAnalyzer.setMaxTokenLength(maxTokenLength); diff --git a/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java index 2b03fc82999..71ecf6d23cb 100644 --- a/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java +++ b/src/main/java/org/elasticsearch/index/analysis/StandardHtmlStripAnalyzer.java @@ -47,9 +47,9 @@ public class StandardHtmlStripAnalyzer extends StopwordAnalyzerBase { tok = new StopFilter(matchVersion, tok, stopwords); return new TokenStreamComponents(src, tok) { @Override - protected boolean reset(final Reader reader) throws IOException { + protected void setReader(final Reader reader) throws IOException { src.setMaxTokenLength(StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH); - return super.reset(reader); + super.setReader(reader); } }; } diff --git a/src/main/java/org/elasticsearch/index/analysis/StemmerOverrideTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/StemmerOverrideTokenFilterFactory.java index 70ba9e142e1..d21bf73f21e 100644 --- a/src/main/java/org/elasticsearch/index/analysis/StemmerOverrideTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/StemmerOverrideTokenFilterFactory.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilter; +import org.apache.lucene.analysis.util.CharArrayMap; import org.apache.lucene.util.Version; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.Strings; @@ -38,7 +39,7 @@ import java.util.Map; @AnalysisSettingsRequired public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactory { - private final Map dictionary; + private final CharArrayMap dictionary; @Inject public StemmerOverrideTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { @@ -48,7 +49,7 @@ public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactor if (rules == null) { throw new ElasticSearchIllegalArgumentException("stemmer override filter requires either `rules` or `rules_path` to be configured"); } - dictionary = new HashMap(); + dictionary = new CharArrayMap(version, rules.size(), false); parseRules(rules, dictionary, "=>"); } @@ -57,7 +58,7 @@ public class StemmerOverrideTokenFilterFactory extends AbstractTokenFilterFactor return new StemmerOverrideFilter(Version.LUCENE_32, tokenStream, dictionary); } - static void parseRules(List rules, Map rulesMap, String mappingSep) { + static void parseRules(List rules, CharArrayMap rulesMap, String mappingSep) { for (String rule : rules) { String key, override; List mapping = Strings.splitSmart(rule, mappingSep, false); diff --git a/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java index 7baeb272c9e..668f04c43ce 100644 --- a/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/StemmerTokenFilterFactory.java @@ -19,7 +19,6 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.PorterStemFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.ar.ArabicStemFilter; import org.apache.lucene.analysis.bg.BulgarianStemFilter; @@ -31,6 +30,7 @@ import org.apache.lucene.analysis.el.GreekStemFilter; import org.apache.lucene.analysis.en.EnglishMinimalStemFilter; import org.apache.lucene.analysis.en.EnglishPossessiveFilter; import org.apache.lucene.analysis.en.KStemFilter; +import org.apache.lucene.analysis.en.PorterStemFilter; import org.apache.lucene.analysis.es.SpanishLightStemFilter; import org.apache.lucene.analysis.fi.FinnishLightStemFilter; import org.apache.lucene.analysis.fr.FrenchLightStemFilter; diff --git a/src/main/java/org/elasticsearch/index/analysis/StopAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/StopAnalyzerProvider.java index 0780301768d..7939c81ba2c 100644 --- a/src/main/java/org/elasticsearch/index/analysis/StopAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/StopAnalyzerProvider.java @@ -19,7 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.StopAnalyzer; +import org.apache.lucene.analysis.core.StopAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -27,8 +28,6 @@ import org.elasticsearch.env.Environment; import org.elasticsearch.index.Index; import org.elasticsearch.index.settings.IndexSettings; -import java.util.Set; - /** * */ @@ -39,7 +38,7 @@ public class StopAnalyzerProvider extends AbstractIndexAnalyzerProvider stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); + CharArraySet stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); this.stopAnalyzer = new StopAnalyzer(version, stopWords); } diff --git a/src/main/java/org/elasticsearch/index/analysis/StopTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/StopTokenFilterFactory.java index 15c361568f6..8c8e8929cc8 100644 --- a/src/main/java/org/elasticsearch/index/analysis/StopTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/StopTokenFilterFactory.java @@ -19,9 +19,10 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.StopAnalyzer; -import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.StopAnalyzer; +import org.apache.lucene.analysis.core.StopFilter; +import org.apache.lucene.analysis.util.CharArraySet; import org.apache.lucene.util.Version; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; @@ -37,7 +38,7 @@ import java.util.Set; */ public class StopTokenFilterFactory extends AbstractTokenFilterFactory { - private final Set stopWords; + private final CharArraySet stopWords; private final boolean ignoreCase; @@ -46,14 +47,15 @@ public class StopTokenFilterFactory extends AbstractTokenFilterFactory { @Inject public StopTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { super(index, indexSettings, name, settings); - this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version); this.ignoreCase = settings.getAsBoolean("ignore_case", false); - this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", version.onOrAfter(Version.LUCENE_29)); + this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version, ignoreCase); + // LUCENE 4 UPGRADE: LUCENE_29 constant is no longer defined + this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", version.onOrAfter(Version.parseLeniently("LUCENE_29"))); } @Override public TokenStream create(TokenStream tokenStream) { - StopFilter filter = new StopFilter(version, tokenStream, stopWords, ignoreCase); + StopFilter filter = new StopFilter(version, tokenStream, stopWords); filter.setEnablePositionIncrements(enablePositionIncrements); return filter; } diff --git a/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java b/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java index 5bdb1462379..8e2b5b741dd 100644 --- a/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java +++ b/src/main/java/org/elasticsearch/index/analysis/SwedishAnalyzerProvider.java @@ -19,8 +19,8 @@ package org.elasticsearch.index.analysis; -import org.apache.lucene.analysis.CharArraySet; import org.apache.lucene.analysis.sv.SwedishAnalyzer; +import org.apache.lucene.analysis.util.CharArraySet; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.inject.assistedinject.Assisted; import org.elasticsearch.common.settings.Settings; @@ -40,7 +40,7 @@ public class SwedishAnalyzerProvider extends AbstractIndexAnalyzerProvider wordList; + protected final CharArraySet wordList; @Inject public AbstractCompoundWordTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, Environment env, @Assisted String name, @Assisted Settings settings) { From d820bfe11be547ac2778f3a6feb9ba1d6f348876 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 26 Oct 2012 12:23:45 +0200 Subject: [PATCH 020/146] lucene 4: Changed from BytesReference to Text as internal term representation for facet keys. Text now also implements comparable. --- .../common/bytes/BytesReference.java | 51 ---------------- .../elasticsearch/common/text/BytesText.java | 10 +++- .../common/text/StringAndBytesText.java | 9 +++ .../elasticsearch/common/text/StringText.java | 5 ++ .../org/elasticsearch/common/text/Text.java | 2 +- .../text/UTF8SortedAsUnicodeComparator.java | 58 +++++++++++++++++++ .../search/facet/terms/TermsFacet.java | 5 +- .../terms/bytes/InternalByteTermsFacet.java | 11 ++-- .../terms/bytes/TermsByteFacetCollector.java | 1 - .../doubles/InternalDoubleTermsFacet.java | 11 ++-- .../doubles/TermsDoubleFacetCollector.java | 1 - .../terms/floats/InternalFloatTermsFacet.java | 15 +++-- .../floats/TermsFloatFacetCollector.java | 2 - .../terms/ints/InternalIntTermsFacet.java | 15 +++-- .../terms/ints/TermsIntFacetCollector.java | 2 - .../facet/terms/ip/InternalIpTermsFacet.java | 15 +++-- .../facet/terms/ip/TermsIpFacetCollector.java | 2 - .../terms/longs/InternalLongTermsFacet.java | 14 ++--- .../terms/shorts/InternalShortTermsFacet.java | 15 +++-- .../shorts/TermsShortFacetCollector.java | 2 - .../strings/InternalStringTermsFacet.java | 30 +++++----- .../ScriptTermsStringFieldFacetCollector.java | 2 +- .../strings/TermsStringFacetCollector.java | 1 - .../facet/termsstats/TermsStatsFacet.java | 5 +- .../InternalTermsStatsDoubleFacet.java | 14 ++--- .../longs/InternalTermsStatsLongFacet.java | 10 ++-- .../InternalTermsStatsStringFacet.java | 24 ++++---- 27 files changed, 175 insertions(+), 157 deletions(-) create mode 100644 src/main/java/org/elasticsearch/common/text/UTF8SortedAsUnicodeComparator.java diff --git a/src/main/java/org/elasticsearch/common/bytes/BytesReference.java b/src/main/java/org/elasticsearch/common/bytes/BytesReference.java index 67c4114ad19..b794fc3c143 100644 --- a/src/main/java/org/elasticsearch/common/bytes/BytesReference.java +++ b/src/main/java/org/elasticsearch/common/bytes/BytesReference.java @@ -137,55 +137,4 @@ public interface BytesReference { String toUtf8(); - // LUCENE 4 UPGRADE: Used by facets to order. Perhaps make this call implement Comparable. - public final static Comparator utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator(); - - public static class UTF8SortedAsUnicodeComparator implements Comparator { - - // Only singleton - private UTF8SortedAsUnicodeComparator() { - } - - public int compare(BytesReference a, BytesReference b) { - if (a.hasArray() && b.hasArray()) { - final byte[] aBytes = a.array(); - int aUpto = a.arrayOffset(); - final byte[] bBytes = b.array(); - int bUpto = b.arrayOffset(); - - final int aStop = aUpto + Math.min(a.length(), b.length()); - while (aUpto < aStop) { - int aByte = aBytes[aUpto++] & 0xff; - int bByte = bBytes[bUpto++] & 0xff; - - int diff = aByte - bByte; - if (diff != 0) { - return diff; - } - } - - // One is a prefix of the other, or, they are equal: - return a.length() - b.length(); - } else { - final byte[] aBytes = a.toBytes(); - int aUpto = 0; - final byte[] bBytes = b.toBytes(); - int bUpto = 0; - - final int aStop = aUpto + Math.min(a.length(), b.length()); - while (aUpto < aStop) { - int aByte = aBytes[aUpto++] & 0xff; - int bByte = bBytes[bUpto++] & 0xff; - - int diff = aByte - bByte; - if (diff != 0) { - return diff; - } - } - - // One is a prefix of the other, or, they are equal: - return a.length() - b.length(); - } - } - } } diff --git a/src/main/java/org/elasticsearch/common/text/BytesText.java b/src/main/java/org/elasticsearch/common/text/BytesText.java index 6059ad8a186..cf216b37550 100644 --- a/src/main/java/org/elasticsearch/common/text/BytesText.java +++ b/src/main/java/org/elasticsearch/common/text/BytesText.java @@ -22,6 +22,8 @@ package org.elasticsearch.common.text; import com.google.common.base.Charsets; import org.elasticsearch.common.bytes.BytesReference; +import java.util.Comparator; + /** * A {@link BytesReference} representation of the text, will always convert on the fly to a {@link String}. */ @@ -71,4 +73,10 @@ public class BytesText implements Text { public boolean equals(Object obj) { return bytes().equals(((Text) obj).bytes()); } -} + + @Override + public int compareTo(Text text) { + return UTF8SortedAsUnicodeComparator.utf8SortedAsUnicodeSortOrder.compare(text.bytes(), text.bytes()); + } + +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/common/text/StringAndBytesText.java b/src/main/java/org/elasticsearch/common/text/StringAndBytesText.java index e825e8ad846..86a8ab8bd24 100644 --- a/src/main/java/org/elasticsearch/common/text/StringAndBytesText.java +++ b/src/main/java/org/elasticsearch/common/text/StringAndBytesText.java @@ -97,4 +97,13 @@ public class StringAndBytesText implements Text { public boolean equals(Object obj) { return bytes().equals(((Text) obj).bytes()); } + + @Override + public int compareTo(Text other) { + if (text == null) { + return UTF8SortedAsUnicodeComparator.utf8SortedAsUnicodeSortOrder.compare(bytes, other.bytes()); + } else { + return text.compareTo(other.string()); + } + } } diff --git a/src/main/java/org/elasticsearch/common/text/StringText.java b/src/main/java/org/elasticsearch/common/text/StringText.java index 20183863b71..01840237911 100644 --- a/src/main/java/org/elasticsearch/common/text/StringText.java +++ b/src/main/java/org/elasticsearch/common/text/StringText.java @@ -83,4 +83,9 @@ public class StringText implements Text { // we use bytes here so we can be consistent with other text implementations return bytes().equals(((Text) obj).bytes()); } + + @Override + public int compareTo(Text text) { + return this.text.compareTo(text.string()); + } } diff --git a/src/main/java/org/elasticsearch/common/text/Text.java b/src/main/java/org/elasticsearch/common/text/Text.java index 7a77e8dd500..03dc18bfb7c 100644 --- a/src/main/java/org/elasticsearch/common/text/Text.java +++ b/src/main/java/org/elasticsearch/common/text/Text.java @@ -26,7 +26,7 @@ import org.elasticsearch.common.bytes.BytesReference; * so we can represent it in a more optimized manner in memory as well as serializing it over the * network as well as converting it to json format. */ -public interface Text { +public interface Text extends Comparable { /** * Are bytes available without the need to be converted into bytes when calling {@link #bytes()}. diff --git a/src/main/java/org/elasticsearch/common/text/UTF8SortedAsUnicodeComparator.java b/src/main/java/org/elasticsearch/common/text/UTF8SortedAsUnicodeComparator.java new file mode 100644 index 00000000000..e5af06060ed --- /dev/null +++ b/src/main/java/org/elasticsearch/common/text/UTF8SortedAsUnicodeComparator.java @@ -0,0 +1,58 @@ +package org.elasticsearch.common.text; + +import org.elasticsearch.common.bytes.BytesReference; + +import java.util.Comparator; + +// LUCENE 4 UPGRADE: Is this the right way of comparing bytesreferences inside Text instances? +// Copied from Lucene's BytesRef comparator +public class UTF8SortedAsUnicodeComparator implements Comparator { + + public final static Comparator utf8SortedAsUnicodeSortOrder = new UTF8SortedAsUnicodeComparator(); + + // Only singleton + private UTF8SortedAsUnicodeComparator() { + } + + public int compare(BytesReference a, BytesReference b) { + if (a.hasArray() && b.hasArray()) { + final byte[] aBytes = a.array(); + int aUpto = a.arrayOffset(); + final byte[] bBytes = b.array(); + int bUpto = b.arrayOffset(); + + final int aStop = aUpto + Math.min(a.length(), b.length()); + while (aUpto < aStop) { + int aByte = aBytes[aUpto++] & 0xff; + int bByte = bBytes[bUpto++] & 0xff; + + int diff = aByte - bByte; + if (diff != 0) { + return diff; + } + } + + // One is a prefix of the other, or, they are equal: + return a.length() - b.length(); + } else { + final byte[] aBytes = a.toBytes(); + int aUpto = 0; + final byte[] bBytes = b.toBytes(); + int bUpto = 0; + + final int aStop = aUpto + Math.min(a.length(), b.length()); + while (aUpto < aStop) { + int aByte = aBytes[aUpto++] & 0xff; + int bByte = bBytes[bUpto++] & 0xff; + + int diff = aByte - bByte; + if (diff != 0) { + return diff; + } + } + + // One is a prefix of the other, or, they are equal: + return a.length() - b.length(); + } + } +} diff --git a/src/main/java/org/elasticsearch/search/facet/terms/TermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/TermsFacet.java index b67569be344..07e5d9ced08 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/TermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/TermsFacet.java @@ -22,6 +22,7 @@ package org.elasticsearch.search.facet.terms; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.text.Text; import org.elasticsearch.search.facet.Facet; import java.util.Comparator; @@ -41,9 +42,9 @@ public interface TermsFacet extends Facet, Iterable { public interface Entry extends Comparable { - BytesReference term(); + Text term(); - BytesReference getTerm(); + Text getTerm(); Number termAsNumber(); diff --git a/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java index dad3cbe0f28..df970366235 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/bytes/InternalByteTermsFacet.java @@ -22,13 +22,12 @@ package org.elasticsearch.search.facet.terms.bytes; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TByteIntIterator; import gnu.trove.map.hash.TByteIntHashMap; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.text.StringText; +import org.elasticsearch.common.text.Text; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; import org.elasticsearch.search.facet.Facet; @@ -74,11 +73,11 @@ public class InternalByteTermsFacet extends InternalTermsFacet { this.count = count; } - public BytesReference term() { - return new BytesArray(Short.toString(term)); + public Text term() { + return new StringText(Short.toString(term)); } - public BytesReference getTerm() { + public Text getTerm() { return term(); } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java index 92bfd60c4a1..59ff3a083d3 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java @@ -24,7 +24,6 @@ import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TByteIntIterator; import gnu.trove.map.hash.TByteIntHashMap; import gnu.trove.set.hash.TByteHashSet; -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.BytesRef; diff --git a/src/main/java/org/elasticsearch/search/facet/terms/doubles/InternalDoubleTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/doubles/InternalDoubleTermsFacet.java index 4522b036a27..c7757132d27 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/doubles/InternalDoubleTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/doubles/InternalDoubleTermsFacet.java @@ -22,13 +22,12 @@ package org.elasticsearch.search.facet.terms.doubles; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TDoubleIntIterator; import gnu.trove.map.hash.TDoubleIntHashMap; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.text.StringText; +import org.elasticsearch.common.text.Text; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; import org.elasticsearch.search.facet.Facet; @@ -74,11 +73,11 @@ public class InternalDoubleTermsFacet extends InternalTermsFacet { this.count = count; } - public BytesReference term() { - return new BytesArray(Double.toString(term)); + public Text term() { + return new StringText(Double.toString(term)); } - public BytesReference getTerm() { + public Text getTerm() { return term(); } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java index 23968376171..bce37aaa9df 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java @@ -24,7 +24,6 @@ import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TDoubleIntIterator; import gnu.trove.map.hash.TDoubleIntHashMap; import gnu.trove.set.hash.TDoubleHashSet; -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.BytesRef; diff --git a/src/main/java/org/elasticsearch/search/facet/terms/floats/InternalFloatTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/floats/InternalFloatTermsFacet.java index 8dac5e38a67..b16424e7f94 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/floats/InternalFloatTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/floats/InternalFloatTermsFacet.java @@ -22,13 +22,12 @@ package org.elasticsearch.search.facet.terms.floats; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TFloatIntIterator; import gnu.trove.map.hash.TFloatIntHashMap; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.text.StringText; +import org.elasticsearch.common.text.Text; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; import org.elasticsearch.search.facet.Facet; @@ -74,11 +73,11 @@ public class InternalFloatTermsFacet extends InternalTermsFacet { this.count = count; } - public BytesReference term() { - return new BytesArray(Float.toString(term)); + public Text term() { + return new StringText(Float.toString(term)); } - public BytesReference getTerm() { + public Text getTerm() { return term(); } @@ -282,7 +281,7 @@ public class InternalFloatTermsFacet extends InternalTermsFacet { @Override public void readFrom(StreamInput in) throws IOException { - name = in.readUTF(); + name = in.readString(); comparatorType = ComparatorType.fromId(in.readByte()); requiredSize = in.readVInt(); missing = in.readVLong(); @@ -297,7 +296,7 @@ public class InternalFloatTermsFacet extends InternalTermsFacet { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeUTF(name); + out.writeString(name); out.writeByte(comparatorType.id()); out.writeVInt(requiredSize); out.writeVLong(missing); diff --git a/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java index 19333a3426c..7c25fd7f13a 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java @@ -24,9 +24,7 @@ import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TFloatIntIterator; import gnu.trove.map.hash.TFloatIntHashMap; import gnu.trove.set.hash.TFloatHashSet; -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; diff --git a/src/main/java/org/elasticsearch/search/facet/terms/ints/InternalIntTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/ints/InternalIntTermsFacet.java index b7738b7d193..644dc6acf40 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/ints/InternalIntTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/ints/InternalIntTermsFacet.java @@ -22,13 +22,12 @@ package org.elasticsearch.search.facet.terms.ints; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TIntIntIterator; import gnu.trove.map.hash.TIntIntHashMap; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.text.StringText; +import org.elasticsearch.common.text.Text; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; import org.elasticsearch.search.facet.Facet; @@ -74,11 +73,11 @@ public class InternalIntTermsFacet extends InternalTermsFacet { this.count = count; } - public BytesReference term() { - return new BytesArray(Integer.toString(term)); + public Text term() { + return new StringText(Integer.toString(term)); } - public BytesReference getTerm() { + public Text getTerm() { return term(); } @@ -279,7 +278,7 @@ public class InternalIntTermsFacet extends InternalTermsFacet { @Override public void readFrom(StreamInput in) throws IOException { - name = in.readUTF(); + name = in.readString(); comparatorType = ComparatorType.fromId(in.readByte()); requiredSize = in.readVInt(); missing = in.readVLong(); @@ -294,7 +293,7 @@ public class InternalIntTermsFacet extends InternalTermsFacet { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeUTF(name); + out.writeString(name); out.writeByte(comparatorType.id()); out.writeVInt(requiredSize); out.writeVLong(missing); diff --git a/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java index e06ca78c9e2..be37b6da701 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java @@ -24,9 +24,7 @@ import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TIntIntIterator; import gnu.trove.map.hash.TIntIntHashMap; import gnu.trove.set.hash.TIntHashSet; -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; diff --git a/src/main/java/org/elasticsearch/search/facet/terms/ip/InternalIpTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/ip/InternalIpTermsFacet.java index 447a4c6c341..7fb5dcb92aa 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/ip/InternalIpTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/ip/InternalIpTermsFacet.java @@ -22,13 +22,12 @@ package org.elasticsearch.search.facet.terms.ip; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TLongIntIterator; import gnu.trove.map.hash.TLongIntHashMap; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.text.StringText; +import org.elasticsearch.common.text.Text; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; import org.elasticsearch.index.mapper.ip.IpFieldMapper; @@ -75,11 +74,11 @@ public class InternalIpTermsFacet extends InternalTermsFacet { this.count = count; } - public BytesReference term() { - return new BytesArray(IpFieldMapper.longToIp(term)); + public Text term() { + return new StringText(IpFieldMapper.longToIp(term)); } - public BytesReference getTerm() { + public Text getTerm() { return term(); } @@ -284,7 +283,7 @@ public class InternalIpTermsFacet extends InternalTermsFacet { @Override public void readFrom(StreamInput in) throws IOException { - name = in.readUTF(); + name = in.readString(); comparatorType = ComparatorType.fromId(in.readByte()); requiredSize = in.readVInt(); missing = in.readVLong(); @@ -299,7 +298,7 @@ public class InternalIpTermsFacet extends InternalTermsFacet { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeUTF(name); + out.writeString(name); out.writeByte(comparatorType.id()); out.writeVInt(requiredSize); out.writeVLong(missing); diff --git a/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java index 9b2f0370e16..9a7f127e97f 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java @@ -22,9 +22,7 @@ package org.elasticsearch.search.facet.terms.ip; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TLongIntIterator; import gnu.trove.map.hash.TLongIntHashMap; -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.CacheRecycler; diff --git a/src/main/java/org/elasticsearch/search/facet/terms/longs/InternalLongTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/longs/InternalLongTermsFacet.java index 4a0ebdcfddd..f3c8d18a564 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/longs/InternalLongTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/longs/InternalLongTermsFacet.java @@ -23,11 +23,11 @@ import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TLongIntIterator; import gnu.trove.map.hash.TLongIntHashMap; import org.elasticsearch.common.CacheRecycler; -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.text.StringText; +import org.elasticsearch.common.text.Text; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; import org.elasticsearch.search.facet.Facet; @@ -73,11 +73,11 @@ public class InternalLongTermsFacet extends InternalTermsFacet { this.count = count; } - public BytesReference term() { - return new BytesArray(Long.toString(term)); + public Text term() { + return new StringText(Long.toString(term)); } - public BytesReference getTerm() { + public Text getTerm() { return term(); } @@ -281,7 +281,7 @@ public class InternalLongTermsFacet extends InternalTermsFacet { @Override public void readFrom(StreamInput in) throws IOException { - name = in.readUTF(); + name = in.readString(); comparatorType = ComparatorType.fromId(in.readByte()); requiredSize = in.readVInt(); missing = in.readVLong(); @@ -296,7 +296,7 @@ public class InternalLongTermsFacet extends InternalTermsFacet { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeUTF(name); + out.writeString(name); out.writeByte(comparatorType.id()); out.writeVInt(requiredSize); out.writeVLong(missing); diff --git a/src/main/java/org/elasticsearch/search/facet/terms/shorts/InternalShortTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/shorts/InternalShortTermsFacet.java index c199d631e28..e5cd4ace763 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/shorts/InternalShortTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/shorts/InternalShortTermsFacet.java @@ -22,13 +22,12 @@ package org.elasticsearch.search.facet.terms.shorts; import com.google.common.collect.ImmutableList; import gnu.trove.iterator.TShortIntIterator; import gnu.trove.map.hash.TShortIntHashMap; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.text.StringText; +import org.elasticsearch.common.text.Text; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; import org.elasticsearch.search.facet.Facet; @@ -74,11 +73,11 @@ public class InternalShortTermsFacet extends InternalTermsFacet { this.count = count; } - public BytesReference term() { - return new BytesArray(Short.toString(term)); + public Text term() { + return new StringText(Short.toString(term)); } - public BytesReference getTerm() { + public Text getTerm() { return term(); } @@ -279,7 +278,7 @@ public class InternalShortTermsFacet extends InternalTermsFacet { @Override public void readFrom(StreamInput in) throws IOException { - name = in.readUTF(); + name = in.readString(); comparatorType = ComparatorType.fromId(in.readByte()); requiredSize = in.readVInt(); missing = in.readVLong(); @@ -294,7 +293,7 @@ public class InternalShortTermsFacet extends InternalTermsFacet { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeUTF(name); + out.writeString(name); out.writeByte(comparatorType.id()); out.writeVInt(requiredSize); out.writeVLong(missing); diff --git a/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java index a362b18a2f5..079ec61f15b 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java @@ -24,9 +24,7 @@ import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TShortIntIterator; import gnu.trove.map.hash.TShortIntHashMap; import gnu.trove.set.hash.TShortHashSet; -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/InternalStringTermsFacet.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/InternalStringTermsFacet.java index a6d217e0b64..cf5a19d99cd 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/InternalStringTermsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/InternalStringTermsFacet.java @@ -25,10 +25,12 @@ import gnu.trove.map.hash.TObjectIntHashMap; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.CacheRecycler; import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.collect.BoundedTreeSet; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.text.BytesText; +import org.elasticsearch.common.text.StringText; +import org.elasticsearch.common.text.Text; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; import org.elasticsearch.search.facet.Facet; @@ -66,36 +68,36 @@ public class InternalStringTermsFacet extends InternalTermsFacet { public static class TermEntry implements Entry { - private BytesReference term; + private Text term; private int count; public TermEntry(String term, int count) { - this.term = new BytesArray(term); + this.term = new StringText(term); this.count = count; } public TermEntry(BytesRef term, int count) { - this.term = new BytesArray(term); + this.term = new BytesText(new BytesArray(term)); this.count = count; } - public TermEntry(BytesReference term, int count) { + public TermEntry(Text term, int count) { this.term = term; this.count = count; } - public BytesReference term() { + public Text term() { return term; } - public BytesReference getTerm() { + public Text getTerm() { return term; } @Override public Number termAsNumber() { // LUCENE 4 UPGRADE: better way? - return Double.parseDouble(term.toUtf8()); + return Double.parseDouble(term.string()); } @Override @@ -113,7 +115,7 @@ public class InternalStringTermsFacet extends InternalTermsFacet { @Override public int compareTo(Entry o) { - int i = BytesReference.utf8SortedAsUnicodeSortOrder.compare(this.term, o.term()); + int i = this.term.compareTo(o.term()); if (i == 0) { i = count - o.count(); if (i == 0) { @@ -227,7 +229,7 @@ public class InternalStringTermsFacet extends InternalTermsFacet { return facets.get(0); } InternalStringTermsFacet first = (InternalStringTermsFacet) facets.get(0); - TObjectIntHashMap aggregated = CacheRecycler.popObjectIntMap(); + TObjectIntHashMap aggregated = CacheRecycler.popObjectIntMap(); long missing = 0; long total = 0; for (Facet facet : facets) { @@ -240,7 +242,7 @@ public class InternalStringTermsFacet extends InternalTermsFacet { } BoundedTreeSet ordered = new BoundedTreeSet(first.comparatorType.comparator(), first.requiredSize); - for (TObjectIntIterator it = aggregated.iterator(); it.hasNext(); ) { + for (TObjectIntIterator it = aggregated.iterator(); it.hasNext(); ) { it.advance(); ordered.add(new TermEntry(it.key(), it.value())); } @@ -290,7 +292,7 @@ public class InternalStringTermsFacet extends InternalTermsFacet { @Override public void readFrom(StreamInput in) throws IOException { - name = in.readUTF(); + name = in.readString(); comparatorType = ComparatorType.fromId(in.readByte()); requiredSize = in.readVInt(); missing = in.readVLong(); @@ -299,7 +301,7 @@ public class InternalStringTermsFacet extends InternalTermsFacet { int size = in.readVInt(); entries = new ArrayList(size); for (int i = 0; i < size; i++) { - entries.add(new TermEntry(in.readBytesReference(), in.readVInt())); + entries.add(new TermEntry(in.readText(), in.readVInt())); } } @@ -313,7 +315,7 @@ public class InternalStringTermsFacet extends InternalTermsFacet { out.writeVInt(entries.size()); for (Entry entry : entries) { - out.writeBytesReference(entry.term()); + out.writeText(entry.term()); out.writeVInt(entry.count()); } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java index bf2dc8e0898..48740b442a3 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java @@ -135,7 +135,7 @@ public class ScriptTermsStringFieldFacetCollector extends AbstractFacetCollector } private boolean match(String value) { - if (excluded != null && excluded.contains(value)) { + if (excluded != null && excluded.contains(new BytesRef(value))) { return false; } if (matcher != null && !matcher.reset(value).matches()) { diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java index 6706b0d6cd9..5c90c1c6d40 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java @@ -23,7 +23,6 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import gnu.trove.iterator.TObjectIntIterator; import gnu.trove.map.hash.TObjectIntHashMap; -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.apache.lucene.util.BytesRef; diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/TermsStatsFacet.java b/src/main/java/org/elasticsearch/search/facet/termsstats/TermsStatsFacet.java index d2b5d85e25c..7eaad2f47e0 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/TermsStatsFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/TermsStatsFacet.java @@ -22,6 +22,7 @@ package org.elasticsearch.search.facet.termsstats; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.bytes.BytesReference; +import org.elasticsearch.common.text.Text; import org.elasticsearch.search.facet.Facet; import java.util.Comparator; @@ -394,9 +395,9 @@ public interface TermsStatsFacet extends Facet, Iterable public interface Entry extends Comparable { - BytesReference term(); + Text term(); - BytesReference getTerm(); + Text getTerm(); Number termAsNumber(); diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/InternalTermsStatsDoubleFacet.java b/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/InternalTermsStatsDoubleFacet.java index 5c2894a8730..850d588b684 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/InternalTermsStatsDoubleFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/InternalTermsStatsDoubleFacet.java @@ -21,10 +21,10 @@ package org.elasticsearch.search.facet.termsstats.doubles; import com.google.common.collect.ImmutableList; import org.elasticsearch.common.CacheRecycler; -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.text.StringText; +import org.elasticsearch.common.text.Text; import org.elasticsearch.common.trove.ExtTDoubleObjectHashMap; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; @@ -76,12 +76,12 @@ public class InternalTermsStatsDoubleFacet extends InternalTermsStatsFacet { } @Override - public BytesReference term() { - return new BytesArray(Double.toString(term)); + public Text term() { + return new StringText(Double.toString(term)); } @Override - public BytesReference getTerm() { + public Text getTerm() { return term(); } @@ -341,7 +341,7 @@ public class InternalTermsStatsDoubleFacet extends InternalTermsStatsFacet { @Override public void readFrom(StreamInput in) throws IOException { - name = in.readUTF(); + name = in.readString(); comparatorType = ComparatorType.fromId(in.readByte()); requiredSize = in.readVInt(); missing = in.readVLong(); @@ -355,7 +355,7 @@ public class InternalTermsStatsDoubleFacet extends InternalTermsStatsFacet { @Override public void writeTo(StreamOutput out) throws IOException { - out.writeUTF(name); + out.writeString(name); out.writeByte(comparatorType.id()); out.writeVInt(requiredSize); out.writeVLong(missing); diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/longs/InternalTermsStatsLongFacet.java b/src/main/java/org/elasticsearch/search/facet/termsstats/longs/InternalTermsStatsLongFacet.java index 0e77fd785da..48edaaed09f 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/longs/InternalTermsStatsLongFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/longs/InternalTermsStatsLongFacet.java @@ -21,10 +21,10 @@ package org.elasticsearch.search.facet.termsstats.longs; import com.google.common.collect.ImmutableList; import org.elasticsearch.common.CacheRecycler; -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.text.StringText; +import org.elasticsearch.common.text.Text; import org.elasticsearch.common.trove.ExtTLongObjectHashMap; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; @@ -76,12 +76,12 @@ public class InternalTermsStatsLongFacet extends InternalTermsStatsFacet { } @Override - public BytesReference term() { - return new BytesArray(Long.toString(term)); + public Text term() { + return new StringText(Long.toString(term)); } @Override - public BytesReference getTerm() { + public Text getTerm() { return term(); } diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/strings/InternalTermsStatsStringFacet.java b/src/main/java/org/elasticsearch/search/facet/termsstats/strings/InternalTermsStatsStringFacet.java index 8c5150a5860..6ac49a81810 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/strings/InternalTermsStatsStringFacet.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/strings/InternalTermsStatsStringFacet.java @@ -26,6 +26,8 @@ import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.text.BytesText; +import org.elasticsearch.common.text.Text; import org.elasticsearch.common.trove.ExtTHashMap; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; @@ -60,7 +62,7 @@ public class InternalTermsStatsStringFacet extends InternalTermsStatsFacet { public static class StringEntry implements Entry { - BytesReference term; + Text term; long count; long totalCount; double total; @@ -68,10 +70,10 @@ public class InternalTermsStatsStringFacet extends InternalTermsStatsFacet { double max; public StringEntry(BytesRef term, long count, long totalCount, double total, double min, double max) { - this(new BytesArray(term), count, totalCount, total, min, max); + this(new BytesText(new BytesArray(term)), count, totalCount, total, min, max); } - public StringEntry(BytesReference term, long count, long totalCount, double total, double min, double max) { + public StringEntry(Text term, long count, long totalCount, double total, double min, double max) { this.term = term; this.count = count; this.totalCount = totalCount; @@ -81,18 +83,18 @@ public class InternalTermsStatsStringFacet extends InternalTermsStatsFacet { } @Override - public BytesReference term() { + public Text term() { return term; } @Override - public BytesReference getTerm() { + public Text getTerm() { return term(); } @Override public Number termAsNumber() { - return Double.parseDouble(term.toUtf8()); + return Double.parseDouble(term.string()); } @Override @@ -164,8 +166,8 @@ public class InternalTermsStatsStringFacet extends InternalTermsStatsFacet { } @Override - public int compareTo(Entry o) { - return BytesReference.utf8SortedAsUnicodeSortOrder.compare(this.term, o.term()); + public int compareTo(Entry other) { + return term.compareTo(other.term()); } } @@ -257,7 +259,7 @@ public class InternalTermsStatsStringFacet extends InternalTermsStatsFacet { return facets.get(0); } int missing = 0; - ExtTHashMap map = CacheRecycler.popHashMap(); + ExtTHashMap map = CacheRecycler.popHashMap(); for (Facet facet : facets) { InternalTermsStatsStringFacet tsFacet = (InternalTermsStatsStringFacet) facet; missing += tsFacet.missing; @@ -353,7 +355,7 @@ public class InternalTermsStatsStringFacet extends InternalTermsStatsFacet { int size = in.readVInt(); entries = new ArrayList(size); for (int i = 0; i < size; i++) { - entries.add(new StringEntry(in.readBytesReference(), in.readVLong(), in.readVLong(), in.readDouble(), in.readDouble(), in.readDouble())); + entries.add(new StringEntry(in.readText(), in.readVLong(), in.readVLong(), in.readDouble(), in.readDouble(), in.readDouble())); } } @@ -366,7 +368,7 @@ public class InternalTermsStatsStringFacet extends InternalTermsStatsFacet { out.writeVInt(entries.size()); for (Entry entry : entries) { - out.writeBytesReference(entry.term()); + out.writeText(entry.term()); out.writeVLong(entry.count()); out.writeVLong(entry.totalCount()); out.writeDouble(entry.total()); From 48b8d0544fcb2b3cca75373a48816eb50d852f06 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 26 Oct 2012 12:40:14 +0200 Subject: [PATCH 021/146] lucene 4: Moved SearchScript from IndexReader to AtomicReader. This also touches the seach/lookup classes --- .../elasticsearch/script/AbstractSearchScript.java | 4 ++-- .../java/org/elasticsearch/script/SearchScript.java | 3 ++- .../script/mvel/MvelScriptEngineService.java | 4 ++-- .../org/elasticsearch/search/lookup/DocLookup.java | 5 +++-- .../org/elasticsearch/search/lookup/FieldLookup.java | 7 ++++--- .../elasticsearch/search/lookup/FieldsLookup.java | 5 +++-- .../elasticsearch/search/lookup/SearchLookup.java | 3 ++- .../elasticsearch/search/lookup/SourceLookup.java | 12 ++++++++---- 8 files changed, 26 insertions(+), 17 deletions(-) diff --git a/src/main/java/org/elasticsearch/script/AbstractSearchScript.java b/src/main/java/org/elasticsearch/script/AbstractSearchScript.java index 46b691ba329..002fcf8eb44 100644 --- a/src/main/java/org/elasticsearch/script/AbstractSearchScript.java +++ b/src/main/java/org/elasticsearch/script/AbstractSearchScript.java @@ -19,7 +19,7 @@ package org.elasticsearch.script; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.search.Scorer; import org.elasticsearch.search.lookup.DocLookup; import org.elasticsearch.search.lookup.FieldsLookup; @@ -83,7 +83,7 @@ public abstract class AbstractSearchScript extends AbstractExecutableScript impl } @Override - public void setNextReader(IndexReader reader) { + public void setNextReader(AtomicReader reader) { lookup.setNextReader(reader); } diff --git a/src/main/java/org/elasticsearch/script/SearchScript.java b/src/main/java/org/elasticsearch/script/SearchScript.java index 2ea16f07fcc..b8fbd9d81f7 100644 --- a/src/main/java/org/elasticsearch/script/SearchScript.java +++ b/src/main/java/org/elasticsearch/script/SearchScript.java @@ -19,6 +19,7 @@ package org.elasticsearch.script; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; @@ -33,7 +34,7 @@ public interface SearchScript extends ExecutableScript { void setScorer(Scorer scorer); - void setNextReader(IndexReader reader); + void setNextReader(AtomicReader reader); void setNextDocId(int doc); diff --git a/src/main/java/org/elasticsearch/script/mvel/MvelScriptEngineService.java b/src/main/java/org/elasticsearch/script/mvel/MvelScriptEngineService.java index e3db194c3e2..39e89225472 100644 --- a/src/main/java/org/elasticsearch/script/mvel/MvelScriptEngineService.java +++ b/src/main/java/org/elasticsearch/script/mvel/MvelScriptEngineService.java @@ -19,7 +19,7 @@ package org.elasticsearch.script.mvel; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.search.Scorer; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.component.AbstractComponent; @@ -163,7 +163,7 @@ public class MvelScriptEngineService extends AbstractComponent implements Script } @Override - public void setNextReader(IndexReader reader) { + public void setNextReader(AtomicReader reader) { lookup.setNextReader(reader); } diff --git a/src/main/java/org/elasticsearch/search/lookup/DocLookup.java b/src/main/java/org/elasticsearch/search/lookup/DocLookup.java index f4fe9ddb689..9e72a8d6553 100644 --- a/src/main/java/org/elasticsearch/search/lookup/DocLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/DocLookup.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.lookup; import com.google.common.collect.Maps; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; import org.elasticsearch.ElasticSearchException; @@ -52,7 +53,7 @@ public class DocLookup implements Map { @Nullable private final String[] types; - private IndexReader reader; + private AtomicReader reader; private Scorer scorer; @@ -72,7 +73,7 @@ public class DocLookup implements Map { return this.fieldDataCache; } - public void setNextReader(IndexReader reader) { + public void setNextReader(AtomicReader reader) { if (this.reader == reader) { // if we are called with the same reader, don't invalidate source return; } diff --git a/src/main/java/org/elasticsearch/search/lookup/FieldLookup.java b/src/main/java/org/elasticsearch/search/lookup/FieldLookup.java index 165497c45f1..eab43acfb8b 100644 --- a/src/main/java/org/elasticsearch/search/lookup/FieldLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/FieldLookup.java @@ -21,6 +21,7 @@ package org.elasticsearch.search.lookup; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.elasticsearch.index.mapper.FieldMapper; import java.util.ArrayList; @@ -84,7 +85,7 @@ public class FieldLookup { } valueLoaded = true; value = null; - Fieldable field = doc.getFieldable(mapper.names().indexName()); + IndexableField field = doc.getField(mapper.names().indexName()); if (field == null) { return null; } @@ -98,8 +99,8 @@ public class FieldLookup { } valuesLoaded = true; values.clear(); - Fieldable[] fields = doc.getFieldables(mapper.names().indexName()); - for (Fieldable field : fields) { + IndexableField[] fields = doc.getFields(mapper.names().indexName()); + for (IndexableField field : fields) { values.add(mapper.value(field)); } return values; diff --git a/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java b/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java index d3bd8e2ee7b..9562a6dfaef 100644 --- a/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.lookup; import com.google.common.collect.Maps; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchParseException; @@ -44,7 +45,7 @@ public class FieldsLookup implements Map { @Nullable private final String[] types; - private IndexReader reader; + private AtomicReader reader; private int docId = -1; @@ -57,7 +58,7 @@ public class FieldsLookup implements Map { this.types = types; } - public void setNextReader(IndexReader reader) { + public void setNextReader(AtomicReader reader) { if (this.reader == reader) { // if we are called with the same reader, don't invalidate source return; } diff --git a/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java b/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java index c5b3d1a4819..566ed5575f5 100644 --- a/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.lookup; import com.google.common.collect.ImmutableMap; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; import org.elasticsearch.common.Nullable; @@ -66,7 +67,7 @@ public class SearchLookup { docMap.setScorer(scorer); } - public void setNextReader(IndexReader reader) { + public void setNextReader(AtomicReader reader) { docMap.setNextReader(reader); sourceLookup.setNextReader(reader); fieldsLookup.setNextReader(reader); diff --git a/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java index 72d8b7a5286..fe313c0b2b3 100644 --- a/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java @@ -22,7 +22,10 @@ package org.elasticsearch.search.lookup; import com.google.common.collect.ImmutableMap; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchParseException; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.XContentHelper; @@ -41,7 +44,7 @@ import java.util.Set; // TODO: If we are processing it in the per hit fetch phase, we cna initialize it with a source if it was loaded.. public class SourceLookup implements Map { - private IndexReader reader; + private AtomicReader reader; private int docId = -1; @@ -62,11 +65,12 @@ public class SourceLookup implements Map { } try { Document doc = reader.document(docId, SourceFieldSelector.INSTANCE); - Fieldable sourceField = doc.getFieldable(SourceFieldMapper.NAME); + IndexableField sourceField = doc.getField(SourceFieldMapper.NAME); if (sourceField == null) { source = ImmutableMap.of(); } else { - this.source = sourceAsMap(sourceField.getBinaryValue(), sourceField.getBinaryOffset(), sourceField.getBinaryLength()); + BytesRef source = sourceField.binaryValue(); + this.source = sourceAsMap(source.bytes, source.offset, source.length); } } catch (Exception e) { throw new ElasticSearchParseException("failed to parse / load source", e); @@ -82,7 +86,7 @@ public class SourceLookup implements Map { return XContentHelper.convertToMap(bytes, offset, length, false).v2(); } - public void setNextReader(IndexReader reader) { + public void setNextReader(AtomicReader reader) { if (this.reader == reader) { // if we are called with the same reader, don't invalidate source return; } From 65ce3aea57dfbbaed5e595134420e923f3998c19 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 26 Oct 2012 12:52:37 +0200 Subject: [PATCH 022/146] lucene 4: Upgraded the function/sort classes. --- .../DoubleFieldsFunctionDataComparator.java | 22 +++++++++++++------ .../StringFieldsFunctionDataComparator.java | 20 ++++++++++++----- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/field/function/sort/DoubleFieldsFunctionDataComparator.java b/src/main/java/org/elasticsearch/index/field/function/sort/DoubleFieldsFunctionDataComparator.java index 6a567f9ab3b..d3aded6b2fa 100644 --- a/src/main/java/org/elasticsearch/index/field/function/sort/DoubleFieldsFunctionDataComparator.java +++ b/src/main/java/org/elasticsearch/index/field/function/sort/DoubleFieldsFunctionDataComparator.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.field.function.sort; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.SortField; @@ -32,7 +32,7 @@ import java.io.IOException; * */ // LUCENE MONITOR: Monitor against FieldComparator.Double -public class DoubleFieldsFunctionDataComparator extends FieldComparator { +public class DoubleFieldsFunctionDataComparator extends FieldComparator { public static FieldDataType.ExtendedFieldComparatorSource comparatorSource(SearchScript script) { return new InnerSource(script); @@ -53,7 +53,7 @@ public class DoubleFieldsFunctionDataComparator extends FieldComparator { @Override public SortField.Type reducedType() { - return SortField.DOUBLE; + return SortField.Type.DOUBLE; } } @@ -68,8 +68,9 @@ public class DoubleFieldsFunctionDataComparator extends FieldComparator { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - script.setNextReader(reader); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + script.setNextReader(context.reader()); + return this; } @Override @@ -103,6 +104,13 @@ public class DoubleFieldsFunctionDataComparator extends FieldComparator { } } + @Override + public int compareDocToValue(int doc, Double val2) throws IOException { + script.setNextDocId(doc); + double val1 = script.runAsDouble(); + return Double.compare(val1, val2); + } + @Override public void copy(int slot, int doc) { script.setNextDocId(doc); @@ -115,7 +123,7 @@ public class DoubleFieldsFunctionDataComparator extends FieldComparator { } @Override - public Comparable value(int slot) { - return Double.valueOf(values[slot]); + public Double value(int slot) { + return values[slot]; } } diff --git a/src/main/java/org/elasticsearch/index/field/function/sort/StringFieldsFunctionDataComparator.java b/src/main/java/org/elasticsearch/index/field/function/sort/StringFieldsFunctionDataComparator.java index ddf9eb81c25..20c8534910c 100644 --- a/src/main/java/org/elasticsearch/index/field/function/sort/StringFieldsFunctionDataComparator.java +++ b/src/main/java/org/elasticsearch/index/field/function/sort/StringFieldsFunctionDataComparator.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.field.function.sort; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.SortField; @@ -31,7 +31,7 @@ import java.io.IOException; /** * */ -public class StringFieldsFunctionDataComparator extends FieldComparator { +public class StringFieldsFunctionDataComparator extends FieldComparator { public static FieldDataType.ExtendedFieldComparatorSource comparatorSource(SearchScript script) { return new InnerSource(script); @@ -52,7 +52,7 @@ public class StringFieldsFunctionDataComparator extends FieldComparator { @Override public SortField.Type reducedType() { - return SortField.STRING; + return SortField.Type.STRING; } } @@ -68,8 +68,9 @@ public class StringFieldsFunctionDataComparator extends FieldComparator { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - script.setNextReader(reader); + public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { + script.setNextReader(context.reader()); + return this; } @Override @@ -108,6 +109,13 @@ public class StringFieldsFunctionDataComparator extends FieldComparator { return bottom.compareTo(val2); } + @Override + public int compareDocToValue(int doc, String val2) throws IOException { + script.setNextDocId(doc); + String val1 = script.run().toString(); + return val1.compareTo(val2); + } + @Override public void copy(int slot, int doc) { script.setNextDocId(doc); @@ -120,7 +128,7 @@ public class StringFieldsFunctionDataComparator extends FieldComparator { } @Override - public Comparable value(int slot) { + public String value(int slot) { return values[slot]; } } From 4b84078f9128936ba1246e84820ee1c549489861 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 26 Oct 2012 14:11:11 +0200 Subject: [PATCH 023/146] lucene 4: text comparator should always work on bytes --- .../java/org/elasticsearch/common/text/BytesText.java | 5 +---- .../org/elasticsearch/common/text/StringAndBytesText.java | 8 ++------ .../java/org/elasticsearch/common/text/StringText.java | 2 +- 3 files changed, 4 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/text/BytesText.java b/src/main/java/org/elasticsearch/common/text/BytesText.java index cf216b37550..dc8ea88e219 100644 --- a/src/main/java/org/elasticsearch/common/text/BytesText.java +++ b/src/main/java/org/elasticsearch/common/text/BytesText.java @@ -22,8 +22,6 @@ package org.elasticsearch.common.text; import com.google.common.base.Charsets; import org.elasticsearch.common.bytes.BytesReference; -import java.util.Comparator; - /** * A {@link BytesReference} representation of the text, will always convert on the fly to a {@link String}. */ @@ -76,7 +74,6 @@ public class BytesText implements Text { @Override public int compareTo(Text text) { - return UTF8SortedAsUnicodeComparator.utf8SortedAsUnicodeSortOrder.compare(text.bytes(), text.bytes()); + return UTF8SortedAsUnicodeComparator.utf8SortedAsUnicodeSortOrder.compare(bytes(), text.bytes()); } - } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/common/text/StringAndBytesText.java b/src/main/java/org/elasticsearch/common/text/StringAndBytesText.java index 86a8ab8bd24..73e2d196e49 100644 --- a/src/main/java/org/elasticsearch/common/text/StringAndBytesText.java +++ b/src/main/java/org/elasticsearch/common/text/StringAndBytesText.java @@ -99,11 +99,7 @@ public class StringAndBytesText implements Text { } @Override - public int compareTo(Text other) { - if (text == null) { - return UTF8SortedAsUnicodeComparator.utf8SortedAsUnicodeSortOrder.compare(bytes, other.bytes()); - } else { - return text.compareTo(other.string()); - } + public int compareTo(Text text) { + return UTF8SortedAsUnicodeComparator.utf8SortedAsUnicodeSortOrder.compare(bytes(), text.bytes()); } } diff --git a/src/main/java/org/elasticsearch/common/text/StringText.java b/src/main/java/org/elasticsearch/common/text/StringText.java index 01840237911..c478396b35d 100644 --- a/src/main/java/org/elasticsearch/common/text/StringText.java +++ b/src/main/java/org/elasticsearch/common/text/StringText.java @@ -86,6 +86,6 @@ public class StringText implements Text { @Override public int compareTo(Text text) { - return this.text.compareTo(text.string()); + return UTF8SortedAsUnicodeComparator.utf8SortedAsUnicodeSortOrder.compare(bytes(), text.bytes()); } } From 8009b80481cc61a24b2703cd5e7c79a1974f43a7 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Fri, 26 Oct 2012 08:49:26 -0400 Subject: [PATCH 024/146] lucene 4: fix access to segment name due to SegmentInfo refactoring --- .../lucene/index/TrackingConcurrentMergeScheduler.java | 6 +++--- .../apache/lucene/index/TrackingSerialMergeScheduler.java | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/apache/lucene/index/TrackingConcurrentMergeScheduler.java b/src/main/java/org/apache/lucene/index/TrackingConcurrentMergeScheduler.java index 5fde49b8af0..59f60f28df4 100644 --- a/src/main/java/org/apache/lucene/index/TrackingConcurrentMergeScheduler.java +++ b/src/main/java/org/apache/lucene/index/TrackingConcurrentMergeScheduler.java @@ -84,7 +84,7 @@ public class TrackingConcurrentMergeScheduler extends ConcurrentMergeScheduler { currentMergesNumDocs.inc(totalNumDocs); currentMergesSizeInBytes.inc(totalSizeInBytes); if (logger.isTraceEnabled()) { - logger.trace("merge [{}] starting..., merging [{}] segments, [{}] docs, [{}] size, into [{}] estimated_size", merge.info == null ? "_na_" : merge.info.name, merge.segments.size(), totalNumDocs, new ByteSizeValue(totalSizeInBytes), new ByteSizeValue(merge.estimatedMergeBytes)); + logger.trace("merge [{}] starting..., merging [{}] segments, [{}] docs, [{}] size, into [{}] estimated_size", merge.info == null ? "_na_" : merge.info.info.name, merge.segments.size(), totalNumDocs, new ByteSizeValue(totalSizeInBytes), new ByteSizeValue(merge.estimatedMergeBytes)); } try { TrackingMergeScheduler.setCurrentMerge(merge); @@ -101,9 +101,9 @@ public class TrackingConcurrentMergeScheduler extends ConcurrentMergeScheduler { totalMergesSizeInBytes.inc(totalSizeInBytes); totalMerges.inc(took); if (took > 20000) { // if more than 20 seconds, DEBUG log it - logger.debug("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.name, TimeValue.timeValueMillis(took)); + logger.debug("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.info.name, TimeValue.timeValueMillis(took)); } else if (logger.isTraceEnabled()) { - logger.trace("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.name, TimeValue.timeValueMillis(took)); + logger.trace("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.info.name, TimeValue.timeValueMillis(took)); } } } diff --git a/src/main/java/org/apache/lucene/index/TrackingSerialMergeScheduler.java b/src/main/java/org/apache/lucene/index/TrackingSerialMergeScheduler.java index 0e4c426790d..c0026203687 100644 --- a/src/main/java/org/apache/lucene/index/TrackingSerialMergeScheduler.java +++ b/src/main/java/org/apache/lucene/index/TrackingSerialMergeScheduler.java @@ -96,7 +96,7 @@ public class TrackingSerialMergeScheduler extends MergeScheduler { // sadly, segment name is not available since mergeInit is called from merge itself... if (logger.isTraceEnabled()) { - logger.trace("merge [{}] starting..., merging [{}] segments, [{}] docs, [{}] size, into [{}] estimated_size", merge.info == null ? "_na_" : merge.info.name, merge.segments.size(), totalNumDocs, new ByteSizeValue(totalSizeInBytes), new ByteSizeValue(merge.estimatedMergeBytes)); + logger.trace("merge [{}] starting..., merging [{}] segments, [{}] docs, [{}] size, into [{}] estimated_size", merge.info == null ? "_na_" : merge.info.info.name, merge.segments.size(), totalNumDocs, new ByteSizeValue(totalSizeInBytes), new ByteSizeValue(merge.estimatedMergeBytes)); } try { TrackingMergeScheduler.setCurrentMerge(merge); @@ -113,9 +113,9 @@ public class TrackingSerialMergeScheduler extends MergeScheduler { totalMergesSizeInBytes.inc(totalSizeInBytes); totalMerges.inc(took); if (took > 20000) { // if more than 20 seconds, DEBUG log it - logger.debug("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.name, TimeValue.timeValueMillis(took)); + logger.debug("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.info.name, TimeValue.timeValueMillis(took)); } else if (logger.isTraceEnabled()) { - logger.trace("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.name, TimeValue.timeValueMillis(took)); + logger.trace("merge [{}] done, took [{}]", merge.info == null ? "_na_" : merge.info.info.name, TimeValue.timeValueMillis(took)); } } } From c3633ab99ff52cf4e7e4449a14001a0c8cb82b74 Mon Sep 17 00:00:00 2001 From: uboness Date: Fri, 26 Oct 2012 15:22:31 +0200 Subject: [PATCH 025/146] lucene 4: changed InternalIndexShard#checkIndex to use the new fixIndex and indexExists apis --- .../index/shard/service/InternalIndexShard.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/shard/service/InternalIndexShard.java b/src/main/java/org/elasticsearch/index/shard/service/InternalIndexShard.java index 6c92f4c9ac8..3464798bf13 100644 --- a/src/main/java/org/elasticsearch/index/shard/service/InternalIndexShard.java +++ b/src/main/java/org/elasticsearch/index/shard/service/InternalIndexShard.java @@ -21,7 +21,7 @@ package org.elasticsearch.index.shard.service; import com.google.common.base.Charsets; import org.apache.lucene.index.CheckIndex; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.search.Filter; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.Query; @@ -808,11 +808,13 @@ public class InternalIndexShard extends AbstractIndexShardComponent implements I } } + //LUCENE 4 UPGRADE: currently passing 'null' codec to fixIndex, when we have proper support for a codec service + // we'll us that to figure out the codec that should be used private void checkIndex(boolean throwException) throws IndexShardException { try { checkIndexTook = 0; long time = System.currentTimeMillis(); - if (!IndexReader.indexExists(store.directory())) { + if (!DirectoryReader.indexExists(store.directory())) { return; } CheckIndex checkIndex = new CheckIndex(store.directory()); @@ -831,7 +833,7 @@ public class InternalIndexShard extends AbstractIndexShardComponent implements I if (logger.isDebugEnabled()) { logger.debug("fixing index, writing new segments file ..."); } - checkIndex.fixIndex(status); + checkIndex.fixIndex(status, null); if (logger.isDebugEnabled()) { logger.debug("index fixed, wrote new segments file \"{}\"", status.segmentsFileName); } From b1eaec6c6af083e7add98435344064e029d9c4a7 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Fri, 26 Oct 2012 09:28:07 -0400 Subject: [PATCH 026/146] lucene 4: change Unicode utils to use BytesRef instead of UTF8Result --- .../org/elasticsearch/common/Unicode.java | 21 ++++++++++--------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/Unicode.java b/src/main/java/org/elasticsearch/common/Unicode.java index cd6c4a9b2d4..92912850d03 100644 --- a/src/main/java/org/elasticsearch/common/Unicode.java +++ b/src/main/java/org/elasticsearch/common/Unicode.java @@ -19,6 +19,7 @@ package org.elasticsearch.common; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.UnicodeUtil; import org.elasticsearch.common.util.concurrent.ThreadLocals; @@ -29,10 +30,10 @@ import java.util.Arrays; */ public class Unicode { - private static ThreadLocal> cachedUtf8Result = new ThreadLocal>() { + private static ThreadLocal> cachedUtf8Result = new ThreadLocal>() { @Override - protected ThreadLocals.CleanableValue initialValue() { - return new ThreadLocals.CleanableValue(new UnicodeUtil.UTF8Result()); + protected ThreadLocals.CleanableValue initialValue() { + return new ThreadLocals.CleanableValue(new BytesRef()); } }; @@ -47,20 +48,20 @@ public class Unicode { if (source == null) { return null; } - UnicodeUtil.UTF8Result result = unsafeFromStringAsUtf8(source); - return Arrays.copyOfRange(result.result, 0, result.length); + BytesRef result = unsafeFromStringAsUtf8(source); + return Arrays.copyOfRange(result.bytes, result.offset, result.length); } - public static UnicodeUtil.UTF8Result fromStringAsUtf8(String source) { + public static BytesRef fromStringAsUtf8(String source) { if (source == null) { return null; } - UnicodeUtil.UTF8Result result = new UnicodeUtil.UTF8Result(); + BytesRef result = new BytesRef(); UnicodeUtil.UTF16toUTF8(source, 0, source.length(), result); return result; } - public static void fromStringAsUtf8(String source, UnicodeUtil.UTF8Result result) { + public static void fromStringAsUtf8(String source, BytesRef result) { if (source == null) { result.length = 0; return; @@ -68,11 +69,11 @@ public class Unicode { UnicodeUtil.UTF16toUTF8(source, 0, source.length(), result); } - public static UnicodeUtil.UTF8Result unsafeFromStringAsUtf8(String source) { + public static BytesRef unsafeFromStringAsUtf8(String source) { if (source == null) { return null; } - UnicodeUtil.UTF8Result result = cachedUtf8Result.get().get(); + BytesRef result = cachedUtf8Result.get().get(); UnicodeUtil.UTF16toUTF8(source, 0, source.length(), result); return result; } From 5d47ad4648d600eaf80a443bf011929f21825987 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sun, 28 Oct 2012 09:57:38 +0100 Subject: [PATCH 027/146] lucene 4: upgraded FuzzyQueryParser + Builder to use integer edit distance rather than floats (bw compatible) --- .../elasticsearch/index/query/FuzzyQueryBuilder.java | 11 +++++++++++ .../elasticsearch/index/query/FuzzyQueryParser.java | 9 ++++++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java index 9a7c6e23139..f5360f30292 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java @@ -41,6 +41,9 @@ public class FuzzyQueryBuilder extends BaseQueryBuilder implements BoostableQuer private Integer prefixLength; private Integer maxExpansions; + + //LUCENE 4 UPGRADE we need a testcase for this + documentation + private Boolean transpositions = true; /** * Constructs a new term query. @@ -81,6 +84,11 @@ public class FuzzyQueryBuilder extends BaseQueryBuilder implements BoostableQuer this.maxExpansions = maxExpansions; return this; } + + public FuzzyQueryBuilder transpositions(boolean transpositions) { + this.transpositions = transpositions; + return this; + } @Override public void doXContent(XContentBuilder builder, Params params) throws IOException { @@ -93,6 +101,9 @@ public class FuzzyQueryBuilder extends BaseQueryBuilder implements BoostableQuer if (boost != -1) { builder.field("boost", boost); } + if (!transpositions) { + builder.field("transpositions", transpositions); + } if (minSimilarity != null) { builder.field("min_similarity", minSimilarity); } diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java b/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java index 56aa6dc4ea6..4da392305f0 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java @@ -60,9 +60,11 @@ public class FuzzyQueryParser implements QueryParser { String value = null; float boost = 1.0f; + //LUCENE 4 UPGRADE we should find a good default here I'd vote for 1.0 -> 1 edit String minSimilarity = "0.5"; int prefixLength = FuzzyQuery.defaultPrefixLength; int maxExpansions = FuzzyQuery.defaultMaxExpansions; + boolean transpositions = true; MultiTermQuery.RewriteMethod rewriteMethod = null; token = parser.nextToken(); if (token == XContentParser.Token.START_OBJECT) { @@ -83,6 +85,8 @@ public class FuzzyQueryParser implements QueryParser { prefixLength = parser.intValue(); } else if ("max_expansions".equals(currentFieldName) || "maxExpansions".equals(currentFieldName)) { maxExpansions = parser.intValue(); + } else if ("transpositions".equals(currentFieldName)) { + transpositions = parser.booleanValue(); } else if ("rewrite".equals(currentFieldName)) { rewriteMethod = QueryParsers.parseRewriteMethod(parser.textOrNull(), null); } else { @@ -109,7 +113,10 @@ public class FuzzyQueryParser implements QueryParser { } } if (query == null) { - query = new FuzzyQuery(new Term(fieldName, value), Float.parseFloat(minSimilarity), prefixLength, maxExpansions); + //LUCENE 4 UPGRADE we need to document that this should now be an int rather than a float + int edits = FuzzyQuery.floatToEdits(Float.parseFloat(minSimilarity), + value.codePointCount(0, value.length())); + query = new FuzzyQuery(new Term(fieldName, value), edits, prefixLength, maxExpansions, transpositions); } if (query instanceof MultiTermQuery) { QueryParsers.setRewriteMethod((MultiTermQuery) query, rewriteMethod); From 479f1784e866329497bdada13713cb1051689ced Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sun, 28 Oct 2012 09:58:11 +0100 Subject: [PATCH 028/146] lucene 4: converted queryparser to lucene classic query parser --- pom.xml | 7 ++++ .../classic}/ExistsFieldQueryExtension.java | 2 +- .../classic}/FieldQueryExtension.java | 2 +- .../classic}/MapperQueryParser.java | 32 +++++++++++-------- .../classic}/MissingFieldQueryExtension.java | 2 +- .../classic}/QueryParserSettings.java | 2 +- .../cache/query/parser/QueryParserCache.java | 2 +- .../parser/none/NoneQueryParserCache.java | 2 +- .../resident/ResidentQueryParserCache.java | 3 +- .../support/AbstractJvmQueryParserCache.java | 2 +- .../index/query/FieldQueryParser.java | 12 +++---- 11 files changed, 41 insertions(+), 27 deletions(-) rename src/main/java/org/apache/lucene/{queryParser => queryparser/classic}/ExistsFieldQueryExtension.java (97%) rename src/main/java/org/apache/lucene/{queryParser => queryparser/classic}/FieldQueryExtension.java (95%) rename src/main/java/org/apache/lucene/{queryParser => queryparser/classic}/MapperQueryParser.java (95%) rename src/main/java/org/apache/lucene/{queryParser => queryparser/classic}/MissingFieldQueryExtension.java (98%) rename src/main/java/org/apache/lucene/{queryParser => queryparser/classic}/QueryParserSettings.java (99%) diff --git a/pom.xml b/pom.xml index d35ea38bdef..cf7222bf823 100644 --- a/pom.xml +++ b/pom.xml @@ -85,6 +85,13 @@ ${lucene.version} compile
+ + org.apache.lucene + lucene-queryparser + ${lucene.version} + compile + + diff --git a/src/main/java/org/apache/lucene/queryParser/ExistsFieldQueryExtension.java b/src/main/java/org/apache/lucene/queryparser/classic/ExistsFieldQueryExtension.java similarity index 97% rename from src/main/java/org/apache/lucene/queryParser/ExistsFieldQueryExtension.java rename to src/main/java/org/apache/lucene/queryparser/classic/ExistsFieldQueryExtension.java index 46b87ef0861..a0cec0787c3 100644 --- a/src/main/java/org/apache/lucene/queryParser/ExistsFieldQueryExtension.java +++ b/src/main/java/org/apache/lucene/queryparser/classic/ExistsFieldQueryExtension.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.lucene.queryParser; +package org.apache.lucene.queryparser.classic; import org.apache.lucene.search.DeletionAwareConstantScoreQuery; import org.apache.lucene.search.Filter; diff --git a/src/main/java/org/apache/lucene/queryParser/FieldQueryExtension.java b/src/main/java/org/apache/lucene/queryparser/classic/FieldQueryExtension.java similarity index 95% rename from src/main/java/org/apache/lucene/queryParser/FieldQueryExtension.java rename to src/main/java/org/apache/lucene/queryparser/classic/FieldQueryExtension.java index b34e55b1d21..03a225cb9b6 100644 --- a/src/main/java/org/apache/lucene/queryParser/FieldQueryExtension.java +++ b/src/main/java/org/apache/lucene/queryparser/classic/FieldQueryExtension.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.lucene.queryParser; +package org.apache.lucene.queryparser.classic; import org.apache.lucene.search.Query; import org.elasticsearch.index.query.QueryParseContext; diff --git a/src/main/java/org/apache/lucene/queryParser/MapperQueryParser.java b/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java similarity index 95% rename from src/main/java/org/apache/lucene/queryParser/MapperQueryParser.java rename to src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java index 3c0fde70ff7..703a9d189a9 100644 --- a/src/main/java/org/apache/lucene/queryParser/MapperQueryParser.java +++ b/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.lucene.queryParser; +package org.apache.lucene.queryparser.classic; import com.google.common.base.Objects; import com.google.common.collect.ImmutableMap; @@ -25,7 +25,9 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.Term; +import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.*; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.FastStringReader; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.search.Queries; @@ -287,7 +289,7 @@ public class MapperQueryParser extends QueryParser { } @Override - protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive) throws ParseException { + protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException { if ("*".equals(part1)) { part1 = null; } @@ -297,13 +299,13 @@ public class MapperQueryParser extends QueryParser { Collection fields = extractMultiFields(field); if (fields != null) { if (fields.size() == 1) { - return getRangeQuerySingle(fields.iterator().next(), part1, part2, inclusive); + return getRangeQuerySingle(fields.iterator().next(), part1, part2, startInclusive, endInclusive); } if (settings.useDisMax()) { DisjunctionMaxQuery disMaxQuery = new DisjunctionMaxQuery(settings.tieBreaker()); boolean added = false; for (String mField : fields) { - Query q = getRangeQuerySingle(mField, part1, part2, inclusive); + Query q = getRangeQuerySingle(mField, part1, part2, startInclusive, endInclusive); if (q != null) { added = true; applyBoost(mField, q); @@ -317,7 +319,7 @@ public class MapperQueryParser extends QueryParser { } else { List clauses = new ArrayList(); for (String mField : fields) { - Query q = getRangeQuerySingle(mField, part1, part2, inclusive); + Query q = getRangeQuerySingle(mField, part1, part2, startInclusive, endInclusive); if (q != null) { applyBoost(mField, q); clauses.add(new BooleanClause(q, BooleanClause.Occur.SHOULD)); @@ -328,18 +330,18 @@ public class MapperQueryParser extends QueryParser { return getBooleanQuery(clauses, true); } } else { - return getRangeQuerySingle(field, part1, part2, inclusive); + return getRangeQuerySingle(field, part1, part2, startInclusive, endInclusive); } } - private Query getRangeQuerySingle(String field, String part1, String part2, boolean inclusive) { + private Query getRangeQuerySingle(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) { currentMapper = null; MapperService.SmartNameFieldMappers fieldMappers = parseContext.smartFieldMappers(field); if (fieldMappers != null) { currentMapper = fieldMappers.fieldMappers().mapper(); if (currentMapper != null) { try { - Query rangeQuery = currentMapper.rangeQuery(part1, part2, inclusive, inclusive, parseContext); + Query rangeQuery = currentMapper.rangeQuery(part1, part2, startInclusive, startInclusive, parseContext); return wrapSmartNameQuery(rangeQuery, fieldMappers, parseContext); } catch (RuntimeException e) { if (settings.lenient()) { @@ -349,7 +351,7 @@ public class MapperQueryParser extends QueryParser { } } } - return newRangeQuery(field, part1, part2, inclusive); + return newRangeQuery(field, part1, part2, startInclusive, endInclusive); } @Override @@ -410,7 +412,11 @@ public class MapperQueryParser extends QueryParser { @Override protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { - FuzzyQuery query = new FuzzyQuery(term, minimumSimilarity, prefixLength, settings.fuzzyMaxExpansions()); + String text = term.text(); + int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity, + text.codePointCount(0, text.length())); + //LUCENE 4 UPGRADE I disabled transpositions here by default - maybe this needs to be changed + FuzzyQuery query = new FuzzyQuery(term, numEdits, prefixLength, settings.fuzzyMaxExpansions(), false); QueryParsers.setRewriteMethod(query, settings.fuzzyRewriteMethod()); return query; } @@ -503,7 +509,7 @@ public class MapperQueryParser extends QueryParser { // get Analyzer from superclass and tokenize the term TokenStream source; try { - source = getAnalyzer().reusableTokenStream(field, new StringReader(termStr)); + source = getAnalyzer().tokenStream(field, new StringReader(termStr)); } catch (IOException e) { return super.getPrefixQuery(field, termStr); } @@ -631,7 +637,7 @@ public class MapperQueryParser extends QueryParser { if (c == '?' || c == '*') { if (isWithinToken) { try { - TokenStream source = getAnalyzer().reusableTokenStream(field, new FastStringReader(tmp.toString())); + TokenStream source = getAnalyzer().tokenStream(field, new FastStringReader(tmp.toString())); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); if (source.incrementToken()) { String term = termAtt.toString(); @@ -660,7 +666,7 @@ public class MapperQueryParser extends QueryParser { } if (isWithinToken) { try { - TokenStream source = getAnalyzer().reusableTokenStream(field, new FastStringReader(tmp.toString())); + TokenStream source = getAnalyzer().tokenStream(field, new FastStringReader(tmp.toString())); CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class); if (source.incrementToken()) { String term = termAtt.toString(); diff --git a/src/main/java/org/apache/lucene/queryParser/MissingFieldQueryExtension.java b/src/main/java/org/apache/lucene/queryparser/classic/MissingFieldQueryExtension.java similarity index 98% rename from src/main/java/org/apache/lucene/queryParser/MissingFieldQueryExtension.java rename to src/main/java/org/apache/lucene/queryparser/classic/MissingFieldQueryExtension.java index 0cc40b4cf0b..f714b476869 100644 --- a/src/main/java/org/apache/lucene/queryParser/MissingFieldQueryExtension.java +++ b/src/main/java/org/apache/lucene/queryparser/classic/MissingFieldQueryExtension.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.lucene.queryParser; +package org.apache.lucene.queryparser.classic; import org.apache.lucene.search.DeletionAwareConstantScoreQuery; import org.apache.lucene.search.Filter; diff --git a/src/main/java/org/apache/lucene/queryParser/QueryParserSettings.java b/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java similarity index 99% rename from src/main/java/org/apache/lucene/queryParser/QueryParserSettings.java rename to src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java index 56f88d76753..ba13033db15 100644 --- a/src/main/java/org/apache/lucene/queryParser/QueryParserSettings.java +++ b/src/main/java/org/apache/lucene/queryparser/classic/QueryParserSettings.java @@ -17,7 +17,7 @@ * under the License. */ -package org.apache.lucene.queryParser; +package org.apache.lucene.queryparser.classic; import gnu.trove.map.hash.TObjectFloatHashMap; import org.apache.lucene.analysis.Analyzer; diff --git a/src/main/java/org/elasticsearch/index/cache/query/parser/QueryParserCache.java b/src/main/java/org/elasticsearch/index/cache/query/parser/QueryParserCache.java index 7c11eed3c40..9df1dd1dae7 100644 --- a/src/main/java/org/elasticsearch/index/cache/query/parser/QueryParserCache.java +++ b/src/main/java/org/elasticsearch/index/cache/query/parser/QueryParserCache.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.cache.query.parser; -import org.apache.lucene.queryParser.QueryParserSettings; +import org.apache.lucene.queryparser.classic.QueryParserSettings; import org.apache.lucene.search.Query; import org.elasticsearch.common.component.CloseableComponent; import org.elasticsearch.index.IndexComponent; diff --git a/src/main/java/org/elasticsearch/index/cache/query/parser/none/NoneQueryParserCache.java b/src/main/java/org/elasticsearch/index/cache/query/parser/none/NoneQueryParserCache.java index 805b5473d8b..1e41dc386a0 100644 --- a/src/main/java/org/elasticsearch/index/cache/query/parser/none/NoneQueryParserCache.java +++ b/src/main/java/org/elasticsearch/index/cache/query/parser/none/NoneQueryParserCache.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.cache.query.parser.none; -import org.apache.lucene.queryParser.QueryParserSettings; +import org.apache.lucene.queryparser.classic.QueryParserSettings; import org.apache.lucene.search.Query; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.inject.Inject; diff --git a/src/main/java/org/elasticsearch/index/cache/query/parser/resident/ResidentQueryParserCache.java b/src/main/java/org/elasticsearch/index/cache/query/parser/resident/ResidentQueryParserCache.java index 6b55deb5774..d572347f653 100644 --- a/src/main/java/org/elasticsearch/index/cache/query/parser/resident/ResidentQueryParserCache.java +++ b/src/main/java/org/elasticsearch/index/cache/query/parser/resident/ResidentQueryParserCache.java @@ -21,7 +21,8 @@ package org.elasticsearch.index.cache.query.parser.resident; import com.google.common.cache.Cache; import com.google.common.cache.CacheBuilder; -import org.apache.lucene.queryParser.QueryParserSettings; + +import org.apache.lucene.queryparser.classic.QueryParserSettings; import org.apache.lucene.search.Query; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.inject.Inject; diff --git a/src/main/java/org/elasticsearch/index/cache/query/parser/support/AbstractJvmQueryParserCache.java b/src/main/java/org/elasticsearch/index/cache/query/parser/support/AbstractJvmQueryParserCache.java index 5f555f308ac..9f3924162d4 100644 --- a/src/main/java/org/elasticsearch/index/cache/query/parser/support/AbstractJvmQueryParserCache.java +++ b/src/main/java/org/elasticsearch/index/cache/query/parser/support/AbstractJvmQueryParserCache.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.cache.query.parser.support; -import org.apache.lucene.queryParser.QueryParserSettings; +import org.apache.lucene.queryparser.classic.QueryParserSettings; import org.apache.lucene.search.Query; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.settings.Settings; diff --git a/src/main/java/org/elasticsearch/index/query/FieldQueryParser.java b/src/main/java/org/elasticsearch/index/query/FieldQueryParser.java index 88f86fc5d5e..1dd847e5879 100644 --- a/src/main/java/org/elasticsearch/index/query/FieldQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/FieldQueryParser.java @@ -19,9 +19,9 @@ package org.elasticsearch.index.query; -import org.apache.lucene.queryParser.MapperQueryParser; -import org.apache.lucene.queryParser.ParseException; -import org.apache.lucene.queryParser.QueryParserSettings; +import org.apache.lucene.queryparser.classic.MapperQueryParser; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParserSettings; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.inject.Inject; @@ -108,9 +108,9 @@ public class FieldQueryParser implements QueryParser { } else if ("default_operator".equals(currentFieldName) || "defaultOperator".equals(currentFieldName)) { String op = parser.text(); if ("or".equalsIgnoreCase(op)) { - qpSettings.defaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.OR); + qpSettings.defaultOperator(org.apache.lucene.queryparser.classic.QueryParser.Operator.OR); } else if ("and".equalsIgnoreCase(op)) { - qpSettings.defaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND); + qpSettings.defaultOperator(org.apache.lucene.queryparser.classic.QueryParser.Operator.AND); } else { throw new QueryParsingException(parseContext.index(), "Query default operator [" + op + "] is not allowed"); } @@ -152,7 +152,7 @@ public class FieldQueryParser implements QueryParser { } if (qpSettings.escape()) { - qpSettings.queryString(org.apache.lucene.queryParser.QueryParser.escape(qpSettings.queryString())); + qpSettings.queryString(org.apache.lucene.queryparser.classic.QueryParser.escape(qpSettings.queryString())); } qpSettings.queryTypes(parseContext.queryTypes()); From c1a9c802f1bc5233447d15651bf63e4b72b4b03d Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sun, 28 Oct 2012 09:58:35 +0100 Subject: [PATCH 029/146] lucene 4: XContentParser now has bytesOrNull and returns bytesref directly --- .../common/xcontent/XContentParser.java | 6 +++++ .../support/AbstractXContentParser.java | 22 +++++++++++++++++++ 2 files changed, 28 insertions(+) diff --git a/src/main/java/org/elasticsearch/common/xcontent/XContentParser.java b/src/main/java/org/elasticsearch/common/xcontent/XContentParser.java index 939a19dd268..3872eb5e5fa 100644 --- a/src/main/java/org/elasticsearch/common/xcontent/XContentParser.java +++ b/src/main/java/org/elasticsearch/common/xcontent/XContentParser.java @@ -23,6 +23,8 @@ import java.io.Closeable; import java.io.IOException; import java.util.Map; +import org.apache.lucene.util.BytesRef; + /** * */ @@ -128,6 +130,10 @@ public interface XContentParser extends Closeable { String text() throws IOException; String textOrNull() throws IOException; + + BytesRef bytesOrNull(BytesRef spare) throws IOException; + + BytesRef bytes(BytesRef spare) throws IOException; boolean hasTextCharacters(); diff --git a/src/main/java/org/elasticsearch/common/xcontent/support/AbstractXContentParser.java b/src/main/java/org/elasticsearch/common/xcontent/support/AbstractXContentParser.java index 080df1c060f..e6a89bb475f 100644 --- a/src/main/java/org/elasticsearch/common/xcontent/support/AbstractXContentParser.java +++ b/src/main/java/org/elasticsearch/common/xcontent/support/AbstractXContentParser.java @@ -19,6 +19,7 @@ package org.elasticsearch.common.xcontent.support; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Booleans; import org.elasticsearch.common.xcontent.XContentParser; @@ -105,6 +106,27 @@ public abstract class AbstractXContentParser implements XContentParser { } return text(); } + + + + @Override + public BytesRef bytesOrNull(BytesRef spare) throws IOException { + if (currentToken() == Token.VALUE_NULL) { + return null; + } + return bytes(spare); + } + + @Override + public BytesRef bytes(BytesRef spare) throws IOException { + // LUCENE 4 UPGRADE: we can possibly make this more efficient for now I just forward to text + if (spare == null) { + return new BytesRef(text()); + } else { + spare.copyChars(text()); + return spare; + } + } @Override public Map map() throws IOException { From ad84186509e7a09e542d965a60b8a78d2e902a07 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sun, 28 Oct 2012 09:59:00 +0100 Subject: [PATCH 030/146] lucene 4: fixed fuzzy like this queryparser/builder --- .../index/query/FuzzyLikeThisFieldQueryParser.java | 2 +- .../org/elasticsearch/index/query/FuzzyLikeThisQueryParser.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryParser.java b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryParser.java index 3856264b00a..34b977303a4 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisFieldQueryParser.java @@ -20,7 +20,7 @@ package org.elasticsearch.index.query; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.search.FuzzyLikeThisQuery; +import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryParser.java b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryParser.java index cdfc49a50f9..0a1f9c02421 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyLikeThisQueryParser.java @@ -21,7 +21,7 @@ package org.elasticsearch.index.query; import com.google.common.collect.Lists; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.search.FuzzyLikeThisQuery; +import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.xcontent.XContentParser; From 5bd8e1b3370a6aba29abe7e50d069581cdc56594 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sun, 28 Oct 2012 09:59:45 +0100 Subject: [PATCH 031/146] lucene 4: fixed MLT query --- .../common/lucene/search/MoreLikeThisQuery.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java b/src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java index cae14843d2a..170fdd72e46 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/MoreLikeThisQuery.java @@ -22,7 +22,10 @@ package org.elasticsearch.common.lucene.search; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.*; -import org.apache.lucene.search.similar.MoreLikeThis; +import org.apache.lucene.search.similarities.DefaultSimilarity; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.TFIDFSimilarity; +import org.apache.lucene.queries.mlt.MoreLikeThis; import org.elasticsearch.common.io.FastStringReader; import java.io.IOException; @@ -35,7 +38,7 @@ public class MoreLikeThisQuery extends Query { public static final float DEFAULT_PERCENT_TERMS_TO_MATCH = 0.3f; - private Similarity similarity; + private TFIDFSimilarity similarity; private String likeText; private String[] moreLikeFields; @@ -77,7 +80,8 @@ public class MoreLikeThisQuery extends Query { mlt.setStopWords(stopWords); mlt.setBoost(boostTerms); mlt.setBoostFactor(boostTermsFactor); - BooleanQuery bq = (BooleanQuery) mlt.like(new FastStringReader(likeText)); + //LUCENE 4 UPGRADE this mapps the 3.6 behavior (only use the first field) + BooleanQuery bq = (BooleanQuery) mlt.like(new FastStringReader(likeText), moreLikeFields[0]); BooleanClause[] clauses = bq.getClauses(); bq.setMinimumNumberShouldMatch((int) (clauses.length * percentTermsToMatch)); @@ -112,7 +116,10 @@ public class MoreLikeThisQuery extends Query { } public void setSimilarity(Similarity similarity) { - this.similarity = similarity; + if (similarity == null || similarity instanceof TFIDFSimilarity) { + //LUCENE 4 UPGRADE we need TFIDF similarity here so I only set it if it is an instance of it + this.similarity = (TFIDFSimilarity) similarity; + } } public Analyzer getAnalyzer() { From 683be6fc645fe3e917caeb883d3f29c63a6763a2 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Sun, 28 Oct 2012 10:00:01 +0100 Subject: [PATCH 032/146] lucene 4: converted QueryParser/Builders to Lucene 4 --- .../index/query/BoolFilterParser.java | 2 +- .../index/query/BoostingQueryParser.java | 2 +- .../index/query/CustomScoreQueryParser.java | 6 +- .../index/query/FuzzyQueryBuilder.java | 4 +- .../index/query/FuzzyQueryParser.java | 6 +- .../index/query/IdsFilterParser.java | 2 +- .../index/query/IdsQueryParser.java | 2 +- .../index/query/MatchAllQueryParser.java | 4 +- .../index/query/MatchQueryBuilder.java | 13 +++- .../index/query/MatchQueryParser.java | 2 + .../index/query/NestedQueryParser.java | 18 +++-- .../index/query/QueryParseContext.java | 7 +- .../index/query/QueryStringQueryParser.java | 15 ++-- .../index/query/RangeFilterParser.java | 20 ++--- .../index/query/RangeQueryParser.java | 20 ++--- .../index/query/ScriptFilterParser.java | 12 ++- .../index/query/TypeFilterParser.java | 9 ++- .../index/search/MatchQuery.java | 73 +++++++++++-------- .../search/NumericRangeFieldDataFilter.java | 41 ++++++----- .../elasticsearch/index/search/UidFilter.java | 55 ++++++-------- 20 files changed, 174 insertions(+), 139 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/query/BoolFilterParser.java b/src/main/java/org/elasticsearch/index/query/BoolFilterParser.java index 5313001311c..1c2dc1b3ef0 100644 --- a/src/main/java/org/elasticsearch/index/query/BoolFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/BoolFilterParser.java @@ -21,7 +21,7 @@ package org.elasticsearch.index.query; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.FilterClause; +import org.apache.lucene.queries.FilterClause; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.XBooleanFilter; import org.elasticsearch.common.xcontent.XContentParser; diff --git a/src/main/java/org/elasticsearch/index/query/BoostingQueryParser.java b/src/main/java/org/elasticsearch/index/query/BoostingQueryParser.java index e42f93d4a19..a5a40f50770 100644 --- a/src/main/java/org/elasticsearch/index/query/BoostingQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/BoostingQueryParser.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.query; -import org.apache.lucene.search.BoostingQuery; +import org.apache.lucene.queries.BoostingQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.xcontent.XContentParser; diff --git a/src/main/java/org/elasticsearch/index/query/CustomScoreQueryParser.java b/src/main/java/org/elasticsearch/index/query/CustomScoreQueryParser.java index f792efd6f4a..81d0694579f 100644 --- a/src/main/java/org/elasticsearch/index/query/CustomScoreQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/CustomScoreQueryParser.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.query; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Explanation; import org.apache.lucene.search.Query; @@ -122,8 +123,9 @@ public class CustomScoreQueryParser implements QueryParser { } @Override - public void setNextReader(IndexReader reader) { - script.setNextReader(reader); + public void setNextReader(AtomicReaderContext ctx) { + //LUCENE 4 UPGRADE should this pass on a ARC or just and atomic reader? + script.setNextReader(ctx); } @Override diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java index f5360f30292..6c94e9ea82f 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyQueryBuilder.java @@ -43,7 +43,7 @@ public class FuzzyQueryBuilder extends BaseQueryBuilder implements BoostableQuer private Integer maxExpansions; //LUCENE 4 UPGRADE we need a testcase for this + documentation - private Boolean transpositions = true; + private Boolean transpositions; /** * Constructs a new term query. @@ -101,7 +101,7 @@ public class FuzzyQueryBuilder extends BaseQueryBuilder implements BoostableQuer if (boost != -1) { builder.field("boost", boost); } - if (!transpositions) { + if (transpositions != null) { builder.field("transpositions", transpositions); } if (minSimilarity != null) { diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java b/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java index 4da392305f0..07749b928d2 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java @@ -64,7 +64,7 @@ public class FuzzyQueryParser implements QueryParser { String minSimilarity = "0.5"; int prefixLength = FuzzyQuery.defaultPrefixLength; int maxExpansions = FuzzyQuery.defaultMaxExpansions; - boolean transpositions = true; + boolean transpositions = false; MultiTermQuery.RewriteMethod rewriteMethod = null; token = parser.nextToken(); if (token == XContentParser.Token.START_OBJECT) { @@ -113,8 +113,8 @@ public class FuzzyQueryParser implements QueryParser { } } if (query == null) { - //LUCENE 4 UPGRADE we need to document that this should now be an int rather than a float - int edits = FuzzyQuery.floatToEdits(Float.parseFloat(minSimilarity), + //LUCENE 4 UPGRADE we need to document that this should now be an int rather than a float + int edits = FuzzyQuery.floatToEdits(Float.parseFloat(minSimilarity), value.codePointCount(0, value.length())); query = new FuzzyQuery(new Term(fieldName, value), edits, prefixLength, maxExpansions, transpositions); } diff --git a/src/main/java/org/elasticsearch/index/query/IdsFilterParser.java b/src/main/java/org/elasticsearch/index/query/IdsFilterParser.java index 59e341e3a3e..a3231f1df5a 100644 --- a/src/main/java/org/elasticsearch/index/query/IdsFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/IdsFilterParser.java @@ -98,7 +98,7 @@ public class IdsFilterParser implements FilterParser { types = parseContext.mapperService().types(); } - UidFilter filter = new UidFilter(types, ids, parseContext.indexCache().bloomCache()); + UidFilter filter = new UidFilter(types, ids); if (filterName != null) { parseContext.addNamedFilter(filterName, filter); } diff --git a/src/main/java/org/elasticsearch/index/query/IdsQueryParser.java b/src/main/java/org/elasticsearch/index/query/IdsQueryParser.java index 3c4bd5264c4..3230c79e316 100644 --- a/src/main/java/org/elasticsearch/index/query/IdsQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/IdsQueryParser.java @@ -102,7 +102,7 @@ public class IdsQueryParser implements QueryParser { types = parseContext.mapperService().types(); } - UidFilter filter = new UidFilter(types, ids, parseContext.indexCache().bloomCache()); + UidFilter filter = new UidFilter(types, ids); // no need for constant score filter, since we don't cache the filter, and it always takes deletes into account ConstantScoreQuery query = new ConstantScoreQuery(filter); query.setBoost(boost); diff --git a/src/main/java/org/elasticsearch/index/query/MatchAllQueryParser.java b/src/main/java/org/elasticsearch/index/query/MatchAllQueryParser.java index 21621d30c5c..6ce238c51c4 100644 --- a/src/main/java/org/elasticsearch/index/query/MatchAllQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/MatchAllQueryParser.java @@ -71,7 +71,9 @@ public class MatchAllQueryParser implements QueryParser { return Queries.MATCH_ALL_QUERY; } - MatchAllDocsQuery query = new MatchAllDocsQuery(normsField); + //LUCENE 4 UPGRADE norms field is not supported anymore need to find another way or drop the functionality + //MatchAllDocsQuery query = new MatchAllDocsQuery(normsField); + MatchAllDocsQuery query = new MatchAllDocsQuery(); query.setBoost(boost); return query; } diff --git a/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java b/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java index 44d52dfc935..c20944790cc 100644 --- a/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java +++ b/src/main/java/org/elasticsearch/index/query/MatchQueryBuilder.java @@ -77,6 +77,8 @@ public class MatchQueryBuilder extends BaseQueryBuilder implements BoostableQuer private String fuzzyRewrite = null; private Boolean lenient; + + private Boolean fuzzyTranspositions = null; /** * Constructs a new text query. @@ -163,6 +165,12 @@ public class MatchQueryBuilder extends BaseQueryBuilder implements BoostableQuer this.fuzzyRewrite = fuzzyRewrite; return this; } + + public MatchQueryBuilder fuzzyTranspositions(boolean fuzzyTranspositions) { + //LUCENE 4 UPGRADE add documentation + this.fuzzyTranspositions = fuzzyTranspositions; + return this; + } /** * Sets whether format based failures will be ignored. @@ -211,7 +219,10 @@ public class MatchQueryBuilder extends BaseQueryBuilder implements BoostableQuer if (fuzzyRewrite != null) { builder.field("fuzzy_rewrite", fuzzyRewrite); } - + if (fuzzyTranspositions != null) { + //LUCENE 4 UPGRADE we need to document this & test this + builder.field("fuzzy_transpositions", fuzzyTranspositions); + } if (lenient != null) { builder.field("lenient", lenient); } diff --git a/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java b/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java index 0df86025a94..2c4251fb060 100644 --- a/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/MatchQueryParser.java @@ -122,6 +122,8 @@ public class MatchQueryParser implements QueryParser { matchQuery.setRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null)); } else if ("fuzzy_rewrite".equals(currentFieldName) || "fuzzyRewrite".equals(currentFieldName)) { matchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(parser.textOrNull(), null)); + } else if ("fuzzy_transpositions".equals(fieldName)) { + matchQuery.setTranspositions(parser.booleanValue()); } else if ("lenient".equals(currentFieldName)) { matchQuery.setLenient(parser.booleanValue()); } else { diff --git a/src/main/java/org/elasticsearch/index/query/NestedQueryParser.java b/src/main/java/org/elasticsearch/index/query/NestedQueryParser.java index 98bd960bc46..8fc3e3f56e7 100644 --- a/src/main/java/org/elasticsearch/index/query/NestedQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/NestedQueryParser.java @@ -19,8 +19,15 @@ package org.elasticsearch.index.query; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.*; +import java.io.IOException; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.DeletionAwareConstantScoreQuery; +import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.search.Filter; +import org.apache.lucene.search.FilteredQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.util.Bits; import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.xcontent.XContentParser; @@ -30,8 +37,6 @@ import org.elasticsearch.index.search.nested.BlockJoinQuery; import org.elasticsearch.index.search.nested.NonNestedDocsFilter; import org.elasticsearch.search.internal.SearchContext; -import java.io.IOException; - public class NestedQueryParser implements QueryParser { public static final String NAME = "nested"; @@ -184,8 +189,9 @@ public class NestedQueryParser implements QueryParser { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - return filter.getDocIdSet(reader); + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits liveDocs) throws IOException { + //LUCENE 4 UPGRADE just passing on ctx and live docs here + return filter.getDocIdSet(ctx, liveDocs); } } } diff --git a/src/main/java/org/elasticsearch/index/query/QueryParseContext.java b/src/main/java/org/elasticsearch/index/query/QueryParseContext.java index 18cc8eb584f..4f597a90695 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryParseContext.java +++ b/src/main/java/org/elasticsearch/index/query/QueryParseContext.java @@ -21,11 +21,12 @@ package org.elasticsearch.index.query; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; -import org.apache.lucene.queryParser.MapperQueryParser; -import org.apache.lucene.queryParser.QueryParserSettings; + +import org.apache.lucene.queryparser.classic.MapperQueryParser; +import org.apache.lucene.queryparser.classic.QueryParserSettings; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.similarities.Similarity; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.Index; diff --git a/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java b/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java index 9600090c2a4..04ea25d4d63 100644 --- a/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/QueryStringQueryParser.java @@ -22,9 +22,10 @@ package org.elasticsearch.index.query; import com.google.common.collect.Lists; import gnu.trove.impl.Constants; import gnu.trove.map.hash.TObjectFloatHashMap; -import org.apache.lucene.queryParser.MapperQueryParser; -import org.apache.lucene.queryParser.ParseException; -import org.apache.lucene.queryParser.QueryParserSettings; + +import org.apache.lucene.queryparser.classic.MapperQueryParser; +import org.apache.lucene.queryparser.classic.ParseException; +import org.apache.lucene.queryparser.classic.QueryParserSettings; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.elasticsearch.common.Strings; @@ -130,9 +131,9 @@ public class QueryStringQueryParser implements QueryParser { } else if ("default_operator".equals(currentFieldName) || "defaultOperator".equals(currentFieldName)) { String op = parser.text(); if ("or".equalsIgnoreCase(op)) { - qpSettings.defaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.OR); + qpSettings.defaultOperator(org.apache.lucene.queryparser.classic.QueryParser.Operator.OR); } else if ("and".equalsIgnoreCase(op)) { - qpSettings.defaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND); + qpSettings.defaultOperator(org.apache.lucene.queryparser.classic.QueryParser.Operator.AND); } else { throw new QueryParsingException(parseContext.index(), "Query default operator [" + op + "] is not allowed"); } @@ -196,7 +197,7 @@ public class QueryStringQueryParser implements QueryParser { qpSettings.defaultQuoteAnalyzer(parseContext.mapperService().searchQuoteAnalyzer()); if (qpSettings.escape()) { - qpSettings.queryString(org.apache.lucene.queryParser.QueryParser.escape(qpSettings.queryString())); + qpSettings.queryString(org.apache.lucene.queryparser.classic.QueryParser.escape(qpSettings.queryString())); } qpSettings.queryTypes(parseContext.queryTypes()); @@ -220,7 +221,7 @@ public class QueryStringQueryParser implements QueryParser { } parseContext.indexCache().queryParserCache().put(qpSettings, query); return query; - } catch (ParseException e) { + } catch (org.apache.lucene.queryparser.classic.ParseException e) { throw new QueryParsingException(parseContext.index(), "Failed to parse query [" + qpSettings.queryString() + "]", e); } } diff --git a/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java b/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java index 820f8a9c66f..b24f72fd13f 100644 --- a/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.query; import org.apache.lucene.search.Filter; import org.apache.lucene.search.TermRangeFilter; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.cache.filter.support.CacheKeyFilter; @@ -53,8 +54,8 @@ public class RangeFilterParser implements FilterParser { boolean cache = true; CacheKeyFilter.Key cacheKey = null; String fieldName = null; - String from = null; - String to = null; + BytesRef from = null; + BytesRef to = null; boolean includeLower = true; boolean includeUpper = true; @@ -71,24 +72,24 @@ public class RangeFilterParser implements FilterParser { currentFieldName = parser.currentName(); } else { if ("from".equals(currentFieldName)) { - from = parser.textOrNull(); + from = parser.bytesOrNull(from); } else if ("to".equals(currentFieldName)) { - to = parser.textOrNull(); + to = parser.bytesOrNull(to); } else if ("include_lower".equals(currentFieldName) || "includeLower".equals(currentFieldName)) { includeLower = parser.booleanValue(); } else if ("include_upper".equals(currentFieldName) || "includeUpper".equals(currentFieldName)) { includeUpper = parser.booleanValue(); } else if ("gt".equals(currentFieldName)) { - from = parser.textOrNull(); + from = parser.bytesOrNull(from); includeLower = false; } else if ("gte".equals(currentFieldName) || "ge".equals(currentFieldName)) { - from = parser.textOrNull(); + from = parser.bytesOrNull(from); includeLower = true; } else if ("lt".equals(currentFieldName)) { - to = parser.textOrNull(); + to = parser.bytesOrNull(to); includeUpper = false; } else if ("lte".equals(currentFieldName) || "le".equals(currentFieldName)) { - to = parser.textOrNull(); + to = parser.bytesOrNull(to); includeUpper = true; } else { throw new QueryParsingException(parseContext.index(), "[range] filter does not support [" + currentFieldName + "]"); @@ -116,7 +117,8 @@ public class RangeFilterParser implements FilterParser { MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); if (smartNameFieldMappers != null) { if (smartNameFieldMappers.hasMapper()) { - filter = smartNameFieldMappers.mapper().rangeFilter(from, to, includeLower, includeUpper, parseContext); + //LUCENE 4 UPGRADE range filter should use bytesref too? + filter = smartNameFieldMappers.mapper().rangeFilter(from.utf8ToString(), to.utf8ToString(), includeLower, includeUpper, parseContext); } } if (filter == null) { diff --git a/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java b/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java index 69c02f42ab4..b79b3a07ee3 100644 --- a/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.query; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.index.mapper.MapperService; @@ -59,8 +60,8 @@ public class RangeQueryParser implements QueryParser { throw new QueryParsingException(parseContext.index(), "[range] query malformed, after field missing start object"); } - String from = null; - String to = null; + BytesRef from = null; + BytesRef to = null; boolean includeLower = true; boolean includeUpper = true; float boost = 1.0f; @@ -71,9 +72,9 @@ public class RangeQueryParser implements QueryParser { currentFieldName = parser.currentName(); } else { if ("from".equals(currentFieldName)) { - from = parser.textOrNull(); + from = parser.bytesOrNull(from); } else if ("to".equals(currentFieldName)) { - to = parser.textOrNull(); + to = parser.bytesOrNull(to); } else if ("include_lower".equals(currentFieldName) || "includeLower".equals(currentFieldName)) { includeLower = parser.booleanValue(); } else if ("include_upper".equals(currentFieldName) || "includeUpper".equals(currentFieldName)) { @@ -81,16 +82,16 @@ public class RangeQueryParser implements QueryParser { } else if ("boost".equals(currentFieldName)) { boost = parser.floatValue(); } else if ("gt".equals(currentFieldName)) { - from = parser.textOrNull(); + from = parser.bytesOrNull(from); includeLower = false; } else if ("gte".equals(currentFieldName) || "ge".equals(currentFieldName)) { - from = parser.textOrNull(); + from = parser.bytesOrNull(from); includeLower = true; } else if ("lt".equals(currentFieldName)) { - to = parser.textOrNull(); + to = parser.bytesOrNull(to); includeUpper = false; } else if ("lte".equals(currentFieldName) || "le".equals(currentFieldName)) { - to = parser.textOrNull(); + to = parser.bytesOrNull(to); includeUpper = true; } else { throw new QueryParsingException(parseContext.index(), "[range] query does not support [" + currentFieldName + "]"); @@ -108,7 +109,8 @@ public class RangeQueryParser implements QueryParser { MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); if (smartNameFieldMappers != null) { if (smartNameFieldMappers.hasMapper()) { - query = smartNameFieldMappers.mapper().rangeQuery(from, to, includeLower, includeUpper, parseContext); + //LUCENE 4 UPGRADE Mapper#rangeQuery should use bytesref as well? + query = smartNameFieldMappers.mapper().rangeQuery(from.utf8ToString(), to.utf8ToString(), includeLower, includeUpper, parseContext); } } if (query == null) { diff --git a/src/main/java/org/elasticsearch/index/query/ScriptFilterParser.java b/src/main/java/org/elasticsearch/index/query/ScriptFilterParser.java index d77557d487d..5e42153e019 100644 --- a/src/main/java/org/elasticsearch/index/query/ScriptFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/ScriptFilterParser.java @@ -20,9 +20,14 @@ package org.elasticsearch.index.query; import com.google.common.collect.Maps; + +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.BitsFilteredDocIdSet; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.search.FilteredDocIdSet; +import org.apache.lucene.util.Bits; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.common.inject.Inject; @@ -160,9 +165,10 @@ public class ScriptFilterParser implements FilterParser { } @Override - public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { - searchScript.setNextReader(reader); - return new ScriptDocSet(reader, searchScript); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + searchScript.setNextReader(context.reader()); + // LUCENE 4 UPGRADE: we can simply wrap this here since it is not cacheable and if we are not top level we will get a null passed anyway + return BitsFilteredDocIdSet.wrap(new ScriptDocSet(context.reader(), searchScript), acceptDocs); } static class ScriptDocSet extends GetDocSet { diff --git a/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java b/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java index b4f0a0a4bc4..f1b5c227130 100644 --- a/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java @@ -19,7 +19,9 @@ package org.elasticsearch.index.query; +import org.apache.lucene.index.Term; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.TermFilter; import org.elasticsearch.common.xcontent.XContentParser; @@ -57,14 +59,15 @@ public class TypeFilterParser implements FilterParser { if (token != XContentParser.Token.VALUE_STRING) { throw new QueryParsingException(parseContext.index(), "[type] filter should have a value field, and the type name"); } - String type = parser.text(); + BytesRef type = parser.bytes(null); // move to the next token parser.nextToken(); Filter filter; - DocumentMapper documentMapper = parseContext.mapperService().documentMapper(type); + //LUCENE 4 UPGRADE document mapper should use bytesref aswell? + DocumentMapper documentMapper = parseContext.mapperService().documentMapper(type.utf8ToString()); if (documentMapper == null) { - filter = new TermFilter(TypeFieldMapper.TERM_FACTORY.createTerm(type)); + filter = new TermFilter(new Term(TypeFieldMapper.TERM_FACTORY.field(), type)); } else { filter = documentMapper.typeFilter(); } diff --git a/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 55eed4b1787..4134f5fb506 100644 --- a/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -24,10 +24,14 @@ import org.apache.lucene.analysis.CachingTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.UnicodeUtil; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalStateException; +import org.elasticsearch.ElasticSearchParseException; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.io.FastStringReader; import org.elasticsearch.common.lucene.search.MatchNoDocsQuery; @@ -64,6 +68,9 @@ public class MatchQuery { protected String fuzziness = null; protected int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; protected int maxExpansions = FuzzyQuery.defaultMaxExpansions; + //LUCENE 4 UPGRADE we need a default value for this! + protected boolean transpositions = false; + protected MultiTermQuery.RewriteMethod rewriteMethod; protected MultiTermQuery.RewriteMethod fuzzyRewriteMethod; @@ -101,6 +108,10 @@ public class MatchQuery { public void setMaxExpansions(int maxExpansions) { this.maxExpansions = maxExpansions; } + + public void setTranspositions(boolean transpositions) { + this.transpositions = transpositions; + } public void setRewriteMethod(MultiTermQuery.RewriteMethod rewriteMethod) { this.rewriteMethod = rewriteMethod; @@ -116,13 +127,13 @@ public class MatchQuery { public Query parse(Type type, String fieldName, String text) { FieldMapper mapper = null; - Term fieldTerm; + final String field; MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); if (smartNameFieldMappers != null && smartNameFieldMappers.hasMapper()) { mapper = smartNameFieldMappers.mapper(); - fieldTerm = mapper.names().indexNameTerm(); + field = mapper.names().indexName(); } else { - fieldTerm = new Term(fieldName); + field = fieldName; } if (mapper != null && mapper.useFieldQueryWithQueryString()) { @@ -169,13 +180,13 @@ public class MatchQuery { } // Logic similar to QueryParser#getFieldQuery - - TokenStream source; + final TokenStream source; try { - source = analyzer.reusableTokenStream(fieldTerm.field(), new FastStringReader(text)); + source = analyzer.tokenStream(field, new FastStringReader(text)); source.reset(); - } catch (IOException e) { - source = analyzer.tokenStream(fieldTerm.field(), new FastStringReader(text)); + } catch(IOException ex) { + //LUCENE 4 UPGRADE not sure what todo here really lucene 3.6 had a tokenStream that didn't throw an exc. + throw new ElasticSearchParseException("failed to process query", ex); } CachingTokenFilter buffer = new CachingTokenFilter(source); CharTermAttribute termAtt = null; @@ -183,12 +194,7 @@ public class MatchQuery { int numTokens = 0; boolean success = false; - try { - buffer.reset(); - success = true; - } catch (IOException e) { - // success==false if we hit an exception - } + buffer.reset(); if (success) { if (buffer.hasAttribute(CharTermAttribute.class)) { termAtt = buffer.getAttribute(CharTermAttribute.class); @@ -233,29 +239,26 @@ public class MatchQuery { return MatchNoDocsQuery.INSTANCE; } else if (type == Type.BOOLEAN) { if (numTokens == 1) { - String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } - Query q = newTermQuery(mapper, fieldTerm.createTerm(term)); + //LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8 + Query q = newTermQuery(mapper, new Term(field, termToByteRef(termAtt, new BytesRef()))); return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext); } BooleanQuery q = new BooleanQuery(positionCount == 1); for (int i = 0; i < numTokens; i++) { - String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } - - Query currentQuery = newTermQuery(mapper, fieldTerm.createTerm(term)); + //LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8 + Query currentQuery = newTermQuery(mapper, new Term(field, termToByteRef(termAtt, new BytesRef()))); q.add(currentQuery, occur); } return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext); @@ -266,12 +269,10 @@ public class MatchQuery { List multiTerms = new ArrayList(); int position = -1; for (int i = 0; i < numTokens; i++) { - String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } @@ -288,7 +289,8 @@ public class MatchQuery { multiTerms.clear(); } position += positionIncrement; - multiTerms.add(fieldTerm.createTerm(term)); + //LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8 + multiTerms.add(new Term(field, termToByteRef(termAtt, new BytesRef()))); } if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); @@ -303,13 +305,11 @@ public class MatchQuery { for (int i = 0; i < numTokens; i++) { - String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } @@ -319,9 +319,10 @@ public class MatchQuery { if (enablePositionIncrements) { position += positionIncrement; - pq.add(fieldTerm.createTerm(term), position); + //LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8 + pq.add(new Term(field, termToByteRef(termAtt, new BytesRef())), position); } else { - pq.add(fieldTerm.createTerm(term)); + pq.add(new Term(field, termToByteRef(termAtt, new BytesRef()))); } } return wrapSmartNameQuery(pq, smartNameFieldMappers, parseContext); @@ -333,12 +334,10 @@ public class MatchQuery { List multiTerms = new ArrayList(); int position = -1; for (int i = 0; i < numTokens; i++) { - String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; - term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } @@ -355,7 +354,8 @@ public class MatchQuery { multiTerms.clear(); } position += positionIncrement; - multiTerms.add(fieldTerm.createTerm(term)); + //LUCENE 4 UPGRADE instead of string term we can convert directly from utf-16 to utf-8 + multiTerms.add(new Term(field, termToByteRef(termAtt, new BytesRef()))); } if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); @@ -376,7 +376,11 @@ public class MatchQuery { QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod); } } - FuzzyQuery query = new FuzzyQuery(term, Float.parseFloat(fuzziness), fuzzyPrefixLength, maxExpansions); + String text = term.text(); + //LUCENE 4 UPGRADE we need to document that this should now be an int rather than a float + int edits = FuzzyQuery.floatToEdits(Float.parseFloat(fuzziness), + text.codePointCount(0, text.length())); + FuzzyQuery query = new FuzzyQuery(term, edits, fuzzyPrefixLength, maxExpansions, transpositions); QueryParsers.setRewriteMethod(query, rewriteMethod); return query; } @@ -388,4 +392,9 @@ public class MatchQuery { } return new TermQuery(term); } + + private static BytesRef termToByteRef(CharTermAttribute attr, BytesRef ref) { + UnicodeUtil.UTF16toUTF8WithHash(attr.buffer(), 0, attr.length(), ref); + return ref; + } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/search/NumericRangeFieldDataFilter.java b/src/main/java/org/elasticsearch/index/search/NumericRangeFieldDataFilter.java index 7a4adbb9cb9..60761fc9476 100644 --- a/src/main/java/org/elasticsearch/index/search/NumericRangeFieldDataFilter.java +++ b/src/main/java/org/elasticsearch/index/search/NumericRangeFieldDataFilter.java @@ -19,9 +19,10 @@ package org.elasticsearch.index.search; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.lucene.docset.DocSet; import org.elasticsearch.common.lucene.docset.GetDocSet; @@ -43,7 +44,7 @@ import java.io.IOException; * */ public abstract class NumericRangeFieldDataFilter extends Filter { - + // LUCENE 4 UPGRADE: this filter doesn't respect acceptDocs yet! final FieldDataCache fieldDataCache; final String field; final T lowerVal; @@ -121,7 +122,7 @@ public abstract class NumericRangeFieldDataFilter extends Filter { public static NumericRangeFieldDataFilter newByteRange(FieldDataCache fieldDataCache, String field, Byte lowerVal, Byte upperVal, boolean includeLower, boolean includeUpper) { return new NumericRangeFieldDataFilter(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { final byte inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { byte i = lowerVal.byteValue(); @@ -143,8 +144,8 @@ public abstract class NumericRangeFieldDataFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocSet.EMPTY_DOC_SET; - final ByteFieldData fieldData = (ByteFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.BYTE, reader, field); - return new GetDocSet(reader.maxDoc()) { + final ByteFieldData fieldData = (ByteFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.BYTE, ctx.reader(), field); + return new GetDocSet(ctx.reader().maxDoc()) { @Override public boolean isCacheable() { @@ -181,7 +182,7 @@ public abstract class NumericRangeFieldDataFilter extends Filter { public static NumericRangeFieldDataFilter newShortRange(FieldDataCache fieldDataCache, String field, Short lowerVal, Short upperVal, boolean includeLower, boolean includeUpper) { return new NumericRangeFieldDataFilter(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { final short inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { short i = lowerVal.shortValue(); @@ -203,8 +204,8 @@ public abstract class NumericRangeFieldDataFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocSet.EMPTY_DOC_SET; - final ShortFieldData fieldData = (ShortFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.SHORT, reader, field); - return new GetDocSet(reader.maxDoc()) { + final ShortFieldData fieldData = (ShortFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.SHORT, ctx.reader(), field); + return new GetDocSet(ctx.reader().maxDoc()) { @Override public boolean isCacheable() { @@ -240,7 +241,7 @@ public abstract class NumericRangeFieldDataFilter extends Filter { public static NumericRangeFieldDataFilter newIntRange(FieldDataCache fieldDataCache, String field, Integer lowerVal, Integer upperVal, boolean includeLower, boolean includeUpper) { return new NumericRangeFieldDataFilter(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { final int inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { int i = lowerVal.intValue(); @@ -262,8 +263,8 @@ public abstract class NumericRangeFieldDataFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocSet.EMPTY_DOC_SET; - final IntFieldData fieldData = (IntFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.INT, reader, field); - return new GetDocSet(reader.maxDoc()) { + final IntFieldData fieldData = (IntFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.INT, ctx.reader(), field); + return new GetDocSet(ctx.reader().maxDoc()) { @Override public boolean isCacheable() { @@ -299,7 +300,7 @@ public abstract class NumericRangeFieldDataFilter extends Filter { public static NumericRangeFieldDataFilter newLongRange(FieldDataCache fieldDataCache, String field, Long lowerVal, Long upperVal, boolean includeLower, boolean includeUpper) { return new NumericRangeFieldDataFilter(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { final long inclusiveLowerPoint, inclusiveUpperPoint; if (lowerVal != null) { long i = lowerVal.longValue(); @@ -321,8 +322,8 @@ public abstract class NumericRangeFieldDataFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocSet.EMPTY_DOC_SET; - final LongFieldData fieldData = (LongFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.LONG, reader, field); - return new GetDocSet(reader.maxDoc()) { + final LongFieldData fieldData = (LongFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.LONG, ctx.reader(), field); + return new GetDocSet(ctx.reader().maxDoc()) { @Override public boolean isCacheable() { @@ -358,7 +359,7 @@ public abstract class NumericRangeFieldDataFilter extends Filter { public static NumericRangeFieldDataFilter newFloatRange(FieldDataCache fieldDataCache, String field, Float lowerVal, Float upperVal, boolean includeLower, boolean includeUpper) { return new NumericRangeFieldDataFilter(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { // we transform the floating point numbers to sortable integers // using NumericUtils to easier find the next bigger/lower value final float inclusiveLowerPoint, inclusiveUpperPoint; @@ -384,8 +385,8 @@ public abstract class NumericRangeFieldDataFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocSet.EMPTY_DOC_SET; - final FloatFieldData fieldData = (FloatFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.FLOAT, reader, field); - return new GetDocSet(reader.maxDoc()) { + final FloatFieldData fieldData = (FloatFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.FLOAT, ctx.reader(), field); + return new GetDocSet(ctx.reader().maxDoc()) { @Override public boolean isCacheable() { @@ -421,7 +422,7 @@ public abstract class NumericRangeFieldDataFilter extends Filter { public static NumericRangeFieldDataFilter newDoubleRange(FieldDataCache fieldDataCache, String field, Double lowerVal, Double upperVal, boolean includeLower, boolean includeUpper) { return new NumericRangeFieldDataFilter(fieldDataCache, field, lowerVal, upperVal, includeLower, includeUpper) { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { // we transform the floating point numbers to sortable integers // using NumericUtils to easier find the next bigger/lower value final double inclusiveLowerPoint, inclusiveUpperPoint; @@ -447,8 +448,8 @@ public abstract class NumericRangeFieldDataFilter extends Filter { if (inclusiveLowerPoint > inclusiveUpperPoint) return DocSet.EMPTY_DOC_SET; - final DoubleFieldData fieldData = (DoubleFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.DOUBLE, reader, field); - return new GetDocSet(reader.maxDoc()) { + final DoubleFieldData fieldData = (DoubleFieldData) this.fieldDataCache.cache(FieldDataType.DefaultTypes.DOUBLE, ctx.reader(), field); + return new GetDocSet(ctx.reader().maxDoc()) { @Override public boolean isCacheable() { diff --git a/src/main/java/org/elasticsearch/index/search/UidFilter.java b/src/main/java/org/elasticsearch/index/search/UidFilter.java index 99d320114bb..59665b6fd95 100644 --- a/src/main/java/org/elasticsearch/index/search/UidFilter.java +++ b/src/main/java/org/elasticsearch/index/search/UidFilter.java @@ -19,16 +19,15 @@ package org.elasticsearch.index.search; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; -import org.apache.lucene.util.UnicodeUtil; -import org.elasticsearch.common.Unicode; -import org.elasticsearch.common.bloom.BloomFilter; -import org.elasticsearch.index.cache.bloom.BloomCache; import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.mapper.internal.UidFieldMapper; @@ -40,17 +39,12 @@ import java.util.List; public class UidFilter extends Filter { final Term[] uids; - - private final BloomCache bloomCache; - - // LUCENE 4 UPGRADE: We removed the bloom cache, so once we rewrite this filter, do it without - public UidFilter(Collection types, List ids, BloomCache bloomCache) { - this.bloomCache = bloomCache; + public UidFilter(Collection types, List ids) { this.uids = new Term[types.size() * ids.size()]; int i = 0; for (String type : types) { for (String id : ids) { - uids[i++] = UidFieldMapper.TERM_FACTORY.createTerm(Uid.createUid(type, id)); + uids[i++] = new Term(UidFieldMapper.NAME, Uid.createUid(type, id)); } } if (this.uids.length > 1) { @@ -66,33 +60,26 @@ public class UidFilter extends Filter { // - If we have a single id, we can create a SingleIdDocIdSet to save on mem // - We can use sorted int array DocIdSet to reserve memory compared to OpenBitSet in some cases @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - BloomFilter filter = bloomCache.filter(reader, UidFieldMapper.NAME, true); + // LUCENE 4 UPGRADE: this filter does respect acceptDocs maybe we need to change this + public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { FixedBitSet set = null; - TermDocs td = null; - UnicodeUtil.UTF8Result utf8 = new UnicodeUtil.UTF8Result(); - try { - for (Term uid : uids) { - Unicode.fromStringAsUtf8(uid.text(), utf8); - if (!filter.isPresent(utf8.result, 0, utf8.length)) { - continue; - } - if (td == null) { - td = reader.termDocs(); - } - td.seek(uid); - // no need for batching, its on the UID, there will be only one doc - while (td.next()) { + final AtomicReader reader = ctx.reader(); + final TermsEnum termsEnum = reader.terms(UidFieldMapper.NAME).iterator(null); + DocsEnum docsEnum = null; + for (Term uid : uids) { + if (termsEnum.seekExact(uid.bytes(), false)) { + docsEnum = termsEnum.docs(acceptedDocs, docsEnum, 0); + int doc; + while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) { + // no need for batching, its on the UID, there will be only + // one doc if (set == null) { set = new FixedBitSet(reader.maxDoc()); } - set.set(td.doc()); + set.set(doc); } } - } finally { - if (td != null) { - td.close(); - } + } return set; } From 24ef9876243a39ce3ae4ce093f4ed5b34965d52d Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 26 Oct 2012 16:51:45 +0200 Subject: [PATCH 033/146] lucene 4: Upgraded the simple id cache. --- .../elasticsearch/index/cache/id/IdCache.java | 5 +- .../index/cache/id/simple/SimpleIdCache.java | 178 +++++++++--------- .../cache/id/simple/SimpleIdReaderCache.java | 12 +- .../id/simple/SimpleIdReaderTypeCache.java | 7 +- .../search/query/QueryPhase.java | 2 +- 5 files changed, 108 insertions(+), 96 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/cache/id/IdCache.java b/src/main/java/org/elasticsearch/index/cache/id/IdCache.java index f563b6ed152..33bd68bee55 100644 --- a/src/main/java/org/elasticsearch/index/cache/id/IdCache.java +++ b/src/main/java/org/elasticsearch/index/cache/id/IdCache.java @@ -19,10 +19,13 @@ package org.elasticsearch.index.cache.id; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.elasticsearch.common.component.CloseableComponent; import org.elasticsearch.index.IndexComponent; +import java.util.List; + /** * */ @@ -32,7 +35,7 @@ public interface IdCache extends IndexComponent, CloseableComponent, Iterable readers) throws Exception; IdReaderCache reader(IndexReader reader); diff --git a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java index 7e43dd6e6e9..0d954f02c73 100644 --- a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java +++ b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java @@ -20,10 +20,11 @@ package org.elasticsearch.index.cache.id.simple; import gnu.trove.impl.Constants; -import gnu.trove.map.hash.TIntObjectHashMap; import org.apache.lucene.index.*; -import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchException; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.HashedBytesArray; import org.elasticsearch.common.collect.MapBuilder; import org.elasticsearch.common.inject.Inject; @@ -39,10 +40,7 @@ import org.elasticsearch.index.mapper.internal.ParentFieldMapper; import org.elasticsearch.index.mapper.internal.UidFieldMapper; import org.elasticsearch.index.settings.IndexSettings; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Iterator; -import java.util.Map; +import java.util.*; import java.util.concurrent.ConcurrentMap; /** @@ -91,20 +89,20 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se @SuppressWarnings({"StringEquality"}) @Override - public void refresh(IndexReader[] readers) throws Exception { + public void refresh(List atomicReaderContexts) throws Exception { // do a quick check for the common case, that all are there - if (refreshNeeded(readers)) { + if (refreshNeeded(atomicReaderContexts)) { synchronized (idReaders) { - if (!refreshNeeded(readers)) { + if (!refreshNeeded(atomicReaderContexts)) { return; } // do the refresh - - Map> builders = new HashMap>(); + Map> builders = new HashMap>(); // first, go over and load all the id->doc map for all types - for (IndexReader reader : readers) { + for (AtomicReaderContext context : atomicReaderContexts) { + AtomicReader reader = context.reader(); if (idReaders.containsKey(reader.getCoreCacheKey())) { // no need, continue continue; @@ -113,98 +111,84 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se if (reader instanceof SegmentReader) { ((SegmentReader) reader).addCoreClosedListener(this); } - HashMap readerBuilder = new HashMap(); + Map readerBuilder = new HashMap(); builders.put(reader.getCoreCacheKey(), readerBuilder); - String field = StringHelper.intern(UidFieldMapper.NAME); - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms(new Term(field)); - try { - do { - Term term = termEnum.term(); - if (term == null || term.field() != field) break; - // TODO we can optimize this, since type is the prefix, and we get terms ordered - // so, only need to move to the next type once its different - Uid uid = Uid.createUid(term.text()); - TypeBuilder typeBuilder = readerBuilder.get(uid.type()); - if (typeBuilder == null) { - typeBuilder = new TypeBuilder(reader); - readerBuilder.put(StringHelper.intern(uid.type()), typeBuilder); - } + Terms terms = reader.terms(UidFieldMapper.NAME); + if (terms == null) { // Should not happen + throw new ElasticSearchIllegalArgumentException("Id cache needs _uid field"); + } - HashedBytesArray idAsBytes = checkIfCanReuse(builders, new HashedBytesArray(uid.id())); - termDocs.seek(termEnum); - while (termDocs.next()) { - // when traversing, make sure to ignore deleted docs, so the key->docId will be correct - if (!reader.isDeleted(termDocs.doc())) { - typeBuilder.idToDoc.put(idAsBytes, termDocs.doc()); - typeBuilder.docToId[termDocs.doc()] = idAsBytes; - } - } - } while (termEnum.next()); - } finally { - termDocs.close(); - termEnum.close(); + TermsEnum termsEnum = terms.iterator(null); + DocsEnum docsEnum = null; + for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.term()) { + HashedBytesArray[] typeAndId = splitUidIntoTypeAndId(term); + TypeBuilder typeBuilder = readerBuilder.get(typeAndId[0]); + if (typeBuilder == null) { + typeBuilder = new TypeBuilder(reader); + readerBuilder.put(typeAndId[0], typeBuilder); + } + + HashedBytesArray idAsBytes = checkIfCanReuse(builders, typeAndId[1]); + docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0); + for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { + typeBuilder.idToDoc.put(idAsBytes, docId); + typeBuilder.docToId[docId] = idAsBytes; + } } } // now, go and load the docId->parentId map - for (IndexReader reader : readers) { + for (AtomicReaderContext context : atomicReaderContexts) { + AtomicReader reader = context.reader(); if (idReaders.containsKey(reader.getCoreCacheKey())) { // no need, continue continue; } - Map readerBuilder = builders.get(reader.getCoreCacheKey()); + Map readerBuilder = builders.get(reader.getCoreCacheKey()); - String field = StringHelper.intern(ParentFieldMapper.NAME); - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms(new Term(field)); - try { - do { - Term term = termEnum.term(); - if (term == null || term.field() != field) break; - // TODO we can optimize this, since type is the prefix, and we get terms ordered - // so, only need to move to the next type once its different - Uid uid = Uid.createUid(term.text()); + Terms terms = reader.terms(ParentFieldMapper.NAME); + if (terms == null) { // Should not happen + throw new ElasticSearchIllegalArgumentException("Id cache needs _parent field"); + } - TypeBuilder typeBuilder = readerBuilder.get(uid.type()); - if (typeBuilder == null) { - typeBuilder = new TypeBuilder(reader); - readerBuilder.put(StringHelper.intern(uid.type()), typeBuilder); + TermsEnum termsEnum = terms.iterator(null); + DocsEnum docsEnum = null; + for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.term()) { + HashedBytesArray[] typeAndId = splitUidIntoTypeAndId(term); + + TypeBuilder typeBuilder = readerBuilder.get(typeAndId[0]); + if (typeBuilder == null) { + typeBuilder = new TypeBuilder(reader); + readerBuilder.put(typeAndId[0], typeBuilder); + } + + HashedBytesArray idAsBytes = checkIfCanReuse(builders, typeAndId[1]); + boolean added = false; // optimize for when all the docs are deleted for this id + + docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0); + for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { + if (!added) { + typeBuilder.parentIdsValues.add(idAsBytes); + added = true; } + typeBuilder.parentIdsOrdinals[docId] = typeBuilder.t; + } - HashedBytesArray idAsBytes = checkIfCanReuse(builders, new HashedBytesArray(uid.id())); - boolean added = false; // optimize for when all the docs are deleted for this id - - termDocs.seek(termEnum); - while (termDocs.next()) { - // ignore deleted docs while we are at it - if (!reader.isDeleted(termDocs.doc())) { - if (!added) { - typeBuilder.parentIdsValues.add(idAsBytes); - added = true; - } - typeBuilder.parentIdsOrdinals[termDocs.doc()] = typeBuilder.t; - } - } - if (added) { - typeBuilder.t++; - } - } while (termEnum.next()); - } finally { - termDocs.close(); - termEnum.close(); + if (added) { + typeBuilder.t++; + } } } // now, build it back - for (Map.Entry> entry : builders.entrySet()) { - MapBuilder types = MapBuilder.newMapBuilder(); - for (Map.Entry typeBuilderEntry : entry.getValue().entrySet()) { + for (Map.Entry> entry : builders.entrySet()) { + MapBuilder types = MapBuilder.newMapBuilder(); + for (Map.Entry typeBuilderEntry : entry.getValue().entrySet()) { types.put(typeBuilderEntry.getKey(), new SimpleIdReaderTypeCache(typeBuilderEntry.getKey(), typeBuilderEntry.getValue().idToDoc, typeBuilderEntry.getValue().docToId, @@ -226,7 +210,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se return sizeInBytes; } - private HashedBytesArray checkIfCanReuse(Map> builders, HashedBytesArray idAsBytes) { + private HashedBytesArray checkIfCanReuse(Map> builders, HashedBytesArray idAsBytes) { HashedBytesArray finalIdAsBytes; // go over and see if we can reuse this id for (SimpleIdReaderCache idReaderCache : idReaders.values()) { @@ -235,7 +219,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se return finalIdAsBytes; } } - for (Map map : builders.values()) { + for (Map map : builders.values()) { for (TypeBuilder typeBuilder : map.values()) { finalIdAsBytes = typeBuilder.canReuse(idAsBytes); if (finalIdAsBytes != null) { @@ -246,15 +230,37 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se return idAsBytes; } - private boolean refreshNeeded(IndexReader[] readers) { - for (IndexReader reader : readers) { - if (!idReaders.containsKey(reader.getCoreCacheKey())) { + private boolean refreshNeeded(List atomicReaderContexts) { + for (AtomicReaderContext atomicReaderContext : atomicReaderContexts) { + if (!idReaders.containsKey(atomicReaderContext.reader().getCoreCacheKey())) { return true; } } return false; } + // LUCENE 4 UPGRADE: This logic should go to Uid class. Uid class should BR based instead of string + private static HashedBytesArray[] splitUidIntoTypeAndId(BytesRef term) { + int loc = -1; + for (int i = term.offset; i < term.length; i++) { + if (term.bytes[i] == 0x23) { // 0x23 is equal to '#' + loc = i; + break; + } + } + + if (loc == -1) { + return null; + } + + byte[] type = new byte[loc - term.offset]; + System.arraycopy(term.bytes, term.offset, type, 0, type.length); + + byte[] id = new byte[term.length - type.length -1]; + System.arraycopy(term.bytes, loc + 1, id, 0, id.length); + return new HashedBytesArray[]{new HashedBytesArray(type), new HashedBytesArray(id)}; + } + static class TypeBuilder { final ExtTObjectIntHasMap idToDoc = new ExtTObjectIntHasMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1); final HashedBytesArray[] docToId; diff --git a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderCache.java b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderCache.java index 5d6d5d1abdc..f57f65e4648 100644 --- a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderCache.java +++ b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderCache.java @@ -20,6 +20,8 @@ package org.elasticsearch.index.cache.id.simple; import com.google.common.collect.ImmutableMap; +import org.elasticsearch.common.bytes.BytesArray; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.HashedBytesArray; import org.elasticsearch.index.cache.id.IdReaderCache; import org.elasticsearch.index.cache.id.IdReaderTypeCache; @@ -31,9 +33,9 @@ public class SimpleIdReaderCache implements IdReaderCache { private final Object readerCacheKey; - private final ImmutableMap types; + private final ImmutableMap types; - public SimpleIdReaderCache(Object readerCacheKey, ImmutableMap types) { + public SimpleIdReaderCache(Object readerCacheKey, ImmutableMap types) { this.readerCacheKey = readerCacheKey; this.types = types; } @@ -45,12 +47,12 @@ public class SimpleIdReaderCache implements IdReaderCache { @Override public IdReaderTypeCache type(String type) { - return types.get(type); + return types.get(new BytesArray(type)); } @Override public HashedBytesArray parentIdByDoc(String type, int docId) { - SimpleIdReaderTypeCache typeCache = types.get(type); + SimpleIdReaderTypeCache typeCache = types.get(new BytesArray(type)); if (typeCache != null) { return typeCache.parentIdByDoc(docId); } @@ -59,7 +61,7 @@ public class SimpleIdReaderCache implements IdReaderCache { @Override public int docById(String type, HashedBytesArray id) { - SimpleIdReaderTypeCache typeCache = types.get(type); + SimpleIdReaderTypeCache typeCache = types.get(new BytesArray(type)); if (typeCache != null) { return typeCache.docById(id); } diff --git a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderTypeCache.java b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderTypeCache.java index 083e79f3b51..ac63bc378f0 100644 --- a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderTypeCache.java +++ b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderTypeCache.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.cache.id.simple; import gnu.trove.impl.hash.TObjectHash; import org.elasticsearch.common.RamUsage; +import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.HashedBytesArray; import org.elasticsearch.common.trove.ExtTObjectIntHasMap; import org.elasticsearch.index.cache.id.IdReaderTypeCache; @@ -30,7 +31,7 @@ import org.elasticsearch.index.cache.id.IdReaderTypeCache; */ public class SimpleIdReaderTypeCache implements IdReaderTypeCache { - private final String type; + private final BytesReference type; private final ExtTObjectIntHasMap idToDoc; @@ -42,7 +43,7 @@ public class SimpleIdReaderTypeCache implements IdReaderTypeCache { private long sizeInBytes = -1; - public SimpleIdReaderTypeCache(String type, ExtTObjectIntHasMap idToDoc, HashedBytesArray[] docIdToId, + public SimpleIdReaderTypeCache(BytesReference type, ExtTObjectIntHasMap idToDoc, HashedBytesArray[] docIdToId, HashedBytesArray[] parentIdsValues, int[] parentIdsOrdinals) { this.type = type; this.idToDoc = idToDoc; @@ -52,7 +53,7 @@ public class SimpleIdReaderTypeCache implements IdReaderTypeCache { this.parentIdsOrdinals = parentIdsOrdinals; } - public String type() { + public BytesReference type() { return this.type; } diff --git a/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/src/main/java/org/elasticsearch/search/query/QueryPhase.java index 9811d7b5884..57b4e0bcf8c 100644 --- a/src/main/java/org/elasticsearch/search/query/QueryPhase.java +++ b/src/main/java/org/elasticsearch/search/query/QueryPhase.java @@ -84,7 +84,7 @@ public class QueryPhase implements SearchPhase { if (searchContext.scopePhases() != null) { // we have scoped queries, refresh the id cache try { - searchContext.idCache().refresh(searchContext.searcher().subReaders()); + searchContext.idCache().refresh(searchContext.searcher().getTopReaderContext().leaves()); } catch (Exception e) { throw new QueryPhaseExecutionException(searchContext, "Failed to refresh id cache for child queries", e); } From 4e5e4869a6d8ec688f50654e614001fb06d1b909 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Fri, 26 Oct 2012 11:00:11 -0400 Subject: [PATCH 034/146] lucene 4: add custom analyzer wrapper that supports overriding of getOffsetGap --- .../analysis/CustomAnalyzerWrapper.java | 85 +++++++++++++++++++ .../index/analysis/NamedAnalyzer.java | 6 +- 2 files changed, 87 insertions(+), 4 deletions(-) create mode 100644 src/main/java/org/apache/lucene/analysis/CustomAnalyzerWrapper.java diff --git a/src/main/java/org/apache/lucene/analysis/CustomAnalyzerWrapper.java b/src/main/java/org/apache/lucene/analysis/CustomAnalyzerWrapper.java new file mode 100644 index 00000000000..7bcdfc3da0a --- /dev/null +++ b/src/main/java/org/apache/lucene/analysis/CustomAnalyzerWrapper.java @@ -0,0 +1,85 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.lucene.analysis; + +import java.io.Reader; + +/** + * Extension to {@link Analyzer} suitable for Analyzers which wrap + * other Analyzers. + *

+ * {@link #getWrappedAnalyzer(String)} allows the Analyzer + * to wrap multiple Analyzers which are selected on a per field basis. + *

+ * {@link #wrapComponents(String, Analyzer.TokenStreamComponents)} allows the + * TokenStreamComponents of the wrapped Analyzer to then be wrapped + * (such as adding a new {@link TokenFilter} to form new TokenStreamComponents. + */ +public abstract class CustomAnalyzerWrapper extends Analyzer { + + /** + * Creates a new CustomAnalyzerWrapper. Since the {@link Analyzer.ReuseStrategy} of + * the wrapped Analyzers are unknown, {@link Analyzer.PerFieldReuseStrategy} is assumed + */ + protected CustomAnalyzerWrapper() { + super(new PerFieldReuseStrategy()); + } + + /** + * Retrieves the wrapped Analyzer appropriate for analyzing the field with + * the given name + * + * @param fieldName Name of the field which is to be analyzed + * @return Analyzer for the field with the given name. Assumed to be non-null + */ + protected abstract Analyzer getWrappedAnalyzer(String fieldName); + + /** + * Wraps / alters the given TokenStreamComponents, taken from the wrapped + * Analyzer, to form new components. It is through this method that new + * TokenFilters can be added by AnalyzerWrappers. + * + * + * @param fieldName Name of the field which is to be analyzed + * @param components TokenStreamComponents taken from the wrapped Analyzer + * @return Wrapped / altered TokenStreamComponents. + */ + protected abstract TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components); + + @Override + protected final TokenStreamComponents createComponents(String fieldName, Reader aReader) { + return wrapComponents(fieldName, getWrappedAnalyzer(fieldName).createComponents(fieldName, aReader)); + } + + @Override + public int getPositionIncrementGap(String fieldName) { + return getWrappedAnalyzer(fieldName).getPositionIncrementGap(fieldName); + } + + @Override + public int getOffsetGap(String fieldName) { + return getWrappedAnalyzer(fieldName).getOffsetGap(fieldName); + } + + @Override + public final Reader initReader(String fieldName, Reader reader) { + return getWrappedAnalyzer(fieldName).initReader(fieldName, reader); + } +} diff --git a/src/main/java/org/elasticsearch/index/analysis/NamedAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/NamedAnalyzer.java index 70cff9e27a0..a639a9fafd6 100644 --- a/src/main/java/org/elasticsearch/index/analysis/NamedAnalyzer.java +++ b/src/main/java/org/elasticsearch/index/analysis/NamedAnalyzer.java @@ -20,15 +20,13 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.AnalyzerWrapper; - -import java.io.Reader; +import org.apache.lucene.analysis.CustomAnalyzerWrapper; /** * Named analyzer is an analyzer wrapper around an actual analyzer ({@link #analyzer} that is associated * with a name ({@link #name()}. */ -public class NamedAnalyzer extends AnalyzerWrapper { +public class NamedAnalyzer extends CustomAnalyzerWrapper { private final String name; From 71c3bd7c6439679f31af4758a70f956d9337b5ac Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 26 Oct 2012 17:11:55 +0200 Subject: [PATCH 035/146] lucene 4: SearchContext#setNextReader accepts an AtomicReaderContext instead of an AtomicReader --- .../sort/DoubleFieldsFunctionDataComparator.java | 2 +- .../sort/StringFieldsFunctionDataComparator.java | 2 +- .../elasticsearch/script/AbstractSearchScript.java | 6 +++--- .../java/org/elasticsearch/script/SearchScript.java | 3 ++- .../script/mvel/MvelScriptEngineService.java | 6 +++--- .../ValueScriptDateHistogramFacetCollector.java | 2 +- .../geodistance/ScriptGeoDistanceFacetCollector.java | 2 +- .../BoundedValueScriptHistogramFacetCollector.java | 2 +- .../unbounded/ScriptHistogramFacetCollector.java | 4 ++-- .../unbounded/ValueScriptHistogramFacetCollector.java | 2 +- .../search/facet/range/ScriptRangeFacetCollector.java | 4 ++-- .../statistical/ScriptStatisticalFacetCollector.java | 2 +- .../facet/terms/bytes/TermsByteFacetCollector.java | 2 +- .../terms/doubles/TermsDoubleFacetCollector.java | 2 +- .../facet/terms/floats/TermsFloatFacetCollector.java | 2 +- .../facet/terms/ints/TermsIntFacetCollector.java | 2 +- .../search/facet/terms/ip/TermsIpFacetCollector.java | 2 +- .../facet/terms/longs/TermsLongFacetCollector.java | 2 +- .../facet/terms/shorts/TermsShortFacetCollector.java | 2 +- .../strings/FieldsTermsStringFacetCollector.java | 2 +- .../strings/ScriptTermsStringFieldFacetCollector.java | 2 +- .../terms/strings/TermsStringFacetCollector.java | 2 +- .../doubles/TermsStatsDoubleFacetCollector.java | 2 +- .../longs/TermsStatsLongFacetCollector.java | 2 +- .../strings/TermsStatsStringFacetCollector.java | 2 +- .../org/elasticsearch/search/lookup/DocLookup.java | 8 ++++---- .../org/elasticsearch/search/lookup/FieldsLookup.java | 8 ++++---- .../org/elasticsearch/search/lookup/SearchLookup.java | 11 +++++------ .../org/elasticsearch/search/lookup/SourceLookup.java | 8 ++++---- 29 files changed, 49 insertions(+), 49 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/field/function/sort/DoubleFieldsFunctionDataComparator.java b/src/main/java/org/elasticsearch/index/field/function/sort/DoubleFieldsFunctionDataComparator.java index d3aded6b2fa..d130be283e3 100644 --- a/src/main/java/org/elasticsearch/index/field/function/sort/DoubleFieldsFunctionDataComparator.java +++ b/src/main/java/org/elasticsearch/index/field/function/sort/DoubleFieldsFunctionDataComparator.java @@ -69,7 +69,7 @@ public class DoubleFieldsFunctionDataComparator extends FieldComparator @Override public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { - script.setNextReader(context.reader()); + script.setNextReader(context); return this; } diff --git a/src/main/java/org/elasticsearch/index/field/function/sort/StringFieldsFunctionDataComparator.java b/src/main/java/org/elasticsearch/index/field/function/sort/StringFieldsFunctionDataComparator.java index 20c8534910c..58a18b6ad4f 100644 --- a/src/main/java/org/elasticsearch/index/field/function/sort/StringFieldsFunctionDataComparator.java +++ b/src/main/java/org/elasticsearch/index/field/function/sort/StringFieldsFunctionDataComparator.java @@ -69,7 +69,7 @@ public class StringFieldsFunctionDataComparator extends FieldComparator @Override public FieldComparator setNextReader(AtomicReaderContext context) throws IOException { - script.setNextReader(context.reader()); + script.setNextReader(context); return this; } diff --git a/src/main/java/org/elasticsearch/script/AbstractSearchScript.java b/src/main/java/org/elasticsearch/script/AbstractSearchScript.java index 002fcf8eb44..14548d05fa9 100644 --- a/src/main/java/org/elasticsearch/script/AbstractSearchScript.java +++ b/src/main/java/org/elasticsearch/script/AbstractSearchScript.java @@ -19,7 +19,7 @@ package org.elasticsearch.script; -import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.elasticsearch.search.lookup.DocLookup; import org.elasticsearch.search.lookup.FieldsLookup; @@ -83,8 +83,8 @@ public abstract class AbstractSearchScript extends AbstractExecutableScript impl } @Override - public void setNextReader(AtomicReader reader) { - lookup.setNextReader(reader); + public void setNextReader(AtomicReaderContext context) { + lookup.setNextReader(context); } @Override diff --git a/src/main/java/org/elasticsearch/script/SearchScript.java b/src/main/java/org/elasticsearch/script/SearchScript.java index b8fbd9d81f7..2c35a14857f 100644 --- a/src/main/java/org/elasticsearch/script/SearchScript.java +++ b/src/main/java/org/elasticsearch/script/SearchScript.java @@ -20,6 +20,7 @@ package org.elasticsearch.script; import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; @@ -34,7 +35,7 @@ public interface SearchScript extends ExecutableScript { void setScorer(Scorer scorer); - void setNextReader(AtomicReader reader); + void setNextReader(AtomicReaderContext context); void setNextDocId(int doc); diff --git a/src/main/java/org/elasticsearch/script/mvel/MvelScriptEngineService.java b/src/main/java/org/elasticsearch/script/mvel/MvelScriptEngineService.java index 39e89225472..f73cc379019 100644 --- a/src/main/java/org/elasticsearch/script/mvel/MvelScriptEngineService.java +++ b/src/main/java/org/elasticsearch/script/mvel/MvelScriptEngineService.java @@ -19,7 +19,7 @@ package org.elasticsearch.script.mvel; -import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.component.AbstractComponent; @@ -163,8 +163,8 @@ public class MvelScriptEngineService extends AbstractComponent implements Script } @Override - public void setNextReader(AtomicReader reader) { - lookup.setNextReader(reader); + public void setNextReader(AtomicReaderContext context) { + lookup.setNextReader(context); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueScriptDateHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueScriptDateHistogramFacetCollector.java index 21a7f84444e..bbb0d481308 100644 --- a/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueScriptDateHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/datehistogram/ValueScriptDateHistogramFacetCollector.java @@ -96,7 +96,7 @@ public class ValueScriptDateHistogramFacetCollector extends AbstractFacetCollect @Override protected void doSetNextReader(AtomicReaderContext context) throws IOException { fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); - valueScript.setNextReader(context.reader()); + valueScript.setNextReader(context); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/geodistance/ScriptGeoDistanceFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/geodistance/ScriptGeoDistanceFacetCollector.java index 7625902c62b..d3251a84509 100644 --- a/src/main/java/org/elasticsearch/search/facet/geodistance/ScriptGeoDistanceFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/geodistance/ScriptGeoDistanceFacetCollector.java @@ -57,7 +57,7 @@ public class ScriptGeoDistanceFacetCollector extends GeoDistanceFacetCollector { @Override protected void doSetNextReader(AtomicReaderContext context) throws IOException { super.doSetNextReader(context); - script.setNextReader(context.reader()); + script.setNextReader(context); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedValueScriptHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedValueScriptHistogramFacetCollector.java index 0f0ad6ed907..a19f9bab4fc 100644 --- a/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedValueScriptHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/histogram/bounded/BoundedValueScriptHistogramFacetCollector.java @@ -102,7 +102,7 @@ public class BoundedValueScriptHistogramFacetCollector extends AbstractFacetColl @Override protected void doSetNextReader(AtomicReaderContext context) throws IOException { fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); - valueScript.setNextReader(context.reader()); + valueScript.setNextReader(context); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ScriptHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ScriptHistogramFacetCollector.java index e484c4440e8..c050fde3e1d 100644 --- a/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ScriptHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ScriptHistogramFacetCollector.java @@ -92,8 +92,8 @@ public class ScriptHistogramFacetCollector extends AbstractFacetCollector { @Override protected void doSetNextReader(AtomicReaderContext context) throws IOException { - keyScript.setNextReader(context.reader()); - valueScript.setNextReader(context.reader()); + keyScript.setNextReader(context); + valueScript.setNextReader(context); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ValueScriptHistogramFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ValueScriptHistogramFacetCollector.java index eca0817bba5..6a34b417b10 100644 --- a/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ValueScriptHistogramFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/histogram/unbounded/ValueScriptHistogramFacetCollector.java @@ -96,7 +96,7 @@ public class ValueScriptHistogramFacetCollector extends AbstractFacetCollector { @Override protected void doSetNextReader(AtomicReaderContext context) throws IOException { fieldData = (NumericFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); - valueScript.setNextReader(context.reader()); + valueScript.setNextReader(context); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/range/ScriptRangeFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/range/ScriptRangeFacetCollector.java index 81bf33e6157..a8902350e46 100644 --- a/src/main/java/org/elasticsearch/search/facet/range/ScriptRangeFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/range/ScriptRangeFacetCollector.java @@ -55,8 +55,8 @@ public class ScriptRangeFacetCollector extends AbstractFacetCollector { @Override protected void doSetNextReader(AtomicReaderContext context) throws IOException { - keyScript.setNextReader(context.reader()); - valueScript.setNextReader(context.reader()); + keyScript.setNextReader(context); + valueScript.setNextReader(context); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/statistical/ScriptStatisticalFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/statistical/ScriptStatisticalFacetCollector.java index 7708b80ea1d..6ae5c4eda49 100644 --- a/src/main/java/org/elasticsearch/search/facet/statistical/ScriptStatisticalFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/statistical/ScriptStatisticalFacetCollector.java @@ -73,7 +73,7 @@ public class ScriptStatisticalFacetCollector extends AbstractFacetCollector { @Override protected void doSetNextReader(AtomicReaderContext context) throws IOException { - script.setNextReader(context.reader()); + script.setNextReader(context); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java index 59ff3a083d3..92bc191ee39 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/bytes/TermsByteFacetCollector.java @@ -130,7 +130,7 @@ public class TermsByteFacetCollector extends AbstractFacetCollector { protected void doSetNextReader(AtomicReaderContext context) throws IOException { fieldData = (ByteFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(context.reader()); + script.setNextReader(context); } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java index bce37aaa9df..cb8ec6efac6 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/doubles/TermsDoubleFacetCollector.java @@ -129,7 +129,7 @@ public class TermsDoubleFacetCollector extends AbstractFacetCollector { protected void doSetNextReader(AtomicReaderContext context) throws IOException { fieldData = (DoubleFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(context.reader()); + script.setNextReader(context); } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java index 7c25fd7f13a..5aef46adb61 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/floats/TermsFloatFacetCollector.java @@ -129,7 +129,7 @@ public class TermsFloatFacetCollector extends AbstractFacetCollector { protected void doSetNextReader(AtomicReaderContext context) throws IOException { fieldData = (FloatFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(context.reader()); + script.setNextReader(context); } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java index be37b6da701..0cadda36c12 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/ints/TermsIntFacetCollector.java @@ -129,7 +129,7 @@ public class TermsIntFacetCollector extends AbstractFacetCollector { protected void doSetNextReader(AtomicReaderContext context) throws IOException { fieldData = (IntFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(context.reader()); + script.setNextReader(context); } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java index 9a7f127e97f..ca5c46e6703 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/ip/TermsIpFacetCollector.java @@ -125,7 +125,7 @@ public class TermsIpFacetCollector extends AbstractFacetCollector { protected void doSetNextReader(AtomicReaderContext context) throws IOException { fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(context.reader()); + script.setNextReader(context); } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongFacetCollector.java index 9a505d35b7e..7b17f60c606 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/longs/TermsLongFacetCollector.java @@ -138,7 +138,7 @@ public class TermsLongFacetCollector extends AbstractFacetCollector { protected void doSetNextReader(AtomicReaderContext context) throws IOException { fieldData = (LongFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(context.reader()); + script.setNextReader(context); } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java index 079ec61f15b..247729e079c 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/shorts/TermsShortFacetCollector.java @@ -129,7 +129,7 @@ public class TermsShortFacetCollector extends AbstractFacetCollector { protected void doSetNextReader(AtomicReaderContext context) throws IOException { fieldData = (ShortFieldData) fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(context.reader()); + script.setNextReader(context); } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java index 436d896b9b5..7a1b09056c5 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java @@ -131,7 +131,7 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector { fieldsData[i] = fieldDataCache.cache(fieldsDataType[i], context.reader(), indexFieldsNames[i]); } if (script != null) { - script.setNextReader(context.reader()); + script.setNextReader(context); } } diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java index 48740b442a3..c0fbbe6d403 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/ScriptTermsStringFieldFacetCollector.java @@ -83,7 +83,7 @@ public class ScriptTermsStringFieldFacetCollector extends AbstractFacetCollector @Override protected void doSetNextReader(AtomicReaderContext context) throws IOException { - script.setNextReader(context.reader()); + script.setNextReader(context); } @Override diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java index 5c90c1c6d40..23f3e17b66a 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java @@ -137,7 +137,7 @@ public class TermsStringFacetCollector extends AbstractFacetCollector { protected void doSetNextReader(AtomicReaderContext context) throws IOException { fieldData = fieldDataCache.cache(fieldDataType, context.reader(), indexFieldName); if (script != null) { - script.setNextReader(context.reader()); + script.setNextReader(context); } } diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/TermsStatsDoubleFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/TermsStatsDoubleFacetCollector.java index 07864d4375b..637233a57da 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/TermsStatsDoubleFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/doubles/TermsStatsDoubleFacetCollector.java @@ -116,7 +116,7 @@ public class TermsStatsDoubleFacetCollector extends AbstractFacetCollector { protected void doSetNextReader(AtomicReaderContext context) throws IOException { keyFieldData = (NumericFieldData) fieldDataCache.cache(keyFieldDataType, context.reader(), keyFieldName); if (script != null) { - script.setNextReader(context.reader()); + script.setNextReader(context); } else { aggregator.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, context.reader(), valueFieldName); } diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java index 2998fc0195c..c36a89a3441 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/longs/TermsStatsLongFacetCollector.java @@ -117,7 +117,7 @@ public class TermsStatsLongFacetCollector extends AbstractFacetCollector { protected void doSetNextReader(AtomicReaderContext context) throws IOException { keyFieldData = (NumericFieldData) fieldDataCache.cache(keyFieldDataType, context.reader(), keyFieldName); if (script != null) { - script.setNextReader(context.reader()); + script.setNextReader(context); } else { aggregator.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, context.reader(), valueFieldName); } diff --git a/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java index 82afa173f92..bdff3fb3714 100644 --- a/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/termsstats/strings/TermsStatsStringFacetCollector.java @@ -118,7 +118,7 @@ public class TermsStatsStringFacetCollector extends AbstractFacetCollector { protected void doSetNextReader(AtomicReaderContext context) throws IOException { keyFieldData = fieldDataCache.cache(keyFieldDataType, context.reader(), keyFieldName); if (script != null) { - script.setNextReader(context.reader()); + script.setNextReader(context); } else { aggregator.valueFieldData = (NumericFieldData) fieldDataCache.cache(valueFieldDataType, context.reader(), valueFieldName); } diff --git a/src/main/java/org/elasticsearch/search/lookup/DocLookup.java b/src/main/java/org/elasticsearch/search/lookup/DocLookup.java index 9e72a8d6553..3b52c988bb5 100644 --- a/src/main/java/org/elasticsearch/search/lookup/DocLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/DocLookup.java @@ -21,7 +21,7 @@ package org.elasticsearch.search.lookup; import com.google.common.collect.Maps; import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchIllegalArgumentException; @@ -73,11 +73,11 @@ public class DocLookup implements Map { return this.fieldDataCache; } - public void setNextReader(AtomicReader reader) { - if (this.reader == reader) { // if we are called with the same reader, don't invalidate source + public void setNextReader(AtomicReaderContext context) { + if (this.reader == context.reader()) { // if we are called with the same reader, don't invalidate source return; } - this.reader = reader; + this.reader = context.reader(); this.docId = -1; localCacheFieldData.clear(); } diff --git a/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java b/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java index 9562a6dfaef..613891bda85 100644 --- a/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java @@ -21,7 +21,7 @@ package org.elasticsearch.search.lookup; import com.google.common.collect.Maps; import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchParseException; import org.elasticsearch.common.Nullable; @@ -58,11 +58,11 @@ public class FieldsLookup implements Map { this.types = types; } - public void setNextReader(AtomicReader reader) { - if (this.reader == reader) { // if we are called with the same reader, don't invalidate source + public void setNextReader(AtomicReaderContext context) { + if (this.reader == context.reader()) { // if we are called with the same reader, don't invalidate source return; } - this.reader = reader; + this.reader = context.reader(); clearCache(); this.docId = -1; } diff --git a/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java b/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java index 566ed5575f5..efa151b5cd5 100644 --- a/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java @@ -20,8 +20,7 @@ package org.elasticsearch.search.lookup; import com.google.common.collect.ImmutableMap; -import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Scorer; import org.elasticsearch.common.Nullable; import org.elasticsearch.index.cache.field.data.FieldDataCache; @@ -67,10 +66,10 @@ public class SearchLookup { docMap.setScorer(scorer); } - public void setNextReader(AtomicReader reader) { - docMap.setNextReader(reader); - sourceLookup.setNextReader(reader); - fieldsLookup.setNextReader(reader); + public void setNextReader(AtomicReaderContext context) { + docMap.setNextReader(context); + sourceLookup.setNextReader(context); + fieldsLookup.setNextReader(context); } public void setNextDocId(int docId) { diff --git a/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java index fe313c0b2b3..a1b30925b64 100644 --- a/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java @@ -23,7 +23,7 @@ import com.google.common.collect.ImmutableMap; import org.apache.lucene.document.Document; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexableField; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchParseException; @@ -86,11 +86,11 @@ public class SourceLookup implements Map { return XContentHelper.convertToMap(bytes, offset, length, false).v2(); } - public void setNextReader(AtomicReader reader) { - if (this.reader == reader) { // if we are called with the same reader, don't invalidate source + public void setNextReader(AtomicReaderContext context) { + if (this.reader == context.reader()) { // if we are called with the same reader, don't invalidate source return; } - this.reader = reader; + this.reader = context.reader(); this.source = null; this.sourceAsBytes = null; this.docId = -1; From 19ab1d05488935d48f5d0e85b81763da8dfd8fd4 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 26 Oct 2012 19:53:23 +0200 Subject: [PATCH 036/146] lucene 4: upgraded o.e.index.search.child package --- .../common/lucene/search/EmptyScorer.java | 11 ++-- .../common/lucene/search/NoopCollector.java | 4 +- .../elasticsearch/index/cache/id/IdCache.java | 3 +- .../index/cache/id/simple/SimpleIdCache.java | 28 +++++----- .../cache/id/simple/SimpleIdReaderCache.java | 12 ++--- .../id/simple/SimpleIdReaderTypeCache.java | 7 ++- .../index/search/child/ChildCollector.java | 20 ++++--- .../index/search/child/HasChildFilter.java | 24 +++++---- .../index/search/child/HasParentFilter.java | 49 ++++++++++------- .../index/search/child/TopChildrenQuery.java | 54 ++++++++++--------- 10 files changed, 120 insertions(+), 92 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/search/EmptyScorer.java b/src/main/java/org/elasticsearch/common/lucene/search/EmptyScorer.java index 8834c49ce17..4955c829653 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/EmptyScorer.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/EmptyScorer.java @@ -20,7 +20,7 @@ package org.elasticsearch.common.lucene.search; import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.search.Weight; import java.io.IOException; @@ -29,8 +29,8 @@ import java.io.IOException; */ public class EmptyScorer extends Scorer { - public EmptyScorer(Similarity similarity) { - super(similarity); + public EmptyScorer(Weight weight) { + super(weight); } @Override @@ -38,6 +38,11 @@ public class EmptyScorer extends Scorer { return 0; } + @Override + public float freq() throws IOException { + return 0; + } + @Override public int docID() { return NO_MORE_DOCS; diff --git a/src/main/java/org/elasticsearch/common/lucene/search/NoopCollector.java b/src/main/java/org/elasticsearch/common/lucene/search/NoopCollector.java index 15a4f5efcb1..60c52b4f2dd 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/NoopCollector.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/NoopCollector.java @@ -19,6 +19,7 @@ package org.elasticsearch.common.lucene.search; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Scorer; @@ -41,7 +42,8 @@ public class NoopCollector extends Collector { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { + public void setNextReader(AtomicReaderContext context) throws IOException { + throw new UnsupportedOperationException(); } @Override diff --git a/src/main/java/org/elasticsearch/index/cache/id/IdCache.java b/src/main/java/org/elasticsearch/index/cache/id/IdCache.java index 33bd68bee55..25bebb45b53 100644 --- a/src/main/java/org/elasticsearch/index/cache/id/IdCache.java +++ b/src/main/java/org/elasticsearch/index/cache/id/IdCache.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.cache.id; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.elasticsearch.common.component.CloseableComponent; @@ -37,7 +38,7 @@ public interface IdCache extends IndexComponent, CloseableComponent, Iterable readers) throws Exception; - IdReaderCache reader(IndexReader reader); + IdReaderCache reader(AtomicReader reader); long sizeInBytes(); diff --git a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java index 0d954f02c73..38aeef884f6 100644 --- a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java +++ b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java @@ -24,7 +24,6 @@ import org.apache.lucene.index.*; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchIllegalArgumentException; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.HashedBytesArray; import org.elasticsearch.common.collect.MapBuilder; import org.elasticsearch.common.inject.Inject; @@ -35,7 +34,6 @@ import org.elasticsearch.index.AbstractIndexComponent; import org.elasticsearch.index.Index; import org.elasticsearch.index.cache.id.IdCache; import org.elasticsearch.index.cache.id.IdReaderCache; -import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.mapper.internal.ParentFieldMapper; import org.elasticsearch.index.mapper.internal.UidFieldMapper; import org.elasticsearch.index.settings.IndexSettings; @@ -77,7 +75,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se } @Override - public IdReaderCache reader(IndexReader reader) { + public IdReaderCache reader(AtomicReader reader) { return idReaders.get(reader.getCoreCacheKey()); } @@ -98,7 +96,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se } // do the refresh - Map> builders = new HashMap>(); + Map> builders = new HashMap>(); // first, go over and load all the id->doc map for all types for (AtomicReaderContext context : atomicReaderContexts) { @@ -111,7 +109,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se if (reader instanceof SegmentReader) { ((SegmentReader) reader).addCoreClosedListener(this); } - Map readerBuilder = new HashMap(); + Map readerBuilder = new HashMap(); builders.put(reader.getCoreCacheKey(), readerBuilder); @@ -124,10 +122,10 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se DocsEnum docsEnum = null; for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.term()) { HashedBytesArray[] typeAndId = splitUidIntoTypeAndId(term); - TypeBuilder typeBuilder = readerBuilder.get(typeAndId[0]); + TypeBuilder typeBuilder = readerBuilder.get(typeAndId[0].toUtf8()); if (typeBuilder == null) { typeBuilder = new TypeBuilder(reader); - readerBuilder.put(typeAndId[0], typeBuilder); + readerBuilder.put(typeAndId[0].toUtf8(), typeBuilder); } HashedBytesArray idAsBytes = checkIfCanReuse(builders, typeAndId[1]); @@ -148,7 +146,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se continue; } - Map readerBuilder = builders.get(reader.getCoreCacheKey()); + Map readerBuilder = builders.get(reader.getCoreCacheKey()); Terms terms = reader.terms(ParentFieldMapper.NAME); if (terms == null) { // Should not happen @@ -160,10 +158,10 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.term()) { HashedBytesArray[] typeAndId = splitUidIntoTypeAndId(term); - TypeBuilder typeBuilder = readerBuilder.get(typeAndId[0]); + TypeBuilder typeBuilder = readerBuilder.get(typeAndId[0].toUtf8()); if (typeBuilder == null) { typeBuilder = new TypeBuilder(reader); - readerBuilder.put(typeAndId[0], typeBuilder); + readerBuilder.put(typeAndId[0].toUtf8(), typeBuilder); } HashedBytesArray idAsBytes = checkIfCanReuse(builders, typeAndId[1]); @@ -186,9 +184,9 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se // now, build it back - for (Map.Entry> entry : builders.entrySet()) { - MapBuilder types = MapBuilder.newMapBuilder(); - for (Map.Entry typeBuilderEntry : entry.getValue().entrySet()) { + for (Map.Entry> entry : builders.entrySet()) { + MapBuilder types = MapBuilder.newMapBuilder(); + for (Map.Entry typeBuilderEntry : entry.getValue().entrySet()) { types.put(typeBuilderEntry.getKey(), new SimpleIdReaderTypeCache(typeBuilderEntry.getKey(), typeBuilderEntry.getValue().idToDoc, typeBuilderEntry.getValue().docToId, @@ -210,7 +208,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se return sizeInBytes; } - private HashedBytesArray checkIfCanReuse(Map> builders, HashedBytesArray idAsBytes) { + private HashedBytesArray checkIfCanReuse(Map> builders, HashedBytesArray idAsBytes) { HashedBytesArray finalIdAsBytes; // go over and see if we can reuse this id for (SimpleIdReaderCache idReaderCache : idReaders.values()) { @@ -219,7 +217,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se return finalIdAsBytes; } } - for (Map map : builders.values()) { + for (Map map : builders.values()) { for (TypeBuilder typeBuilder : map.values()) { finalIdAsBytes = typeBuilder.canReuse(idAsBytes); if (finalIdAsBytes != null) { diff --git a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderCache.java b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderCache.java index f57f65e4648..5d6d5d1abdc 100644 --- a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderCache.java +++ b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderCache.java @@ -20,8 +20,6 @@ package org.elasticsearch.index.cache.id.simple; import com.google.common.collect.ImmutableMap; -import org.elasticsearch.common.bytes.BytesArray; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.HashedBytesArray; import org.elasticsearch.index.cache.id.IdReaderCache; import org.elasticsearch.index.cache.id.IdReaderTypeCache; @@ -33,9 +31,9 @@ public class SimpleIdReaderCache implements IdReaderCache { private final Object readerCacheKey; - private final ImmutableMap types; + private final ImmutableMap types; - public SimpleIdReaderCache(Object readerCacheKey, ImmutableMap types) { + public SimpleIdReaderCache(Object readerCacheKey, ImmutableMap types) { this.readerCacheKey = readerCacheKey; this.types = types; } @@ -47,12 +45,12 @@ public class SimpleIdReaderCache implements IdReaderCache { @Override public IdReaderTypeCache type(String type) { - return types.get(new BytesArray(type)); + return types.get(type); } @Override public HashedBytesArray parentIdByDoc(String type, int docId) { - SimpleIdReaderTypeCache typeCache = types.get(new BytesArray(type)); + SimpleIdReaderTypeCache typeCache = types.get(type); if (typeCache != null) { return typeCache.parentIdByDoc(docId); } @@ -61,7 +59,7 @@ public class SimpleIdReaderCache implements IdReaderCache { @Override public int docById(String type, HashedBytesArray id) { - SimpleIdReaderTypeCache typeCache = types.get(new BytesArray(type)); + SimpleIdReaderTypeCache typeCache = types.get(type); if (typeCache != null) { return typeCache.docById(id); } diff --git a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderTypeCache.java b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderTypeCache.java index ac63bc378f0..083e79f3b51 100644 --- a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderTypeCache.java +++ b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdReaderTypeCache.java @@ -21,7 +21,6 @@ package org.elasticsearch.index.cache.id.simple; import gnu.trove.impl.hash.TObjectHash; import org.elasticsearch.common.RamUsage; -import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.HashedBytesArray; import org.elasticsearch.common.trove.ExtTObjectIntHasMap; import org.elasticsearch.index.cache.id.IdReaderTypeCache; @@ -31,7 +30,7 @@ import org.elasticsearch.index.cache.id.IdReaderTypeCache; */ public class SimpleIdReaderTypeCache implements IdReaderTypeCache { - private final BytesReference type; + private final String type; private final ExtTObjectIntHasMap idToDoc; @@ -43,7 +42,7 @@ public class SimpleIdReaderTypeCache implements IdReaderTypeCache { private long sizeInBytes = -1; - public SimpleIdReaderTypeCache(BytesReference type, ExtTObjectIntHasMap idToDoc, HashedBytesArray[] docIdToId, + public SimpleIdReaderTypeCache(String type, ExtTObjectIntHasMap idToDoc, HashedBytesArray[] docIdToId, HashedBytesArray[] parentIdsValues, int[] parentIdsOrdinals) { this.type = type; this.idToDoc = idToDoc; @@ -53,7 +52,7 @@ public class SimpleIdReaderTypeCache implements IdReaderTypeCache { this.parentIdsOrdinals = parentIdsOrdinals; } - public BytesReference type() { + public String type() { return this.type; } diff --git a/src/main/java/org/elasticsearch/index/search/child/ChildCollector.java b/src/main/java/org/elasticsearch/index/search/child/ChildCollector.java index 9a308cb485f..974cd1e2739 100644 --- a/src/main/java/org/elasticsearch/index/search/child/ChildCollector.java +++ b/src/main/java/org/elasticsearch/index/search/child/ChildCollector.java @@ -19,6 +19,8 @@ package org.elasticsearch.index.search.child; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Scorer; @@ -41,7 +43,7 @@ public class ChildCollector extends Collector { private final SearchContext context; - private final Tuple[] readers; + private final Tuple[] readers; private final Map parentDocs; @@ -53,10 +55,12 @@ public class ChildCollector extends Collector { this.parentDocs = new HashMap(); // create a specific type map lookup for faster lookup operations per doc - this.readers = new Tuple[context.searcher().subReaders().length]; + this.readers = new Tuple[context.searcher().getIndexReader().leaves().size()]; for (int i = 0; i < readers.length; i++) { - IndexReader reader = context.searcher().subReaders()[i]; - readers[i] = new Tuple(reader, context.idCache().reader(reader).type(parentType)); + AtomicReaderContext readerContext = context.searcher().getIndexReader().leaves().get(i); + readers[i] = new Tuple( + readerContext.reader(), context.idCache().reader(readerContext.reader()).type(parentType) + ); } } @@ -75,14 +79,14 @@ public class ChildCollector extends Collector { if (parentId == null) { return; } - for (Tuple tuple : readers) { + for (Tuple tuple : readers) { IndexReader indexReader = tuple.v1(); IdReaderTypeCache idReaderTypeCache = tuple.v2(); if (idReaderTypeCache == null) { // might be if we don't have that doc with that type in this reader continue; } int parentDocId = idReaderTypeCache.docById(parentId); - if (parentDocId != -1 && !indexReader.isDeleted(parentDocId)) { + if (parentDocId != -1) { FixedBitSet docIdSet = parentDocs().get(indexReader.getCoreCacheKey()); if (docIdSet == null) { docIdSet = new FixedBitSet(indexReader.maxDoc()); @@ -95,8 +99,8 @@ public class ChildCollector extends Collector { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - typeCache = context.idCache().reader(reader).type(parentType); + public void setNextReader(AtomicReaderContext readerContext) throws IOException { + typeCache = context.idCache().reader(readerContext.reader()).type(parentType); } @Override diff --git a/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java b/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java index 81955523835..2833fe70746 100644 --- a/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java +++ b/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java @@ -20,11 +20,13 @@ package org.elasticsearch.index.search.child; import gnu.trove.set.hash.THashSet; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.common.CacheRecycler; @@ -106,13 +108,15 @@ public abstract class HasChildFilter extends Filter implements ScopePhase.Collec parentDocs = null; } - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { if (parentDocs == null) { throw new ElasticSearchIllegalStateException("has_child filter/query hasn't executed properly"); } + // np need to use acceptDocs, since the parentDocs were collected with a collector, which means those + // collected docs are not deleted // ok to return null - return parentDocs.get(reader.getCoreCacheKey()); + return parentDocs.get(context.reader().getCoreCacheKey()); } } @@ -138,14 +142,14 @@ public abstract class HasChildFilter extends Filter implements ScopePhase.Collec collectedUids = ((UidCollector) collector).collectedUids; } - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { if (collectedUids == null) { throw new ElasticSearchIllegalStateException("has_child filter/query hasn't executed properly"); } - IdReaderTypeCache idReaderTypeCache = searchContext.idCache().reader(reader).type(parentType); + IdReaderTypeCache idReaderTypeCache = searchContext.idCache().reader(context.reader()).type(parentType); if (idReaderTypeCache != null) { - return new ParentDocSet(reader, collectedUids, idReaderTypeCache); + return new ParentDocSet(context.reader(), collectedUids, idReaderTypeCache, acceptDocs); } else { return null; } @@ -163,16 +167,18 @@ public abstract class HasChildFilter extends Filter implements ScopePhase.Collec final IndexReader reader; final THashSet parents; final IdReaderTypeCache typeCache; + final Bits acceptDocs; - ParentDocSet(IndexReader reader, THashSet parents, IdReaderTypeCache typeCache) { + ParentDocSet(IndexReader reader, THashSet parents, IdReaderTypeCache typeCache, Bits acceptDocs) { super(reader.maxDoc()); this.reader = reader; this.parents = parents; this.typeCache = typeCache; + this.acceptDocs = acceptDocs; } public boolean get(int doc) { - return !reader.isDeleted(doc) && parents.contains(typeCache.idByDoc(doc)); + return !acceptDocs.get(doc) && parents.contains(typeCache.idByDoc(doc)); } } @@ -196,8 +202,8 @@ public abstract class HasChildFilter extends Filter implements ScopePhase.Collec } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - typeCache = context.idCache().reader(reader).type(parentType); + public void setNextReader(AtomicReaderContext readerContext) throws IOException { + typeCache = context.idCache().reader(readerContext.reader()).type(parentType); } } } diff --git a/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java b/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java index 789d174b6ed..57e2822c646 100644 --- a/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java +++ b/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java @@ -20,11 +20,14 @@ package org.elasticsearch.index.search.child; import gnu.trove.set.hash.THashSet; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.common.CacheRecycler; @@ -104,14 +107,14 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle parents = ((ParentUidsCollector) collector).collectedUids; } - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext readerContext, Bits acceptDocs) throws IOException { if (parents == null) { throw new ElasticSearchIllegalStateException("has_parent filter/query hasn't executed properly"); } - IdReaderTypeCache idReaderTypeCache = context.idCache().reader(reader).type(parentType); + IdReaderTypeCache idReaderTypeCache = context.idCache().reader(readerContext.reader()).type(parentType); if (idReaderTypeCache != null) { - return new ChildrenDocSet(reader, parents, idReaderTypeCache); + return new ChildrenDocSet(readerContext.reader(), parents, idReaderTypeCache, acceptDocs); } else { return null; } @@ -129,16 +132,18 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle final IndexReader reader; final THashSet parents; final IdReaderTypeCache idReaderTypeCache; + final Bits acceptDocs; - ChildrenDocSet(IndexReader reader, THashSet parents, IdReaderTypeCache idReaderTypeCache) { + ChildrenDocSet(IndexReader reader, THashSet parents, IdReaderTypeCache idReaderTypeCache, Bits acceptDocs) { super(reader.maxDoc()); this.reader = reader; this.parents = parents; this.idReaderTypeCache = idReaderTypeCache; + this.acceptDocs = acceptDocs; } public boolean get(int doc) { - return !reader.isDeleted(doc) && parents.contains(idReaderTypeCache.parentIdByDoc(doc)); + return !acceptDocs.get(doc) && parents.contains(idReaderTypeCache.parentIdByDoc(doc)); } } @@ -161,8 +166,9 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle collectedUids.add(typeCache.idByDoc(doc)); } - public void setNextReader(IndexReader reader, int docBase) throws IOException { - typeCache = context.idCache().reader(reader).type(parentType); + @Override + public void setNextReader(AtomicReaderContext readerContext) throws IOException { + typeCache = context.idCache().reader(readerContext.reader()).type(parentType); } } @@ -188,12 +194,12 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle parentDocs = ((ParentDocsCollector) collector).segmentResults; } - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext readerContext, Bits acceptDocs) throws IOException { if (parentDocs == null) { throw new ElasticSearchIllegalStateException("has_parent filter/query hasn't executed properly"); } - return new ChildrenDocSet(reader, parentDocs, context, parentType); + return new ChildrenDocSet(readerContext.reader(), parentDocs, context, parentType, acceptDocs); } public void clear() { @@ -203,25 +209,27 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle static class ChildrenDocSet extends GetDocSet { final IdReaderTypeCache currentTypeCache; - final IndexReader currentReader; - final Tuple[] readersToTypeCache; + final AtomicReader currentReader; + final Tuple[] readersToTypeCache; final Map parentDocs; + final Bits acceptDocs; - ChildrenDocSet(IndexReader currentReader, Map parentDocs, - SearchContext context, String parentType) { + ChildrenDocSet(AtomicReader currentReader, Map parentDocs, + SearchContext context, String parentType, Bits acceptDocs) { super(currentReader.maxDoc()); + this.acceptDocs = acceptDocs; this.currentTypeCache = context.idCache().reader(currentReader).type(parentType); this.currentReader = currentReader; this.parentDocs = parentDocs; - this.readersToTypeCache = new Tuple[context.searcher().subReaders().length]; + this.readersToTypeCache = new Tuple[context.searcher().getIndexReader().leaves().size()]; for (int i = 0; i < readersToTypeCache.length; i++) { - IndexReader reader = context.searcher().subReaders()[i]; - readersToTypeCache[i] = new Tuple(reader, context.idCache().reader(reader).type(parentType)); + AtomicReader reader = context.searcher().getIndexReader().leaves().get(i).reader(); + readersToTypeCache[i] = new Tuple(reader, context.idCache().reader(reader).type(parentType)); } } public boolean get(int doc) { - if (currentReader.isDeleted(doc) || doc == -1) { + if (acceptDocs.get(doc) || doc == -1) { return false; } @@ -230,7 +238,7 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle return false; } - for (Tuple readerTypeCacheTuple : readersToTypeCache) { + for (Tuple readerTypeCacheTuple : readersToTypeCache) { int parentDocId = readerTypeCacheTuple.v2().docById(parentId); if (parentDocId == -1) { continue; @@ -254,8 +262,9 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle current.set(doc); } - public void setNextReader(IndexReader reader, int docBase) throws IOException { - segmentResults.put(reader.getCoreCacheKey(), current = new FixedBitSet(reader.maxDoc())); + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + segmentResults.put(context.reader().getCoreCacheKey(), current = new FixedBitSet(context.reader().maxDoc())); } } } diff --git a/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java b/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java index 47a10f42d3e..7fe64c8a4bb 100644 --- a/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java +++ b/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java @@ -20,9 +20,9 @@ package org.elasticsearch.index.search.child; import gnu.trove.map.hash.TIntObjectHashMap; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; +import org.apache.lucene.index.*; import org.apache.lucene.search.*; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchIllegalStateException; @@ -124,20 +124,21 @@ public class TopChildrenQuery extends Query implements ScopePhase.TopDocsPhase { public void processResults(TopDocs topDocs, SearchContext context) { Map> parentDocsPerReader = new HashMap>(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { - int readerIndex = context.searcher().readerIndex(scoreDoc.doc); - IndexReader subReader = context.searcher().subReaders()[readerIndex]; - int subDoc = scoreDoc.doc - context.searcher().docStarts()[readerIndex]; + int readerIndex = ReaderUtil.subIndex(scoreDoc.doc, context.searcher().getIndexReader().leaves()); + AtomicReaderContext subContext = context.searcher().getIndexReader().leaves().get(readerIndex); + int subDoc = scoreDoc.doc - subContext.docBase; // find the parent id - HashedBytesArray parentId = context.idCache().reader(subReader).parentIdByDoc(parentType, subDoc); + HashedBytesArray parentId = context.idCache().reader(subContext.reader()).parentIdByDoc(parentType, subDoc); if (parentId == null) { // no parent found continue; } // now go over and find the parent doc Id and reader tuple - for (IndexReader indexReader : context.searcher().subReaders()) { + for (AtomicReaderContext atomicReaderContext : context.searcher().getIndexReader().leaves()) { + AtomicReader indexReader = atomicReaderContext.reader(); int parentDocId = context.idCache().reader(indexReader).docById(parentType, parentId); - if (parentDocId != -1 && !indexReader.isDeleted(parentDocId)) { + if (parentDocId != -1 && !indexReader.getLiveDocs().get(parentDocId)) { // we found a match, add it and break TIntObjectHashMap readerParentDocs = parentDocsPerReader.get(indexReader.getCoreCacheKey()); @@ -205,15 +206,15 @@ public class TopChildrenQuery extends Query implements ScopePhase.TopDocsPhase { } @Override - public Weight createWeight(Searcher searcher) throws IOException { + public Weight createWeight(IndexSearcher searcher) throws IOException { if (!properlyInvoked) { throw new ElasticSearchIllegalStateException("top_children query hasn't executed properly"); } if (parentDocs != null) { - return new ParentWeight(searcher, query.weight(searcher)); + return new ParentWeight(searcher, query.createWeight(searcher)); } - return query.weight(searcher); + return query.createWeight(searcher); } public String toString(String field) { @@ -225,11 +226,11 @@ public class TopChildrenQuery extends Query implements ScopePhase.TopDocsPhase { class ParentWeight extends Weight { - final Searcher searcher; + final IndexSearcher searcher; final Weight queryWeight; - public ParentWeight(Searcher searcher, Weight queryWeight) throws IOException { + public ParentWeight(IndexSearcher searcher, Weight queryWeight) throws IOException { this.searcher = searcher; this.queryWeight = queryWeight; } @@ -243,28 +244,28 @@ public class TopChildrenQuery extends Query implements ScopePhase.TopDocsPhase { } @Override - public float sumOfSquaredWeights() throws IOException { - float sum = queryWeight.sumOfSquaredWeights(); + public float getValueForNormalization() throws IOException { + float sum = queryWeight.getValueForNormalization(); sum *= getBoost() * getBoost(); return sum; } @Override - public void normalize(float norm) { - // nothing to do here.... + public void normalize(float norm, float topLevelBoost) { + // Nothing to normalize } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { - ParentDoc[] readerParentDocs = parentDocs.get(reader.getCoreCacheKey()); + public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { + ParentDoc[] readerParentDocs = parentDocs.get(context.reader().getCoreCacheKey()); if (readerParentDocs != null) { - return new ParentScorer(getSimilarity(searcher), readerParentDocs); + return new ParentScorer(this, readerParentDocs); } - return new EmptyScorer(getSimilarity(searcher)); + return new EmptyScorer(this); } @Override - public Explanation explain(IndexReader reader, int doc) throws IOException { + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { return new Explanation(getBoost(), "not implemented yet..."); } } @@ -275,8 +276,8 @@ public class TopChildrenQuery extends Query implements ScopePhase.TopDocsPhase { private int index = -1; - private ParentScorer(Similarity similarity, ParentDoc[] docs) throws IOException { - super(similarity); + private ParentScorer(ParentWeight weight, ParentDoc[] docs) throws IOException { + super(weight); this.docs = docs; } @@ -315,5 +316,10 @@ public class TopChildrenQuery extends Query implements ScopePhase.TopDocsPhase { } throw new ElasticSearchIllegalStateException("No support for score type [" + scoreType + "]"); } + + @Override + public float freq() throws IOException { + return docs[index].count; // The number of matches in the child doc, which is propagated to parent + } } } From e75301b781f2ee73f0782d9b90b5fa085052ad9a Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sat, 27 Oct 2012 23:20:43 +0200 Subject: [PATCH 037/146] lucene 4: optimize bytes on XContentParser also, does not seem like we need to reuse bytes buffer, if we need to, we can always add it later --- .../common/xcontent/XContentParser.java | 12 ++++----- .../support/AbstractXContentParser.java | 26 ++++++++----------- .../index/query/RangeFilterParser.java | 12 ++++----- .../index/query/RangeQueryParser.java | 12 ++++----- .../index/query/TypeFilterParser.java | 2 +- 5 files changed, 30 insertions(+), 34 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/xcontent/XContentParser.java b/src/main/java/org/elasticsearch/common/xcontent/XContentParser.java index 3872eb5e5fa..597901676da 100644 --- a/src/main/java/org/elasticsearch/common/xcontent/XContentParser.java +++ b/src/main/java/org/elasticsearch/common/xcontent/XContentParser.java @@ -19,12 +19,12 @@ package org.elasticsearch.common.xcontent; +import org.apache.lucene.util.BytesRef; + import java.io.Closeable; import java.io.IOException; import java.util.Map; -import org.apache.lucene.util.BytesRef; - /** * */ @@ -130,10 +130,10 @@ public interface XContentParser extends Closeable { String text() throws IOException; String textOrNull() throws IOException; - - BytesRef bytesOrNull(BytesRef spare) throws IOException; - - BytesRef bytes(BytesRef spare) throws IOException; + + BytesRef bytesOrNull() throws IOException; + + BytesRef bytes() throws IOException; boolean hasTextCharacters(); diff --git a/src/main/java/org/elasticsearch/common/xcontent/support/AbstractXContentParser.java b/src/main/java/org/elasticsearch/common/xcontent/support/AbstractXContentParser.java index e6a89bb475f..945b6ac617e 100644 --- a/src/main/java/org/elasticsearch/common/xcontent/support/AbstractXContentParser.java +++ b/src/main/java/org/elasticsearch/common/xcontent/support/AbstractXContentParser.java @@ -20,6 +20,7 @@ package org.elasticsearch.common.xcontent.support; import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.UnicodeUtil; import org.elasticsearch.common.Booleans; import org.elasticsearch.common.xcontent.XContentParser; @@ -106,26 +107,21 @@ public abstract class AbstractXContentParser implements XContentParser { } return text(); } - - + @Override - public BytesRef bytesOrNull(BytesRef spare) throws IOException { - if (currentToken() == Token.VALUE_NULL) { - return null; - } - return bytes(spare); + public BytesRef bytesOrNull() throws IOException { + if (currentToken() == Token.VALUE_NULL) { + return null; + } + return bytes(); } @Override - public BytesRef bytes(BytesRef spare) throws IOException { - // LUCENE 4 UPGRADE: we can possibly make this more efficient for now I just forward to text - if (spare == null) { - return new BytesRef(text()); - } else { - spare.copyChars(text()); - return spare; - } + public BytesRef bytes() throws IOException { + BytesRef bytes = new BytesRef(); + UnicodeUtil.UTF16toUTF8(textCharacters(), textOffset(), textLength(), bytes); + return bytes; } @Override diff --git a/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java b/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java index b24f72fd13f..f35cb3194a6 100644 --- a/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java @@ -72,24 +72,24 @@ public class RangeFilterParser implements FilterParser { currentFieldName = parser.currentName(); } else { if ("from".equals(currentFieldName)) { - from = parser.bytesOrNull(from); + from = parser.bytesOrNull(); } else if ("to".equals(currentFieldName)) { - to = parser.bytesOrNull(to); + to = parser.bytesOrNull(); } else if ("include_lower".equals(currentFieldName) || "includeLower".equals(currentFieldName)) { includeLower = parser.booleanValue(); } else if ("include_upper".equals(currentFieldName) || "includeUpper".equals(currentFieldName)) { includeUpper = parser.booleanValue(); } else if ("gt".equals(currentFieldName)) { - from = parser.bytesOrNull(from); + from = parser.bytesOrNull(); includeLower = false; } else if ("gte".equals(currentFieldName) || "ge".equals(currentFieldName)) { - from = parser.bytesOrNull(from); + from = parser.bytesOrNull(); includeLower = true; } else if ("lt".equals(currentFieldName)) { - to = parser.bytesOrNull(to); + to = parser.bytesOrNull(); includeUpper = false; } else if ("lte".equals(currentFieldName) || "le".equals(currentFieldName)) { - to = parser.bytesOrNull(to); + to = parser.bytesOrNull(); includeUpper = true; } else { throw new QueryParsingException(parseContext.index(), "[range] filter does not support [" + currentFieldName + "]"); diff --git a/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java b/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java index b79b3a07ee3..5cbaccf9db4 100644 --- a/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java @@ -72,9 +72,9 @@ public class RangeQueryParser implements QueryParser { currentFieldName = parser.currentName(); } else { if ("from".equals(currentFieldName)) { - from = parser.bytesOrNull(from); + from = parser.bytesOrNull(); } else if ("to".equals(currentFieldName)) { - to = parser.bytesOrNull(to); + to = parser.bytesOrNull(); } else if ("include_lower".equals(currentFieldName) || "includeLower".equals(currentFieldName)) { includeLower = parser.booleanValue(); } else if ("include_upper".equals(currentFieldName) || "includeUpper".equals(currentFieldName)) { @@ -82,16 +82,16 @@ public class RangeQueryParser implements QueryParser { } else if ("boost".equals(currentFieldName)) { boost = parser.floatValue(); } else if ("gt".equals(currentFieldName)) { - from = parser.bytesOrNull(from); + from = parser.bytesOrNull(); includeLower = false; } else if ("gte".equals(currentFieldName) || "ge".equals(currentFieldName)) { - from = parser.bytesOrNull(from); + from = parser.bytesOrNull(); includeLower = true; } else if ("lt".equals(currentFieldName)) { - to = parser.bytesOrNull(to); + to = parser.bytesOrNull(); includeUpper = false; } else if ("lte".equals(currentFieldName) || "le".equals(currentFieldName)) { - to = parser.bytesOrNull(to); + to = parser.bytesOrNull(); includeUpper = true; } else { throw new QueryParsingException(parseContext.index(), "[range] query does not support [" + currentFieldName + "]"); diff --git a/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java b/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java index f1b5c227130..754295562e6 100644 --- a/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/TypeFilterParser.java @@ -59,7 +59,7 @@ public class TypeFilterParser implements FilterParser { if (token != XContentParser.Token.VALUE_STRING) { throw new QueryParsingException(parseContext.index(), "[type] filter should have a value field, and the type name"); } - BytesRef type = parser.bytes(null); + BytesRef type = parser.bytes(); // move to the next token parser.nextToken(); From 549900a0824e2c856b7b7cedc1d447ea854aebf2 Mon Sep 17 00:00:00 2001 From: Chris Male Date: Sat, 27 Oct 2012 15:19:27 +1300 Subject: [PATCH 038/146] lucene 4: Converted most Mappers over to FieldType API --- .../lucene/spatial/SpatialStrategy.java | 4 +- .../common/lucene/uid/UidField.java | 2 +- .../index/mapper/DocumentMapper.java | 5 +- .../index/mapper/FieldMapper.java | 19 +- .../mapper/core/AbstractFieldMapper.java | 211 +++++++++++------- .../index/mapper/core/BinaryFieldMapper.java | 32 +-- .../index/mapper/core/BooleanFieldMapper.java | 80 ++++--- .../index/mapper/core/ByteFieldMapper.java | 72 +++--- .../index/mapper/core/DateFieldMapper.java | 72 +++--- .../index/mapper/core/DoubleFieldMapper.java | 70 +++--- .../index/mapper/core/FloatFieldMapper.java | 70 +++--- .../index/mapper/core/IntegerFieldMapper.java | 71 +++--- .../index/mapper/core/LongFieldMapper.java | 70 +++--- .../index/mapper/core/NumberFieldMapper.java | 62 +++-- .../index/mapper/core/ShortFieldMapper.java | 69 +++--- .../index/mapper/core/StringFieldMapper.java | 64 +++--- .../index/mapper/core/TypeParsers.java | 52 +++-- .../index/mapper/geo/GeoShapeFieldMapper.java | 28 ++- .../index/mapper/internal/AllFieldMapper.java | 9 +- .../mapper/internal/BoostFieldMapper.java | 44 ++-- .../index/mapper/internal/IdFieldMapper.java | 75 ++++--- .../mapper/internal/IndexFieldMapper.java | 48 ++-- .../mapper/internal/ParentFieldMapper.java | 30 ++- .../mapper/internal/RoutingFieldMapper.java | 51 +++-- .../mapper/internal/SizeFieldMapper.java | 35 +-- .../mapper/internal/SourceFieldMapper.java | 37 +-- .../index/mapper/internal/TTLFieldMapper.java | 32 +-- .../mapper/internal/TimestampFieldMapper.java | 45 ++-- .../mapper/internal/TypeFieldMapper.java | 57 ++--- .../index/mapper/internal/UidFieldMapper.java | 28 ++- .../index/mapper/ip/IpFieldMapper.java | 66 ++++-- .../index/mapper/object/ObjectMapper.java | 11 +- 32 files changed, 973 insertions(+), 648 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/spatial/SpatialStrategy.java b/src/main/java/org/elasticsearch/common/lucene/spatial/SpatialStrategy.java index 05dc835c9b3..e31ed96e03a 100644 --- a/src/main/java/org/elasticsearch/common/lucene/spatial/SpatialStrategy.java +++ b/src/main/java/org/elasticsearch/common/lucene/spatial/SpatialStrategy.java @@ -5,7 +5,6 @@ import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Rectangle; import com.spatial4j.core.shape.Shape; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; import org.elasticsearch.common.geo.GeoShapeConstants; @@ -56,12 +55,13 @@ public abstract class SpatialStrategy { * @param shape Shape to convert ints its indexable format * @return Fieldable for indexing the Shape */ - public Fieldable createField(Shape shape) { + public Field createField(Shape shape) { int detailLevel = prefixTree.getLevelForDistance( calcDistanceFromErrPct(shape, distanceErrorPct, GeoShapeConstants.SPATIAL_CONTEXT)); List nodes = prefixTree.getNodes(shape, detailLevel, true); NodeTokenStream tokenStream = nodeTokenStream.get(); tokenStream.setNodes(nodes); + // LUCENE 4 Upgrade: We should pass in the FieldType and use it here return new Field(fieldName.indexName(), tokenStream); } diff --git a/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java b/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java index f0d56957b44..05473665655 100644 --- a/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java +++ b/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java @@ -36,7 +36,7 @@ import java.io.Reader; /** * */ -public class UidField extends AbstractField { +public class UidField extends Field { public static class DocIdAndVersion { public final int docId; diff --git a/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java b/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java index e9886d70de5..99b1cc94818 100644 --- a/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java @@ -24,7 +24,6 @@ import com.google.common.collect.Maps; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.search.Filter; import org.elasticsearch.common.Booleans; import org.elasticsearch.common.Nullable; @@ -114,13 +113,13 @@ public class DocumentMapper implements ToXContent { * Called before a field is added to the document. Return true to include * it in the document. */ - boolean beforeFieldAdded(FieldMapper fieldMapper, Fieldable fieldable, ParseContext parseContent); + boolean beforeFieldAdded(FieldMapper fieldMapper, Field fieldable, ParseContext parseContent); } public static class ParseListenerAdapter implements ParseListener { @Override - public boolean beforeFieldAdded(FieldMapper fieldMapper, Fieldable fieldable, Object parseContext) { + public boolean beforeFieldAdded(FieldMapper fieldMapper, Field fieldable, Object parseContext) { return true; } } diff --git a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index af1655d0d16..e214b9da591 100644 --- a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -21,7 +21,6 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.search.Filter; @@ -120,17 +119,21 @@ public interface FieldMapper { Names names(); - Field.Index index(); + // LUCENE 4 UPGRADE Consider replacing these all with fieldType() and letting consumer pick and choose boolean indexed(); boolean analyzed(); - Field.Store store(); - boolean stored(); - Field.TermVector termVector(); + boolean storeTermVectors(); + + boolean storeTermVectorOffsets(); + + boolean storeTermVectorPositions(); + + boolean storeTermVectorPayloads(); float boost(); @@ -156,19 +159,19 @@ public interface FieldMapper { /** * Returns the value that will be used as a result for search. Can be only of specific types... . */ - Object valueForSearch(Fieldable field); + Object valueForSearch(Field field); /** * Returns the actual value of the field. */ - T value(Fieldable field); + T value(Field field); T valueFromString(String value); /** * Returns the actual value of the field as string. */ - String valueAsString(Fieldable field); + String valueAsString(Field field); /** * Returns the indexed value. diff --git a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java index e63bcbf7806..27bbd04d0d8 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java @@ -21,11 +21,11 @@ package org.elasticsearch.index.mapper.core; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.lucene.Lucene; @@ -44,33 +44,60 @@ import java.io.IOException; public abstract class AbstractFieldMapper implements FieldMapper, Mapper { public static class Defaults { - public static final Field.Index INDEX = Field.Index.ANALYZED; - public static final Field.Store STORE = Field.Store.NO; - public static final Field.TermVector TERM_VECTOR = Field.TermVector.NO; + public static final FieldType FIELD_TYPE = new FieldType(); + + static { + FIELD_TYPE.setIndexed(true); + FIELD_TYPE.setTokenized(true); + FIELD_TYPE.setStored(false); + FIELD_TYPE.setStoreTermVectors(false); + FIELD_TYPE.setOmitNorms(false); + FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + FIELD_TYPE.freeze(); + } + public static final float BOOST = 1.0f; - public static final boolean OMIT_NORMS = false; - public static final IndexOptions INDEX_OPTIONS = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; } public abstract static class OpenBuilder extends AbstractFieldMapper.Builder { - protected OpenBuilder(String name) { - super(name); + protected OpenBuilder(String name, FieldType fieldType) { + super(name, fieldType); } @Override - public T index(Field.Index index) { + public T index(boolean index) { return super.index(index); } @Override - public T store(Field.Store store) { + public T store(boolean store) { return super.store(store); } @Override - public T termVector(Field.TermVector termVector) { - return super.termVector(termVector); + protected T storeTermVectors(boolean termVectors) { + return super.storeTermVectors(termVectors); + } + + @Override + protected T storeTermVectorOffsets(boolean termVectorOffsets) { + return super.storeTermVectorOffsets(termVectorOffsets); + } + + @Override + protected T storeTermVectorPositions(boolean termVectorPositions) { + return super.storeTermVectorPositions(termVectorPositions); + } + + @Override + protected T storeTermVectorPayloads(boolean termVectorPayloads) { + return super.storeTermVectorPayloads(termVectorPayloads); + } + + @Override + protected T tokenized(boolean tokenized) { + return super.tokenized(tokenized); } @Override @@ -106,35 +133,55 @@ public abstract class AbstractFieldMapper implements FieldMapper, Mapper { public abstract static class Builder extends Mapper.Builder { - protected Field.Index index = Defaults.INDEX; - protected Field.Store store = Defaults.STORE; - protected Field.TermVector termVector = Defaults.TERM_VECTOR; + protected final FieldType fieldType; protected float boost = Defaults.BOOST; - protected boolean omitNorms = Defaults.OMIT_NORMS; protected boolean omitNormsSet = false; protected String indexName; protected NamedAnalyzer indexAnalyzer; protected NamedAnalyzer searchAnalyzer; protected Boolean includeInAll; - protected IndexOptions indexOptions = Defaults.INDEX_OPTIONS; protected boolean indexOptionsSet = false; - protected Builder(String name) { + protected Builder(String name, FieldType fieldType) { super(name); + this.fieldType = fieldType; } - protected T index(Field.Index index) { - this.index = index; + protected T index(boolean index) { + this.fieldType.setIndexed(index); return builder; } - protected T store(Field.Store store) { - this.store = store; + protected T store(boolean store) { + this.fieldType.setStored(store); return builder; } - protected T termVector(Field.TermVector termVector) { - this.termVector = termVector; + protected T storeTermVectors(boolean termVectors) { + this.fieldType.setStoreTermVectors(termVectors); + return builder; + } + + protected T storeTermVectorOffsets(boolean termVectorOffsets) { + this.fieldType.setStoreTermVectors(termVectorOffsets); + this.fieldType.setStoreTermVectorOffsets(termVectorOffsets); + return builder; + } + + protected T storeTermVectorPositions(boolean termVectorPositions) { + this.fieldType.setStoreTermVectors(termVectorPositions); + this.fieldType.setStoreTermVectorPositions(termVectorPositions); + return builder; + } + + protected T storeTermVectorPayloads(boolean termVectorPayloads) { + this.fieldType.setStoreTermVectors(termVectorPayloads); + this.fieldType.setStoreTermVectorPayloads(termVectorPayloads); + return builder; + } + + protected T tokenized(boolean tokenized) { + this.fieldType.setTokenized(tokenized); return builder; } @@ -144,13 +191,13 @@ public abstract class AbstractFieldMapper implements FieldMapper, Mapper { } protected T omitNorms(boolean omitNorms) { - this.omitNorms = omitNorms; + this.fieldType.setOmitNorms(omitNorms); this.omitNormsSet = true; return builder; } protected T indexOptions(IndexOptions indexOptions) { - this.indexOptions = indexOptions; + this.fieldType.setIndexOptions(indexOptions); this.indexOptionsSet = true; return builder; } @@ -191,40 +238,28 @@ public abstract class AbstractFieldMapper implements FieldMapper, Mapper { protected final Names names; - protected final Field.Index index; - - protected final Field.Store store; - - protected final Field.TermVector termVector; - protected float boost; - protected final boolean omitNorms; - - protected final FieldInfo.IndexOptions indexOptions; + protected final FieldType fieldType; protected final NamedAnalyzer indexAnalyzer; protected final NamedAnalyzer searchAnalyzer; - protected AbstractFieldMapper(Names names, Field.Index index, Field.Store store, Field.TermVector termVector, - float boost, boolean omitNorms, IndexOptions indexOptions, NamedAnalyzer indexAnalyzer, NamedAnalyzer searchAnalyzer) { + protected AbstractFieldMapper(Names names, float boost, FieldType fieldType, NamedAnalyzer indexAnalyzer, NamedAnalyzer searchAnalyzer) { this.names = names; - this.index = index; - this.store = store; - this.termVector = termVector; this.boost = boost; - this.omitNorms = omitNorms; - this.indexOptions = indexOptions; + this.fieldType = fieldType; + this.fieldType.freeze(); // automatically set to keyword analyzer if its indexed and not analyzed - if (indexAnalyzer == null && !index.isAnalyzed() && index.isIndexed()) { + if (indexAnalyzer == null && !this.fieldType.tokenized() && this.fieldType.indexed()) { this.indexAnalyzer = Lucene.KEYWORD_ANALYZER; } else { this.indexAnalyzer = indexAnalyzer; } // automatically set to keyword analyzer if its indexed and not analyzed - if (searchAnalyzer == null && !index.isAnalyzed() && index.isIndexed()) { + if (searchAnalyzer == null && !this.fieldType.tokenized() && this.fieldType.indexed()) { this.searchAnalyzer = Lucene.KEYWORD_ANALYZER; } else { this.searchAnalyzer = searchAnalyzer; @@ -241,34 +276,39 @@ public abstract class AbstractFieldMapper implements FieldMapper, Mapper { return this.names; } - @Override - public Field.Index index() { - return this.index; - } - - @Override - public Field.Store store() { - return this.store; - } - @Override public boolean stored() { - return store == Field.Store.YES; + return fieldType.stored(); } @Override public boolean indexed() { - return index != Field.Index.NO; + return fieldType.indexed(); } @Override public boolean analyzed() { - return index == Field.Index.ANALYZED; + return fieldType.tokenized(); } @Override - public Field.TermVector termVector() { - return this.termVector; + public boolean storeTermVectors() { + return fieldType.storeTermVectors(); + } + + @Override + public boolean storeTermVectorOffsets() { + return fieldType.storeTermVectorOffsets(); + } + + @Override + public boolean storeTermVectorPositions() { + return fieldType.storeTermVectorPositions(); + } + + @Override + public boolean storeTermVectorPayloads() { + return fieldType.storeTermVectorPayloads(); } @Override @@ -278,12 +318,12 @@ public abstract class AbstractFieldMapper implements FieldMapper, Mapper { @Override public boolean omitNorms() { - return this.omitNorms; + return fieldType.omitNorms(); } @Override public IndexOptions indexOptions() { - return this.indexOptions; + return fieldType.indexOptions(); } @Override @@ -304,12 +344,10 @@ public abstract class AbstractFieldMapper implements FieldMapper, Mapper { @Override public void parse(ParseContext context) throws IOException { try { - Fieldable field = parseCreateField(context); + Field field = parseCreateField(context); if (field == null) { return; } - field.setOmitNorms(omitNorms); - field.setIndexOptions(indexOptions); if (!customBoost()) { field.setBoost(boost); } @@ -321,7 +359,7 @@ public abstract class AbstractFieldMapper implements FieldMapper, Mapper { } } - protected abstract Fieldable parseCreateField(ParseContext context) throws IOException; + protected abstract Field parseCreateField(ParseContext context) throws IOException; /** * Derived classes can override it to specify that boost value is set by derived classes. @@ -341,7 +379,7 @@ public abstract class AbstractFieldMapper implements FieldMapper, Mapper { } @Override - public Object valueForSearch(Fieldable field) { + public Object valueForSearch(Field field) { return valueAsString(field); } @@ -396,17 +434,18 @@ public abstract class AbstractFieldMapper implements FieldMapper, Mapper { @Override public Query rangeQuery(String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) { + // LUCENE 4 UPGRADE: Perhaps indexedValue() should return a BytesRef? return new TermRangeQuery(names.indexName(), - lowerTerm == null ? null : indexedValue(lowerTerm), - upperTerm == null ? null : indexedValue(upperTerm), + lowerTerm == null ? null : new BytesRef(indexedValue(lowerTerm)), + upperTerm == null ? null : new BytesRef(indexedValue(upperTerm)), includeLower, includeUpper); } @Override public Filter rangeFilter(String lowerTerm, String upperTerm, boolean includeLower, boolean includeUpper, @Nullable QueryParseContext context) { return new TermRangeFilter(names.indexName(), - lowerTerm == null ? null : indexedValue(lowerTerm), - upperTerm == null ? null : indexedValue(upperTerm), + lowerTerm == null ? null : new BytesRef(indexedValue(lowerTerm)), + upperTerm == null ? null : new BytesRef(indexedValue(upperTerm)), includeLower, includeUpper); } @@ -427,14 +466,26 @@ public abstract class AbstractFieldMapper implements FieldMapper, Mapper { return; } AbstractFieldMapper fieldMergeWith = (AbstractFieldMapper) mergeWith; - if (!this.index.equals(fieldMergeWith.index)) { + if (this.indexed() != fieldMergeWith.indexed() || this.analyzed() != fieldMergeWith.analyzed()) { mergeContext.addConflict("mapper [" + names.fullName() + "] has different index values"); } - if (!this.store.equals(fieldMergeWith.store)) { + if (this.stored() != fieldMergeWith.stored()) { mergeContext.addConflict("mapper [" + names.fullName() + "] has different store values"); } - if (!this.termVector.equals(fieldMergeWith.termVector)) { - mergeContext.addConflict("mapper [" + names.fullName() + "] has different term_vector values"); + if (this.analyzed() != fieldMergeWith.analyzed()) { + mergeContext.addConflict("mapper [" + names.fullName() + "] has different tokenize values"); + } + if (this.storeTermVectors() != fieldMergeWith.storeTermVectors()) { + mergeContext.addConflict("mapper [" + names.fullName() + "] has different store_term_vector values"); + } + if (this.storeTermVectorOffsets() != fieldMergeWith.storeTermVectorOffsets()) { + mergeContext.addConflict("mapper [" + names.fullName() + "] has different store_term_vector_offsets values"); + } + if (this.storeTermVectorPositions() != fieldMergeWith.storeTermVectorPositions()) { + mergeContext.addConflict("mapper [" + names.fullName() + "] has different store_term_vector_positions values"); + } + if (this.storeTermVectorPayloads() != fieldMergeWith.storeTermVectorPayloads()) { + mergeContext.addConflict("mapper [" + names.fullName() + "] has different store_term_vector_payloads values"); } if (this.indexAnalyzer == null) { if (fieldMergeWith.indexAnalyzer != null) { @@ -486,6 +537,16 @@ public abstract class AbstractFieldMapper implements FieldMapper, Mapper { } } + protected static String indexTokenizeOptionToString(boolean indexed, boolean tokenized) { + if (!indexed) { + return "no"; + } else if (tokenized) { + return "analyzed"; + } else { + return "not_analyzed"; + } + } + protected void doXContentBody(XContentBuilder builder) throws IOException { builder.field("type", contentType()); if (!names.name().equals(names.indexNameClean())) { diff --git a/src/main/java/org/elasticsearch/index/mapper/core/BinaryFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/BinaryFieldMapper.java index b159364c018..c667c008bda 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/BinaryFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/BinaryFieldMapper.java @@ -20,8 +20,8 @@ package org.elasticsearch.index.mapper.core; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchParseException; import org.elasticsearch.common.Base64; import org.elasticsearch.common.Strings; @@ -50,7 +50,12 @@ public class BinaryFieldMapper extends AbstractFieldMapper { public static class Defaults extends AbstractFieldMapper.Defaults { public static final long COMPRESS_THRESHOLD = -1; - public static final Field.Store STORE = Field.Store.YES; + public static final FieldType BINARY_FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE); + + static { + BINARY_FIELD_TYPE.setStored(false); + BINARY_FIELD_TYPE.freeze(); + } } public static class Builder extends AbstractFieldMapper.Builder { @@ -60,8 +65,7 @@ public class BinaryFieldMapper extends AbstractFieldMapper { private long compressThreshold = Defaults.COMPRESS_THRESHOLD; public Builder(String name) { - super(name); - store = Defaults.STORE; + super(name, new FieldType(Defaults.BINARY_FIELD_TYPE)); builder = this; } @@ -82,7 +86,7 @@ public class BinaryFieldMapper extends AbstractFieldMapper { @Override public BinaryFieldMapper build(BuilderContext context) { - return new BinaryFieldMapper(buildNames(context), store, compress, compressThreshold); + return new BinaryFieldMapper(buildNames(context), fieldType, compress, compressThreshold); } } @@ -114,22 +118,22 @@ public class BinaryFieldMapper extends AbstractFieldMapper { private long compressThreshold; - protected BinaryFieldMapper(Names names, Field.Store store, Boolean compress, long compressThreshold) { - super(names, Field.Index.NO, store, Field.TermVector.NO, 1.0f, true, IndexOptions.DOCS_ONLY, null, null); + protected BinaryFieldMapper(Names names, FieldType fieldType, Boolean compress, long compressThreshold) { + super(names, 1.0f, fieldType, null, null); this.compress = compress; this.compressThreshold = compressThreshold; } @Override - public Object valueForSearch(Fieldable field) { + public Object valueForSearch(Field field) { return value(field); } @Override - public byte[] value(Fieldable field) { - byte[] value = field.getBinaryValue(); + public byte[] value(Field field) { + BytesRef value = field.binaryValue(); if (value == null) { - return value; + return null; } try { return CompressorFactory.uncompressIfNeeded(new BytesArray(value)).toBytes(); @@ -149,7 +153,7 @@ public class BinaryFieldMapper extends AbstractFieldMapper { } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { return null; } @@ -184,7 +188,7 @@ public class BinaryFieldMapper extends AbstractFieldMapper { if (value == null) { return null; } - return new Field(names.indexName(), value); + return new Field(names.indexName(), value, fieldType); } @Override diff --git a/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java index fceefaafbc1..405ca11df19 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/BooleanFieldMapper.java @@ -20,8 +20,7 @@ package org.elasticsearch.index.mapper.core; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.document.FieldType; import org.apache.lucene.search.Filter; import org.elasticsearch.common.Booleans; import org.elasticsearch.common.Strings; @@ -49,7 +48,12 @@ public class BooleanFieldMapper extends AbstractFieldMapper { public static final String CONTENT_TYPE = "boolean"; public static class Defaults extends AbstractFieldMapper.Defaults { - public static final boolean OMIT_NORMS = true; + public static final FieldType BOOLEAN_FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE); + + static { + BOOLEAN_FIELD_TYPE.setOmitNorms(true); + BOOLEAN_FIELD_TYPE.freeze(); + } public static final Boolean NULL_VALUE = null; } @@ -58,8 +62,7 @@ public class BooleanFieldMapper extends AbstractFieldMapper { private Boolean nullValue = Defaults.NULL_VALUE; public Builder(String name) { - super(name); - this.omitNorms = Defaults.OMIT_NORMS; + super(name, new FieldType(Defaults.BOOLEAN_FIELD_TYPE)); this.builder = this; } @@ -69,18 +72,33 @@ public class BooleanFieldMapper extends AbstractFieldMapper { } @Override - public Builder index(Field.Index index) { + public Builder index(boolean index) { return super.index(index); } @Override - public Builder store(Field.Store store) { + public Builder store(boolean store) { return super.store(store); } @Override - public Builder termVector(Field.TermVector termVector) { - return super.termVector(termVector); + protected Builder storeTermVectors(boolean termVectors) { + return super.storeTermVectors(termVectors); + } + + @Override + protected Builder storeTermVectorOffsets(boolean termVectorOffsets) { + return super.storeTermVectorOffsets(termVectorOffsets); + } + + @Override + protected Builder storeTermVectorPositions(boolean termVectorPositions) { + return super.storeTermVectorPositions(termVectorPositions); + } + + @Override + protected Builder storeTermVectorPayloads(boolean termVectorPayloads) { + return super.storeTermVectorPayloads(termVectorPayloads); } @Override @@ -96,8 +114,7 @@ public class BooleanFieldMapper extends AbstractFieldMapper { @Override public BooleanFieldMapper build(BuilderContext context) { - return new BooleanFieldMapper(buildNames(context), index, store, - termVector, boost, omitNorms, indexOptions, nullValue); + return new BooleanFieldMapper(buildNames(context), boost, fieldType, nullValue); } } @@ -119,9 +136,8 @@ public class BooleanFieldMapper extends AbstractFieldMapper { private Boolean nullValue; - protected BooleanFieldMapper(Names names, Field.Index index, Field.Store store, Field.TermVector termVector, - float boost, boolean omitNorms, IndexOptions indexOptions, Boolean nullValue) { - super(names, index, store, termVector, boost, omitNorms, indexOptions, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); + protected BooleanFieldMapper(Names names, float boost, FieldType fieldType, Boolean nullValue) { + super(names, boost, fieldType, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); this.nullValue = nullValue; } @@ -131,7 +147,7 @@ public class BooleanFieldMapper extends AbstractFieldMapper { } @Override - public Boolean value(Fieldable field) { + public Boolean value(Field field) { return field.stringValue().charAt(0) == 'T' ? Boolean.TRUE : Boolean.FALSE; } @@ -141,7 +157,7 @@ public class BooleanFieldMapper extends AbstractFieldMapper { } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { return field.stringValue().charAt(0) == 'T' ? "true" : "false"; } @@ -184,7 +200,7 @@ public class BooleanFieldMapper extends AbstractFieldMapper { if (value == null) { return null; } - return new Field(names.indexName(), value, store, index, termVector); + return new Field(names.indexName(), value, fieldType); } @Override @@ -195,20 +211,30 @@ public class BooleanFieldMapper extends AbstractFieldMapper { @Override protected void doXContentBody(XContentBuilder builder) throws IOException { super.doXContentBody(builder); - if (index != Defaults.INDEX) { - builder.field("index", index.name().toLowerCase()); + if (indexed() != Defaults.BOOLEAN_FIELD_TYPE.indexed() || + analyzed() != Defaults.BOOLEAN_FIELD_TYPE.tokenized()) { + builder.field("index", indexTokenizeOptionToString(indexed(), analyzed())); } - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.BOOLEAN_FIELD_TYPE.stored()) { + builder.field("store", stored()); } - if (termVector != Defaults.TERM_VECTOR) { - builder.field("term_vector", termVector.name().toLowerCase()); + if (storeTermVectors() != Defaults.BOOLEAN_FIELD_TYPE.storeTermVectors()) { + builder.field("store_term_vector", storeTermVectors()); } - if (omitNorms != Defaults.OMIT_NORMS) { - builder.field("omit_norms", omitNorms); + if (storeTermVectorOffsets() != Defaults.BOOLEAN_FIELD_TYPE.storeTermVectorOffsets()) { + builder.field("store_term_vector_offsets", storeTermVectorOffsets()); } - if (indexOptions != Defaults.INDEX_OPTIONS) { - builder.field("index_options", indexOptionToString(indexOptions)); + if (storeTermVectorPositions() != Defaults.BOOLEAN_FIELD_TYPE.storeTermVectorPositions()) { + builder.field("store_term_vector_positions", storeTermVectorPositions()); + } + if (storeTermVectorPayloads() != Defaults.BOOLEAN_FIELD_TYPE.storeTermVectorPayloads()) { + builder.field("store_term_vector_payloads", storeTermVectorPayloads()); + } + if (omitNorms() != Defaults.BOOLEAN_FIELD_TYPE.omitNorms()) { + builder.field("omit_norms", omitNorms()); + } + if (indexOptions() != Defaults.BOOLEAN_FIELD_TYPE.indexOptions()) { + builder.field("index_options", indexOptionToString(indexOptions())); } if (nullValue != null) { builder.field("null_value", nullValue); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java index a6acef2e02f..f665688e208 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java @@ -19,14 +19,15 @@ package org.elasticsearch.index.mapper.core; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.document.FieldType; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; @@ -56,6 +57,11 @@ public class ByteFieldMapper extends NumberFieldMapper { public static final String CONTENT_TYPE = "byte"; public static class Defaults extends NumberFieldMapper.Defaults { + public static final FieldType BYTE_FIELD_TYPE = new FieldType(NumberFieldMapper.Defaults.NUMBER_FIELD_TYPE); + + static { + BYTE_FIELD_TYPE.freeze(); + } public static final Byte NULL_VALUE = null; } @@ -64,7 +70,7 @@ public class ByteFieldMapper extends NumberFieldMapper { protected Byte nullValue = Defaults.NULL_VALUE; public Builder(String name) { - super(name); + super(name, new FieldType(Defaults.BYTE_FIELD_TYPE)); builder = this; } @@ -75,8 +81,9 @@ public class ByteFieldMapper extends NumberFieldMapper { @Override public ByteFieldMapper build(BuilderContext context) { + fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); ByteFieldMapper fieldMapper = new ByteFieldMapper(buildNames(context), - precisionStep, fuzzyFactor, index, store, boost, omitNorms, indexOptions, nullValue, ignoreMalformed(context)); + precisionStep, fuzzyFactor, boost, fieldType, nullValue, ignoreMalformed(context)); fieldMapper.includeInAll(includeInAll); return fieldMapper; } @@ -102,10 +109,9 @@ public class ByteFieldMapper extends NumberFieldMapper { private String nullValueAsString; - protected ByteFieldMapper(Names names, int precisionStep, String fuzzyFactor, Field.Index index, Field.Store store, - float boost, boolean omitNorms, IndexOptions indexOptions, + protected ByteFieldMapper(Names names, int precisionStep, String fuzzyFactor, float boost, FieldType fieldType, Byte nullValue, Explicit ignoreMalformed) { - super(names, precisionStep, fuzzyFactor, index, store, boost, omitNorms, indexOptions, + super(names, precisionStep, fuzzyFactor, boost, fieldType, ignoreMalformed, new NamedAnalyzer("_byte/" + precisionStep, new NumericIntegerAnalyzer(precisionStep)), new NamedAnalyzer("_byte/max", new NumericIntegerAnalyzer(Integer.MAX_VALUE))); this.nullValue = nullValue; @@ -118,12 +124,12 @@ public class ByteFieldMapper extends NumberFieldMapper { } @Override - public Byte value(Fieldable field) { - byte[] value = field.getBinaryValue(); + public Byte value(Field field) { + BytesRef value = field.binaryValue(); if (value == null) { return null; } - return value[0]; + return value.bytes[value.offset]; } @Override @@ -133,7 +139,9 @@ public class ByteFieldMapper extends NumberFieldMapper { @Override public String indexedValue(String value) { - return NumericUtils.intToPrefixCoded(Byte.parseByte(value)); + BytesRef bytesRef = new BytesRef(); + NumericUtils.intToPrefixCoded(Byte.parseByte(value), precisionStep(), bytesRef); + return bytesRef.utf8ToString(); } @Override @@ -216,7 +224,7 @@ public class ByteFieldMapper extends NumberFieldMapper { } @Override - protected Fieldable innerParseCreateField(ParseContext context) throws IOException { + protected Field innerParseCreateField(ParseContext context) throws IOException { byte value; float boost = this.boost; if (context.externalValueSet()) { @@ -282,7 +290,7 @@ public class ByteFieldMapper extends NumberFieldMapper { } } } - CustomByteNumericField field = new CustomByteNumericField(this, value); + CustomByteNumericField field = new CustomByteNumericField(this, value, fieldType); field.setBoost(boost); return field; } @@ -312,20 +320,30 @@ public class ByteFieldMapper extends NumberFieldMapper { @Override protected void doXContentBody(XContentBuilder builder) throws IOException { super.doXContentBody(builder); - if (index != Defaults.INDEX) { - builder.field("index", index.name().toLowerCase()); + if (indexed() != Defaults.BYTE_FIELD_TYPE.indexed() || + analyzed() != Defaults.BYTE_FIELD_TYPE.tokenized()) { + builder.field("index", indexTokenizeOptionToString(indexed(), analyzed())); } - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.BYTE_FIELD_TYPE.stored()) { + builder.field("store", stored()); } - if (termVector != Defaults.TERM_VECTOR) { - builder.field("term_vector", termVector.name().toLowerCase()); + if (storeTermVectors() != Defaults.BYTE_FIELD_TYPE.storeTermVectors()) { + builder.field("store_term_vector", storeTermVectors()); } - if (omitNorms != Defaults.OMIT_NORMS) { - builder.field("omit_norms", omitNorms); + if (storeTermVectorOffsets() != Defaults.BYTE_FIELD_TYPE.storeTermVectorOffsets()) { + builder.field("store_term_vector_offsets", storeTermVectorOffsets()); } - if (indexOptions != Defaults.INDEX_OPTIONS) { - builder.field("index_options", indexOptionToString(indexOptions)); + if (storeTermVectorPositions() != Defaults.BYTE_FIELD_TYPE.storeTermVectorPositions()) { + builder.field("store_term_vector_positions", storeTermVectorPositions()); + } + if (storeTermVectorPayloads() != Defaults.BYTE_FIELD_TYPE.storeTermVectorPayloads()) { + builder.field("store_term_vector_payloads", storeTermVectorPayloads()); + } + if (omitNorms() != Defaults.BYTE_FIELD_TYPE.omitNorms()) { + builder.field("omit_norms", omitNorms()); + } + if (indexOptions() != Defaults.BYTE_FIELD_TYPE.indexOptions()) { + builder.field("index_options", indexOptionToString(indexOptions())); } if (precisionStep != Defaults.PRECISION_STEP) { builder.field("precision_step", precisionStep); @@ -347,15 +365,15 @@ public class ByteFieldMapper extends NumberFieldMapper { private final NumberFieldMapper mapper; - public CustomByteNumericField(NumberFieldMapper mapper, byte number) { - super(mapper, mapper.stored() ? new byte[]{number} : null); + public CustomByteNumericField(NumberFieldMapper mapper, byte number, FieldType fieldType) { + super(mapper, mapper.stored() ? new byte[]{number} : null, fieldType); this.mapper = mapper; this.number = number; } @Override - public TokenStream tokenStreamValue() { - if (isIndexed) { + public TokenStream tokenStream(Analyzer analyzer) { + if (fieldType().indexed()) { return mapper.popCachedStream().setIntValue(number); } return null; diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java index 4ea4255548d..06bc083bf77 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java @@ -20,12 +20,12 @@ package org.elasticsearch.index.mapper.core; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.document.FieldType; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; @@ -63,6 +63,12 @@ public class DateFieldMapper extends NumberFieldMapper { public static class Defaults extends NumberFieldMapper.Defaults { public static final FormatDateTimeFormatter DATE_TIME_FORMATTER = Joda.forPattern("dateOptionalTime"); + public static final FieldType DATE_FIELD_TYPE = new FieldType(NumberFieldMapper.Defaults.NUMBER_FIELD_TYPE); + + static { + DATE_FIELD_TYPE.freeze(); + } + public static final String NULL_VALUE = null; public static final TimeUnit TIME_UNIT = TimeUnit.MILLISECONDS; @@ -78,7 +84,7 @@ public class DateFieldMapper extends NumberFieldMapper { protected FormatDateTimeFormatter dateTimeFormatter = Defaults.DATE_TIME_FORMATTER; public Builder(String name) { - super(name); + super(name, new FieldType(Defaults.DATE_FIELD_TYPE)); builder = this; } @@ -103,8 +109,9 @@ public class DateFieldMapper extends NumberFieldMapper { if (context.indexSettings() != null) { parseUpperInclusive = context.indexSettings().getAsBoolean("index.mapping.date.parse_upper_inclusive", Defaults.PARSE_UPPER_INCLUSIVE); } + fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); DateFieldMapper fieldMapper = new DateFieldMapper(buildNames(context), dateTimeFormatter, - precisionStep, fuzzyFactor, index, store, boost, omitNorms, indexOptions, nullValue, + precisionStep, fuzzyFactor, boost, fieldType, nullValue, timeUnit, parseUpperInclusive, ignoreMalformed(context)); fieldMapper.includeInAll(includeInAll); return fieldMapper; @@ -142,10 +149,9 @@ public class DateFieldMapper extends NumberFieldMapper { protected final TimeUnit timeUnit; protected DateFieldMapper(Names names, FormatDateTimeFormatter dateTimeFormatter, int precisionStep, String fuzzyFactor, - Field.Index index, Field.Store store, - float boost, boolean omitNorms, IndexOptions indexOptions, + float boost, FieldType fieldType, String nullValue, TimeUnit timeUnit, boolean parseUpperInclusive, Explicit ignoreMalformed) { - super(names, precisionStep, fuzzyFactor, index, store, boost, omitNorms, indexOptions, + super(names, precisionStep, fuzzyFactor, boost, fieldType, ignoreMalformed, new NamedAnalyzer("_date/" + precisionStep, new NumericDateAnalyzer(precisionStep, dateTimeFormatter.parser())), new NamedAnalyzer("_date/max", new NumericDateAnalyzer(Integer.MAX_VALUE, dateTimeFormatter.parser()))); @@ -174,12 +180,12 @@ public class DateFieldMapper extends NumberFieldMapper { } @Override - public Long value(Fieldable field) { - byte[] value = field.getBinaryValue(); + public Long value(Field field) { + BytesRef value = field.binaryValue(); if (value == null) { return null; } - return Numbers.bytesToLong(value); + return Numbers.bytesToLong(value.bytes); } @Override @@ -188,15 +194,15 @@ public class DateFieldMapper extends NumberFieldMapper { } /** - * Dates should return as a string, delegates to {@link #valueAsString(org.apache.lucene.document.Fieldable)}. + * Dates should return as a string. */ @Override - public Object valueForSearch(Fieldable field) { + public Object valueForSearch(Field field) { return valueAsString(field); } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { Long value = value(field); if (value == null) { return null; @@ -206,7 +212,9 @@ public class DateFieldMapper extends NumberFieldMapper { @Override public String indexedValue(String value) { - return NumericUtils.longToPrefixCoded(dateTimeFormatter.parser().parseMillis(value)); + BytesRef bytesRef = new BytesRef(); + NumericUtils.longToPrefixCoded(dateTimeFormatter.parser().parseMillis(value), precisionStep(), bytesRef); + return bytesRef.utf8ToString(); } @Override @@ -297,7 +305,7 @@ public class DateFieldMapper extends NumberFieldMapper { } @Override - protected Fieldable innerParseCreateField(ParseContext context) throws IOException { + protected Field innerParseCreateField(ParseContext context) throws IOException { String dateAsString = null; Long value = null; float boost = this.boost; @@ -343,7 +351,7 @@ public class DateFieldMapper extends NumberFieldMapper { } if (value != null) { - LongFieldMapper.CustomLongNumericField field = new LongFieldMapper.CustomLongNumericField(this, timeUnit.toMillis(value)); + LongFieldMapper.CustomLongNumericField field = new LongFieldMapper.CustomLongNumericField(this, timeUnit.toMillis(value), fieldType); field.setBoost(boost); return field; } @@ -356,7 +364,7 @@ public class DateFieldMapper extends NumberFieldMapper { } value = parseStringValue(dateAsString); - LongFieldMapper.CustomLongNumericField field = new LongFieldMapper.CustomLongNumericField(this, value); + LongFieldMapper.CustomLongNumericField field = new LongFieldMapper.CustomLongNumericField(this, value, fieldType); field.setBoost(boost); return field; } @@ -385,20 +393,30 @@ public class DateFieldMapper extends NumberFieldMapper { @Override protected void doXContentBody(XContentBuilder builder) throws IOException { super.doXContentBody(builder); - if (index != Defaults.INDEX) { - builder.field("index", index.name().toLowerCase()); + if (indexed() != Defaults.DATE_FIELD_TYPE.indexed() || + analyzed() != Defaults.DATE_FIELD_TYPE.tokenized()) { + builder.field("index", indexTokenizeOptionToString(indexed(), analyzed())); } - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.DATE_FIELD_TYPE.stored()) { + builder.field("store", stored()); } - if (termVector != Defaults.TERM_VECTOR) { - builder.field("term_vector", termVector.name().toLowerCase()); + if (storeTermVectors() != Defaults.DATE_FIELD_TYPE.storeTermVectors()) { + builder.field("store_term_vector", storeTermVectors()); } - if (omitNorms != Defaults.OMIT_NORMS) { - builder.field("omit_norms", omitNorms); + if (storeTermVectorOffsets() != Defaults.DATE_FIELD_TYPE.storeTermVectorOffsets()) { + builder.field("store_term_vector_offsets", storeTermVectorOffsets()); } - if (indexOptions != Defaults.INDEX_OPTIONS) { - builder.field("index_options", indexOptionToString(indexOptions)); + if (storeTermVectorPositions() != Defaults.DATE_FIELD_TYPE.storeTermVectorPositions()) { + builder.field("store_term_vector_positions", storeTermVectorPositions()); + } + if (storeTermVectorPayloads() != Defaults.DATE_FIELD_TYPE.storeTermVectorPayloads()) { + builder.field("store_term_vector_payloads", storeTermVectorPayloads()); + } + if (omitNorms() != Defaults.DATE_FIELD_TYPE.omitNorms()) { + builder.field("omit_norms", omitNorms()); + } + if (indexOptions() != Defaults.DATE_FIELD_TYPE.indexOptions()) { + builder.field("index_options", indexOptionToString(indexOptions())); } if (precisionStep != Defaults.PRECISION_STEP) { builder.field("precision_step", precisionStep); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java index 93f3b963a52..cded1df85b6 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java @@ -21,12 +21,12 @@ package org.elasticsearch.index.mapper.core; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.document.FieldType; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; @@ -56,6 +56,11 @@ public class DoubleFieldMapper extends NumberFieldMapper { public static final String CONTENT_TYPE = "double"; public static class Defaults extends NumberFieldMapper.Defaults { + public static final FieldType DOUBLE_FIELD_TYPE = new FieldType(NumberFieldMapper.Defaults.NUMBER_FIELD_TYPE); + + static { + DOUBLE_FIELD_TYPE.freeze(); + } public static final Double NULL_VALUE = null; } @@ -64,7 +69,7 @@ public class DoubleFieldMapper extends NumberFieldMapper { protected Double nullValue = Defaults.NULL_VALUE; public Builder(String name) { - super(name); + super(name, new FieldType(Defaults.DOUBLE_FIELD_TYPE)); builder = this; } @@ -75,8 +80,9 @@ public class DoubleFieldMapper extends NumberFieldMapper { @Override public DoubleFieldMapper build(BuilderContext context) { + fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); DoubleFieldMapper fieldMapper = new DoubleFieldMapper(buildNames(context), - precisionStep, fuzzyFactor, index, store, boost, omitNorms, indexOptions, nullValue, + precisionStep, fuzzyFactor, boost, fieldType, nullValue, ignoreMalformed(context)); fieldMapper.includeInAll(includeInAll); return fieldMapper; @@ -105,10 +111,9 @@ public class DoubleFieldMapper extends NumberFieldMapper { private String nullValueAsString; protected DoubleFieldMapper(Names names, int precisionStep, String fuzzyFactor, - Field.Index index, Field.Store store, - float boost, boolean omitNorms, IndexOptions indexOptions, + float boost, FieldType fieldType, Double nullValue, Explicit ignoreMalformed) { - super(names, precisionStep, fuzzyFactor, index, store, boost, omitNorms, indexOptions, + super(names, precisionStep, fuzzyFactor, boost, fieldType, ignoreMalformed, new NamedAnalyzer("_double/" + precisionStep, new NumericDoubleAnalyzer(precisionStep)), new NamedAnalyzer("_double/max", new NumericDoubleAnalyzer(Integer.MAX_VALUE))); this.nullValue = nullValue; @@ -121,12 +126,12 @@ public class DoubleFieldMapper extends NumberFieldMapper { } @Override - public Double value(Fieldable field) { - byte[] value = field.getBinaryValue(); + public Double value(Field field) { + BytesRef value = field.binaryValue(); if (value == null) { return null; } - return Numbers.bytesToDouble(value); + return Numbers.bytesToDouble(value.bytes); } @Override @@ -136,7 +141,10 @@ public class DoubleFieldMapper extends NumberFieldMapper { @Override public String indexedValue(String value) { - return NumericUtils.doubleToPrefixCoded(Double.parseDouble(value)); + long longValue = NumericUtils.doubleToSortableLong(Double.parseDouble(value)); + BytesRef bytesRef = new BytesRef(); + NumericUtils.longToPrefixCoded(longValue, precisionStep(), bytesRef); + return bytesRef.utf8ToString(); } @Override @@ -218,7 +226,7 @@ public class DoubleFieldMapper extends NumberFieldMapper { } @Override - protected Fieldable innerParseCreateField(ParseContext context) throws IOException { + protected Field innerParseCreateField(ParseContext context) throws IOException { double value; float boost = this.boost; if (context.externalValueSet()) { @@ -285,7 +293,7 @@ public class DoubleFieldMapper extends NumberFieldMapper { } } - CustomDoubleNumericField field = new CustomDoubleNumericField(this, value); + CustomDoubleNumericField field = new CustomDoubleNumericField(this, value, fieldType); field.setBoost(boost); return field; } @@ -315,20 +323,30 @@ public class DoubleFieldMapper extends NumberFieldMapper { @Override protected void doXContentBody(XContentBuilder builder) throws IOException { super.doXContentBody(builder); - if (index != Defaults.INDEX) { - builder.field("index", index.name().toLowerCase()); + if (indexed() != Defaults.DOUBLE_FIELD_TYPE.indexed() || + analyzed() != Defaults.DOUBLE_FIELD_TYPE.tokenized()) { + builder.field("index", indexTokenizeOptionToString(indexed(), analyzed())); } - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.DOUBLE_FIELD_TYPE.stored()) { + builder.field("store", stored()); } - if (termVector != Defaults.TERM_VECTOR) { - builder.field("term_vector", termVector.name().toLowerCase()); + if (storeTermVectors() != Defaults.DOUBLE_FIELD_TYPE.storeTermVectors()) { + builder.field("store_term_vector", storeTermVectors()); } - if (omitNorms != Defaults.OMIT_NORMS) { - builder.field("omit_norms", omitNorms); + if (storeTermVectorOffsets() != Defaults.DOUBLE_FIELD_TYPE.storeTermVectorOffsets()) { + builder.field("store_term_vector_offsets", storeTermVectorOffsets()); } - if (indexOptions != Defaults.INDEX_OPTIONS) { - builder.field("index_options", indexOptionToString(indexOptions)); + if (storeTermVectorPositions() != Defaults.DOUBLE_FIELD_TYPE.storeTermVectorPositions()) { + builder.field("store_term_vector_positions", storeTermVectorPositions()); + } + if (storeTermVectorPayloads() != Defaults.DOUBLE_FIELD_TYPE.storeTermVectorPayloads()) { + builder.field("store_term_vector_payloads", storeTermVectorPayloads()); + } + if (omitNorms() != Defaults.DOUBLE_FIELD_TYPE.omitNorms()) { + builder.field("omit_norms", omitNorms()); + } + if (indexOptions() != Defaults.DOUBLE_FIELD_TYPE.indexOptions()) { + builder.field("index_options", indexOptionToString(indexOptions())); } if (precisionStep != Defaults.PRECISION_STEP) { builder.field("precision_step", precisionStep); @@ -350,15 +368,15 @@ public class DoubleFieldMapper extends NumberFieldMapper { private final NumberFieldMapper mapper; - public CustomDoubleNumericField(NumberFieldMapper mapper, double number) { - super(mapper, mapper.stored() ? Numbers.doubleToBytes(number) : null); + public CustomDoubleNumericField(NumberFieldMapper mapper, double number, FieldType fieldType) { + super(mapper, mapper.stored() ? Numbers.doubleToBytes(number) : null, fieldType); this.mapper = mapper; this.number = number; } @Override public TokenStream tokenStreamValue() { - if (isIndexed) { + if (fieldType().indexed()) { return mapper.popCachedStream().setDoubleValue(number); } return null; diff --git a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java index 71225c9a41e..820c8d894ef 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java @@ -21,12 +21,13 @@ package org.elasticsearch.index.mapper.core; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; @@ -57,6 +58,12 @@ public class FloatFieldMapper extends NumberFieldMapper { public static final String CONTENT_TYPE = "float"; public static class Defaults extends NumberFieldMapper.Defaults { + public static final FieldType FLOAT_FIELD_TYPE = new FieldType(NumberFieldMapper.Defaults.NUMBER_FIELD_TYPE); + + static { + FLOAT_FIELD_TYPE.freeze(); + } + public static final Float NULL_VALUE = null; } @@ -65,7 +72,7 @@ public class FloatFieldMapper extends NumberFieldMapper { protected Float nullValue = Defaults.NULL_VALUE; public Builder(String name) { - super(name); + super(name, new FieldType(Defaults.FLOAT_FIELD_TYPE)); builder = this; } @@ -76,8 +83,9 @@ public class FloatFieldMapper extends NumberFieldMapper { @Override public FloatFieldMapper build(BuilderContext context) { + fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); FloatFieldMapper fieldMapper = new FloatFieldMapper(buildNames(context), - precisionStep, fuzzyFactor, index, store, boost, omitNorms, indexOptions, nullValue, + precisionStep, fuzzyFactor, boost, fieldType, nullValue, ignoreMalformed(context)); fieldMapper.includeInAll(includeInAll); return fieldMapper; @@ -104,10 +112,9 @@ public class FloatFieldMapper extends NumberFieldMapper { private String nullValueAsString; - protected FloatFieldMapper(Names names, int precisionStep, String fuzzyFactor, Field.Index index, Field.Store store, - float boost, boolean omitNorms, IndexOptions indexOptions, + protected FloatFieldMapper(Names names, int precisionStep, String fuzzyFactor, float boost, FieldType fieldType, Float nullValue, Explicit ignoreMalformed) { - super(names, precisionStep, fuzzyFactor, index, store, boost, omitNorms, indexOptions, + super(names, precisionStep, fuzzyFactor, boost, fieldType, ignoreMalformed, new NamedAnalyzer("_float/" + precisionStep, new NumericFloatAnalyzer(precisionStep)), new NamedAnalyzer("_float/max", new NumericFloatAnalyzer(Integer.MAX_VALUE))); this.nullValue = nullValue; @@ -120,12 +127,12 @@ public class FloatFieldMapper extends NumberFieldMapper { } @Override - public Float value(Fieldable field) { - byte[] value = field.getBinaryValue(); + public Float value(Field field) { + BytesRef value = field.binaryValue(); if (value == null) { return null; } - return Numbers.bytesToFloat(value); + return Numbers.bytesToFloat(value.bytes); } @Override @@ -135,7 +142,10 @@ public class FloatFieldMapper extends NumberFieldMapper { @Override public String indexedValue(String value) { - return NumericUtils.floatToPrefixCoded(Float.parseFloat(value)); + int intValue = NumericUtils.floatToSortableInt(Float.parseFloat(value)); + BytesRef bytesRef = new BytesRef(); + NumericUtils.intToPrefixCoded(intValue, precisionStep(), bytesRef); + return bytesRef.utf8ToString(); } @Override @@ -213,7 +223,7 @@ public class FloatFieldMapper extends NumberFieldMapper { } @Override - protected Fieldable innerParseCreateField(ParseContext context) throws IOException { + protected Field innerParseCreateField(ParseContext context) throws IOException { float value; float boost = this.boost; if (context.externalValueSet()) { @@ -280,7 +290,7 @@ public class FloatFieldMapper extends NumberFieldMapper { } } - CustomFloatNumericField field = new CustomFloatNumericField(this, value); + CustomFloatNumericField field = new CustomFloatNumericField(this, value, fieldType); field.setBoost(boost); return field; } @@ -311,20 +321,30 @@ public class FloatFieldMapper extends NumberFieldMapper { @Override protected void doXContentBody(XContentBuilder builder) throws IOException { super.doXContentBody(builder); - if (index != Defaults.INDEX) { - builder.field("index", index.name().toLowerCase()); + if (indexed() != Defaults.FLOAT_FIELD_TYPE.indexed() || + analyzed() != Defaults.FLOAT_FIELD_TYPE.tokenized()) { + builder.field("index", indexTokenizeOptionToString(indexed(), analyzed())); } - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.FLOAT_FIELD_TYPE.stored()) { + builder.field("store", stored()); } - if (termVector != Defaults.TERM_VECTOR) { - builder.field("term_vector", termVector.name().toLowerCase()); + if (storeTermVectors() != Defaults.FLOAT_FIELD_TYPE.storeTermVectors()) { + builder.field("store_term_vector", storeTermVectors()); } - if (omitNorms != Defaults.OMIT_NORMS) { - builder.field("omit_norms", omitNorms); + if (storeTermVectorOffsets() != Defaults.FLOAT_FIELD_TYPE.storeTermVectorOffsets()) { + builder.field("store_term_vector_offsets", storeTermVectorOffsets()); } - if (indexOptions != Defaults.INDEX_OPTIONS) { - builder.field("index_options", indexOptionToString(indexOptions)); + if (storeTermVectorPositions() != Defaults.FLOAT_FIELD_TYPE.storeTermVectorPositions()) { + builder.field("store_term_vector_positions", storeTermVectorPositions()); + } + if (storeTermVectorPayloads() != Defaults.FLOAT_FIELD_TYPE.storeTermVectorPayloads()) { + builder.field("store_term_vector_payloads", storeTermVectorPayloads()); + } + if (omitNorms() != Defaults.FLOAT_FIELD_TYPE.omitNorms()) { + builder.field("omit_norms", omitNorms()); + } + if (indexOptions() != Defaults.FLOAT_FIELD_TYPE.indexOptions()) { + builder.field("index_options", indexOptionToString(indexOptions())); } if (precisionStep != Defaults.PRECISION_STEP) { builder.field("precision_step", precisionStep); @@ -346,15 +366,15 @@ public class FloatFieldMapper extends NumberFieldMapper { private final NumberFieldMapper mapper; - public CustomFloatNumericField(NumberFieldMapper mapper, float number) { - super(mapper, mapper.stored() ? Numbers.floatToBytes(number) : null); + public CustomFloatNumericField(NumberFieldMapper mapper, float number, FieldType fieldType) { + super(mapper, mapper.stored() ? Numbers.floatToBytes(number) : null, fieldType); this.mapper = mapper; this.number = number; } @Override public TokenStream tokenStreamValue() { - if (isIndexed) { + if (fieldType().indexed()) { return mapper.popCachedStream().setFloatValue(number); } return null; diff --git a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java index 3ecb8a0f4be..c61355478e3 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java @@ -21,12 +21,12 @@ package org.elasticsearch.index.mapper.core; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.document.FieldType; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; @@ -57,6 +57,12 @@ public class IntegerFieldMapper extends NumberFieldMapper { public static final String CONTENT_TYPE = "integer"; public static class Defaults extends NumberFieldMapper.Defaults { + public static final FieldType INTEGER_FIELD_TYPE = new FieldType(NumberFieldMapper.Defaults.NUMBER_FIELD_TYPE); + + static { + INTEGER_FIELD_TYPE.freeze(); + } + public static final Integer NULL_VALUE = null; } @@ -65,7 +71,7 @@ public class IntegerFieldMapper extends NumberFieldMapper { protected Integer nullValue = Defaults.NULL_VALUE; public Builder(String name) { - super(name); + super(name, new FieldType(Defaults.INTEGER_FIELD_TYPE)); builder = this; } @@ -76,8 +82,9 @@ public class IntegerFieldMapper extends NumberFieldMapper { @Override public IntegerFieldMapper build(BuilderContext context) { + fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); IntegerFieldMapper fieldMapper = new IntegerFieldMapper(buildNames(context), - precisionStep, fuzzyFactor, index, store, boost, omitNorms, indexOptions, + precisionStep, fuzzyFactor, boost, fieldType, nullValue, ignoreMalformed(context)); fieldMapper.includeInAll(includeInAll); return fieldMapper; @@ -104,10 +111,10 @@ public class IntegerFieldMapper extends NumberFieldMapper { private String nullValueAsString; - protected IntegerFieldMapper(Names names, int precisionStep, String fuzzyFactor, Field.Index index, Field.Store store, - float boost, boolean omitNorms, IndexOptions indexOptions, + protected IntegerFieldMapper(Names names, int precisionStep, String fuzzyFactor, + float boost, FieldType fieldType, Integer nullValue, Explicit ignoreMalformed) { - super(names, precisionStep, fuzzyFactor, index, store, boost, omitNorms, indexOptions, + super(names, precisionStep, fuzzyFactor, boost, fieldType, ignoreMalformed, new NamedAnalyzer("_int/" + precisionStep, new NumericIntegerAnalyzer(precisionStep)), new NamedAnalyzer("_int/max", new NumericIntegerAnalyzer(Integer.MAX_VALUE))); this.nullValue = nullValue; @@ -120,12 +127,12 @@ public class IntegerFieldMapper extends NumberFieldMapper { } @Override - public Integer value(Fieldable field) { - byte[] value = field.getBinaryValue(); + public Integer value(Field field) { + BytesRef value = field.binaryValue(); if (value == null) { return null; } - return Numbers.bytesToInt(value); + return Numbers.bytesToInt(value.bytes); } @Override @@ -135,7 +142,9 @@ public class IntegerFieldMapper extends NumberFieldMapper { @Override public String indexedValue(String value) { - return NumericUtils.intToPrefixCoded(Integer.parseInt(value)); + BytesRef bytesRef = new BytesRef(); + NumericUtils.intToPrefixCoded(Integer.parseInt(value), precisionStep(), bytesRef); + return bytesRef.utf8ToString(); } @Override @@ -218,7 +227,7 @@ public class IntegerFieldMapper extends NumberFieldMapper { } @Override - protected Fieldable innerParseCreateField(ParseContext context) throws IOException { + protected Field innerParseCreateField(ParseContext context) throws IOException { int value; float boost = this.boost; if (context.externalValueSet()) { @@ -285,7 +294,7 @@ public class IntegerFieldMapper extends NumberFieldMapper { } } - CustomIntegerNumericField field = new CustomIntegerNumericField(this, value); + CustomIntegerNumericField field = new CustomIntegerNumericField(this, value, fieldType); field.setBoost(boost); return field; } @@ -315,20 +324,30 @@ public class IntegerFieldMapper extends NumberFieldMapper { @Override protected void doXContentBody(XContentBuilder builder) throws IOException { super.doXContentBody(builder); - if (index != Defaults.INDEX) { - builder.field("index", index.name().toLowerCase()); + if (indexed() != Defaults.INTEGER_FIELD_TYPE.indexed() || + analyzed() != Defaults.INTEGER_FIELD_TYPE.tokenized()) { + builder.field("index", indexTokenizeOptionToString(indexed(), analyzed())); } - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.INTEGER_FIELD_TYPE.stored()) { + builder.field("store", stored()); } - if (termVector != Defaults.TERM_VECTOR) { - builder.field("term_vector", termVector.name().toLowerCase()); + if (storeTermVectors() != Defaults.INTEGER_FIELD_TYPE.storeTermVectors()) { + builder.field("store_term_vector", storeTermVectors()); } - if (omitNorms != Defaults.OMIT_NORMS) { - builder.field("omit_norms", omitNorms); + if (storeTermVectorOffsets() != Defaults.INTEGER_FIELD_TYPE.storeTermVectorOffsets()) { + builder.field("store_term_vector_offsets", storeTermVectorOffsets()); } - if (indexOptions != Defaults.INDEX_OPTIONS) { - builder.field("index_options", indexOptionToString(indexOptions)); + if (storeTermVectorPositions() != Defaults.INTEGER_FIELD_TYPE.storeTermVectorPositions()) { + builder.field("store_term_vector_positions", storeTermVectorPositions()); + } + if (storeTermVectorPayloads() != Defaults.INTEGER_FIELD_TYPE.storeTermVectorPayloads()) { + builder.field("store_term_vector_payloads", storeTermVectorPayloads()); + } + if (omitNorms() != Defaults.INTEGER_FIELD_TYPE.omitNorms()) { + builder.field("omit_norms", omitNorms()); + } + if (indexOptions() != Defaults.INTEGER_FIELD_TYPE.indexOptions()) { + builder.field("index_options", indexOptionToString(indexOptions())); } if (precisionStep != Defaults.PRECISION_STEP) { builder.field("precision_step", precisionStep); @@ -350,15 +369,15 @@ public class IntegerFieldMapper extends NumberFieldMapper { private final NumberFieldMapper mapper; - public CustomIntegerNumericField(NumberFieldMapper mapper, int number) { - super(mapper, mapper.stored() ? Numbers.intToBytes(number) : null); + public CustomIntegerNumericField(NumberFieldMapper mapper, int number, FieldType fieldType) { + super(mapper, mapper.stored() ? Numbers.intToBytes(number) : null, fieldType); this.mapper = mapper; this.number = number; } @Override public TokenStream tokenStreamValue() { - if (isIndexed) { + if (fieldType().indexed()) { return mapper.popCachedStream().setIntValue(number); } return null; diff --git a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java index 40d35b528ad..f3348a05f39 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java @@ -21,12 +21,13 @@ package org.elasticsearch.index.mapper.core; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; @@ -57,6 +58,12 @@ public class LongFieldMapper extends NumberFieldMapper { public static final String CONTENT_TYPE = "long"; public static class Defaults extends NumberFieldMapper.Defaults { + public static final FieldType LONG_FIELD_TYPE = new FieldType(NumberFieldMapper.Defaults.NUMBER_FIELD_TYPE); + + static { + LONG_FIELD_TYPE.freeze(); + } + public static final Long NULL_VALUE = null; } @@ -65,7 +72,7 @@ public class LongFieldMapper extends NumberFieldMapper { protected Long nullValue = Defaults.NULL_VALUE; public Builder(String name) { - super(name); + super(name, new FieldType(Defaults.LONG_FIELD_TYPE)); builder = this; } @@ -76,8 +83,9 @@ public class LongFieldMapper extends NumberFieldMapper { @Override public LongFieldMapper build(BuilderContext context) { + fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); LongFieldMapper fieldMapper = new LongFieldMapper(buildNames(context), - precisionStep, fuzzyFactor, index, store, boost, omitNorms, indexOptions, nullValue, + precisionStep, fuzzyFactor, boost, fieldType, nullValue, ignoreMalformed(context)); fieldMapper.includeInAll(includeInAll); return fieldMapper; @@ -104,10 +112,10 @@ public class LongFieldMapper extends NumberFieldMapper { private String nullValueAsString; - protected LongFieldMapper(Names names, int precisionStep, String fuzzyFactor, Field.Index index, Field.Store store, - float boost, boolean omitNorms, IndexOptions indexOptions, + protected LongFieldMapper(Names names, int precisionStep, String fuzzyFactor, + float boost, FieldType fieldType, Long nullValue, Explicit ignoreMalformed) { - super(names, precisionStep, fuzzyFactor, index, store, boost, omitNorms, indexOptions, + super(names, precisionStep, fuzzyFactor, boost, fieldType, ignoreMalformed, new NamedAnalyzer("_long/" + precisionStep, new NumericLongAnalyzer(precisionStep)), new NamedAnalyzer("_long/max", new NumericLongAnalyzer(Integer.MAX_VALUE))); this.nullValue = nullValue; @@ -120,12 +128,12 @@ public class LongFieldMapper extends NumberFieldMapper { } @Override - public Long value(Fieldable field) { - byte[] value = field.getBinaryValue(); + public Long value(Field field) { + BytesRef value = field.binaryValue(); if (value == null) { return null; } - return Numbers.bytesToLong(value); + return Numbers.bytesToLong(value.bytes); } @Override @@ -135,7 +143,9 @@ public class LongFieldMapper extends NumberFieldMapper { @Override public String indexedValue(String value) { - return NumericUtils.longToPrefixCoded(Long.parseLong(value)); + BytesRef bytesRef = new BytesRef(); + NumericUtils.longToPrefixCoded(Long.parseLong(value), precisionStep(), bytesRef); + return bytesRef.utf8ToString(); } @Override @@ -218,7 +228,7 @@ public class LongFieldMapper extends NumberFieldMapper { } @Override - protected Fieldable innerParseCreateField(ParseContext context) throws IOException { + protected Field innerParseCreateField(ParseContext context) throws IOException { long value; float boost = this.boost; if (context.externalValueSet()) { @@ -284,7 +294,7 @@ public class LongFieldMapper extends NumberFieldMapper { } } } - CustomLongNumericField field = new CustomLongNumericField(this, value); + CustomLongNumericField field = new CustomLongNumericField(this, value, fieldType); field.setBoost(boost); return field; } @@ -314,20 +324,30 @@ public class LongFieldMapper extends NumberFieldMapper { @Override protected void doXContentBody(XContentBuilder builder) throws IOException { super.doXContentBody(builder); - if (index != Defaults.INDEX) { - builder.field("index", index.name().toLowerCase()); + if (indexed() != Defaults.LONG_FIELD_TYPE.indexed() || + analyzed() != Defaults.LONG_FIELD_TYPE.tokenized()) { + builder.field("index", indexTokenizeOptionToString(indexed(), analyzed())); } - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.LONG_FIELD_TYPE.stored()) { + builder.field("store", stored()); } - if (termVector != Defaults.TERM_VECTOR) { - builder.field("term_vector", termVector.name().toLowerCase()); + if (storeTermVectors() != Defaults.LONG_FIELD_TYPE.storeTermVectors()) { + builder.field("store_term_vector", storeTermVectors()); } - if (omitNorms != Defaults.OMIT_NORMS) { - builder.field("omit_norms", omitNorms); + if (storeTermVectorOffsets() != Defaults.LONG_FIELD_TYPE.storeTermVectorOffsets()) { + builder.field("store_term_vector_offsets", storeTermVectorOffsets()); } - if (indexOptions != Defaults.INDEX_OPTIONS) { - builder.field("index_options", indexOptionToString(indexOptions)); + if (storeTermVectorPositions() != Defaults.LONG_FIELD_TYPE.storeTermVectorPositions()) { + builder.field("store_term_vector_positions", storeTermVectorPositions()); + } + if (storeTermVectorPayloads() != Defaults.LONG_FIELD_TYPE.storeTermVectorPayloads()) { + builder.field("store_term_vector_payloads", storeTermVectorPayloads()); + } + if (omitNorms() != Defaults.LONG_FIELD_TYPE.omitNorms()) { + builder.field("omit_norms", omitNorms()); + } + if (indexOptions() != Defaults.LONG_FIELD_TYPE.indexOptions()) { + builder.field("index_options", indexOptionToString(indexOptions())); } if (precisionStep != Defaults.PRECISION_STEP) { builder.field("precision_step", precisionStep); @@ -349,15 +369,15 @@ public class LongFieldMapper extends NumberFieldMapper { private final NumberFieldMapper mapper; - public CustomLongNumericField(NumberFieldMapper mapper, long number) { - super(mapper, mapper.stored() ? Numbers.longToBytes(number) : null); + public CustomLongNumericField(NumberFieldMapper mapper, long number, FieldType fieldType) { + super(mapper, mapper.stored() ? Numbers.longToBytes(number) : null, fieldType); this.mapper = mapper; this.number = number; } @Override public TokenStream tokenStreamValue() { - if (isIndexed) { + if (fieldType().indexed()) { return mapper.popCachedStream().setLongValue(number); } return null; diff --git a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java index f9403d819ce..3d6909e5df6 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java @@ -20,10 +20,8 @@ package org.elasticsearch.index.mapper.core; import org.apache.lucene.analysis.NumericTokenStream; -import org.apache.lucene.document.AbstractField; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; @@ -48,9 +46,17 @@ public abstract class NumberFieldMapper extends AbstractFieldM public static class Defaults extends AbstractFieldMapper.Defaults { public static final int PRECISION_STEP = NumericUtils.PRECISION_STEP_DEFAULT; - public static final Field.Index INDEX = Field.Index.NOT_ANALYZED; - public static final boolean OMIT_NORMS = true; - public static final IndexOptions INDEX_OPTIONS = IndexOptions.DOCS_ONLY; + + public static final FieldType NUMBER_FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE); + + static { + NUMBER_FIELD_TYPE.setTokenized(false); + NUMBER_FIELD_TYPE.setOmitNorms(true); + NUMBER_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY); + NUMBER_FIELD_TYPE.setStoreTermVectors(false); + NUMBER_FIELD_TYPE.freeze(); + } + public static final String FUZZY_FACTOR = null; public static final Explicit IGNORE_MALFORMED = new Explicit(false, false); } @@ -63,15 +69,12 @@ public abstract class NumberFieldMapper extends AbstractFieldM private Boolean ignoreMalformed; - public Builder(String name) { - super(name); - this.index = Defaults.INDEX; - this.omitNorms = Defaults.OMIT_NORMS; - this.indexOptions = Defaults.INDEX_OPTIONS; + public Builder(String name, FieldType fieldType) { + super(name, fieldType); } @Override - public T store(Field.Store store) { + public T store(boolean store) { return super.store(store); } @@ -134,10 +137,10 @@ public abstract class NumberFieldMapper extends AbstractFieldM }; protected NumberFieldMapper(Names names, int precisionStep, @Nullable String fuzzyFactor, - Field.Index index, Field.Store store, - float boost, boolean omitNorms, IndexOptions indexOptions, + float boost, FieldType fieldType, Explicit ignoreMalformed, NamedAnalyzer indexAnalyzer, NamedAnalyzer searchAnalyzer) { - super(names, index, store, Field.TermVector.NO, boost, boost != 1.0f || omitNorms, indexOptions, indexAnalyzer, searchAnalyzer); + // LUCENE 4 UPGRADE: Since we can't do anything before the super call, we have to push the boost check down to subclasses + super(names, boost, fieldType, indexAnalyzer, searchAnalyzer); if (precisionStep <= 0 || precisionStep >= maxPrecisionStep()) { this.precisionStep = Integer.MAX_VALUE; } else { @@ -176,7 +179,7 @@ public abstract class NumberFieldMapper extends AbstractFieldM } @Override - protected Fieldable parseCreateField(ParseContext context) throws IOException { + protected Field parseCreateField(ParseContext context) throws IOException { RuntimeException e; try { return innerParseCreateField(context); @@ -193,7 +196,7 @@ public abstract class NumberFieldMapper extends AbstractFieldM } } - protected abstract Fieldable innerParseCreateField(ParseContext context) throws IOException; + protected abstract Field innerParseCreateField(ParseContext context) throws IOException; /** * Use the field query created here when matching on numbers. @@ -242,12 +245,12 @@ public abstract class NumberFieldMapper extends AbstractFieldM * Override the default behavior (to return the string, and return the actual Number instance). */ @Override - public Object valueForSearch(Fieldable field) { + public Object valueForSearch(Field field) { return value(field); } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { Number num = value(field); return num == null ? null : num.toString(); } @@ -283,28 +286,13 @@ public abstract class NumberFieldMapper extends AbstractFieldM } // used to we can use a numeric field in a document that is then parsed twice! - public abstract static class CustomNumericField extends AbstractField { + public abstract static class CustomNumericField extends Field { protected final NumberFieldMapper mapper; - public CustomNumericField(NumberFieldMapper mapper, byte[] value) { + public CustomNumericField(NumberFieldMapper mapper, byte[] value, FieldType fieldType) { + super(mapper.names().indexName(), value, fieldType); this.mapper = mapper; - this.name = mapper.names().indexName(); - fieldsData = value; - - isIndexed = mapper.indexed(); - isTokenized = mapper.indexed(); - indexOptions = FieldInfo.IndexOptions.DOCS_ONLY; - omitNorms = mapper.omitNorms(); - - if (value != null) { - isStored = true; - isBinary = true; - binaryLength = value.length; - binaryOffset = 0; - } - - setStoreTermVector(Field.TermVector.NO); } @Override diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java index 237b8b08d25..83281a4bc00 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java @@ -21,12 +21,13 @@ package org.elasticsearch.index.mapper.core; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; @@ -57,6 +58,11 @@ public class ShortFieldMapper extends NumberFieldMapper { public static final String CONTENT_TYPE = "short"; public static class Defaults extends NumberFieldMapper.Defaults { + public static final FieldType SHORT_FIELD_TYPE = new FieldType(NumberFieldMapper.Defaults.NUMBER_FIELD_TYPE); + + static { + SHORT_FIELD_TYPE.freeze(); + } public static final Short NULL_VALUE = null; } @@ -65,7 +71,7 @@ public class ShortFieldMapper extends NumberFieldMapper { protected Short nullValue = Defaults.NULL_VALUE; public Builder(String name) { - super(name); + super(name, new FieldType(Defaults.SHORT_FIELD_TYPE)); builder = this; } @@ -76,8 +82,9 @@ public class ShortFieldMapper extends NumberFieldMapper { @Override public ShortFieldMapper build(BuilderContext context) { + fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); ShortFieldMapper fieldMapper = new ShortFieldMapper(buildNames(context), - precisionStep, fuzzyFactor, index, store, boost, omitNorms, indexOptions, nullValue, + precisionStep, fuzzyFactor, boost, fieldType, nullValue, ignoreMalformed(context)); fieldMapper.includeInAll(includeInAll); return fieldMapper; @@ -104,10 +111,10 @@ public class ShortFieldMapper extends NumberFieldMapper { private String nullValueAsString; - protected ShortFieldMapper(Names names, int precisionStep, String fuzzyFactor, Field.Index index, Field.Store store, - float boost, boolean omitNorms, IndexOptions indexOptions, + protected ShortFieldMapper(Names names, int precisionStep, String fuzzyFactor, + float boost, FieldType fieldType, Short nullValue, Explicit ignoreMalformed) { - super(names, precisionStep, fuzzyFactor, index, store, boost, omitNorms, indexOptions, + super(names, precisionStep, fuzzyFactor, boost, fieldType, ignoreMalformed, new NamedAnalyzer("_short/" + precisionStep, new NumericIntegerAnalyzer(precisionStep)), new NamedAnalyzer("_short/max", new NumericIntegerAnalyzer(Integer.MAX_VALUE))); this.nullValue = nullValue; @@ -120,12 +127,12 @@ public class ShortFieldMapper extends NumberFieldMapper { } @Override - public Short value(Fieldable field) { - byte[] value = field.getBinaryValue(); + public Short value(Field field) { + BytesRef value = field.binaryValue(); if (value == null) { return null; } - return Numbers.bytesToShort(value); + return Numbers.bytesToShort(value.bytes); } @Override @@ -135,7 +142,9 @@ public class ShortFieldMapper extends NumberFieldMapper { @Override public String indexedValue(String value) { - return NumericUtils.intToPrefixCoded(Short.parseShort(value)); + BytesRef bytesRef = new BytesRef(); + NumericUtils.intToPrefixCoded(Short.parseShort(value), precisionStep(), bytesRef); + return bytesRef.utf8ToString(); } @Override @@ -218,7 +227,7 @@ public class ShortFieldMapper extends NumberFieldMapper { } @Override - protected Fieldable innerParseCreateField(ParseContext context) throws IOException { + protected Field innerParseCreateField(ParseContext context) throws IOException { short value; float boost = this.boost; if (context.externalValueSet()) { @@ -284,7 +293,7 @@ public class ShortFieldMapper extends NumberFieldMapper { } } } - CustomShortNumericField field = new CustomShortNumericField(this, value); + CustomShortNumericField field = new CustomShortNumericField(this, value, fieldType); field.setBoost(boost); return field; } @@ -314,20 +323,30 @@ public class ShortFieldMapper extends NumberFieldMapper { @Override protected void doXContentBody(XContentBuilder builder) throws IOException { super.doXContentBody(builder); - if (index != Defaults.INDEX) { - builder.field("index", index.name().toLowerCase()); + if (indexed() != Defaults.SHORT_FIELD_TYPE.indexed() || + analyzed() != Defaults.SHORT_FIELD_TYPE.tokenized()) { + builder.field("index", indexTokenizeOptionToString(indexed(), analyzed())); } - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.SHORT_FIELD_TYPE.stored()) { + builder.field("store", stored()); } - if (termVector != Defaults.TERM_VECTOR) { - builder.field("term_vector", termVector.name().toLowerCase()); + if (storeTermVectors() != Defaults.SHORT_FIELD_TYPE.storeTermVectors()) { + builder.field("store_term_vector", storeTermVectors()); } - if (omitNorms != Defaults.OMIT_NORMS) { - builder.field("omit_norms", omitNorms); + if (storeTermVectorOffsets() != Defaults.SHORT_FIELD_TYPE.storeTermVectorOffsets()) { + builder.field("store_term_vector_offsets", storeTermVectorOffsets()); } - if (indexOptions != Defaults.INDEX_OPTIONS) { - builder.field("index_options", indexOptionToString(indexOptions)); + if (storeTermVectorPositions() != Defaults.SHORT_FIELD_TYPE.storeTermVectorPositions()) { + builder.field("store_term_vector_positions", storeTermVectorPositions()); + } + if (storeTermVectorPayloads() != Defaults.SHORT_FIELD_TYPE.storeTermVectorPayloads()) { + builder.field("store_term_vector_payloads", storeTermVectorPayloads()); + } + if (omitNorms() != Defaults.SHORT_FIELD_TYPE.omitNorms()) { + builder.field("omit_norms", omitNorms()); + } + if (indexOptions() != Defaults.SHORT_FIELD_TYPE.indexOptions()) { + builder.field("index_options", indexOptionToString(indexOptions())); } if (precisionStep != Defaults.PRECISION_STEP) { builder.field("precision_step", precisionStep); @@ -349,15 +368,15 @@ public class ShortFieldMapper extends NumberFieldMapper { private final NumberFieldMapper mapper; - public CustomShortNumericField(NumberFieldMapper mapper, short number) { - super(mapper, mapper.stored() ? Numbers.shortToBytes(number) : null); + public CustomShortNumericField(NumberFieldMapper mapper, short number, FieldType fieldType) { + super(mapper, mapper.stored() ? Numbers.shortToBytes(number) : null, fieldType); this.mapper = mapper; this.number = number; } @Override public TokenStream tokenStreamValue() { - if (isIndexed) { + if (fieldType().indexed()) { return mapper.popCachedStream().setIntValue(number); } return null; diff --git a/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java index 636d07e92d8..eb69e9b6757 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java @@ -21,7 +21,7 @@ package org.elasticsearch.index.mapper.core; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.Filter; import org.elasticsearch.common.Strings; @@ -47,6 +47,12 @@ public class StringFieldMapper extends AbstractFieldMapper implements Al public static final String CONTENT_TYPE = "string"; public static class Defaults extends AbstractFieldMapper.Defaults { + public static final FieldType STRING_FIELD_TYPE = new FieldType(NumberFieldMapper.Defaults.NUMBER_FIELD_TYPE); + + static { + STRING_FIELD_TYPE.freeze(); + } + // NOTE, when adding defaults here, make sure you add them in the builder public static final String NULL_VALUE = null; public static final int POSITION_OFFSET_GAP = 0; @@ -64,7 +70,7 @@ public class StringFieldMapper extends AbstractFieldMapper implements Al protected int ignoreAbove = Defaults.IGNORE_ABOVE; public Builder(String name) { - super(name); + super(name, new FieldType(Defaults.STRING_FIELD_TYPE)); builder = this; } @@ -113,16 +119,16 @@ public class StringFieldMapper extends AbstractFieldMapper implements Al // if the field is not analyzed, then by default, we should omit norms and have docs only // index options, as probably what the user really wants // if they are set explicitly, we will use those values - if (index == Field.Index.NOT_ANALYZED) { + if (fieldType.indexed() && fieldType.tokenized()) { if (!omitNormsSet) { - omitNorms = true; + fieldType.setOmitNorms(true); } if (!indexOptionsSet) { - indexOptions = IndexOptions.DOCS_ONLY; + fieldType.setIndexOptions(IndexOptions.DOCS_ONLY); } } StringFieldMapper fieldMapper = new StringFieldMapper(buildNames(context), - index, store, termVector, boost, omitNorms, indexOptions, nullValue, + boost, fieldType, nullValue, indexAnalyzer, searchAnalyzer, searchQuotedAnalyzer, positionOffsetGap, ignoreAbove); fieldMapper.includeInAll(includeInAll); return fieldMapper; @@ -176,18 +182,16 @@ public class StringFieldMapper extends AbstractFieldMapper implements Al private int ignoreAbove; - protected StringFieldMapper(Names names, Field.Index index, Field.Store store, Field.TermVector termVector, - float boost, boolean omitNorms, IndexOptions indexOptions, + protected StringFieldMapper(Names names, float boost, FieldType fieldType, String nullValue, NamedAnalyzer indexAnalyzer, NamedAnalyzer searchAnalyzer) { - this(names, index, store, termVector, boost, omitNorms, indexOptions, nullValue, indexAnalyzer, + this(names, boost, fieldType, nullValue, indexAnalyzer, searchAnalyzer, searchAnalyzer, Defaults.POSITION_OFFSET_GAP, Defaults.IGNORE_ABOVE); } - protected StringFieldMapper(Names names, Field.Index index, Field.Store store, Field.TermVector termVector, - float boost, boolean omitNorms, IndexOptions indexOptions, + protected StringFieldMapper(Names names, float boost, FieldType fieldType, String nullValue, NamedAnalyzer indexAnalyzer, NamedAnalyzer searchAnalyzer, NamedAnalyzer searchQuotedAnalyzer, int positionOffsetGap, int ignoreAbove) { - super(names, index, store, termVector, boost, omitNorms, indexOptions, indexAnalyzer, searchAnalyzer); + super(names, boost, fieldType, indexAnalyzer, searchAnalyzer); this.nullValue = nullValue; this.positionOffsetGap = positionOffsetGap; this.searchQuotedAnalyzer = searchQuotedAnalyzer != null ? searchQuotedAnalyzer : this.searchAnalyzer; @@ -209,7 +213,7 @@ public class StringFieldMapper extends AbstractFieldMapper implements Al } @Override - public String value(Fieldable field) { + public String value(Field field) { return field.stringValue(); } @@ -219,7 +223,7 @@ public class StringFieldMapper extends AbstractFieldMapper implements Al } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { return value(field); } @@ -291,7 +295,7 @@ public class StringFieldMapper extends AbstractFieldMapper implements Al context.ignoredValue(names.indexName(), value); return null; } - Field field = new Field(names.indexName(), false, value, store, index, termVector); + Field field = new Field(names.indexName(), value, fieldType); field.setBoost(boost); return field; } @@ -317,20 +321,30 @@ public class StringFieldMapper extends AbstractFieldMapper implements Al @Override protected void doXContentBody(XContentBuilder builder) throws IOException { super.doXContentBody(builder); - if (index != Defaults.INDEX) { - builder.field("index", index.name().toLowerCase()); + if (indexed() != Defaults.STRING_FIELD_TYPE.indexed() || + analyzed() != Defaults.STRING_FIELD_TYPE.tokenized()) { + builder.field("index", indexTokenizeOptionToString(indexed(), analyzed())); } - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.STRING_FIELD_TYPE.stored()) { + builder.field("store", stored()); } - if (termVector != Defaults.TERM_VECTOR) { - builder.field("term_vector", termVector.name().toLowerCase()); + if (storeTermVectors() != Defaults.STRING_FIELD_TYPE.storeTermVectors()) { + builder.field("store_term_vector", storeTermVectors()); } - if (omitNorms != Defaults.OMIT_NORMS) { - builder.field("omit_norms", omitNorms); + if (storeTermVectorOffsets() != Defaults.STRING_FIELD_TYPE.storeTermVectorOffsets()) { + builder.field("store_term_vector_offsets", storeTermVectorOffsets()); } - if (indexOptions != Defaults.INDEX_OPTIONS) { - builder.field("index_options", indexOptionToString(indexOptions)); + if (storeTermVectorPositions() != Defaults.STRING_FIELD_TYPE.storeTermVectorPositions()) { + builder.field("store_term_vector_positions", storeTermVectorPositions()); + } + if (storeTermVectorPayloads() != Defaults.STRING_FIELD_TYPE.storeTermVectorPayloads()) { + builder.field("store_term_vector_payloads", storeTermVectorPayloads()); + } + if (omitNorms() != Defaults.STRING_FIELD_TYPE.omitNorms()) { + builder.field("omit_norms", omitNorms()); + } + if (indexOptions() != Defaults.STRING_FIELD_TYPE.indexOptions()) { + builder.field("index_options", indexOptionToString(indexOptions())); } if (nullValue != null) { builder.field("null_value", nullValue); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java b/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java index 2bcdf14eec1..1ffd78d532c 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java @@ -19,8 +19,6 @@ package org.elasticsearch.index.mapper.core; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Field.Index; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.elasticsearch.ElasticSearchParseException; import org.elasticsearch.common.Strings; @@ -68,11 +66,21 @@ public class TypeParsers { } else if (propName.equals("store")) { builder.store(parseStore(name, propNode.toString())); } else if (propName.equals("index")) { - builder.index(parseIndex(name, propNode.toString())); + parseIndex(name, propNode.toString(), builder); + } else if (propName.equals("tokenized")) { + builder.tokenized(nodeBooleanValue(propNode)); } else if (propName.equals("term_vector")) { - builder.termVector(parseTermVector(name, propNode.toString())); + parseTermVector(name, propNode.toString(), builder); } else if (propName.equals("boost")) { builder.boost(nodeFloatValue(propNode)); + } else if (propName.equals("store_term_vectors")) { + builder.storeTermVectors(nodeBooleanValue(propNode)); + } else if (propName.equals("store_term_vector_offsets")) { + builder.storeTermVectorOffsets(nodeBooleanValue(propNode)); + } else if (propName.equals("store_term_vector_positions")) { + builder.storeTermVectorPositions(nodeBooleanValue(propNode)); + } else if (propName.equals("store_term_vector_payloads")) { + builder.storeTermVectorPayloads(nodeBooleanValue(propNode)); } else if (propName.equals("omit_norms")) { builder.omitNorms(nodeBooleanValue(propNode)); } else if (propName.equals("omit_term_freq_and_positions")) { @@ -122,48 +130,46 @@ public class TypeParsers { return Joda.forPattern(node.toString()); } - public static Field.TermVector parseTermVector(String fieldName, String termVector) throws MapperParsingException { + public static void parseTermVector(String fieldName, String termVector, AbstractFieldMapper.Builder builder) throws MapperParsingException { termVector = Strings.toUnderscoreCase(termVector); if ("no".equals(termVector)) { - return Field.TermVector.NO; + builder.storeTermVectors(false); } else if ("yes".equals(termVector)) { - return Field.TermVector.YES; + builder.storeTermVectors(true); } else if ("with_offsets".equals(termVector)) { - return Field.TermVector.WITH_OFFSETS; + builder.storeTermVectorOffsets(true); } else if ("with_positions".equals(termVector)) { - return Field.TermVector.WITH_POSITIONS; + builder.storeTermVectorPositions(true); } else if ("with_positions_offsets".equals(termVector)) { - return Field.TermVector.WITH_POSITIONS_OFFSETS; + builder.storeTermVectorPositions(true); + builder.storeTermVectorOffsets(true); } else { throw new MapperParsingException("Wrong value for termVector [" + termVector + "] for field [" + fieldName + "]"); } } - public static Field.Index parseIndex(String fieldName, String index) throws MapperParsingException { + public static void parseIndex(String fieldName, String index, AbstractFieldMapper.Builder builder) throws MapperParsingException { index = Strings.toUnderscoreCase(index); if ("no".equals(index)) { - return Field.Index.NO; + builder.index(false); } else if ("not_analyzed".equals(index)) { - return Field.Index.NOT_ANALYZED; + builder.index(true); + builder.tokenized(false); } else if ("analyzed".equals(index)) { - return Field.Index.ANALYZED; + builder.index(true); + builder.tokenized(true); } else { throw new MapperParsingException("Wrong value for index [" + index + "] for field [" + fieldName + "]"); } } - public static Field.Store parseStore(String fieldName, String store) throws MapperParsingException { + public static boolean parseStore(String fieldName, String store) throws MapperParsingException { if ("no".equals(store)) { - return Field.Store.NO; + return false; } else if ("yes".equals(store)) { - return Field.Store.YES; + return true; } else { - boolean value = nodeBooleanValue(store); - if (value) { - return Field.Store.YES; - } else { - return Field.Store.NO; - } + return nodeBooleanValue(store); } } diff --git a/src/main/java/org/elasticsearch/index/mapper/geo/GeoShapeFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/geo/GeoShapeFieldMapper.java index a9782c6c2d3..880420a130b 100644 --- a/src/main/java/org/elasticsearch/index/mapper/geo/GeoShapeFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/geo/GeoShapeFieldMapper.java @@ -1,7 +1,7 @@ package org.elasticsearch.index.mapper.geo; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.Strings; @@ -55,6 +55,18 @@ public class GeoShapeFieldMapper extends AbstractFieldMapper { public static final int GEOHASH_LEVELS = GeohashPrefixTree.getMaxLevelsPossible(); public static final int QUADTREE_LEVELS = QuadPrefixTree.DEFAULT_MAX_LEVELS; public static final double DISTANCE_ERROR_PCT = 0.025d; + + public static final FieldType GEO_SHAPE_FIELD_TYPE = new FieldType(); + + static { + GEO_SHAPE_FIELD_TYPE.setIndexed(true); + GEO_SHAPE_FIELD_TYPE.setTokenized(false); + GEO_SHAPE_FIELD_TYPE.setStored(false); + GEO_SHAPE_FIELD_TYPE.setStoreTermVectors(false); + GEO_SHAPE_FIELD_TYPE.setOmitNorms(true); + GEO_SHAPE_FIELD_TYPE.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY); + GEO_SHAPE_FIELD_TYPE.freeze(); + } } public static class Builder extends AbstractFieldMapper.Builder { @@ -66,7 +78,7 @@ public class GeoShapeFieldMapper extends AbstractFieldMapper { private SpatialPrefixTree prefixTree; public Builder(String name) { - super(name); + super(name, new FieldType(Defaults.GEO_SHAPE_FIELD_TYPE)); } public Builder tree(String tree) { @@ -96,7 +108,7 @@ public class GeoShapeFieldMapper extends AbstractFieldMapper { throw new ElasticSearchIllegalArgumentException("Unknown prefix tree type [" + tree + "]"); } - return new GeoShapeFieldMapper(buildNames(context), prefixTree, distanceErrorPct); + return new GeoShapeFieldMapper(buildNames(context), prefixTree, distanceErrorPct, fieldType); } } @@ -123,13 +135,13 @@ public class GeoShapeFieldMapper extends AbstractFieldMapper { private final SpatialStrategy spatialStrategy; - public GeoShapeFieldMapper(FieldMapper.Names names, SpatialPrefixTree prefixTree, double distanceErrorPct) { - super(names, Field.Index.NOT_ANALYZED, Field.Store.NO, Field.TermVector.NO, 1, true, FieldInfo.IndexOptions.DOCS_ONLY, null, null); + public GeoShapeFieldMapper(FieldMapper.Names names, SpatialPrefixTree prefixTree, double distanceErrorPct, FieldType fieldType) { + super(names, 1, fieldType, null, null); this.spatialStrategy = new TermQueryPrefixTreeStrategy(names, prefixTree, distanceErrorPct); } @Override - protected Fieldable parseCreateField(ParseContext context) throws IOException { + protected Field parseCreateField(ParseContext context) throws IOException { return spatialStrategy.createField(GeoJSONShapeParser.parse(context.parser())); } @@ -162,7 +174,7 @@ public class GeoShapeFieldMapper extends AbstractFieldMapper { } @Override - public String value(Fieldable field) { + public String value(Field field) { throw new UnsupportedOperationException("GeoShape fields cannot be converted to String values"); } @@ -172,7 +184,7 @@ public class GeoShapeFieldMapper extends AbstractFieldMapper { } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { throw new UnsupportedOperationException("GeoShape fields cannot be converted to String values"); } diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java index 85c88c7525b..9c9dd11858d 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java @@ -21,7 +21,6 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; @@ -195,7 +194,7 @@ public class AllFieldMapper extends AbstractFieldMapper implements Interna } @Override - protected Fieldable parseCreateField(ParseContext context) throws IOException { + protected Field parseCreateField(ParseContext context) throws IOException { if (!enabled) { return null; } @@ -229,7 +228,7 @@ public class AllFieldMapper extends AbstractFieldMapper implements Interna } @Override - public Void value(Fieldable field) { + public Void value(Field field) { return null; } @@ -239,12 +238,12 @@ public class AllFieldMapper extends AbstractFieldMapper implements Interna } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { return null; } @Override - public Object valueForSearch(Fieldable field) { + public Object valueForSearch(Field field) { return null; } diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java index 97ef611857f..21a0c7fdcf8 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java @@ -20,12 +20,12 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.document.FieldType; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Numbers; @@ -59,8 +59,13 @@ public class BoostFieldMapper extends NumberFieldMapper implements Intern public static class Defaults extends NumberFieldMapper.Defaults { public static final String NAME = "_boost"; public static final Float NULL_VALUE = null; - public static final Field.Index INDEX = Field.Index.NO; - public static final Field.Store STORE = Field.Store.NO; + + public static final FieldType BOOST_FIELD_TYPE = new FieldType(NumberFieldMapper.Defaults.NUMBER_FIELD_TYPE); + + static { + BOOST_FIELD_TYPE.setIndexed(false); + BOOST_FIELD_TYPE.setStored(false); + } } public static class Builder extends NumberFieldMapper.Builder { @@ -68,10 +73,8 @@ public class BoostFieldMapper extends NumberFieldMapper implements Intern protected Float nullValue = Defaults.NULL_VALUE; public Builder(String name) { - super(name); + super(name, new FieldType(Defaults.BOOST_FIELD_TYPE)); builder = this; - index = Defaults.INDEX; - store = Defaults.STORE; } public Builder nullValue(float nullValue) { @@ -82,7 +85,7 @@ public class BoostFieldMapper extends NumberFieldMapper implements Intern @Override public BoostFieldMapper build(BuilderContext context) { return new BoostFieldMapper(name, buildIndexName(context), - precisionStep, index, store, boost, omitNorms, indexOptions, nullValue); + precisionStep, boost, fieldType, nullValue); } } @@ -110,14 +113,12 @@ public class BoostFieldMapper extends NumberFieldMapper implements Intern } protected BoostFieldMapper(String name, String indexName) { - this(name, indexName, Defaults.PRECISION_STEP, Defaults.INDEX, Defaults.STORE, - Defaults.BOOST, Defaults.OMIT_NORMS, Defaults.INDEX_OPTIONS, Defaults.NULL_VALUE); + this(name, indexName, Defaults.PRECISION_STEP, Defaults.BOOST, new FieldType(Defaults.BOOST_FIELD_TYPE), Defaults.NULL_VALUE); } - protected BoostFieldMapper(String name, String indexName, int precisionStep, Field.Index index, Field.Store store, - float boost, boolean omitNorms, IndexOptions indexOptions, - Float nullValue) { - super(new Names(name, indexName, indexName, name), precisionStep, null, index, store, boost, omitNorms, indexOptions, + protected BoostFieldMapper(String name, String indexName, int precisionStep, + float boost, FieldType fieldType, Float nullValue) { + super(new Names(name, indexName, indexName, name), precisionStep, null, boost, fieldType, Defaults.IGNORE_MALFORMED, new NamedAnalyzer("_float/" + precisionStep, new NumericFloatAnalyzer(precisionStep)), new NamedAnalyzer("_float/max", new NumericFloatAnalyzer(Integer.MAX_VALUE))); this.nullValue = nullValue; @@ -129,12 +130,12 @@ public class BoostFieldMapper extends NumberFieldMapper implements Intern } @Override - public Float value(Fieldable field) { - byte[] value = field.getBinaryValue(); + public Float value(Field field) { + BytesRef value = field.binaryValue(); if (value == null) { return null; } - return Numbers.bytesToFloat(value); + return Numbers.bytesToFloat(value.bytes); } @Override @@ -144,7 +145,10 @@ public class BoostFieldMapper extends NumberFieldMapper implements Intern @Override public String indexedValue(String value) { - return NumericUtils.floatToPrefixCoded(Float.parseFloat(value)); + int intValue = NumericUtils.floatToSortableInt(Float.parseFloat(value)); + BytesRef bytesRef = new BytesRef(); + NumericUtils.intToPrefixCoded(intValue, precisionStep(), bytesRef); + return bytesRef.utf8ToString(); } @Override @@ -230,13 +234,13 @@ public class BoostFieldMapper extends NumberFieldMapper implements Intern } @Override - protected Fieldable innerParseCreateField(ParseContext context) throws IOException { + protected Field innerParseCreateField(ParseContext context) throws IOException { final float value = parseFloatValue(context); if (Float.isNaN(value)) { return null; } context.doc().setBoost(value); - return new FloatFieldMapper.CustomFloatNumericField(this, value); + return new FloatFieldMapper.CustomFloatNumericField(this, value, fieldType); } private float parseFloatValue(ParseContext context) throws IOException { diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java index d20b47dc267..58008e8314d 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java @@ -23,8 +23,9 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterables; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.Term; import org.apache.lucene.search.*; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; @@ -56,10 +57,17 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern public static class Defaults extends AbstractFieldMapper.Defaults { public static final String NAME = IdFieldMapper.NAME; public static final String INDEX_NAME = IdFieldMapper.NAME; - public static final Field.Index INDEX = Field.Index.NO; - public static final Field.Store STORE = Field.Store.NO; - public static final boolean OMIT_NORMS = true; - public static final IndexOptions INDEX_OPTIONS = IndexOptions.DOCS_ONLY; + + public static final FieldType ID_FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE); + + static { + ID_FIELD_TYPE.setIndexed(false); + ID_FIELD_TYPE.setStored(false); + ID_FIELD_TYPE.setOmitNorms(true); + ID_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY); + ID_FIELD_TYPE.freeze(); + } + public static final String PATH = null; } @@ -68,12 +76,8 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern private String path = Defaults.PATH; public Builder() { - super(Defaults.NAME); + super(Defaults.NAME, new FieldType(Defaults.ID_FIELD_TYPE)); indexName = Defaults.INDEX_NAME; - store = Defaults.STORE; - index = Defaults.INDEX; - omitNorms = Defaults.OMIT_NORMS; - indexOptions = Defaults.INDEX_OPTIONS; } public Builder path(String path) { @@ -83,7 +87,7 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern @Override public IdFieldMapper build(BuilderContext context) { - return new IdFieldMapper(name, indexName, index, store, termVector, boost, omitNorms, indexOptions, path); + return new IdFieldMapper(name, indexName, boost, fieldType, path); } } @@ -106,21 +110,19 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern private final String path; public IdFieldMapper() { - this(Defaults.NAME, Defaults.INDEX_NAME, Defaults.INDEX); + this(Defaults.NAME, Defaults.INDEX_NAME, new FieldType(Defaults.ID_FIELD_TYPE)); } - public IdFieldMapper(Field.Index index) { - this(Defaults.NAME, Defaults.INDEX_NAME, index); + public IdFieldMapper(FieldType fieldType) { + this(Defaults.NAME, Defaults.INDEX_NAME, fieldType); } - protected IdFieldMapper(String name, String indexName, Field.Index index) { - this(name, indexName, index, Defaults.STORE, Defaults.TERM_VECTOR, Defaults.BOOST, - Defaults.OMIT_NORMS, Defaults.INDEX_OPTIONS, Defaults.PATH); + protected IdFieldMapper(String name, String indexName, FieldType fieldType) { + this(name, indexName, Defaults.BOOST, fieldType, Defaults.PATH); } - protected IdFieldMapper(String name, String indexName, Field.Index index, Field.Store store, Field.TermVector termVector, - float boost, boolean omitNorms, IndexOptions indexOptions, String path) { - super(new Names(name, indexName, indexName, name), index, store, termVector, boost, omitNorms, indexOptions, Lucene.KEYWORD_ANALYZER, + protected IdFieldMapper(String name, String indexName, float boost, FieldType fieldType, String path) { + super(new Names(name, indexName, indexName, name), boost, fieldType, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); this.path = path; } @@ -130,12 +132,12 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern } public String value(Document document) { - Fieldable field = document.getFieldable(names.indexName()); + Field field = (Field) document.getField(names.indexName()); return field == null ? null : value(field); } @Override - public String value(Fieldable field) { + public String value(Field field) { return field.stringValue(); } @@ -145,7 +147,7 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { return value(field); } @@ -184,14 +186,14 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern } Collection queryTypes = context.queryTypes(); if (queryTypes.size() == 1) { - PrefixQuery prefixQuery = new PrefixQuery(UidFieldMapper.TERM_FACTORY.createTerm(Uid.createUid(Iterables.getFirst(queryTypes, null), value))); + PrefixQuery prefixQuery = new PrefixQuery(new Term(UidFieldMapper.NAME, Uid.createUid(Iterables.getFirst(queryTypes, null), value))); if (method != null) { prefixQuery.setRewriteMethod(method); } } BooleanQuery query = new BooleanQuery(); for (String queryType : queryTypes) { - PrefixQuery prefixQuery = new PrefixQuery(UidFieldMapper.TERM_FACTORY.createTerm(Uid.createUid(queryType, value))); + PrefixQuery prefixQuery = new PrefixQuery(new Term(UidFieldMapper.NAME, Uid.createUid(queryType, value))); if (method != null) { prefixQuery.setRewriteMethod(method); } @@ -207,11 +209,11 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern } Collection queryTypes = context.queryTypes(); if (queryTypes.size() == 1) { - return new PrefixFilter(UidFieldMapper.TERM_FACTORY.createTerm(Uid.createUid(Iterables.getFirst(queryTypes, null), value))); + return new PrefixFilter(new Term(UidFieldMapper.NAME, Uid.createUid(Iterables.getFirst(queryTypes, null), value))); } XBooleanFilter filter = new XBooleanFilter(); for (String queryType : queryTypes) { - filter.addShould(new PrefixFilter(UidFieldMapper.TERM_FACTORY.createTerm(Uid.createUid(queryType, value)))); + filter.addShould(new PrefixFilter(new Term(UidFieldMapper.NAME, Uid.createUid(queryType, value)))); } return filter; } @@ -256,16 +258,16 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern throw new MapperParsingException("Provided id [" + context.id() + "] does not match the content one [" + id + "]"); } context.id(id); - if (index == Field.Index.NO && store == Field.Store.NO) { + if (!fieldType.indexed() && !fieldType.stored()) { return null; } - return new Field(names.indexName(), false, context.id(), store, index, termVector); + return new Field(names.indexName(), context.id(), fieldType); } else { // we are in the pre/post parse phase - if (index == Field.Index.NO && store == Field.Store.NO) { + if (!fieldType.indexed() && !fieldType.stored()) { return null; } - return new Field(names.indexName(), false, context.id(), store, index, termVector); + return new Field(names.indexName(), context.id(), fieldType); } } @@ -277,15 +279,16 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { // if all are defaults, no sense to write it at all - if (store == Defaults.STORE && index == Defaults.INDEX && path == Defaults.PATH) { + if (fieldType.stored() == Defaults.ID_FIELD_TYPE.stored() && + fieldType.indexed() == Defaults.ID_FIELD_TYPE.indexed() && path == Defaults.PATH) { return builder; } builder.startObject(CONTENT_TYPE); - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (fieldType.stored() != Defaults.ID_FIELD_TYPE.stored()) { + builder.field("store", fieldType.stored()); } - if (index != Defaults.INDEX) { - builder.field("index", index.name().toLowerCase()); + if (fieldType.indexed() != Defaults.ID_FIELD_TYPE.indexed()) { + builder.field("index", fieldType.indexed()); } if (path != Defaults.PATH) { builder.field("path", path); diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/IndexFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/IndexFieldMapper.java index f7765276a33..6b752125316 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/IndexFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/IndexFieldMapper.java @@ -21,7 +21,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.Term; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.elasticsearch.common.Strings; @@ -48,10 +48,18 @@ public class IndexFieldMapper extends AbstractFieldMapper implements Int public static class Defaults extends AbstractFieldMapper.Defaults { public static final String NAME = IndexFieldMapper.NAME; public static final String INDEX_NAME = IndexFieldMapper.NAME; - public static final Field.Index INDEX = Field.Index.NOT_ANALYZED; - public static final Field.Store STORE = Field.Store.NO; - public static final boolean OMIT_NORMS = true; - public static final IndexOptions INDEX_OPTIONS = IndexOptions.DOCS_ONLY; + + public static final FieldType INDEX_FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE); + + static { + INDEX_FIELD_TYPE.setIndexed(true); + INDEX_FIELD_TYPE.setTokenized(false); + INDEX_FIELD_TYPE.setStored(false); + INDEX_FIELD_TYPE.setOmitNorms(true); + INDEX_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY); + INDEX_FIELD_TYPE.freeze(); + } + public static final boolean ENABLED = false; } @@ -60,12 +68,8 @@ public class IndexFieldMapper extends AbstractFieldMapper implements Int private boolean enabled = Defaults.ENABLED; public Builder() { - super(Defaults.NAME); + super(Defaults.NAME, new FieldType(Defaults.INDEX_FIELD_TYPE)); indexName = Defaults.INDEX_NAME; - index = Defaults.INDEX; - store = Defaults.STORE; - omitNorms = Defaults.OMIT_NORMS; - indexOptions = Defaults.INDEX_OPTIONS; } public Builder enabled(boolean enabled) { @@ -75,7 +79,7 @@ public class IndexFieldMapper extends AbstractFieldMapper implements Int @Override public IndexFieldMapper build(BuilderContext context) { - return new IndexFieldMapper(name, indexName, store, termVector, boost, omitNorms, indexOptions, enabled); + return new IndexFieldMapper(name, indexName, boost, fieldType, enabled); } } @@ -103,13 +107,11 @@ public class IndexFieldMapper extends AbstractFieldMapper implements Int } protected IndexFieldMapper(String name, String indexName) { - this(name, indexName, Defaults.STORE, Defaults.TERM_VECTOR, Defaults.BOOST, - Defaults.OMIT_NORMS, Defaults.INDEX_OPTIONS, Defaults.ENABLED); + this(name, indexName, Defaults.BOOST, new FieldType(Defaults.INDEX_FIELD_TYPE), Defaults.ENABLED); } - public IndexFieldMapper(String name, String indexName, Field.Store store, Field.TermVector termVector, - float boost, boolean omitNorms, IndexOptions indexOptions, boolean enabled) { - super(new Names(name, indexName, indexName, name), Defaults.INDEX, store, termVector, boost, omitNorms, indexOptions, Lucene.KEYWORD_ANALYZER, + public IndexFieldMapper(String name, String indexName, float boost, FieldType fieldType, boolean enabled) { + super(new Names(name, indexName, indexName, name), boost, fieldType, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); this.enabled = enabled; } @@ -119,12 +121,12 @@ public class IndexFieldMapper extends AbstractFieldMapper implements Int } public String value(Document document) { - Fieldable field = document.getFieldable(names.indexName()); + Field field = (Field) document.getField(names.indexName()); return field == null ? null : value(field); } @Override - public String value(Fieldable field) { + public String value(Field field) { return field.stringValue(); } @@ -134,7 +136,7 @@ public class IndexFieldMapper extends AbstractFieldMapper implements Int } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { return value(field); } @@ -176,7 +178,7 @@ public class IndexFieldMapper extends AbstractFieldMapper implements Int if (!enabled) { return null; } - return new Field(names.indexName(), context.index(), store, index); + return new Field(names.indexName(), context.index(), fieldType); } @Override @@ -187,12 +189,12 @@ public class IndexFieldMapper extends AbstractFieldMapper implements Int @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { // if all defaults, no need to write it at all - if (store == Defaults.STORE && enabled == Defaults.ENABLED) { + if (stored() == Defaults.INDEX_FIELD_TYPE.stored() && enabled == Defaults.ENABLED) { return builder; } builder.startObject(CONTENT_TYPE); - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.INDEX_FIELD_TYPE.stored()) { + builder.field("store", stored()); } if (enabled != Defaults.ENABLED) { builder.field("enabled", enabled); diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/ParentFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/ParentFieldMapper.java index e846a6e47ff..8ddac5397bd 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/ParentFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/ParentFieldMapper.java @@ -20,7 +20,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.search.ConstantScoreQuery; @@ -49,9 +49,17 @@ public class ParentFieldMapper extends AbstractFieldMapper implements Inter public static class Defaults extends AbstractFieldMapper.Defaults { public static final String NAME = ParentFieldMapper.NAME; - public static final Field.Index INDEX = Field.Index.NOT_ANALYZED; - public static final boolean OMIT_NORMS = true; - public static final IndexOptions INDEX_OPTIONS = IndexOptions.DOCS_ONLY; + + public static final FieldType PARENT_FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE); + + static { + PARENT_FIELD_TYPE.setIndexed(true); + PARENT_FIELD_TYPE.setTokenized(false); + PARENT_FIELD_TYPE.setStored(true); + PARENT_FIELD_TYPE.setOmitNorms(true); + PARENT_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY); + PARENT_FIELD_TYPE.freeze(); + } } public static class Builder extends Mapper.Builder { @@ -97,8 +105,8 @@ public class ParentFieldMapper extends AbstractFieldMapper implements Inter private final String type; protected ParentFieldMapper(String name, String indexName, String type) { - super(new Names(name, indexName, indexName, name), Defaults.INDEX, Field.Store.YES, Defaults.TERM_VECTOR, Defaults.BOOST, - Defaults.OMIT_NORMS, Defaults.INDEX_OPTIONS, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); + super(new Names(name, indexName, indexName, name), Defaults.BOOST, new FieldType(Defaults.PARENT_FIELD_TYPE), + Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); this.type = type; } @@ -130,7 +138,7 @@ public class ParentFieldMapper extends AbstractFieldMapper implements Inter // we are in the parsing of _parent phase String parentId = context.parser().text(); context.sourceToParse().parent(parentId); - return new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), store, index); + return new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType); } // otherwise, we are running it post processing of the xcontent String parsedParentId = context.doc().get(Defaults.NAME); @@ -141,7 +149,7 @@ public class ParentFieldMapper extends AbstractFieldMapper implements Inter throw new MapperParsingException("No parent id provided, not within the document, and not externally"); } // we did not add it in the parsing phase, add it now - return new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), store, index); + return new Field(names.indexName(), Uid.createUid(context.stringBuilder(), type, parentId), fieldType); } else if (parentId != null && !parsedParentId.equals(Uid.createUid(context.stringBuilder(), type, parentId))) { throw new MapperParsingException("Parent id mismatch, document value is [" + Uid.createUid(parsedParentId).id() + "], while external value is [" + parentId + "]"); } @@ -151,7 +159,7 @@ public class ParentFieldMapper extends AbstractFieldMapper implements Inter } @Override - public Uid value(Fieldable field) { + public Uid value(Field field) { return Uid.createUid(field.stringValue()); } @@ -161,12 +169,12 @@ public class ParentFieldMapper extends AbstractFieldMapper implements Inter } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { return field.stringValue(); } @Override - public Object valueForSearch(Fieldable field) { + public Object valueForSearch(Field field) { String fieldValue = field.stringValue(); if (fieldValue == null) { return null; diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/RoutingFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/RoutingFieldMapper.java index 8435027ac10..dabfdf34fbd 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/RoutingFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/RoutingFieldMapper.java @@ -21,7 +21,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.elasticsearch.common.Strings; import org.elasticsearch.common.lucene.Lucene; @@ -47,10 +47,18 @@ public class RoutingFieldMapper extends AbstractFieldMapper implements I public static class Defaults extends AbstractFieldMapper.Defaults { public static final String NAME = "_routing"; - public static final Field.Index INDEX = Field.Index.NOT_ANALYZED; - public static final Field.Store STORE = Field.Store.YES; - public static final boolean OMIT_NORMS = true; - public static final IndexOptions INDEX_OPTIONS = IndexOptions.DOCS_ONLY; + + public static final FieldType ROUTING_FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE); + + static { + ROUTING_FIELD_TYPE.setIndexed(true); + ROUTING_FIELD_TYPE.setTokenized(false); + ROUTING_FIELD_TYPE.setStored(true); + ROUTING_FIELD_TYPE.setOmitNorms(true); + ROUTING_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY); + ROUTING_FIELD_TYPE.freeze(); + } + public static final boolean REQUIRED = false; public static final String PATH = null; } @@ -62,9 +70,7 @@ public class RoutingFieldMapper extends AbstractFieldMapper implements I private String path = Defaults.PATH; public Builder() { - super(Defaults.NAME); - store = Defaults.STORE; - index = Defaults.INDEX; + super(Defaults.NAME, new FieldType(Defaults.ROUTING_FIELD_TYPE)); } public Builder required(boolean required) { @@ -79,7 +85,7 @@ public class RoutingFieldMapper extends AbstractFieldMapper implements I @Override public RoutingFieldMapper build(BuilderContext context) { - return new RoutingFieldMapper(store, index, required, path); + return new RoutingFieldMapper(fieldType, required, path); } } @@ -107,11 +113,11 @@ public class RoutingFieldMapper extends AbstractFieldMapper implements I private final String path; public RoutingFieldMapper() { - this(Defaults.STORE, Defaults.INDEX, Defaults.REQUIRED, Defaults.PATH); + this(new FieldType(Defaults.ROUTING_FIELD_TYPE), Defaults.REQUIRED, Defaults.PATH); } - protected RoutingFieldMapper(Field.Store store, Field.Index index, boolean required, String path) { - super(new Names(Defaults.NAME, Defaults.NAME, Defaults.NAME, Defaults.NAME), index, store, Defaults.TERM_VECTOR, 1.0f, Defaults.OMIT_NORMS, Defaults.INDEX_OPTIONS, Lucene.KEYWORD_ANALYZER, + protected RoutingFieldMapper(FieldType fieldType, boolean required, String path) { + super(new Names(Defaults.NAME, Defaults.NAME, Defaults.NAME, Defaults.NAME), 1.0f, fieldType, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); this.required = required; this.path = path; @@ -130,12 +136,12 @@ public class RoutingFieldMapper extends AbstractFieldMapper implements I } public String value(Document document) { - Fieldable field = document.getFieldable(names.indexName()); + Field field = (Field) document.getField(names.indexName()); return field == null ? null : value(field); } @Override - public String value(Fieldable field) { + public String value(Field field) { return field.stringValue(); } @@ -145,7 +151,7 @@ public class RoutingFieldMapper extends AbstractFieldMapper implements I } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { return value(field); } @@ -160,7 +166,7 @@ public class RoutingFieldMapper extends AbstractFieldMapper implements I if (path != null && routing != null) { // we have a path, check if we can validate we have the same routing value as the one in the doc... String value = null; - Fieldable field = context.doc().getFieldable(path); + Field field = (Field) context.doc().getField(path); if (field != null) { value = field.stringValue(); if (value == null) { @@ -209,7 +215,7 @@ public class RoutingFieldMapper extends AbstractFieldMapper implements I context.ignoredValue(names.indexName(), routing); return null; } - return new Field(names.indexName(), routing, store, index); + return new Field(names.indexName(), routing, fieldType); } } return null; @@ -224,15 +230,16 @@ public class RoutingFieldMapper extends AbstractFieldMapper implements I @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { // if all are defaults, no sense to write it at all - if (index == Defaults.INDEX && store == Defaults.STORE && required == Defaults.REQUIRED && path == Defaults.PATH) { + if (indexed() == Defaults.ROUTING_FIELD_TYPE.indexed() && + stored() == Defaults.ROUTING_FIELD_TYPE.stored() && required == Defaults.REQUIRED && path == Defaults.PATH) { return builder; } builder.startObject(CONTENT_TYPE); - if (index != Defaults.INDEX) { - builder.field("index", index.name().toLowerCase()); + if (indexed() != Defaults.ROUTING_FIELD_TYPE.indexed()) { + builder.field("index", indexed()); } - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.ROUTING_FIELD_TYPE.stored()) { + builder.field("store", stored()); } if (required != Defaults.REQUIRED) { builder.field("required", required); diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/SizeFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/SizeFieldMapper.java index de1fbb8a87f..b782af60542 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/SizeFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/SizeFieldMapper.java @@ -20,7 +20,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.elasticsearch.common.Strings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.index.mapper.*; @@ -40,16 +40,23 @@ public class SizeFieldMapper extends IntegerFieldMapper implements RootMapper { public static class Defaults extends IntegerFieldMapper.Defaults { public static final String NAME = CONTENT_TYPE; public static final boolean ENABLED = false; + + public static final FieldType SIZE_FIELD_TYPE = new FieldType(IntegerFieldMapper.Defaults.INTEGER_FIELD_TYPE); + + static { + SIZE_FIELD_TYPE.freeze(); + } } public static class Builder extends Mapper.Builder { protected boolean enabled = Defaults.ENABLED; - protected Field.Store store = Defaults.STORE; + protected final FieldType fieldType; public Builder() { super(Defaults.NAME); + fieldType = new FieldType(Defaults.SIZE_FIELD_TYPE); builder = this; } @@ -58,14 +65,14 @@ public class SizeFieldMapper extends IntegerFieldMapper implements RootMapper { return builder; } - public Builder store(Field.Store store) { - this.store = store; + public Builder store(boolean store) { + this.fieldType.setStored(store); return builder; } @Override public SizeFieldMapper build(BuilderContext context) { - return new SizeFieldMapper(enabled, store); + return new SizeFieldMapper(enabled, fieldType); } } @@ -89,12 +96,12 @@ public class SizeFieldMapper extends IntegerFieldMapper implements RootMapper { private final boolean enabled; public SizeFieldMapper() { - this(Defaults.ENABLED, Defaults.STORE); + this(Defaults.ENABLED, new FieldType(Defaults.SIZE_FIELD_TYPE)); } - public SizeFieldMapper(boolean enabled, Field.Store store) { - super(new Names(Defaults.NAME), Defaults.PRECISION_STEP, Defaults.FUZZY_FACTOR, Defaults.INDEX, store, - Defaults.BOOST, Defaults.OMIT_NORMS, Defaults.INDEX_OPTIONS, Defaults.NULL_VALUE, + public SizeFieldMapper(boolean enabled, FieldType fieldType) { + super(new Names(Defaults.NAME), Defaults.PRECISION_STEP, Defaults.FUZZY_FACTOR, + Defaults.BOOST, fieldType, Defaults.NULL_VALUE, Defaults.IGNORE_MALFORMED); this.enabled = enabled; } @@ -133,28 +140,28 @@ public class SizeFieldMapper extends IntegerFieldMapper implements RootMapper { } @Override - protected Fieldable innerParseCreateField(ParseContext context) throws IOException { + protected Field innerParseCreateField(ParseContext context) throws IOException { if (!enabled) { return null; } if (context.flyweight()) { return null; } - return new CustomIntegerNumericField(this, context.source().length()); + return new CustomIntegerNumericField(this, context.source().length(), fieldType); } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { // all are defaults, no need to write it at all - if (enabled == Defaults.ENABLED && store == Defaults.STORE) { + if (enabled == Defaults.ENABLED && stored() == Defaults.SIZE_FIELD_TYPE.stored()) { return builder; } builder.startObject(contentType()); if (enabled != Defaults.ENABLED) { builder.field("enabled", enabled); } - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.SIZE_FIELD_TYPE.stored()) { + builder.field("store", stored()); } builder.endObject(); return builder; diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java index b818f99b285..85f03491a53 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java @@ -22,7 +22,8 @@ package org.elasticsearch.index.mapper.internal; import com.google.common.base.Objects; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.StoredField; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.elasticsearch.ElasticSearchParseException; import org.elasticsearch.common.Strings; @@ -67,10 +68,17 @@ public class SourceFieldMapper extends AbstractFieldMapper implements In public static final boolean ENABLED = true; public static final long COMPRESS_THRESHOLD = -1; public static final String FORMAT = null; // default format is to use the one provided - public static final Field.Index INDEX = Field.Index.NO; - public static final Field.Store STORE = Field.Store.YES; - public static final boolean OMIT_NORMS = true; - public static final IndexOptions INDEX_OPTIONS = IndexOptions.DOCS_ONLY; + + public static final FieldType SOURCE_FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE); + + static { + SOURCE_FIELD_TYPE.setIndexed(false); + SOURCE_FIELD_TYPE.setStored(true); + SOURCE_FIELD_TYPE.setOmitNorms(true); + SOURCE_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY); + SOURCE_FIELD_TYPE.freeze(); + } + public static final String[] INCLUDES = Strings.EMPTY_ARRAY; public static final String[] EXCLUDES = Strings.EMPTY_ARRAY; } @@ -190,8 +198,7 @@ public class SourceFieldMapper extends AbstractFieldMapper implements In } protected SourceFieldMapper(String name, boolean enabled, String format, Boolean compress, long compressThreshold, String[] includes, String[] excludes) { - super(new Names(name, name, name, name), Defaults.INDEX, Defaults.STORE, Defaults.TERM_VECTOR, Defaults.BOOST, - Defaults.OMIT_NORMS, Defaults.INDEX_OPTIONS, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); + super(new Names(name, name, name, name), Defaults.BOOST, new FieldType(Defaults.SOURCE_FIELD_TYPE), Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); this.enabled = enabled; this.compress = compress; this.compressThreshold = compressThreshold; @@ -237,7 +244,7 @@ public class SourceFieldMapper extends AbstractFieldMapper implements In if (!enabled) { return null; } - if (store == Field.Store.NO) { + if (!stored()) { return null; } if (context.flyweight()) { @@ -335,21 +342,21 @@ public class SourceFieldMapper extends AbstractFieldMapper implements In } } assert source.hasArray(); - return new Field(names().indexName(), source.array(), source.arrayOffset(), source.length()); + return new StoredField(names().indexName(), source.array(), source.arrayOffset(), source.length()); } public byte[] value(Document document) { - Fieldable field = document.getFieldable(names.indexName()); + Field field = (Field) document.getField(names.indexName()); return field == null ? null : value(field); } - public byte[] nativeValue(Fieldable field) { - return field.getBinaryValue(); + public byte[] nativeValue(Field field) { + return field.binaryValue().bytes; } @Override - public byte[] value(Fieldable field) { - byte[] value = field.getBinaryValue(); + public byte[] value(Field field) { + byte[] value = field.binaryValue().bytes; if (value == null) { return value; } @@ -366,7 +373,7 @@ public class SourceFieldMapper extends AbstractFieldMapper implements In } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { throw new UnsupportedOperationException(); } diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/TTLFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/TTLFieldMapper.java index 86bdc43775f..1f647fd2769 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/TTLFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/TTLFieldMapper.java @@ -20,7 +20,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Strings; import org.elasticsearch.common.unit.TimeValue; @@ -47,8 +47,16 @@ public class TTLFieldMapper extends LongFieldMapper implements InternalMapper, R public static class Defaults extends LongFieldMapper.Defaults { public static final String NAME = TTLFieldMapper.CONTENT_TYPE; - public static final Field.Store STORE = Field.Store.YES; - public static final Field.Index INDEX = Field.Index.NOT_ANALYZED; + + public static final FieldType TTL_FIELD_TYPE = new FieldType(LongFieldMapper.Defaults.LONG_FIELD_TYPE); + + static { + TTL_FIELD_TYPE.setStored(true); + TTL_FIELD_TYPE.setIndexed(true); + TTL_FIELD_TYPE.setTokenized(false); + TTL_FIELD_TYPE.freeze(); + } + public static final boolean ENABLED = false; public static final long DEFAULT = -1; } @@ -59,9 +67,7 @@ public class TTLFieldMapper extends LongFieldMapper implements InternalMapper, R private long defaultTTL = Defaults.DEFAULT; public Builder() { - super(Defaults.NAME); - store = Defaults.STORE; - index = Defaults.INDEX; + super(Defaults.NAME, new FieldType(Defaults.TTL_FIELD_TYPE)); } public Builder enabled(boolean enabled) { @@ -76,7 +82,7 @@ public class TTLFieldMapper extends LongFieldMapper implements InternalMapper, R @Override public TTLFieldMapper build(BuilderContext context) { - return new TTLFieldMapper(store, index, enabled, defaultTTL, ignoreMalformed(context)); + return new TTLFieldMapper(fieldType, enabled, defaultTTL, ignoreMalformed(context)); } } @@ -105,12 +111,12 @@ public class TTLFieldMapper extends LongFieldMapper implements InternalMapper, R private long defaultTTL; public TTLFieldMapper() { - this(Defaults.STORE, Defaults.INDEX, Defaults.ENABLED, Defaults.DEFAULT, Defaults.IGNORE_MALFORMED); + this(new FieldType(Defaults.TTL_FIELD_TYPE), Defaults.ENABLED, Defaults.DEFAULT, Defaults.IGNORE_MALFORMED); } - protected TTLFieldMapper(Field.Store store, Field.Index index, boolean enabled, long defaultTTL, Explicit ignoreMalformed) { + protected TTLFieldMapper(FieldType fieldType, boolean enabled, long defaultTTL, Explicit ignoreMalformed) { super(new Names(Defaults.NAME, Defaults.NAME, Defaults.NAME, Defaults.NAME), Defaults.PRECISION_STEP, - Defaults.FUZZY_FACTOR, index, store, Defaults.BOOST, Defaults.OMIT_NORMS, Defaults.INDEX_OPTIONS, + Defaults.FUZZY_FACTOR, Defaults.BOOST, fieldType, Defaults.NULL_VALUE, ignoreMalformed); this.enabled = enabled; this.defaultTTL = defaultTTL; @@ -126,7 +132,7 @@ public class TTLFieldMapper extends LongFieldMapper implements InternalMapper, R // Overrides valueForSearch to display live value of remaining ttl @Override - public Object valueForSearch(Fieldable field) { + public Object valueForSearch(Field field) { long now; SearchContext searchContext = SearchContext.current(); if (searchContext != null) { @@ -178,7 +184,7 @@ public class TTLFieldMapper extends LongFieldMapper implements InternalMapper, R } @Override - protected Fieldable innerParseCreateField(ParseContext context) throws IOException, AlreadyExpiredException { + protected Field innerParseCreateField(ParseContext context) throws IOException, AlreadyExpiredException { if (enabled) { long ttl = context.sourceToParse().ttl(); if (ttl <= 0 && defaultTTL > 0) { // no ttl provided so we use the default value @@ -194,7 +200,7 @@ public class TTLFieldMapper extends LongFieldMapper implements InternalMapper, R throw new AlreadyExpiredException(context.index(), context.type(), context.id(), timestamp, ttl, now); } // the expiration timestamp (timestamp + ttl) is set as field - return new CustomLongNumericField(this, expire); + return new CustomLongNumericField(this, expire, fieldType); } } return null; diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/TimestampFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/TimestampFieldMapper.java index 9c6ce847168..7cfd91ad77f 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/TimestampFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/TimestampFieldMapper.java @@ -20,7 +20,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Strings; import org.elasticsearch.common.joda.FormatDateTimeFormatter; @@ -50,8 +50,16 @@ public class TimestampFieldMapper extends DateFieldMapper implements InternalMap public static class Defaults extends DateFieldMapper.Defaults { public static final String NAME = "_timestamp"; - public static final Field.Store STORE = Field.Store.NO; - public static final Field.Index INDEX = Field.Index.NOT_ANALYZED; + + public static final FieldType TIMESTAMP_FIELD_TYPE = new FieldType(DateFieldMapper.Defaults.DATE_FIELD_TYPE); + + static { + TIMESTAMP_FIELD_TYPE.setStored(false); + TIMESTAMP_FIELD_TYPE.setIndexed(true); + TIMESTAMP_FIELD_TYPE.setTokenized(false); + TIMESTAMP_FIELD_TYPE.freeze(); + } + public static final boolean ENABLED = false; public static final String PATH = null; public static final FormatDateTimeFormatter DATE_TIME_FORMATTER = Joda.forPattern(DEFAULT_DATE_TIME_FORMAT); @@ -64,9 +72,7 @@ public class TimestampFieldMapper extends DateFieldMapper implements InternalMap private FormatDateTimeFormatter dateTimeFormatter = Defaults.DATE_TIME_FORMATTER; public Builder() { - super(Defaults.NAME); - store = Defaults.STORE; - index = Defaults.INDEX; + super(Defaults.NAME, new FieldType(Defaults.TIMESTAMP_FIELD_TYPE)); } public Builder enabled(boolean enabled) { @@ -90,7 +96,7 @@ public class TimestampFieldMapper extends DateFieldMapper implements InternalMap if (context.indexSettings() != null) { parseUpperInclusive = context.indexSettings().getAsBoolean("index.mapping.date.parse_upper_inclusive", Defaults.PARSE_UPPER_INCLUSIVE); } - return new TimestampFieldMapper(store, index, enabled, path, dateTimeFormatter, parseUpperInclusive, ignoreMalformed(context)); + return new TimestampFieldMapper(fieldType, enabled, path, dateTimeFormatter, parseUpperInclusive, ignoreMalformed(context)); } } @@ -120,13 +126,13 @@ public class TimestampFieldMapper extends DateFieldMapper implements InternalMap private final String path; public TimestampFieldMapper() { - this(Defaults.STORE, Defaults.INDEX, Defaults.ENABLED, Defaults.PATH, Defaults.DATE_TIME_FORMATTER, Defaults.PARSE_UPPER_INCLUSIVE, Defaults.IGNORE_MALFORMED); + this(new FieldType(Defaults.TIMESTAMP_FIELD_TYPE), Defaults.ENABLED, Defaults.PATH, Defaults.DATE_TIME_FORMATTER, Defaults.PARSE_UPPER_INCLUSIVE, Defaults.IGNORE_MALFORMED); } - protected TimestampFieldMapper(Field.Store store, Field.Index index, boolean enabled, String path, + protected TimestampFieldMapper(FieldType fieldType, boolean enabled, String path, FormatDateTimeFormatter dateTimeFormatter, boolean parseUpperInclusive, Explicit ignoreMalformed) { super(new Names(Defaults.NAME, Defaults.NAME, Defaults.NAME, Defaults.NAME), dateTimeFormatter, - Defaults.PRECISION_STEP, Defaults.FUZZY_FACTOR, index, store, Defaults.BOOST, Defaults.OMIT_NORMS, Defaults.INDEX_OPTIONS, + Defaults.PRECISION_STEP, Defaults.FUZZY_FACTOR, Defaults.BOOST, fieldType, Defaults.NULL_VALUE, TimeUnit.MILLISECONDS /*always milliseconds*/, parseUpperInclusive, ignoreMalformed); this.enabled = enabled; @@ -149,12 +155,12 @@ public class TimestampFieldMapper extends DateFieldMapper implements InternalMap * Override the default behavior to return a timestamp */ @Override - public Object valueForSearch(Fieldable field) { + public Object valueForSearch(Field field) { return value(field); } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { Long value = value(field); if (value == null) { return null; @@ -186,14 +192,14 @@ public class TimestampFieldMapper extends DateFieldMapper implements InternalMap } @Override - protected Fieldable innerParseCreateField(ParseContext context) throws IOException { + protected Field innerParseCreateField(ParseContext context) throws IOException { if (enabled) { long timestamp = context.sourceToParse().timestamp(); if (!indexed() && !stored()) { context.ignoredValue(names.indexName(), String.valueOf(timestamp)); return null; } - return new LongFieldMapper.CustomLongNumericField(this, timestamp); + return new LongFieldMapper.CustomLongNumericField(this, timestamp, fieldType); } return null; } @@ -206,16 +212,17 @@ public class TimestampFieldMapper extends DateFieldMapper implements InternalMap @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { // if all are defaults, no sense to write it at all - if (index == Defaults.INDEX && store == Defaults.STORE && enabled == Defaults.ENABLED && path == Defaults.PATH + if (indexed() == Defaults.TIMESTAMP_FIELD_TYPE.indexed() && + stored() == Defaults.TIMESTAMP_FIELD_TYPE.stored() && enabled == Defaults.ENABLED && path == Defaults.PATH && dateTimeFormatter.format().equals(Defaults.DATE_TIME_FORMATTER.format())) { return builder; } builder.startObject(CONTENT_TYPE); - if (index != Defaults.INDEX) { - builder.field("index", index.name().toLowerCase()); + if (indexed() != Defaults.TIMESTAMP_FIELD_TYPE.indexed()) { + builder.field("index", indexed()); } - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.TIMESTAMP_FIELD_TYPE.stored()) { + builder.field("store", stored()); } if (enabled != Defaults.ENABLED) { builder.field("enabled", enabled); diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java index 59d232bfec8..55c5ae5916d 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/TypeFieldMapper.java @@ -21,7 +21,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.search.DeletionAwareConstantScoreQuery; @@ -56,26 +56,29 @@ public class TypeFieldMapper extends AbstractFieldMapper implements Inte public static class Defaults extends AbstractFieldMapper.Defaults { public static final String NAME = TypeFieldMapper.NAME; public static final String INDEX_NAME = TypeFieldMapper.NAME; - public static final Field.Index INDEX = Field.Index.NOT_ANALYZED; - public static final Field.Store STORE = Field.Store.NO; - public static final boolean OMIT_NORMS = true; - public static final IndexOptions INDEX_OPTIONS = IndexOptions.DOCS_ONLY; + + public static final FieldType TYPE_FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE); + + static { + TYPE_FIELD_TYPE.setIndexed(true); + TYPE_FIELD_TYPE.setTokenized(false); + TYPE_FIELD_TYPE.setStored(false); + TYPE_FIELD_TYPE.setOmitNorms(true); + TYPE_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY); + TYPE_FIELD_TYPE.freeze(); + } } public static class Builder extends AbstractFieldMapper.Builder { public Builder() { - super(Defaults.NAME); + super(Defaults.NAME, new FieldType(Defaults.TYPE_FIELD_TYPE)); indexName = Defaults.INDEX_NAME; - index = Defaults.INDEX; - store = Defaults.STORE; - omitNorms = Defaults.OMIT_NORMS; - indexOptions = Defaults.INDEX_OPTIONS; } @Override public TypeFieldMapper build(BuilderContext context) { - return new TypeFieldMapper(name, indexName, index, store, termVector, boost, omitNorms, indexOptions); + return new TypeFieldMapper(name, indexName, boost, fieldType); } } @@ -94,23 +97,21 @@ public class TypeFieldMapper extends AbstractFieldMapper implements Inte } protected TypeFieldMapper(String name, String indexName) { - this(name, indexName, Defaults.INDEX, Defaults.STORE, Defaults.TERM_VECTOR, Defaults.BOOST, - Defaults.OMIT_NORMS, Defaults.INDEX_OPTIONS); + this(name, indexName, Defaults.BOOST, new FieldType(Defaults.TYPE_FIELD_TYPE)); } - public TypeFieldMapper(String name, String indexName, Field.Index index, Field.Store store, Field.TermVector termVector, - float boost, boolean omitNorms, IndexOptions indexOptions) { - super(new Names(name, indexName, indexName, name), index, store, termVector, boost, omitNorms, indexOptions, Lucene.KEYWORD_ANALYZER, + public TypeFieldMapper(String name, String indexName, float boost, FieldType fieldType) { + super(new Names(name, indexName, indexName, name), boost, fieldType, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); } public String value(Document document) { - Fieldable field = document.getFieldable(names.indexName()); + Field field = (Field) document.getField(names.indexName()); return field == null ? null : value(field); } @Override - public String value(Fieldable field) { + public String value(Field field) { return field.stringValue(); } @@ -120,7 +121,7 @@ public class TypeFieldMapper extends AbstractFieldMapper implements Inte } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { return value(field); } @@ -135,8 +136,8 @@ public class TypeFieldMapper extends AbstractFieldMapper implements Inte @Override public Filter fieldFilter(String value, @Nullable QueryParseContext context) { - if (index == Field.Index.NO) { - return new PrefixFilter(UidFieldMapper.TERM_FACTORY.createTerm(Uid.typePrefix(value))); + if (!indexed()) { + return new PrefixFilter(new Term(UidFieldMapper.NAME, Uid.typePrefix(value))); } return new TermFilter(names().createIndexNameTerm(value)); } @@ -176,10 +177,10 @@ public class TypeFieldMapper extends AbstractFieldMapper implements Inte @Override protected Field parseCreateField(ParseContext context) throws IOException { - if (index == Field.Index.NO && store == Field.Store.NO) { + if (!indexed() && !stored()) { return null; } - return new Field(names.indexName(), false, context.type(), store, index, termVector); + return new Field(names.indexName(), context.type(), fieldType); } @Override @@ -190,15 +191,15 @@ public class TypeFieldMapper extends AbstractFieldMapper implements Inte @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { // if all are defaults, no sense to write it at all - if (store == Defaults.STORE && index == Defaults.INDEX) { + if (stored() == Defaults.TYPE_FIELD_TYPE.stored() && indexed() == Defaults.TYPE_FIELD_TYPE.indexed()) { return builder; } builder.startObject(CONTENT_TYPE); - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.TYPE_FIELD_TYPE.stored()) { + builder.field("store", stored()); } - if (index != Defaults.INDEX) { - builder.field("index", index.name().toLowerCase()); + if (indexed() != Defaults.TYPE_FIELD_TYPE.indexed()) { + builder.field("index", indexed()); } builder.endObject(); return builder; diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java index fca40d673ae..c821eb51b57 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java @@ -20,7 +20,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo; import org.apache.lucene.index.Term; import org.elasticsearch.common.lucene.Lucene; @@ -47,9 +47,17 @@ public class UidFieldMapper extends AbstractFieldMapper implements Internal public static class Defaults extends AbstractFieldMapper.Defaults { public static final String NAME = UidFieldMapper.NAME; - public static final Field.Index INDEX = Field.Index.NOT_ANALYZED; - public static final boolean OMIT_NORMS = true; - public static final FieldInfo.IndexOptions INDEX_OPTIONS = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; // we store payload (otherwise, we really need just docs) + + public static final FieldType UID_FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE); + + static { + UID_FIELD_TYPE.setIndexed(true); + UID_FIELD_TYPE.setTokenized(false); + UID_FIELD_TYPE.setStored(true); + UID_FIELD_TYPE.setOmitNorms(true); + UID_FIELD_TYPE.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); // we store payload (otherwise, we really need just docs) + UID_FIELD_TYPE.freeze(); + } } public static class Builder extends Mapper.Builder { @@ -90,8 +98,8 @@ public class UidFieldMapper extends AbstractFieldMapper implements Internal } protected UidFieldMapper(String name, String indexName) { - super(new Names(name, indexName, indexName, name), Defaults.INDEX, Field.Store.YES, Defaults.TERM_VECTOR, Defaults.BOOST, - Defaults.OMIT_NORMS, Defaults.INDEX_OPTIONS, Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); + super(new Names(name, indexName, indexName, name), Defaults.BOOST, new FieldType(Defaults.UID_FIELD_TYPE), + Lucene.KEYWORD_ANALYZER, Lucene.KEYWORD_ANALYZER); } @Override @@ -115,7 +123,7 @@ public class UidFieldMapper extends AbstractFieldMapper implements Internal // since we did not have the uid in the pre phase, we did not add it automatically to the nested docs // as they were created we need to make sure we add it to all the nested docs... if (context.docs().size() > 1) { - UidField uidField = (UidField) context.rootDoc().getFieldable(UidFieldMapper.NAME); + UidField uidField = (UidField) context.rootDoc().getField(UidFieldMapper.NAME); assert uidField != null; // we need to go over the docs and add it... for (int i = 1; i < context.docs().size(); i++) { @@ -141,7 +149,7 @@ public class UidFieldMapper extends AbstractFieldMapper implements Internal } @Override - protected Fieldable parseCreateField(ParseContext context) throws IOException { + protected Field parseCreateField(ParseContext context) throws IOException { context.uid(Uid.createUid(context.stringBuilder(), context.type(), context.id())); // so, caching uid stream and field is fine // since we don't do any mapping parsing without immediate indexing @@ -152,7 +160,7 @@ public class UidFieldMapper extends AbstractFieldMapper implements Internal } @Override - public Uid value(Fieldable field) { + public Uid value(Field field) { return Uid.createUid(field.stringValue()); } @@ -162,7 +170,7 @@ public class UidFieldMapper extends AbstractFieldMapper implements Internal } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { return field.stringValue(); } diff --git a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java index 5cf1ebdacc4..7ec2d05aa1e 100644 --- a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java @@ -21,9 +21,11 @@ package org.elasticsearch.index.mapper.ip; import org.apache.lucene.analysis.NumericTokenStream; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.*; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.Explicit; @@ -86,6 +88,12 @@ public class IpFieldMapper extends NumberFieldMapper { public static class Defaults extends NumberFieldMapper.Defaults { public static final String NULL_VALUE = null; + + public static final FieldType IP_FIELD_TYPE = new FieldType(NumberFieldMapper.Defaults.NUMBER_FIELD_TYPE); + + static { + IP_FIELD_TYPE.freeze(); + } } public static class Builder extends NumberFieldMapper.Builder { @@ -93,7 +101,7 @@ public class IpFieldMapper extends NumberFieldMapper { protected String nullValue = Defaults.NULL_VALUE; public Builder(String name) { - super(name); + super(name, new FieldType(Defaults.IP_FIELD_TYPE)); builder = this; } @@ -104,8 +112,9 @@ public class IpFieldMapper extends NumberFieldMapper { @Override public IpFieldMapper build(BuilderContext context) { + fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); IpFieldMapper fieldMapper = new IpFieldMapper(buildNames(context), - precisionStep, index, store, boost, omitNorms, indexOptions, nullValue, ignoreMalformed(context)); + precisionStep, boost, fieldType, nullValue, ignoreMalformed(context)); fieldMapper.includeInAll(includeInAll); return fieldMapper; } @@ -130,10 +139,9 @@ public class IpFieldMapper extends NumberFieldMapper { private String nullValue; protected IpFieldMapper(Names names, int precisionStep, - Field.Index index, Field.Store store, - float boost, boolean omitNorms, IndexOptions indexOptions, + float boost, FieldType fieldType, String nullValue, Explicit ignoreMalformed) { - super(names, precisionStep, null, index, store, boost, omitNorms, indexOptions, + super(names, precisionStep, null, boost, fieldType, ignoreMalformed, new NamedAnalyzer("_ip/" + precisionStep, new NumericIpAnalyzer(precisionStep)), new NamedAnalyzer("_ip/max", new NumericIpAnalyzer(Integer.MAX_VALUE))); this.nullValue = nullValue; @@ -145,8 +153,8 @@ public class IpFieldMapper extends NumberFieldMapper { } @Override - public Long value(Fieldable field) { - byte[] value = field.getBinaryValue(); + public Long value(Field field) { + byte[] value = field.binaryValue().bytes; if (value == null) { return null; } @@ -159,15 +167,15 @@ public class IpFieldMapper extends NumberFieldMapper { } /** - * IPs should return as a string, delegates to {@link #valueAsString(org.apache.lucene.document.Fieldable)}. + * IPs should return as a string. */ @Override - public Object valueForSearch(Fieldable field) { + public Object valueForSearch(Field field) { return valueAsString(field); } @Override - public String valueAsString(Fieldable field) { + public String valueAsString(Field field) { Long value = value(field); if (value == null) { return null; @@ -177,7 +185,9 @@ public class IpFieldMapper extends NumberFieldMapper { @Override public String indexedValue(String value) { - return NumericUtils.longToPrefixCoded(ipToLong(value)); + BytesRef bytesRef = new BytesRef(); + NumericUtils.longToPrefixCoded(ipToLong(value), precisionStep(), bytesRef); + return bytesRef.utf8ToString(); } @Override @@ -241,7 +251,7 @@ public class IpFieldMapper extends NumberFieldMapper { } @Override - protected Fieldable innerParseCreateField(ParseContext context) throws IOException { + protected Field innerParseCreateField(ParseContext context) throws IOException { String ipAsString; if (context.externalValueSet()) { ipAsString = (String) context.externalValue(); @@ -264,7 +274,7 @@ public class IpFieldMapper extends NumberFieldMapper { } final long value = ipToLong(ipAsString); - return new LongFieldMapper.CustomLongNumericField(this, value); + return new LongFieldMapper.CustomLongNumericField(this, value, fieldType); } @Override @@ -291,20 +301,30 @@ public class IpFieldMapper extends NumberFieldMapper { @Override protected void doXContentBody(XContentBuilder builder) throws IOException { super.doXContentBody(builder); - if (index != Defaults.INDEX) { - builder.field("index", index.name().toLowerCase()); + if (indexed() != Defaults.IP_FIELD_TYPE.indexed() || + analyzed() != Defaults.IP_FIELD_TYPE.tokenized()) { + builder.field("index", indexTokenizeOptionToString(indexed(), analyzed())); } - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.IP_FIELD_TYPE.stored()) { + builder.field("store", stored()); } - if (termVector != Defaults.TERM_VECTOR) { - builder.field("term_vector", termVector.name().toLowerCase()); + if (storeTermVectors() != Defaults.IP_FIELD_TYPE.storeTermVectors()) { + builder.field("store_term_vector", storeTermVectors()); } - if (omitNorms != Defaults.OMIT_NORMS) { - builder.field("omit_norms", omitNorms); + if (storeTermVectorOffsets() != Defaults.IP_FIELD_TYPE.storeTermVectorOffsets()) { + builder.field("store_term_vector_offsets", storeTermVectorOffsets()); } - if (indexOptions != Defaults.INDEX_OPTIONS) { - builder.field("index_options", indexOptionToString(indexOptions)); + if (storeTermVectorPositions() != Defaults.IP_FIELD_TYPE.storeTermVectorPositions()) { + builder.field("store_term_vector_positions", storeTermVectorPositions()); + } + if (storeTermVectorPayloads() != Defaults.IP_FIELD_TYPE.storeTermVectorPayloads()) { + builder.field("store_term_vector_payloads", storeTermVectorPayloads()); + } + if (omitNorms() != Defaults.IP_FIELD_TYPE.omitNorms()) { + builder.field("omit_norms", omitNorms()); + } + if (indexOptions() != Defaults.IP_FIELD_TYPE.indexOptions()) { + builder.field("index_options", indexOptionToString(indexOptions())); } if (precisionStep != Defaults.PRECISION_STEP) { builder.field("precision_step", precisionStep); diff --git a/src/main/java/org/elasticsearch/index/mapper/object/ObjectMapper.java b/src/main/java/org/elasticsearch/index/mapper/object/ObjectMapper.java index b44353dda87..c64acbcdd86 100644 --- a/src/main/java/org/elasticsearch/index/mapper/object/ObjectMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/object/ObjectMapper.java @@ -22,7 +22,8 @@ package org.elasticsearch.index.mapper.object; import com.google.common.collect.ImmutableMap; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; import org.apache.lucene.search.Filter; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.common.Strings; @@ -304,7 +305,7 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll { this.mappers = copyOf(mappers); } this.nestedTypePath = "__" + fullPath; - this.nestedTypeFilter = new TermFilter(TypeFieldMapper.TERM_FACTORY.createTerm(nestedTypePath)); + this.nestedTypeFilter = new TermFilter(new Term(TypeFieldMapper.NAME, nestedTypePath)); } @Override @@ -412,7 +413,7 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll { if (nested.isNested()) { Document nestedDoc = new Document(); // pre add the uid field if possible (id was already provided) - Fieldable uidField = context.doc().getFieldable(UidFieldMapper.NAME); + Field uidField = (Field) context.doc().getField(UidFieldMapper.NAME); if (uidField != null) { // we don't need to add it as a full uid field in nested docs, since we don't need versioning // we also rely on this for UidField#loadVersion @@ -465,7 +466,7 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll { if (nested.isNested()) { Document nestedDoc = context.switchDoc(restoreDoc); if (nested.isIncludeInParent()) { - for (Fieldable field : nestedDoc.getFields()) { + for (IndexableField field : nestedDoc.getFields()) { if (field.name().equals(UidFieldMapper.NAME) || field.name().equals(TypeFieldMapper.NAME)) { continue; } else { @@ -476,7 +477,7 @@ public class ObjectMapper implements Mapper, AllFieldMapper.IncludeInAll { if (nested.isIncludeInRoot()) { // don't add it twice, if its included in parent, and we are handling the master doc... if (!(nested.isIncludeInParent() && context.doc() == context.rootDoc())) { - for (Fieldable field : nestedDoc.getFields()) { + for (IndexableField field : nestedDoc.getFields()) { if (field.name().equals(UidFieldMapper.NAME) || field.name().equals(TypeFieldMapper.NAME)) { continue; } else { From f444ed4dff95fe49d2a11f8eac287e48c563ce9c Mon Sep 17 00:00:00 2001 From: Chris Male Date: Sun, 28 Oct 2012 13:04:53 +1300 Subject: [PATCH 039/146] lucene 4: Converted remaining Mappers to FieldType API --- .../common/lucene/all/AllField.java | 11 ++- .../index/mapper/DocumentMapper.java | 5 +- .../index/mapper/FieldMapper.java | 2 +- .../mapper/core/AbstractFieldMapper.java | 10 +-- .../index/mapper/geo/GeoPointFieldMapper.java | 34 +++++---- .../index/mapper/internal/AllFieldMapper.java | 69 ++++++++++--------- .../index/mapper/ip/IpFieldMapper.java | 2 - 7 files changed, 73 insertions(+), 60 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/all/AllField.java b/src/main/java/org/elasticsearch/common/lucene/all/AllField.java index 8e31243dcae..66d39ef57fe 100644 --- a/src/main/java/org/elasticsearch/common/lucene/all/AllField.java +++ b/src/main/java/org/elasticsearch/common/lucene/all/AllField.java @@ -21,8 +21,8 @@ package org.elasticsearch.common.lucene.all; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.AbstractField; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; import org.elasticsearch.ElasticSearchException; import java.io.IOException; @@ -31,22 +31,21 @@ import java.io.Reader; /** * */ -public class AllField extends AbstractField { +public class AllField extends Field { private final AllEntries allEntries; private final Analyzer analyzer; - public AllField(String name, Field.Store store, Field.TermVector termVector, AllEntries allEntries, Analyzer analyzer) { - super(name, store, Field.Index.ANALYZED, termVector); - + public AllField(String name, AllEntries allEntries, Analyzer analyzer, FieldType fieldType) { + super(name, fieldType); this.allEntries = allEntries; this.analyzer = analyzer; } @Override public String stringValue() { - if (isStored()) { + if (fieldType().stored()) { return allEntries.buildText(); } return null; diff --git a/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java b/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java index 99b1cc94818..0935ad41926 100644 --- a/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java @@ -24,6 +24,7 @@ import com.google.common.collect.Maps; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; import org.apache.lucene.search.Filter; import org.elasticsearch.common.Booleans; import org.elasticsearch.common.Nullable; @@ -154,7 +155,9 @@ public class DocumentMapper implements ToXContent { if (indexSettings != null) { String idIndexed = indexSettings.get("index.mapping._id.indexed"); if (idIndexed != null && Booleans.parseBoolean(idIndexed, false)) { - idFieldMapper = new IdFieldMapper(Field.Index.NOT_ANALYZED); + FieldType fieldType = new FieldType(IdFieldMapper.Defaults.ID_FIELD_TYPE); + fieldType.setTokenized(false); + idFieldMapper = new IdFieldMapper(fieldType); } } this.rootMappers.put(IdFieldMapper.class, idFieldMapper); diff --git a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index e214b9da591..01afed9e167 100644 --- a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -113,7 +113,7 @@ public interface FieldMapper { * Creates a new index term based on the provided value. */ public Term createIndexNameTerm(String value) { - return indexNameTermFactory.createTerm(value); + return new Term(indexName, value); } } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java index 27bbd04d0d8..b6d2de6e227 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java @@ -76,27 +76,27 @@ public abstract class AbstractFieldMapper implements FieldMapper, Mapper { } @Override - protected T storeTermVectors(boolean termVectors) { + public T storeTermVectors(boolean termVectors) { return super.storeTermVectors(termVectors); } @Override - protected T storeTermVectorOffsets(boolean termVectorOffsets) { + public T storeTermVectorOffsets(boolean termVectorOffsets) { return super.storeTermVectorOffsets(termVectorOffsets); } @Override - protected T storeTermVectorPositions(boolean termVectorPositions) { + public T storeTermVectorPositions(boolean termVectorPositions) { return super.storeTermVectorPositions(termVectorPositions); } @Override - protected T storeTermVectorPayloads(boolean termVectorPayloads) { + public T storeTermVectorPayloads(boolean termVectorPayloads) { return super.storeTermVectorPayloads(termVectorPayloads); } @Override - protected T tokenized(boolean tokenized) { + public T tokenized(boolean tokenized) { return super.tokenized(tokenized); } diff --git a/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java index 5287f9ce75f..a38a5ff3ffc 100644 --- a/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/geo/GeoPointFieldMapper.java @@ -19,7 +19,7 @@ package org.elasticsearch.index.mapper.geo; -import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.Strings; @@ -73,7 +73,7 @@ public class GeoPointFieldMapper implements Mapper, ArrayValueMapperParser { public static class Defaults { public static final ContentPath.Type PATH_TYPE = ContentPath.Type.FULL; - public static final Field.Store STORE = Field.Store.NO; + public static final boolean STORE = false; public static final boolean ENABLE_LATLON = false; public static final boolean ENABLE_GEOHASH = false; public static final int PRECISION = GeoHashUtils.PRECISION; @@ -81,6 +81,16 @@ public class GeoPointFieldMapper implements Mapper, ArrayValueMapperParser { public static final boolean NORMALIZE_LON = true; public static final boolean VALIDATE_LAT = true; public static final boolean VALIDATE_LON = true; + + public static final FieldType GEO_STRING_FIELD_TYPE = new FieldType(StringFieldMapper.Defaults.STRING_FIELD_TYPE); + + static { + GEO_STRING_FIELD_TYPE.setIndexed(true); + GEO_STRING_FIELD_TYPE.setTokenized(false); + GEO_STRING_FIELD_TYPE.setOmitNorms(true); + GEO_STRING_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY); + GEO_STRING_FIELD_TYPE.freeze(); + } } public static class Builder extends Mapper.Builder { @@ -95,7 +105,7 @@ public class GeoPointFieldMapper implements Mapper, ArrayValueMapperParser { private int precision = Defaults.PRECISION; - private Field.Store store = Defaults.STORE; + private boolean store = Defaults.STORE; boolean validateLat = Defaults.VALIDATE_LAT; boolean validateLon = Defaults.VALIDATE_LON; @@ -132,7 +142,7 @@ public class GeoPointFieldMapper implements Mapper, ArrayValueMapperParser { return this; } - public Builder store(Field.Store store) { + public Builder store(boolean store) { this.store = store; return this; } @@ -143,7 +153,7 @@ public class GeoPointFieldMapper implements Mapper, ArrayValueMapperParser { context.path().pathType(pathType); GeoStringFieldMapper geoStringMapper = new GeoStringFieldMapper.Builder(name) - .index(Field.Index.NOT_ANALYZED).omitNorms(true).indexOptions(IndexOptions.DOCS_ONLY).includeInAll(false).store(store).build(context); + .includeInAll(false).store(store).build(context); DoubleFieldMapper latMapper = null; @@ -162,7 +172,7 @@ public class GeoPointFieldMapper implements Mapper, ArrayValueMapperParser { } StringFieldMapper geohashMapper = null; if (enableGeoHash) { - geohashMapper = stringField(Names.GEOHASH).index(Field.Index.NOT_ANALYZED).includeInAll(false).omitNorms(true).indexOptions(IndexOptions.DOCS_ONLY).build(context); + geohashMapper = stringField(Names.GEOHASH).index(true).tokenized(false).includeInAll(false).omitNorms(true).indexOptions(IndexOptions.DOCS_ONLY).build(context); } context.path().remove(); @@ -493,8 +503,8 @@ public class GeoPointFieldMapper implements Mapper, ArrayValueMapperParser { if (enableGeoHash != Defaults.ENABLE_GEOHASH) { builder.field("geohash", enableGeoHash); } - if (geoStringMapper.store() != Defaults.STORE) { - builder.field("store", geoStringMapper.store().name().toLowerCase()); + if (geoStringMapper.stored() != Defaults.STORE) { + builder.field("store", geoStringMapper.stored()); } if (precision != Defaults.PRECISION) { builder.field("geohash_precision", precision); @@ -534,7 +544,7 @@ public class GeoPointFieldMapper implements Mapper, ArrayValueMapperParser { protected String nullValue = Defaults.NULL_VALUE; public Builder(String name) { - super(name); + super(name, new FieldType(GeoPointFieldMapper.Defaults.GEO_STRING_FIELD_TYPE)); builder = this; } @@ -552,7 +562,7 @@ public class GeoPointFieldMapper implements Mapper, ArrayValueMapperParser { @Override public GeoStringFieldMapper build(BuilderContext context) { GeoStringFieldMapper fieldMapper = new GeoStringFieldMapper(buildNames(context), - index, store, termVector, boost, omitNorms, indexOptions, nullValue, + boost, fieldType, nullValue, indexAnalyzer, searchAnalyzer); fieldMapper.includeInAll(includeInAll); return fieldMapper; @@ -561,8 +571,8 @@ public class GeoPointFieldMapper implements Mapper, ArrayValueMapperParser { GeoPointFieldMapper geoMapper; - public GeoStringFieldMapper(Names names, Field.Index index, Field.Store store, Field.TermVector termVector, float boost, boolean omitNorms, IndexOptions indexOptions, String nullValue, NamedAnalyzer indexAnalyzer, NamedAnalyzer searchAnalyzer) { - super(names, index, store, termVector, boost, omitNorms, indexOptions, nullValue, indexAnalyzer, searchAnalyzer); + public GeoStringFieldMapper(Names names, float boost, FieldType fieldType, String nullValue, NamedAnalyzer indexAnalyzer, NamedAnalyzer searchAnalyzer) { + super(names, boost, fieldType, nullValue, indexAnalyzer, searchAnalyzer); } @Override diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java index 9c9dd11858d..aa0ebe287f2 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/AllFieldMapper.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper.internal; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.Term; import org.apache.lucene.search.Query; @@ -62,6 +63,14 @@ public class AllFieldMapper extends AbstractFieldMapper implements Interna public static final String NAME = AllFieldMapper.NAME; public static final String INDEX_NAME = AllFieldMapper.NAME; public static final boolean ENABLED = true; + + public static final FieldType ALL_FIELD_TYPE = new FieldType(); + + static { + ALL_FIELD_TYPE.setIndexed(true); + ALL_FIELD_TYPE.setTokenized(true); + ALL_FIELD_TYPE.freeze(); + } } public static class Builder extends AbstractFieldMapper.Builder { @@ -72,7 +81,7 @@ public class AllFieldMapper extends AbstractFieldMapper implements Interna boolean autoBoost = false; public Builder() { - super(Defaults.NAME); + super(Defaults.NAME, new FieldType(Defaults.ALL_FIELD_TYPE)); builder = this; indexName = Defaults.INDEX_NAME; } @@ -82,29 +91,13 @@ public class AllFieldMapper extends AbstractFieldMapper implements Interna return this; } - @Override - public Builder store(Field.Store store) { - return super.store(store); - } - - @Override - public Builder termVector(Field.TermVector termVector) { - return super.termVector(termVector); - } - - @Override - protected Builder indexAnalyzer(NamedAnalyzer indexAnalyzer) { - return super.indexAnalyzer(indexAnalyzer); - } - - @Override - protected Builder searchAnalyzer(NamedAnalyzer searchAnalyzer) { - return super.searchAnalyzer(searchAnalyzer); - } - @Override public AllFieldMapper build(BuilderContext context) { - return new AllFieldMapper(name, store, termVector, omitNorms, indexOptions, + // In case the mapping overrides these + fieldType.setIndexed(true); + fieldType.setTokenized(true); + + return new AllFieldMapper(name, fieldType, indexAnalyzer, searchAnalyzer, enabled, autoBoost); } } @@ -137,13 +130,12 @@ public class AllFieldMapper extends AbstractFieldMapper implements Interna private volatile boolean autoBoost; public AllFieldMapper() { - this(Defaults.NAME, Defaults.STORE, Defaults.TERM_VECTOR, Defaults.OMIT_NORMS, Defaults.INDEX_OPTIONS, null, null, Defaults.ENABLED, false); + this(Defaults.NAME, new FieldType(Defaults.ALL_FIELD_TYPE), null, null, Defaults.ENABLED, false); } - protected AllFieldMapper(String name, Field.Store store, Field.TermVector termVector, boolean omitNorms, IndexOptions indexOptions, + protected AllFieldMapper(String name, FieldType fieldType, NamedAnalyzer indexAnalyzer, NamedAnalyzer searchAnalyzer, boolean enabled, boolean autoBoost) { - super(new Names(name, name, name, name), Field.Index.ANALYZED, store, termVector, 1.0f, omitNorms, indexOptions, indexAnalyzer, - searchAnalyzer); + super(new Names(name, name, name, name), 1.0f, fieldType, indexAnalyzer, searchAnalyzer); this.enabled = enabled; this.autoBoost = autoBoost; @@ -158,7 +150,7 @@ public class AllFieldMapper extends AbstractFieldMapper implements Interna if (!autoBoost) { return new TermQuery(term); } - if (indexOptions == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { + if (fieldType.indexOptions() == IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) { return new AllTermQuery(term); } return new TermQuery(term); @@ -209,7 +201,7 @@ public class AllFieldMapper extends AbstractFieldMapper implements Interna } Analyzer analyzer = findAnalyzer(context); - return new AllField(names.indexName(), store, termVector, context.allEntries(), analyzer); + return new AllField(names.indexName(), context.allEntries(), analyzer, fieldType); } private Analyzer findAnalyzer(ParseContext context) { @@ -255,7 +247,9 @@ public class AllFieldMapper extends AbstractFieldMapper implements Interna @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { // if all are defaults, no need to write it at all - if (enabled == Defaults.ENABLED && store == Defaults.STORE && termVector == Defaults.TERM_VECTOR && indexAnalyzer == null && searchAnalyzer == null) { + if (enabled == Defaults.ENABLED && stored() == Defaults.ALL_FIELD_TYPE.stored() && + storeTermVectors() == Defaults.ALL_FIELD_TYPE.storeTermVectors() && + indexAnalyzer == null && searchAnalyzer == null) { return builder; } builder.startObject(CONTENT_TYPE); @@ -265,11 +259,20 @@ public class AllFieldMapper extends AbstractFieldMapper implements Interna if (autoBoost != false) { builder.field("auto_boost", autoBoost); } - if (store != Defaults.STORE) { - builder.field("store", store.name().toLowerCase()); + if (stored() != Defaults.ALL_FIELD_TYPE.stored()) { + builder.field("store", stored()); } - if (termVector != Defaults.TERM_VECTOR) { - builder.field("term_vector", termVector.name().toLowerCase()); + if (storeTermVectors() != Defaults.ALL_FIELD_TYPE.storeTermVectors()) { + builder.field("store_term_vector", storeTermVectors()); + } + if (storeTermVectorOffsets() != Defaults.ALL_FIELD_TYPE.storeTermVectorOffsets()) { + builder.field("store_term_vector_offsets", storeTermVectorOffsets()); + } + if (storeTermVectorPositions() != Defaults.ALL_FIELD_TYPE.storeTermVectorPositions()) { + builder.field("store_term_vector_positions", storeTermVectorPositions()); + } + if (storeTermVectorPayloads() != Defaults.ALL_FIELD_TYPE.storeTermVectorPayloads()) { + builder.field("store_term_vector_payloads", storeTermVectorPayloads()); } if (indexAnalyzer != null && searchAnalyzer != null && indexAnalyzer.name().equals(searchAnalyzer.name()) && !indexAnalyzer.name().startsWith("_")) { // same analyzers, output it once diff --git a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java index 7ec2d05aa1e..bcd48ef38d3 100644 --- a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java @@ -22,8 +22,6 @@ package org.elasticsearch.index.mapper.ip; import org.apache.lucene.analysis.NumericTokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.*; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; From a49078dfc1673883b87d180f1be7a4e9d73d8df7 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Sat, 27 Oct 2012 20:34:32 -0400 Subject: [PATCH 040/146] lucene 4: replace UnicodeUtil.UTF8Result with BytesRef --- .../common/compress/CompressedString.java | 6 +-- .../http/netty/NettyHttpChannel.java | 12 ++--- .../rest/AbstractRestResponse.java | 10 ++++ .../elasticsearch/rest/BytesRestResponse.java | 5 ++ .../org/elasticsearch/rest/RestResponse.java | 6 +++ .../rest/StringRestResponse.java | 11 +++-- .../elasticsearch/rest/Utf8RestResponse.java | 46 +++++++++++-------- .../rest/XContentRestResponse.java | 39 ++++++++++++---- 8 files changed, 92 insertions(+), 43 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/compress/CompressedString.java b/src/main/java/org/elasticsearch/common/compress/CompressedString.java index 0307fdf72d8..54427b4f191 100644 --- a/src/main/java/org/elasticsearch/common/compress/CompressedString.java +++ b/src/main/java/org/elasticsearch/common/compress/CompressedString.java @@ -19,7 +19,7 @@ package org.elasticsearch.common.compress; -import org.apache.lucene.util.UnicodeUtil; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Unicode; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; @@ -79,8 +79,8 @@ public class CompressedString implements Streamable { } public CompressedString(String str) throws IOException { - UnicodeUtil.UTF8Result result = Unicode.unsafeFromStringAsUtf8(str); - this.bytes = CompressorFactory.defaultCompressor().compress(result.result, 0, result.length); + BytesRef result = Unicode.unsafeFromStringAsUtf8(str); + this.bytes = CompressorFactory.defaultCompressor().compress(result.bytes, result.offset, result.length); } public byte[] compressed() { diff --git a/src/main/java/org/elasticsearch/http/netty/NettyHttpChannel.java b/src/main/java/org/elasticsearch/http/netty/NettyHttpChannel.java index 4257f46070e..5e3508a84fa 100644 --- a/src/main/java/org/elasticsearch/http/netty/NettyHttpChannel.java +++ b/src/main/java/org/elasticsearch/http/netty/NettyHttpChannel.java @@ -102,15 +102,15 @@ public class NettyHttpChannel implements HttpChannel { releaseContentListener = new NettyTransport.CacheFutureListener((CachedStreamOutput.Entry) builder.payload()); buf = builder.bytes().toChannelBuffer(); } else if (response.contentThreadSafe()) { - buf = ChannelBuffers.wrappedBuffer(response.content(), 0, response.contentLength()); + buf = ChannelBuffers.wrappedBuffer(response.content(), response.contentOffset(), response.contentLength()); } else { - buf = ChannelBuffers.copiedBuffer(response.content(), 0, response.contentLength()); + buf = ChannelBuffers.copiedBuffer(response.content(), response.contentOffset(), response.contentLength()); } } else { if (response.contentThreadSafe()) { - buf = ChannelBuffers.wrappedBuffer(response.content(), 0, response.contentLength()); + buf = ChannelBuffers.wrappedBuffer(response.content(), response.contentOffset(), response.contentLength()); } else { - buf = ChannelBuffers.copiedBuffer(response.content(), 0, response.contentLength()); + buf = ChannelBuffers.copiedBuffer(response.content(), response.contentOffset(), response.contentLength()); } } } catch (IOException e) { @@ -119,11 +119,11 @@ public class NettyHttpChannel implements HttpChannel { if (response.prefixContent() != null || response.suffixContent() != null) { ChannelBuffer prefixBuf = ChannelBuffers.EMPTY_BUFFER; if (response.prefixContent() != null) { - prefixBuf = ChannelBuffers.copiedBuffer(response.prefixContent(), 0, response.prefixContentLength()); + prefixBuf = ChannelBuffers.copiedBuffer(response.prefixContent(), response.prefixContentOffset(), response.prefixContentLength()); } ChannelBuffer suffixBuf = ChannelBuffers.EMPTY_BUFFER; if (response.suffixContent() != null) { - suffixBuf = ChannelBuffers.copiedBuffer(response.suffixContent(), 0, response.suffixContentLength()); + suffixBuf = ChannelBuffers.copiedBuffer(response.suffixContent(), response.suffixContentOffset(), response.suffixContentLength()); } buf = ChannelBuffers.wrappedBuffer(prefixBuf, buf, suffixBuf); } diff --git a/src/main/java/org/elasticsearch/rest/AbstractRestResponse.java b/src/main/java/org/elasticsearch/rest/AbstractRestResponse.java index a2b2de29fe4..72592b255ce 100644 --- a/src/main/java/org/elasticsearch/rest/AbstractRestResponse.java +++ b/src/main/java/org/elasticsearch/rest/AbstractRestResponse.java @@ -34,6 +34,11 @@ public abstract class AbstractRestResponse implements RestResponse { return -1; } + @Override + public int prefixContentOffset() { + return 0; + } + @Override public byte[] suffixContent() { return null; @@ -43,4 +48,9 @@ public abstract class AbstractRestResponse implements RestResponse { public int suffixContentLength() { return -1; } + + @Override + public int suffixContentOffset() { + return 0; + } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/rest/BytesRestResponse.java b/src/main/java/org/elasticsearch/rest/BytesRestResponse.java index a1dcd0e84a2..57151725e3f 100644 --- a/src/main/java/org/elasticsearch/rest/BytesRestResponse.java +++ b/src/main/java/org/elasticsearch/rest/BytesRestResponse.java @@ -52,6 +52,11 @@ public class BytesRestResponse extends AbstractRestResponse { return bytes.length; } + @Override + public int contentOffset() throws IOException { + return 0; + } + @Override public RestStatus status() { return RestStatus.OK; diff --git a/src/main/java/org/elasticsearch/rest/RestResponse.java b/src/main/java/org/elasticsearch/rest/RestResponse.java index a92011e44dc..15bba829321 100644 --- a/src/main/java/org/elasticsearch/rest/RestResponse.java +++ b/src/main/java/org/elasticsearch/rest/RestResponse.java @@ -44,13 +44,19 @@ public interface RestResponse { */ int contentLength() throws IOException; + int contentOffset() throws IOException; + byte[] prefixContent(); int prefixContentLength(); + int prefixContentOffset(); + byte[] suffixContent(); int suffixContentLength(); + int suffixContentOffset(); + RestStatus status(); } diff --git a/src/main/java/org/elasticsearch/rest/StringRestResponse.java b/src/main/java/org/elasticsearch/rest/StringRestResponse.java index 7f2a2454783..f4280b3f2be 100644 --- a/src/main/java/org/elasticsearch/rest/StringRestResponse.java +++ b/src/main/java/org/elasticsearch/rest/StringRestResponse.java @@ -19,6 +19,7 @@ package org.elasticsearch.rest; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.UnicodeUtil; import org.elasticsearch.common.util.concurrent.ThreadLocals; @@ -27,10 +28,10 @@ import org.elasticsearch.common.util.concurrent.ThreadLocals; */ public class StringRestResponse extends Utf8RestResponse { - private static ThreadLocal> cache = new ThreadLocal>() { + private static ThreadLocal> cache = new ThreadLocal>() { @Override - protected ThreadLocals.CleanableValue initialValue() { - return new ThreadLocals.CleanableValue(new UnicodeUtil.UTF8Result()); + protected ThreadLocals.CleanableValue initialValue() { + return new ThreadLocals.CleanableValue(new BytesRef()); } }; @@ -42,8 +43,8 @@ public class StringRestResponse extends Utf8RestResponse { super(status, convert(content)); } - private static UnicodeUtil.UTF8Result convert(String content) { - UnicodeUtil.UTF8Result result = cache.get().get(); + private static BytesRef convert(String content) { + BytesRef result = cache.get().get(); UnicodeUtil.UTF16toUTF8(content, 0, content.length(), result); return result; } diff --git a/src/main/java/org/elasticsearch/rest/Utf8RestResponse.java b/src/main/java/org/elasticsearch/rest/Utf8RestResponse.java index 5471cb645a7..43634277155 100644 --- a/src/main/java/org/elasticsearch/rest/Utf8RestResponse.java +++ b/src/main/java/org/elasticsearch/rest/Utf8RestResponse.java @@ -19,10 +19,10 @@ package org.elasticsearch.rest; -import org.apache.lucene.util.UnicodeUtil; +import org.apache.lucene.util.BytesRef; /** - * An http response that is built on top of {@link org.apache.lucene.util.UnicodeUtil.UTF8Result}. + * An http response that is built on top of {@link org.apache.lucene.util.BytesRef}. *

*

Note, this class assumes that the utf8 result is not thread safe. * @@ -30,33 +30,26 @@ import org.apache.lucene.util.UnicodeUtil; */ public class Utf8RestResponse extends AbstractRestResponse implements RestResponse { - public static final UnicodeUtil.UTF8Result EMPTY; - - static { - UnicodeUtil.UTF8Result temp = new UnicodeUtil.UTF8Result(); - temp.result = new byte[0]; - temp.length = 0; - EMPTY = temp; - } + public static final BytesRef EMPTY = new BytesRef(); private final RestStatus status; - private final UnicodeUtil.UTF8Result utf8Result; + private final BytesRef utf8Result; - private final UnicodeUtil.UTF8Result prefixUtf8Result; + private final BytesRef prefixUtf8Result; - private final UnicodeUtil.UTF8Result suffixUtf8Result; + private final BytesRef suffixUtf8Result; public Utf8RestResponse(RestStatus status) { this(status, EMPTY); } - public Utf8RestResponse(RestStatus status, UnicodeUtil.UTF8Result utf8Result) { + public Utf8RestResponse(RestStatus status, BytesRef utf8Result) { this(status, utf8Result, null, null); } - public Utf8RestResponse(RestStatus status, UnicodeUtil.UTF8Result utf8Result, - UnicodeUtil.UTF8Result prefixUtf8Result, UnicodeUtil.UTF8Result suffixUtf8Result) { + public Utf8RestResponse(RestStatus status, BytesRef utf8Result, + BytesRef prefixUtf8Result, BytesRef suffixUtf8Result) { this.status = status; this.utf8Result = utf8Result; this.prefixUtf8Result = prefixUtf8Result; @@ -75,7 +68,7 @@ public class Utf8RestResponse extends AbstractRestResponse implements RestRespon @Override public byte[] content() { - return utf8Result.result; + return utf8Result.bytes; } @Override @@ -83,6 +76,11 @@ public class Utf8RestResponse extends AbstractRestResponse implements RestRespon return utf8Result.length; } + @Override + public int contentOffset() { + return utf8Result.offset; + } + @Override public RestStatus status() { return status; @@ -90,7 +88,7 @@ public class Utf8RestResponse extends AbstractRestResponse implements RestRespon @Override public byte[] prefixContent() { - return prefixUtf8Result != null ? prefixUtf8Result.result : null; + return prefixUtf8Result != null ? prefixUtf8Result.bytes : null; } @Override @@ -98,13 +96,23 @@ public class Utf8RestResponse extends AbstractRestResponse implements RestRespon return prefixUtf8Result != null ? prefixUtf8Result.length : 0; } + @Override + public int prefixContentOffset() { + return prefixUtf8Result != null ? prefixUtf8Result.offset : 0; + } + @Override public byte[] suffixContent() { - return suffixUtf8Result != null ? suffixUtf8Result.result : null; + return suffixUtf8Result != null ? suffixUtf8Result.bytes : null; } @Override public int suffixContentLength() { return suffixUtf8Result != null ? suffixUtf8Result.length : 0; } + + @Override + public int suffixContentOffset() { + return suffixUtf8Result != null ? suffixUtf8Result.offset : 0; + } } \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/rest/XContentRestResponse.java b/src/main/java/org/elasticsearch/rest/XContentRestResponse.java index e34110440aa..ac9a6cd2b86 100644 --- a/src/main/java/org/elasticsearch/rest/XContentRestResponse.java +++ b/src/main/java/org/elasticsearch/rest/XContentRestResponse.java @@ -19,6 +19,7 @@ package org.elasticsearch.rest; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.UnicodeUtil; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.common.xcontent.XContentBuilder; @@ -33,20 +34,20 @@ public class XContentRestResponse extends AbstractRestResponse { private static final byte[] END_JSONP; static { - UnicodeUtil.UTF8Result U_END_JSONP = new UnicodeUtil.UTF8Result(); + BytesRef U_END_JSONP = new BytesRef(); UnicodeUtil.UTF16toUTF8(");", 0, ");".length(), U_END_JSONP); END_JSONP = new byte[U_END_JSONP.length]; - System.arraycopy(U_END_JSONP.result, 0, END_JSONP, 0, U_END_JSONP.length); + System.arraycopy(U_END_JSONP.bytes, U_END_JSONP.offset, END_JSONP, 0, U_END_JSONP.length); } - private static ThreadLocal> prefixCache = new ThreadLocal>() { + private static ThreadLocal> prefixCache = new ThreadLocal>() { @Override - protected ThreadLocals.CleanableValue initialValue() { - return new ThreadLocals.CleanableValue(new UnicodeUtil.UTF8Result()); + protected ThreadLocals.CleanableValue initialValue() { + return new ThreadLocals.CleanableValue(new BytesRef()); } }; - private final UnicodeUtil.UTF8Result prefixUtf8Result; + private final BytesRef prefixUtf8Result; private final RestStatus status; @@ -82,6 +83,11 @@ public class XContentRestResponse extends AbstractRestResponse { return builder.bytes().length(); } + @Override + public int contentOffset() throws IOException { + return 0; + } + @Override public RestStatus status() { return this.status; @@ -90,7 +96,7 @@ public class XContentRestResponse extends AbstractRestResponse { @Override public byte[] prefixContent() { if (prefixUtf8Result != null) { - return prefixUtf8Result.result; + return prefixUtf8Result.bytes; } return null; } @@ -103,6 +109,14 @@ public class XContentRestResponse extends AbstractRestResponse { return 0; } + @Override + public int prefixContentOffset() { + if (prefixUtf8Result != null) { + return prefixUtf8Result.offset; + } + return 0; + } + @Override public byte[] suffixContent() { if (prefixUtf8Result != null) { @@ -119,14 +133,19 @@ public class XContentRestResponse extends AbstractRestResponse { return 0; } - private static UnicodeUtil.UTF8Result startJsonp(RestRequest request) { + @Override + public int suffixContentOffset() { + return 0; + } + + private static BytesRef startJsonp(RestRequest request) { String callback = request.param("callback"); if (callback == null) { return null; } - UnicodeUtil.UTF8Result result = prefixCache.get().get(); + BytesRef result = prefixCache.get().get(); UnicodeUtil.UTF16toUTF8(callback, 0, callback.length(), result); - result.result[result.length] = '('; + result.bytes[result.length] = '('; result.length++; return result; } From cdf1fc8981c99ed8af58a33bd2bba63fe192c26d Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Sun, 28 Oct 2012 17:04:10 +0100 Subject: [PATCH 041/146] lucene 4: upgraded o.e.index.search.nested package. Also fixed issue with liveDocs in child package. --- .../index/search/child/HasChildFilter.java | 2 +- .../index/search/child/HasParentFilter.java | 4 +- .../index/search/child/TopChildrenQuery.java | 2 +- .../index/search/nested/BlockJoinQuery.java | 201 +++++++++++------- .../search/nested/IncludeNestedDocsQuery.java | 43 ++-- .../nested/NestedChildrenCollector.java | 17 +- .../index/search/nested/NestedDocsFilter.java | 6 +- .../search/nested/NonNestedDocsFilter.java | 10 +- 8 files changed, 165 insertions(+), 120 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java b/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java index 2833fe70746..9f33e2d9614 100644 --- a/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java +++ b/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java @@ -178,7 +178,7 @@ public abstract class HasChildFilter extends Filter implements ScopePhase.Collec } public boolean get(int doc) { - return !acceptDocs.get(doc) && parents.contains(typeCache.idByDoc(doc)); + return acceptDocs.get(doc) && parents.contains(typeCache.idByDoc(doc)); } } diff --git a/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java b/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java index 57e2822c646..8cb758748c5 100644 --- a/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java +++ b/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java @@ -143,7 +143,7 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle } public boolean get(int doc) { - return !acceptDocs.get(doc) && parents.contains(idReaderTypeCache.parentIdByDoc(doc)); + return acceptDocs.get(doc) && parents.contains(idReaderTypeCache.parentIdByDoc(doc)); } } @@ -229,7 +229,7 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle } public boolean get(int doc) { - if (acceptDocs.get(doc) || doc == -1) { + if (!acceptDocs.get(doc) || doc == -1) { return false; } diff --git a/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java b/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java index 7fe64c8a4bb..1b9634f4a51 100644 --- a/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java +++ b/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java @@ -138,7 +138,7 @@ public class TopChildrenQuery extends Query implements ScopePhase.TopDocsPhase { for (AtomicReaderContext atomicReaderContext : context.searcher().getIndexReader().leaves()) { AtomicReader indexReader = atomicReaderContext.reader(); int parentDocId = context.idCache().reader(indexReader).docById(parentType, parentId); - if (parentDocId != -1 && !indexReader.getLiveDocs().get(parentDocId)) { + if (parentDocId != -1 && indexReader.getLiveDocs().get(parentDocId)) { // we found a match, add it and break TIntObjectHashMap readerParentDocs = parentDocsPerReader.get(indexReader.getCoreCacheKey()); diff --git a/src/main/java/org/elasticsearch/index/search/nested/BlockJoinQuery.java b/src/main/java/org/elasticsearch/index/search/nested/BlockJoinQuery.java index d06e35883ba..579086993c2 100644 --- a/src/main/java/org/elasticsearch/index/search/nested/BlockJoinQuery.java +++ b/src/main/java/org/elasticsearch/index/search/nested/BlockJoinQuery.java @@ -19,15 +19,20 @@ package org.elasticsearch.index.search.nested; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.common.lucene.docset.FixedBitDocSet; import org.elasticsearch.common.lucene.search.NoopCollector; import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.Locale; import java.util.Set; /** @@ -75,8 +80,6 @@ public class BlockJoinQuery extends Query { public static enum ScoreMode {None, Avg, Max, Total} - ; - private final Filter parentsFilter; private final Query childQuery; @@ -112,7 +115,7 @@ public class BlockJoinQuery extends Query { } @Override - public Weight createWeight(Searcher searcher) throws IOException { + public Weight createWeight(IndexSearcher searcher) throws IOException { return new BlockJoinWeight(this, childQuery.createWeight(searcher), parentsFilter, scoreMode, childCollector); } @@ -138,24 +141,19 @@ public class BlockJoinQuery extends Query { } @Override - public float getValue() { - return childWeight.getValue(); + public float getValueForNormalization() throws IOException { + return childWeight.getValueForNormalization() * joinQuery.getBoost() * joinQuery.getBoost(); } @Override - public float sumOfSquaredWeights() throws IOException { - return childWeight.sumOfSquaredWeights() * joinQuery.getBoost() * joinQuery.getBoost(); + public void normalize(float norm, float topLevelBoost) { + childWeight.normalize(norm, topLevelBoost * joinQuery.getBoost()); } @Override - public void normalize(float norm) { - childWeight.normalize(norm * joinQuery.getBoost()); - } - - @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { + public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { // Pass scoreDocsInOrder true, topScorer false to our sub: - final Scorer childScorer = childWeight.scorer(reader, true, false); + final Scorer childScorer = childWeight.scorer(context, true, false, null); if (childScorer == null) { // No matches @@ -168,7 +166,7 @@ public class BlockJoinQuery extends Query { return null; } - DocIdSet parents = parentsFilter.getDocIdSet(reader); + DocIdSet parents = parentsFilter.getDocIdSet(context, null); // TODO NESTED: We have random access in ES, not sure I understand what can be gain? // TODO: once we do random-access filters we can // generalize this: @@ -186,18 +184,22 @@ public class BlockJoinQuery extends Query { // CHANGE: if (childCollector != null) { - childCollector.setNextReader(reader, 0); + childCollector.setNextReader(context); childCollector.setScorer(childScorer); } - return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode, childCollector); + return new BlockJoinScorer(this, childScorer, (FixedBitSet) parents, firstChildDoc, scoreMode, childCollector, acceptDocs); } @Override - public Explanation explain(IndexReader reader, int doc) throws IOException { - // TODO - throw new UnsupportedOperationException(getClass().getName() + - " cannot explain match on parent document"); + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + BlockJoinScorer scorer = (BlockJoinScorer) scorer(context, true, false, context.reader().getLiveDocs()); + if (scorer != null) { + if (scorer.advance(doc) == doc) { + return scorer.explain(context.docBase); + } + } + return new ComplexExplanation(false, 0.0f, "Not a match"); } @Override @@ -210,21 +212,25 @@ public class BlockJoinQuery extends Query { private final Scorer childScorer; private final FixedBitSet parentBits; private final ScoreMode scoreMode; + private final Bits acceptDocs; // LUCENE 4 UPGRADE: Why not make the parentBits already be filtered by acceptDocs? private final Collector childCollector; private int parentDoc = -1; + private int prevParentDoc; private float parentScore; + private float parentFreq; private int nextChildDoc; private int[] pendingChildDocs = new int[5]; private float[] pendingChildScores; private int childDocUpto; - public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Collector childCollector) { + public BlockJoinScorer(Weight weight, Scorer childScorer, FixedBitSet parentBits, int firstChildDoc, ScoreMode scoreMode, Collector childCollector, Bits acceptDocs) { super(weight); //System.out.println("Q.init firstChildDoc=" + firstChildDoc); this.parentBits = parentBits; this.childScorer = childScorer; this.scoreMode = scoreMode; + this.acceptDocs = acceptDocs; this.childCollector = childCollector; if (scoreMode != ScoreMode.None) { pendingChildScores = new float[5]; @@ -233,11 +239,8 @@ public class BlockJoinQuery extends Query { } @Override - public void visitSubScorers(Query parent, BooleanClause.Occur relationship, - ScorerVisitor visitor) { - super.visitSubScorers(parent, relationship, visitor); - //childScorer.visitSubScorers(weight.getQuery(), BooleanClause.Occur.MUST, visitor); - childScorer.visitScorers(visitor); + public Collection getChildren() { + return Collections.singleton(new ChildScorer(childScorer, "BLOCK_JOIN")); } int getChildCount() { @@ -271,64 +274,88 @@ public class BlockJoinQuery extends Query { public int nextDoc() throws IOException { //System.out.println("Q.nextDoc() nextChildDoc=" + nextChildDoc); - if (nextChildDoc == NO_MORE_DOCS) { - //System.out.println(" end"); - return parentDoc = NO_MORE_DOCS; - } + // Loop until we hit a parentDoc that's accepted + while (true) { + if (nextChildDoc == NO_MORE_DOCS) { + //System.out.println(" end"); + return parentDoc = NO_MORE_DOCS; + } - // Gather all children sharing the same parent as nextChildDoc - parentDoc = parentBits.nextSetBit(nextChildDoc); - //System.out.println(" parentDoc=" + parentDoc); - assert parentDoc != -1; + // Gather all children sharing the same parent as + // nextChildDoc - float totalScore = 0; - float maxScore = Float.NEGATIVE_INFINITY; + parentDoc = parentBits.nextSetBit(nextChildDoc); - childDocUpto = 0; - do { - //System.out.println(" c=" + nextChildDoc); - if (pendingChildDocs.length == childDocUpto) { - pendingChildDocs = ArrayUtil.grow(pendingChildDocs); - if (scoreMode != ScoreMode.None) { + //System.out.println(" parentDoc=" + parentDoc); + assert parentDoc != -1; + + //System.out.println(" nextChildDoc=" + nextChildDoc); + if (acceptDocs != null && !acceptDocs.get(parentDoc)) { + // Parent doc not accepted; skip child docs until + // we hit a new parent doc: + do { + nextChildDoc = childScorer.nextDoc(); + } while (nextChildDoc < parentDoc); + continue; + } + + float totalScore = 0; + float totalFreq = 0; + float maxScore = Float.NEGATIVE_INFINITY; + float maxFreq = 0; + + childDocUpto = 0; + do { + + //System.out.println(" c=" + nextChildDoc); + if (pendingChildDocs.length == childDocUpto) { + pendingChildDocs = ArrayUtil.grow(pendingChildDocs); + } + if (scoreMode != ScoreMode.None && pendingChildScores.length == childDocUpto) { pendingChildScores = ArrayUtil.grow(pendingChildScores); } - } - pendingChildDocs[childDocUpto] = nextChildDoc; - if (scoreMode != ScoreMode.None) { - // TODO: specialize this into dedicated classes per-scoreMode - final float childScore = childScorer.score(); - pendingChildScores[childDocUpto] = childScore; - maxScore = Math.max(childScore, maxScore); - totalScore += childScore; + pendingChildDocs[childDocUpto] = nextChildDoc; + if (scoreMode != ScoreMode.None) { + // TODO: specialize this into dedicated classes per-scoreMode + final float childScore = childScorer.score(); + final float childFreq = childScorer.freq(); + pendingChildScores[childDocUpto] = childScore; + maxScore = Math.max(childScore, maxScore); + maxFreq = Math.max(childFreq, maxFreq); + totalScore += childScore; + totalFreq += childFreq; + } + + // CHANGE: + childCollector.collect(nextChildDoc); + + childDocUpto++; + nextChildDoc = childScorer.nextDoc(); + } while (nextChildDoc < parentDoc); + + // Parent & child docs are supposed to be orthogonal: + assert nextChildDoc != parentDoc; + + switch(scoreMode) { + case Avg: + parentScore = totalScore / childDocUpto; + parentFreq = totalFreq / childDocUpto; + break; + case Max: + parentScore = maxScore; + parentFreq = maxFreq; + break; + case Total: + parentScore = totalScore; + parentFreq = totalFreq; + break; + case None: + break; } - // CHANGE: - childCollector.collect(nextChildDoc); - - childDocUpto++; - nextChildDoc = childScorer.nextDoc(); - } while (nextChildDoc < parentDoc); - //System.out.println(" nextChildDoc=" + nextChildDoc); - - // Parent & child docs are supposed to be orthogonal: - assert nextChildDoc != parentDoc; - - switch (scoreMode) { - case Avg: - parentScore = totalScore / childDocUpto; - break; - case Max: - parentScore = maxScore; - break; - case Total: - parentScore = totalScore; - break; - case None: - break; + //System.out.println(" return parentDoc=" + parentDoc); + return parentDoc; } - - //System.out.println(" return parentDoc=" + parentDoc); - return parentDoc; } @Override @@ -341,6 +368,11 @@ public class BlockJoinQuery extends Query { return parentScore; } + @Override + public float freq() throws IOException { + return parentFreq; + } + @Override public int advance(int parentTarget) throws IOException { @@ -359,7 +391,7 @@ public class BlockJoinQuery extends Query { return nextDoc(); } - final int prevParentDoc = parentBits.prevSetBit(parentTarget - 1); + prevParentDoc = parentBits.prevSetBit(parentTarget - 1); //System.out.println(" rolled back to prevParentDoc=" + prevParentDoc + " vs parentDoc=" + parentDoc); assert prevParentDoc >= parentDoc; @@ -377,6 +409,15 @@ public class BlockJoinQuery extends Query { //System.out.println(" return nextParentDoc=" + nd); return nd; } + + public Explanation explain(int docBase) throws IOException { + int start = docBase + prevParentDoc + 1; // +1 b/c prevParentDoc is previous parent doc + int end = docBase + parentDoc - 1; // -1 b/c parentDoc is parent doc + return new ComplexExplanation( + true, score(), String.format(Locale.ROOT, "Score based on child doc range from %d to %d", start, end) + ); + } + } @Override @@ -427,8 +468,8 @@ public class BlockJoinQuery extends Query { } @Override - public Object clone() { - return new BlockJoinQuery((Query) origChildQuery.clone(), + public Query clone() { + return new BlockJoinQuery(origChildQuery.clone(), parentsFilter, scoreMode).setCollector(childCollector); } diff --git a/src/main/java/org/elasticsearch/index/search/nested/IncludeNestedDocsQuery.java b/src/main/java/org/elasticsearch/index/search/nested/IncludeNestedDocsQuery.java index f94c58648bf..ce7d7951e88 100644 --- a/src/main/java/org/elasticsearch/index/search/nested/IncludeNestedDocsQuery.java +++ b/src/main/java/org/elasticsearch/index/search/nested/IncludeNestedDocsQuery.java @@ -1,12 +1,15 @@ package org.elasticsearch.index.search.nested; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.common.lucene.docset.FixedBitDocSet; import java.io.IOException; +import java.util.Collection; import java.util.Set; /** @@ -48,7 +51,7 @@ public class IncludeNestedDocsQuery extends Query { } @Override - public Weight createWeight(Searcher searcher) throws IOException { + public Weight createWeight(IndexSearcher searcher) throws IOException { return new IncludeNestedDocsWeight(parentQuery, parentQuery.createWeight(searcher), parentFilter); } @@ -70,30 +73,25 @@ public class IncludeNestedDocsQuery extends Query { } @Override - public float getValue() { - return parentWeight.getValue(); + public void normalize(float norm, float topLevelBoost) { + parentWeight.normalize(norm, topLevelBoost); } @Override - public float sumOfSquaredWeights() throws IOException { - return parentWeight.sumOfSquaredWeights() * parentQuery.getBoost() * parentQuery.getBoost(); + public float getValueForNormalization() throws IOException { + return parentWeight.getValueForNormalization(); // this query is never boosted so just delegate... } @Override - public void normalize(float norm) { - parentWeight.normalize(norm * parentQuery.getBoost()); - } - - @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { - final Scorer parentScorer = parentWeight.scorer(reader, true, false); + public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { + final Scorer parentScorer = parentWeight.scorer(context, true, false, acceptDocs); // no matches if (parentScorer == null) { return null; } - DocIdSet parents = parentsFilter.getDocIdSet(reader); + DocIdSet parents = parentsFilter.getDocIdSet(context, acceptDocs); if (parents == null) { // No matches return null; @@ -114,8 +112,8 @@ public class IncludeNestedDocsQuery extends Query { } @Override - public Explanation explain(IndexReader reader, int doc) throws IOException { - return null; + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + return null; //Query is used internally and not by users, so explain can be empty } @Override @@ -154,12 +152,10 @@ public class IncludeNestedDocsQuery extends Query { } @Override - public void visitSubScorers(Query parent, BooleanClause.Occur relationship, ScorerVisitor visitor) { - super.visitSubScorers(parent, relationship, visitor); - parentScorer.visitScorers(visitor); + public Collection getChildren() { + return parentScorer.getChildren(); } - @Override public int nextDoc() throws IOException { if (currentParentPointer == NO_MORE_DOCS) { return (currentDoc = NO_MORE_DOCS); @@ -187,7 +183,6 @@ public class IncludeNestedDocsQuery extends Query { return currentDoc; } - @Override public int advance(int target) throws IOException { if (target == NO_MORE_DOCS) { return (currentDoc = NO_MORE_DOCS); @@ -224,6 +219,10 @@ public class IncludeNestedDocsQuery extends Query { return parentScorer.score(); } + public float freq() throws IOException { + return parentScorer.freq(); + } + public int docID() { return currentDoc; } @@ -269,8 +268,8 @@ public class IncludeNestedDocsQuery extends Query { } @Override - public Object clone() { - Query clonedQuery = (Query) origParentQuery.clone(); + public Query clone() { + Query clonedQuery = origParentQuery.clone(); return new IncludeNestedDocsQuery(clonedQuery, this); } } diff --git a/src/main/java/org/elasticsearch/index/search/nested/NestedChildrenCollector.java b/src/main/java/org/elasticsearch/index/search/nested/NestedChildrenCollector.java index 99752f6e51c..1c7c8988514 100644 --- a/src/main/java/org/elasticsearch/index/search/nested/NestedChildrenCollector.java +++ b/src/main/java/org/elasticsearch/index/search/nested/NestedChildrenCollector.java @@ -19,6 +19,8 @@ package org.elasticsearch.index.search.nested; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; @@ -47,8 +49,6 @@ public class NestedChildrenCollector extends FacetCollector { private FixedBitSet parentDocs; - private IndexReader currentReader; - public NestedChildrenCollector(FacetCollector collector, Filter parentFilter, Filter childFilter) { this.collector = collector; this.parentFilter = parentFilter; @@ -72,11 +72,12 @@ public class NestedChildrenCollector extends FacetCollector { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - collector.setNextReader(reader, docBase); - currentReader = reader; - childDocs = DocSets.convert(reader, childFilter.getDocIdSet(reader)); - DocIdSet docIdSet = parentFilter.getDocIdSet(reader); + public void setNextReader(AtomicReaderContext context) throws IOException { + collector.setNextReader(context); + // Can use null as acceptedDocs here, since only live doc ids are being pushed to collect method. + DocIdSet docIdSet = parentFilter.getDocIdSet(context, null); + // Im ES if parent is deleted, then also the children are deleted. Therefore acceptedDocs can also null here. + childDocs = DocSets.convert(context.reader(), childFilter.getDocIdSet(context, null)); if (docIdSet == null) { parentDocs = null; } else if (docIdSet instanceof FixedBitDocSet) { @@ -98,7 +99,7 @@ public class NestedChildrenCollector extends FacetCollector { } int prevParentDoc = parentDocs.prevSetBit(parentDoc - 1); for (int i = (parentDoc - 1); i > prevParentDoc; i--) { - if (!currentReader.isDeleted(i) && childDocs.get(i)) { + if (childDocs.get(i)) { collector.collect(i); } } diff --git a/src/main/java/org/elasticsearch/index/search/nested/NestedDocsFilter.java b/src/main/java/org/elasticsearch/index/search/nested/NestedDocsFilter.java index 872f04abb80..5940b40974d 100644 --- a/src/main/java/org/elasticsearch/index/search/nested/NestedDocsFilter.java +++ b/src/main/java/org/elasticsearch/index/search/nested/NestedDocsFilter.java @@ -19,11 +19,13 @@ package org.elasticsearch.index.search.nested; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; import org.apache.lucene.search.PrefixFilter; +import org.apache.lucene.util.Bits; import org.elasticsearch.index.mapper.internal.TypeFieldMapper; import java.io.IOException; @@ -41,8 +43,8 @@ public class NestedDocsFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - return filter.getDocIdSet(reader); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + return filter.getDocIdSet(context, acceptDocs); } @Override diff --git a/src/main/java/org/elasticsearch/index/search/nested/NonNestedDocsFilter.java b/src/main/java/org/elasticsearch/index/search/nested/NonNestedDocsFilter.java index ac833b743fc..7df2f3e8795 100644 --- a/src/main/java/org/elasticsearch/index/search/nested/NonNestedDocsFilter.java +++ b/src/main/java/org/elasticsearch/index/search/nested/NonNestedDocsFilter.java @@ -19,11 +19,13 @@ package org.elasticsearch.index.search.nested; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; import org.apache.lucene.search.PrefixFilter; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.index.mapper.internal.TypeFieldMapper; @@ -42,14 +44,14 @@ public class NonNestedDocsFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - DocIdSet docSet = filter.getDocIdSet(reader); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + DocIdSet docSet = filter.getDocIdSet(context, acceptDocs); if (docSet == null || docSet == DocIdSet.EMPTY_DOCIDSET) { // will almost never happen, and we need an OpenBitSet for the parent filter in // BlockJoinQuery, we cache it anyhow... - docSet = new FixedBitSet(reader.maxDoc()); + docSet = new FixedBitSet(context.reader().maxDoc()); } - ((FixedBitSet) docSet).flip(0, reader.maxDoc()); + ((FixedBitSet) docSet).flip(0, context.reader().maxDoc()); return docSet; } From 7b8ab2d6851918bbbb5ba1bca1e19e23353ab087 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sun, 28 Oct 2012 23:52:57 +0000 Subject: [PATCH 042/146] lucene 4: cleanup unused class --- .../elasticsearch/index/analysis/NumericAnalyzer.java | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/analysis/NumericAnalyzer.java b/src/main/java/org/elasticsearch/index/analysis/NumericAnalyzer.java index 1b20e95a504..61c5cb689b8 100644 --- a/src/main/java/org/elasticsearch/index/analysis/NumericAnalyzer.java +++ b/src/main/java/org/elasticsearch/index/analysis/NumericAnalyzer.java @@ -20,7 +20,6 @@ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; import java.io.IOException; import java.io.Reader; @@ -42,14 +41,4 @@ public abstract class NumericAnalyzer extends Analyz } protected abstract T createNumericTokenizer(Reader reader, char[] buffer) throws IOException; - - private static final class Holder { - final NumericTokenizer tokenizer; - final char[] buffer; - - private Holder(NumericTokenizer tokenizer, char[] buffer) { - this.tokenizer = tokenizer; - this.buffer = buffer; - } - } } From d4e4b5d9f4cb1b5725fdf25cef226c365f1c6f0e Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 29 Oct 2012 11:18:29 +0100 Subject: [PATCH 043/146] lucene 4: read commit user data from directory without a reader --- .../index/engine/robin/RobinEngine.java | 20 +++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java index 449ac47798d..311b55c1682 100644 --- a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java +++ b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java @@ -23,6 +23,9 @@ import com.google.common.collect.Lists; import org.apache.lucene.index.*; import org.apache.lucene.search.*; import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.IOUtils; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.cluster.metadata.IndexMetaData; @@ -245,8 +248,8 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { try { // commit on a just opened writer will commit even if there are no changes done to it // we rely on that for the commit data translog id key - if (IndexReader.indexExists(store.directory())) { - Map commitUserData = IndexReader.getCommitUserData(store.directory()); + if (DirectoryReader.indexExists(store.directory())) { + Map commitUserData = getCommitUserData(store.directory()); if (commitUserData.containsKey(Translog.TRANSLOG_ID_KEY)) { translogIdGenerator.set(Long.parseLong(commitUserData.get(Translog.TRANSLOG_ID_KEY))); } else { @@ -861,7 +864,7 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { indexWriter.commit(MapBuilder.newMapBuilder().put(Translog.TRANSLOG_ID_KEY, Long.toString(translogId)).map()); if (flush.force()) { // if we force, we might not have committed, we need to check that its the same id - Map commitUserData = IndexReader.getCommitUserData(store.directory()); + Map commitUserData = getCommitUserData(store.directory()); long committedTranslogId = Long.parseLong(commitUserData.get(Translog.TRANSLOG_ID_KEY)); if (committedTranslogId != translogId) { // we did not commit anything, revert to the old translog @@ -1325,7 +1328,7 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { logger.warn("shard is locked, releasing lock"); IndexWriter.unlock(store.directory()); } - boolean create = !IndexReader.indexExists(store.directory()); + boolean create = !DirectoryReader.indexExists(store.directory()); IndexWriterConfig config = new IndexWriterConfig(Lucene.VERSION, analysisService.defaultIndexAnalyzer()); config.setOpenMode(create ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.APPEND); config.setIndexDeletionPolicy(deletionPolicy); @@ -1531,4 +1534,13 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { return searcher; } } + + /** + * Reads the latest commit and loads the userdata + */ + private static final Map getCommitUserData(final Directory directory) throws IOException { + final SegmentInfos sis = new SegmentInfos(); + sis.read(directory); + return sis.getUserData(); + } } From 0c1778a033e43d14b85923199d427aea245b48a8 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 29 Oct 2012 11:45:59 +0100 Subject: [PATCH 044/146] lucene 4: don't restrict ram buffer to 2GB this lucene restriction was removed with DWPT --- .../org/elasticsearch/index/engine/robin/RobinEngine.java | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java index 311b55c1682..b5ea6660538 100644 --- a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java +++ b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java @@ -188,12 +188,7 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { ByteSizeValue preValue = this.indexingBufferSize; rwl.readLock().lock(); try { - // LUCENE MONITOR - If this restriction is removed from Lucene, remove it from here - if (indexingBufferSize.mbFrac() > 2048.0) { - this.indexingBufferSize = new ByteSizeValue(2048, ByteSizeUnit.MB); - } else { - this.indexingBufferSize = indexingBufferSize; - } + this.indexingBufferSize = indexingBufferSize; IndexWriter indexWriter = this.indexWriter; if (indexWriter != null) { indexWriter.getConfig().setRAMBufferSizeMB(this.indexingBufferSize.mbFrac()); From 77cbe0a26ba04d3eb0750048eed2f31bed8481a2 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 29 Oct 2012 11:47:14 +0100 Subject: [PATCH 045/146] lucene 4: s/getFieldable/getField --- src/main/java/org/elasticsearch/index/engine/Engine.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/engine/Engine.java b/src/main/java/org/elasticsearch/index/engine/Engine.java index e08d2cb4f0c..02c59758c12 100644 --- a/src/main/java/org/elasticsearch/index/engine/Engine.java +++ b/src/main/java/org/elasticsearch/index/engine/Engine.java @@ -475,7 +475,7 @@ public interface Engine extends IndexShardComponent, CloseableComponent { } public UidField uidField() { - return (UidField) doc.rootDoc().getFieldable(UidFieldMapper.NAME); + return (UidField) doc.rootDoc().getField(UidFieldMapper.NAME); } @@ -603,7 +603,7 @@ public interface Engine extends IndexShardComponent, CloseableComponent { } public UidField uidField() { - return (UidField) doc.rootDoc().getFieldable(UidFieldMapper.NAME); + return (UidField) doc.rootDoc().getField(UidFieldMapper.NAME); } public Index startTime(long startTime) { From d8d74982923a23e3c807077b6f269a7d698de40e Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 29 Oct 2012 14:11:58 +0100 Subject: [PATCH 046/146] lucene 4: Moved from FieldSelectors to FieldVisitors. --- .../lucene/document/BaseFieldVisitor.java | 14 +++ .../document/MultipleFieldsVisitor.java | 85 +++++++++++++++++ .../lucene/document/ResetFieldSelector.java | 29 ------ .../lucene/document/SingleFieldSelector.java | 53 ----------- .../lucene/document/SingleFieldVisitor.java | 89 +++++++++++++++++ .../index/get/ShardGetService.java | 45 +++++---- .../mapper/internal/SourceFieldMapper.java | 6 +- .../mapper/internal/SourceFieldSelector.java | 52 ---------- .../mapper/internal/SourceFieldVisitor.java | 74 +++++++++++++++ ...tor.java => AllButSourceFieldVisitor.java} | 23 ++--- ...tor.java => FieldMappersFieldVisitor.java} | 27 +----- .../selector/UidAndRoutingFieldSelector.java | 60 ------------ .../selector/UidAndRoutingFieldVisitor.java | 86 +++++++++++++++++ .../selector/UidAndSourceFieldSelector.java | 60 ------------ .../selector/UidAndSourceFieldVisitor.java | 80 ++++++++++++++++ ...ieldSelector.java => UidFieldVisitor.java} | 41 ++++++-- .../index/percolator/PercolatorService.java | 24 +++-- .../indices/ttl/IndicesTTLService.java | 23 ++--- .../search/fetch/FetchPhase.java | 95 ++++++++++--------- .../search/fetch/FetchSubPhase.java | 16 +++- .../search/highlight/HighlightPhase.java | 23 ++--- .../search/lookup/FieldsLookup.java | 12 ++- .../search/lookup/SourceLookup.java | 17 ++-- 23 files changed, 614 insertions(+), 420 deletions(-) create mode 100644 src/main/java/org/elasticsearch/common/lucene/document/BaseFieldVisitor.java create mode 100644 src/main/java/org/elasticsearch/common/lucene/document/MultipleFieldsVisitor.java delete mode 100644 src/main/java/org/elasticsearch/common/lucene/document/ResetFieldSelector.java delete mode 100644 src/main/java/org/elasticsearch/common/lucene/document/SingleFieldSelector.java create mode 100644 src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java delete mode 100644 src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldSelector.java create mode 100644 src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java rename src/main/java/org/elasticsearch/index/mapper/selector/{AllButSourceFieldSelector.java => AllButSourceFieldVisitor.java} (67%) rename src/main/java/org/elasticsearch/index/mapper/selector/{FieldMappersFieldSelector.java => FieldMappersFieldVisitor.java} (63%) delete mode 100644 src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldSelector.java create mode 100644 src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java delete mode 100644 src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldSelector.java create mode 100644 src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java rename src/main/java/org/elasticsearch/index/mapper/selector/{UidFieldSelector.java => UidFieldVisitor.java} (50%) diff --git a/src/main/java/org/elasticsearch/common/lucene/document/BaseFieldVisitor.java b/src/main/java/org/elasticsearch/common/lucene/document/BaseFieldVisitor.java new file mode 100644 index 00000000000..49b54460398 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/lucene/document/BaseFieldVisitor.java @@ -0,0 +1,14 @@ +package org.elasticsearch.common.lucene.document; + +import org.apache.lucene.document.Document; +import org.apache.lucene.index.StoredFieldVisitor; + +public abstract class BaseFieldVisitor extends StoredFieldVisitor { + + // LUCENE 4 UPGRADE: Some field visitors need to be cleared before they can be reused. Maybe a better way. + public abstract void reset(); + + // LUCENE 4 UPGRADE: Added for now to make everything work. Want to make use of Document as less as possible. + public abstract Document createDocument(); + +} diff --git a/src/main/java/org/elasticsearch/common/lucene/document/MultipleFieldsVisitor.java b/src/main/java/org/elasticsearch/common/lucene/document/MultipleFieldsVisitor.java new file mode 100644 index 00000000000..8120f7c6ad4 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/lucene/document/MultipleFieldsVisitor.java @@ -0,0 +1,85 @@ +package org.elasticsearch.common.lucene.document; + +import org.apache.lucene.document.*; +import org.apache.lucene.index.FieldInfo; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + +/** + * + */ +public class MultipleFieldsVisitor extends BaseFieldVisitor { + + protected Document doc = new Document(); + protected final Set fieldsToAdd; + + /** Load only fields named in the provided Set<String>. */ + public MultipleFieldsVisitor(Set fieldsToAdd) { + this.fieldsToAdd = fieldsToAdd; + } + + /** Load only fields named in the provided Set<String>. */ + public MultipleFieldsVisitor(String... fields) { + fieldsToAdd = new HashSet(fields.length); + for(String field : fields) { + fieldsToAdd.add(field); + } + } + + /** Load all stored fields. */ + public MultipleFieldsVisitor() { + this.fieldsToAdd = null; + } + + @Override + public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { + doc.add(new StoredField(fieldInfo.name, value)); + } + + @Override + public void stringField(FieldInfo fieldInfo, String value) throws IOException { + final FieldType ft = new FieldType(TextField.TYPE_STORED); + ft.setStoreTermVectors(fieldInfo.hasVectors()); + ft.setIndexed(fieldInfo.isIndexed()); + ft.setOmitNorms(fieldInfo.omitsNorms()); + ft.setIndexOptions(fieldInfo.getIndexOptions()); + doc.add(new Field(fieldInfo.name, value, ft)); + } + + @Override + public void intField(FieldInfo fieldInfo, int value) { + doc.add(new StoredField(fieldInfo.name, value)); + } + + @Override + public void longField(FieldInfo fieldInfo, long value) { + doc.add(new StoredField(fieldInfo.name, value)); + } + + @Override + public void floatField(FieldInfo fieldInfo, float value) { + doc.add(new StoredField(fieldInfo.name, value)); + } + + @Override + public void doubleField(FieldInfo fieldInfo, double value) { + doc.add(new StoredField(fieldInfo.name, value)); + } + + @Override + public Status needsField(FieldInfo fieldInfo) throws IOException { + return fieldsToAdd == null || fieldsToAdd.contains(fieldInfo.name) ? Status.YES : Status.NO; + } + + @Override + public void reset() { + doc = null; + } + + @Override + public Document createDocument() { + return doc; + } +} diff --git a/src/main/java/org/elasticsearch/common/lucene/document/ResetFieldSelector.java b/src/main/java/org/elasticsearch/common/lucene/document/ResetFieldSelector.java deleted file mode 100644 index f447107ce8d..00000000000 --- a/src/main/java/org/elasticsearch/common/lucene/document/ResetFieldSelector.java +++ /dev/null @@ -1,29 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.common.lucene.document; - -import org.apache.lucene.document.FieldSelector; - -/** - */ -public interface ResetFieldSelector extends FieldSelector { - - void reset(); -} diff --git a/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldSelector.java b/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldSelector.java deleted file mode 100644 index cbc9d7a2313..00000000000 --- a/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldSelector.java +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.common.lucene.document; - -import org.apache.lucene.document.FieldSelectorResult; - -/** - * - */ -public class SingleFieldSelector implements ResetFieldSelector { - - private String name; - - public SingleFieldSelector() { - } - - public SingleFieldSelector(String name) { - this.name = name; - } - - public void name(String name) { - this.name = name; - } - - @Override - public FieldSelectorResult accept(String fieldName) { - if (name.equals(fieldName)) { - return FieldSelectorResult.LOAD; - } - return FieldSelectorResult.NO_LOAD; - } - - @Override - public void reset() { - } -} diff --git a/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java b/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java new file mode 100644 index 00000000000..f4a003d685e --- /dev/null +++ b/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java @@ -0,0 +1,89 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.lucene.document; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.FieldInfo; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * + */ +public class SingleFieldVisitor extends BaseFieldVisitor { + + private String name; + private List values; + + public SingleFieldVisitor() { + } + + public SingleFieldVisitor(String name) { + this.name = name; + } + + public void name(String name) { + this.name = name; + } + + @Override + public Document createDocument() { + Document document = new Document(); + for (String value : values) { + document.add(new StoredField(name, value)); + } + return document; + } + + public String value() { + return values.get(0); + } + + public List values() { + return values; + } + + @Override + public Status needsField(FieldInfo fieldInfo) throws IOException { + if (name.equals(fieldInfo.name)) { + return Status.YES; + } + + return values != null ? Status.STOP : Status.NO; + } + + @Override + public void stringField(FieldInfo fieldInfo, String value) throws IOException { + if (fieldInfo.name.equals(name)) { + if (values == null) { + values = new ArrayList(); + } + values.add(value); + } + } + + @Override + public void reset() { + values = null; + } +} diff --git a/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/src/main/java/org/elasticsearch/index/get/ShardGetService.java index 85f0c6bed5a..5deee8bc38a 100644 --- a/src/main/java/org/elasticsearch/index/get/ShardGetService.java +++ b/src/main/java/org/elasticsearch/index/get/ShardGetService.java @@ -20,11 +20,13 @@ package org.elasticsearch.index.get; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.lucene.document.ResetFieldSelector; +import org.elasticsearch.common.lucene.document.BaseFieldVisitor; import org.elasticsearch.common.lucene.uid.UidField; import org.elasticsearch.common.metrics.CounterMetric; import org.elasticsearch.common.metrics.MeanMetric; @@ -33,7 +35,7 @@ import org.elasticsearch.index.cache.IndexCache; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.mapper.*; import org.elasticsearch.index.mapper.internal.*; -import org.elasticsearch.index.mapper.selector.FieldMappersFieldSelector; +import org.elasticsearch.index.mapper.selector.FieldMappersFieldVisitor; import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.shard.AbstractIndexShardComponent; import org.elasticsearch.index.shard.ShardId; @@ -140,7 +142,7 @@ public class ShardGetService extends AbstractIndexShardComponent { Engine.GetResult get = null; if (type == null || type.equals("_all")) { for (String typeX : mapperService.types()) { - get = indexShard.get(new Engine.Get(realtime, UidFieldMapper.TERM_FACTORY.createTerm(Uid.createUid(typeX, id))).loadSource(loadSource)); + get = indexShard.get(new Engine.Get(realtime, new Term(UidFieldMapper.NAME, Uid.createUid(typeX, id))).loadSource(loadSource)); if (get.exists()) { type = typeX; break; @@ -156,7 +158,7 @@ public class ShardGetService extends AbstractIndexShardComponent { return new GetResult(shardId.index().name(), type, id, -1, false, null, null); } } else { - get = indexShard.get(new Engine.Get(realtime, UidFieldMapper.TERM_FACTORY.createTerm(Uid.createUid(type, id))).loadSource(loadSource)); + get = indexShard.get(new Engine.Get(realtime, new Term(UidFieldMapper.NAME, Uid.createUid(type, id))).loadSource(loadSource)); if (!get.exists()) { get.release(); return new GetResult(shardId.index().name(), type, id, -1, false, null, null); @@ -277,19 +279,21 @@ public class ShardGetService extends AbstractIndexShardComponent { Map fields = null; byte[] source = null; UidField.DocIdAndVersion docIdAndVersion = get.docIdAndVersion(); - ResetFieldSelector fieldSelector = buildFieldSelectors(docMapper, gFields); - if (fieldSelector != null) { - fieldSelector.reset(); + // LUCENE 4 UPGRADE: optimize when only a single field needs to be loaded + BaseFieldVisitor fieldVisitor = buildFieldSelectors(docMapper, gFields); + if (fieldVisitor != null) { + fieldVisitor.reset(); Document doc; try { - doc = docIdAndVersion.reader.document(docIdAndVersion.docId, fieldSelector); + docIdAndVersion.reader.reader().document(docIdAndVersion.docId, fieldVisitor); + doc = fieldVisitor.createDocument(); } catch (IOException e) { throw new ElasticSearchException("Failed to get type [" + type + "] and id [" + id + "]", e); } source = extractSource(doc, docMapper); for (Object oField : doc.getFields()) { - Fieldable field = (Fieldable) oField; + Field field = (Field) oField; String name = field.name(); Object value = null; FieldMappers fieldMappers = docMapper.mappers().indexName(field.name()); @@ -301,8 +305,8 @@ public class ShardGetService extends AbstractIndexShardComponent { } } if (value == null) { - if (field.isBinary()) { - value = new BytesArray(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength()); + if (field.binaryValue() != null) { + value = new BytesArray(field.binaryValue()); } else { value = field.stringValue(); } @@ -371,7 +375,7 @@ public class ShardGetService extends AbstractIndexShardComponent { return new GetResult(shardId.index().name(), type, id, get.version(), get.exists(), source == null ? null : new BytesArray(source), fields); } - private static ResetFieldSelector buildFieldSelectors(DocumentMapper docMapper, String... fields) { + private static BaseFieldVisitor buildFieldSelectors(DocumentMapper docMapper, String... fields) { if (fields == null) { return docMapper.sourceMapper().fieldSelector(); } @@ -381,25 +385,26 @@ public class ShardGetService extends AbstractIndexShardComponent { return null; } - FieldMappersFieldSelector fieldSelector = null; + FieldMappersFieldVisitor fieldVisitor = null; for (String fieldName : fields) { FieldMappers x = docMapper.mappers().smartName(fieldName); if (x != null && x.mapper().stored()) { - if (fieldSelector == null) { - fieldSelector = new FieldMappersFieldSelector(); + if (fieldVisitor == null) { + fieldVisitor = new FieldMappersFieldVisitor(); } - fieldSelector.add(x); + fieldVisitor.add(x); } } - return fieldSelector; + return fieldVisitor; } private static byte[] extractSource(Document doc, DocumentMapper documentMapper) { byte[] source = null; - Fieldable sourceField = doc.getFieldable(documentMapper.sourceMapper().names().indexName()); + IndexableField sourceField = doc.getField(documentMapper.sourceMapper().names().indexName()); if (sourceField != null) { - source = documentMapper.sourceMapper().nativeValue(sourceField); + // LUCENE 4 UPGRADE: Field instead of IndexableField? + source = documentMapper.sourceMapper().nativeValue((Field) sourceField); doc.removeField(documentMapper.sourceMapper().names().indexName()); } return source; diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java index 85f03491a53..e1c1d714cd1 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java @@ -36,7 +36,7 @@ import org.elasticsearch.common.compress.CompressorFactory; import org.elasticsearch.common.io.stream.CachedStreamOutput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.lucene.Lucene; -import org.elasticsearch.common.lucene.document.ResetFieldSelector; +import org.elasticsearch.common.lucene.document.BaseFieldVisitor; import org.elasticsearch.common.unit.ByteSizeValue; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; @@ -212,8 +212,8 @@ public class SourceFieldMapper extends AbstractFieldMapper implements In return this.enabled; } - public ResetFieldSelector fieldSelector() { - return SourceFieldSelector.INSTANCE; + public BaseFieldVisitor fieldSelector() { + return SourceFieldVisitor.INSTANCE; } @Override diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldSelector.java b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldSelector.java deleted file mode 100644 index 3d689a456b5..00000000000 --- a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldSelector.java +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.mapper.internal; - -import org.apache.lucene.document.FieldSelectorResult; -import org.elasticsearch.common.lucene.document.ResetFieldSelector; - -/** - * An optimized field selector that loads just the uid. - */ -public class SourceFieldSelector implements ResetFieldSelector { - - public static final SourceFieldSelector INSTANCE = new SourceFieldSelector(); - - private SourceFieldSelector() { - - } - - @Override - public FieldSelectorResult accept(String fieldName) { - if (SourceFieldMapper.NAME.equals(fieldName)) { - return FieldSelectorResult.LOAD_AND_BREAK; - } - return FieldSelectorResult.NO_LOAD; - } - - @Override - public void reset() { - } - - @Override - public String toString() { - return "source"; - } -} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java new file mode 100644 index 00000000000..cb8b06e7764 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java @@ -0,0 +1,74 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper.internal; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.lucene.document.BaseFieldVisitor; + +import java.io.IOException; + +/** + * An optimized field selector that loads just the _source + */ +public class SourceFieldVisitor extends BaseFieldVisitor { + + public static final SourceFieldVisitor INSTANCE = new SourceFieldVisitor(); + private static ThreadLocal loadingContext = new ThreadLocal(); + + private SourceFieldVisitor() { + } + + @Override + public Status needsField(FieldInfo fieldInfo) throws IOException { + if (SourceFieldMapper.NAME.equals(fieldInfo.name)) { + return Status.YES; + } + return loadingContext.get() != null ? Status.STOP : Status.NO; + } + + @Override + public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { + loadingContext.set(new BytesRef(value)); + } + + @Override + public void reset() { + loadingContext.remove(); + } + + @Override + public Document createDocument() { + Document document = new Document(); + document.add(new StoredField("_source", loadingContext.get().utf8ToString())); + return document; + } + + public BytesRef source() { + return loadingContext.get(); + } + + @Override + public String toString() { + return "source"; + } +} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/AllButSourceFieldSelector.java b/src/main/java/org/elasticsearch/index/mapper/selector/AllButSourceFieldVisitor.java similarity index 67% rename from src/main/java/org/elasticsearch/index/mapper/selector/AllButSourceFieldSelector.java rename to src/main/java/org/elasticsearch/index/mapper/selector/AllButSourceFieldVisitor.java index f5943008108..2a9103cf1d4 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/AllButSourceFieldSelector.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/AllButSourceFieldVisitor.java @@ -19,27 +19,24 @@ package org.elasticsearch.index.mapper.selector; -import org.apache.lucene.document.FieldSelectorResult; -import org.elasticsearch.common.lucene.document.ResetFieldSelector; +import org.apache.lucene.index.FieldInfo; +import org.elasticsearch.common.lucene.document.MultipleFieldsVisitor; import org.elasticsearch.index.mapper.internal.SourceFieldMapper; +import java.io.IOException; + /** * A field selector that loads all fields except the source field. */ -public class AllButSourceFieldSelector implements ResetFieldSelector { - - public static final AllButSourceFieldSelector INSTANCE = new AllButSourceFieldSelector(); +// LUCENE 4 UPGRADE: change into singleton +public class AllButSourceFieldVisitor extends MultipleFieldsVisitor { @Override - public FieldSelectorResult accept(String fieldName) { - if (SourceFieldMapper.NAME.equals(fieldName)) { - return FieldSelectorResult.NO_LOAD; + public Status needsField(FieldInfo fieldInfo) throws IOException { + if (SourceFieldMapper.NAME.equals(fieldInfo.name)) { + return Status.NO; } - return FieldSelectorResult.LOAD; - } - - @Override - public void reset() { + return super.needsField(fieldInfo); } @Override diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/FieldMappersFieldSelector.java b/src/main/java/org/elasticsearch/index/mapper/selector/FieldMappersFieldVisitor.java similarity index 63% rename from src/main/java/org/elasticsearch/index/mapper/selector/FieldMappersFieldSelector.java rename to src/main/java/org/elasticsearch/index/mapper/selector/FieldMappersFieldVisitor.java index 862f48f77af..3661167fec2 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/FieldMappersFieldSelector.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/FieldMappersFieldVisitor.java @@ -19,44 +19,27 @@ package org.elasticsearch.index.mapper.selector; -import org.apache.lucene.document.FieldSelectorResult; -import org.elasticsearch.common.lucene.document.ResetFieldSelector; +import org.elasticsearch.common.lucene.document.MultipleFieldsVisitor; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.FieldMappers; -import java.util.HashSet; - /** * */ -public class FieldMappersFieldSelector implements ResetFieldSelector { - - private final HashSet names = new HashSet(); +public class FieldMappersFieldVisitor extends MultipleFieldsVisitor { public void add(String fieldName) { - names.add(fieldName); + fieldsToAdd.add(fieldName); } public void add(FieldMappers fieldMappers) { for (FieldMapper fieldMapper : fieldMappers) { - names.add(fieldMapper.names().indexName()); + fieldsToAdd.add(fieldMapper.names().indexName()); } } - @Override - public FieldSelectorResult accept(String fieldName) { - if (names.contains(fieldName)) { - return FieldSelectorResult.LOAD; - } - return FieldSelectorResult.NO_LOAD; - } - - @Override - public void reset() { - } - @Override public String toString() { - return "fields(" + names + ")"; + return "fields(" + fieldsToAdd + ")"; } } diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldSelector.java b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldSelector.java deleted file mode 100644 index 80886bdb9a8..00000000000 --- a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldSelector.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.mapper.selector; - -import org.apache.lucene.document.FieldSelectorResult; -import org.elasticsearch.common.lucene.document.ResetFieldSelector; -import org.elasticsearch.index.mapper.internal.RoutingFieldMapper; -import org.elasticsearch.index.mapper.internal.UidFieldMapper; - -/** - * An optimized field selector that loads just the uid and the routing. - */ -public class UidAndRoutingFieldSelector implements ResetFieldSelector { - - private int match = 0; - - @Override - public FieldSelectorResult accept(String fieldName) { - if (UidFieldMapper.NAME.equals(fieldName)) { - if (++match == 2) { - return FieldSelectorResult.LOAD_AND_BREAK; - } - return FieldSelectorResult.LOAD; - } - if (RoutingFieldMapper.NAME.equals(fieldName)) { - if (++match == 2) { - return FieldSelectorResult.LOAD_AND_BREAK; - } - return FieldSelectorResult.LOAD; - } - return FieldSelectorResult.NO_LOAD; - } - - @Override - public void reset() { - match = 0; - } - - @Override - public String toString() { - return "uid_and_routing"; - } -} diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java new file mode 100644 index 00000000000..7ff8e349fbf --- /dev/null +++ b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java @@ -0,0 +1,86 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper.selector; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.FieldInfo; +import org.elasticsearch.common.lucene.document.BaseFieldVisitor; +import org.elasticsearch.index.mapper.internal.RoutingFieldMapper; +import org.elasticsearch.index.mapper.internal.UidFieldMapper; + +import java.io.IOException; + +/** + * An optimized field selector that loads just the uid and the routing. + */ +// LUCENE 4 UPGRADE: change into singleton +public class UidAndRoutingFieldVisitor extends BaseFieldVisitor { + + private String uid; + private String routing; + + @Override + public Document createDocument() { + Document document = new Document(); + document.add(new StoredField("uid", uid)); + document.add(new StoredField("_source", routing)); + return document; + } + + @Override + public Status needsField(FieldInfo fieldInfo) throws IOException { + if (RoutingFieldMapper.NAME.equals(fieldInfo.name)) { + return Status.YES; + } else if (UidFieldMapper.NAME.equals(fieldInfo.name)) { + return Status.YES; + } + + return uid != null && routing != null ? Status.STOP : Status.NO; + } + + @Override + public void stringField(FieldInfo fieldInfo, String value) throws IOException { + if (RoutingFieldMapper.NAME.equals(fieldInfo.name)) { + routing = value; + } else if (UidFieldMapper.NAME.equals(fieldInfo.name)) { + uid = value; + } + } + + @Override + public void reset() { + uid = null; + routing = null; + } + + public String uid() { + return uid; + } + + public String routing() { + return routing; + } + + @Override + public String toString() { + return "uid_and_routing"; + } +} diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldSelector.java b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldSelector.java deleted file mode 100644 index 53a42eb8b86..00000000000 --- a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldSelector.java +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.index.mapper.selector; - -import org.apache.lucene.document.FieldSelectorResult; -import org.elasticsearch.common.lucene.document.ResetFieldSelector; -import org.elasticsearch.index.mapper.internal.SourceFieldMapper; -import org.elasticsearch.index.mapper.internal.UidFieldMapper; - -/** - * An optimized field selector that loads just the uid and the source. - */ -public class UidAndSourceFieldSelector implements ResetFieldSelector { - - private int match = 0; - - @Override - public FieldSelectorResult accept(String fieldName) { - if (UidFieldMapper.NAME.equals(fieldName)) { - if (++match == 2) { - return FieldSelectorResult.LOAD_AND_BREAK; - } - return FieldSelectorResult.LOAD; - } - if (SourceFieldMapper.NAME.equals(fieldName)) { - if (++match == 2) { - return FieldSelectorResult.LOAD_AND_BREAK; - } - return FieldSelectorResult.LOAD; - } - return FieldSelectorResult.NO_LOAD; - } - - @Override - public void reset() { - match = 0; - } - - @Override - public String toString() { - return "uid_and_source"; - } -} diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java new file mode 100644 index 00000000000..9be0fcb64b6 --- /dev/null +++ b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java @@ -0,0 +1,80 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.index.mapper.selector; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.lucene.document.BaseFieldVisitor; +import org.elasticsearch.index.mapper.internal.SourceFieldMapper; +import org.elasticsearch.index.mapper.internal.UidFieldMapper; + +import java.io.IOException; + +/** + * An optimized field selector that loads just the uid and the source. + */ +// LUCENE 4 UPGRADE: change into singleton +public class UidAndSourceFieldVisitor extends BaseFieldVisitor { + + private String uid; + private BytesRef source; + + @Override + public Document createDocument() { + Document document = new Document(); + document.add(new StoredField("uid", uid)); + document.add(new StoredField("_source", source)); + return document; + } + + @Override + public Status needsField(FieldInfo fieldInfo) throws IOException { + if (SourceFieldMapper.NAME.equals(fieldInfo.name)) { + return Status.YES; + } else if (UidFieldMapper.NAME.equals(fieldInfo.name)) { + return Status.YES; + } + + return uid != null && source != null ? Status.STOP : Status.NO; + } + + @Override + public void reset() { + uid = null; + source = null; + } + + @Override + public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { + source = new BytesRef(value); + } + + @Override + public void stringField(FieldInfo fieldInfo, String value) throws IOException { + uid = value; + } + + @Override + public String toString() { + return "uid_and_source"; + } +} diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldSelector.java b/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java similarity index 50% rename from src/main/java/org/elasticsearch/index/mapper/selector/UidFieldSelector.java rename to src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java index 8da00802873..5307e62c8ef 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldSelector.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java @@ -19,31 +19,52 @@ package org.elasticsearch.index.mapper.selector; -import org.apache.lucene.document.FieldSelectorResult; -import org.elasticsearch.common.lucene.document.ResetFieldSelector; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.FieldInfo; +import org.elasticsearch.common.lucene.document.BaseFieldVisitor; import org.elasticsearch.index.mapper.internal.UidFieldMapper; +import java.io.IOException; + /** * An optimized field selector that loads just the uid. */ -public class UidFieldSelector implements ResetFieldSelector { +public class UidFieldVisitor extends BaseFieldVisitor { - public static final UidFieldSelector INSTANCE = new UidFieldSelector(); - - private UidFieldSelector() { + public static final UidFieldVisitor INSTANCE = new UidFieldVisitor(); + private static ThreadLocal loadingContext = new ThreadLocal(); + private UidFieldVisitor() { } @Override - public FieldSelectorResult accept(String fieldName) { - if (UidFieldMapper.NAME.equals(fieldName)) { - return FieldSelectorResult.LOAD_AND_BREAK; + public void stringField(FieldInfo fieldInfo, String value) throws IOException { + loadingContext.set(value); + } + + @Override + public Status needsField(FieldInfo fieldInfo) throws IOException { + if (UidFieldMapper.NAME.equals(fieldInfo.name)) { + return Status.YES; } - return FieldSelectorResult.NO_LOAD; + return loadingContext.get() != null ? Status.STOP : Status.NO; } @Override public void reset() { + loadingContext.remove(); + } + + @Override + public Document createDocument() { + Document document = new Document(); + document.add(new StoredField("_uid", loadingContext.get())); + return document; + } + + public String uid() { + return loadingContext.get(); } @Override diff --git a/src/main/java/org/elasticsearch/index/percolator/PercolatorService.java b/src/main/java/org/elasticsearch/index/percolator/PercolatorService.java index 19968eb3886..3317893cf74 100644 --- a/src/main/java/org/elasticsearch/index/percolator/PercolatorService.java +++ b/src/main/java/org/elasticsearch/index/percolator/PercolatorService.java @@ -21,9 +21,11 @@ package org.elasticsearch.index.percolator; import com.google.common.collect.Maps; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.Term; import org.apache.lucene.search.*; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.search.TermFilter; @@ -36,7 +38,7 @@ import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.mapper.internal.SourceFieldMapper; import org.elasticsearch.index.mapper.internal.TypeFieldMapper; import org.elasticsearch.index.mapper.internal.UidFieldMapper; -import org.elasticsearch.index.mapper.selector.UidAndSourceFieldSelector; +import org.elasticsearch.index.mapper.selector.UidAndSourceFieldVisitor; import org.elasticsearch.index.service.IndexService; import org.elasticsearch.index.settings.IndexSettings; import org.elasticsearch.index.shard.IndexShardState; @@ -135,7 +137,7 @@ public class PercolatorService extends AbstractIndexComponent { } private Filter indexQueriesFilter(String indexName) { - return percolatorIndexService().cache().filter().cache(new TermFilter(TypeFieldMapper.TERM_FACTORY.createTerm(indexName))); + return percolatorIndexService().cache().filter().cache(new TermFilter(new Term(TypeFieldMapper.NAME, indexName))); } private boolean percolatorAllocated() { @@ -157,7 +159,7 @@ public class PercolatorService extends AbstractIndexComponent { class QueriesLoaderCollector extends Collector { - private IndexReader reader; + private AtomicReader reader; private Map queries = Maps.newHashMap(); @@ -172,19 +174,21 @@ public class PercolatorService extends AbstractIndexComponent { @Override public void collect(int doc) throws IOException { // the _source is the query - Document document = reader.document(doc, new UidAndSourceFieldSelector()); + UidAndSourceFieldVisitor fieldVisitor = new UidAndSourceFieldVisitor(); + reader.document(doc, fieldVisitor); + Document document = fieldVisitor.createDocument(); String id = Uid.createUid(document.get(UidFieldMapper.NAME)).id(); try { - Fieldable sourceField = document.getFieldable(SourceFieldMapper.NAME); - queries.put(id, percolator.parseQuery(id, new BytesArray(sourceField.getBinaryValue(), sourceField.getBinaryOffset(), sourceField.getBinaryLength()))); + BytesRef sourceVal = document.getBinaryValue(SourceFieldMapper.NAME); + queries.put(id, percolator.parseQuery(id, new BytesArray(sourceVal.bytes, sourceVal.offset, sourceVal.length))); } catch (Exception e) { logger.warn("failed to add query [{}]", e, id); } } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - this.reader = reader; + public void setNextReader(AtomicReaderContext context) throws IOException { + this.reader = context.reader(); } @Override diff --git a/src/main/java/org/elasticsearch/indices/ttl/IndicesTTLService.java b/src/main/java/org/elasticsearch/indices/ttl/IndicesTTLService.java index e68af6e95a6..0539d1eea79 100644 --- a/src/main/java/org/elasticsearch/indices/ttl/IndicesTTLService.java +++ b/src/main/java/org/elasticsearch/indices/ttl/IndicesTTLService.java @@ -19,8 +19,8 @@ package org.elasticsearch.indices.ttl; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.Term; import org.apache.lucene.search.Collector; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.search.Query; @@ -44,16 +44,16 @@ import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.FieldMappers; import org.elasticsearch.index.mapper.Uid; -import org.elasticsearch.index.mapper.internal.RoutingFieldMapper; import org.elasticsearch.index.mapper.internal.TTLFieldMapper; import org.elasticsearch.index.mapper.internal.UidFieldMapper; -import org.elasticsearch.index.mapper.selector.UidAndRoutingFieldSelector; +import org.elasticsearch.index.mapper.selector.UidAndRoutingFieldVisitor; import org.elasticsearch.index.service.IndexService; import org.elasticsearch.index.shard.IndexShardState; import org.elasticsearch.index.shard.service.IndexShard; import org.elasticsearch.indices.IndicesService; import org.elasticsearch.node.settings.NodeSettingsService; +import java.io.IOException; import java.util.ArrayList; import java.util.List; @@ -220,7 +220,7 @@ public class IndicesTTLService extends AbstractLifecycleComponent docsToPurge = new ArrayList(); public ExpiredDocsCollector() { @@ -235,17 +235,18 @@ public class IndicesTTLService extends AbstractLifecycleComponent getDocsToPurge() { diff --git a/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java index bae3fff0a37..2d7abdb29a8 100644 --- a/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java +++ b/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java @@ -22,12 +22,14 @@ package org.elasticsearch.search.fetch; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.ReaderUtil; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.lucene.document.ResetFieldSelector; +import org.elasticsearch.common.lucene.document.BaseFieldVisitor; import org.elasticsearch.index.Index; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.FieldMapper; @@ -35,10 +37,10 @@ import org.elasticsearch.index.mapper.FieldMappers; import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.mapper.internal.SourceFieldMapper; import org.elasticsearch.index.mapper.internal.UidFieldMapper; -import org.elasticsearch.index.mapper.selector.AllButSourceFieldSelector; -import org.elasticsearch.index.mapper.selector.FieldMappersFieldSelector; -import org.elasticsearch.index.mapper.selector.UidAndSourceFieldSelector; -import org.elasticsearch.index.mapper.selector.UidFieldSelector; +import org.elasticsearch.index.mapper.selector.AllButSourceFieldVisitor; +import org.elasticsearch.index.mapper.selector.FieldMappersFieldVisitor; +import org.elasticsearch.index.mapper.selector.UidAndSourceFieldVisitor; +import org.elasticsearch.index.mapper.selector.UidFieldVisitor; import org.elasticsearch.indices.TypeMissingException; import org.elasticsearch.search.SearchHitField; import org.elasticsearch.search.SearchParseElement; @@ -88,28 +90,28 @@ public class FetchPhase implements SearchPhase { } public void execute(SearchContext context) { - ResetFieldSelector fieldSelector; + BaseFieldVisitor fieldVisitor; List extractFieldNames = null; boolean sourceRequested = false; if (!context.hasFieldNames()) { if (context.hasPartialFields()) { // partial fields need the source, so fetch it, but don't return it - fieldSelector = new UidAndSourceFieldSelector(); + fieldVisitor = new UidAndSourceFieldVisitor(); sourceRequested = false; } else if (context.hasScriptFields()) { // we ask for script fields, and no field names, don't load the source - fieldSelector = UidFieldSelector.INSTANCE; + fieldVisitor = UidFieldVisitor.INSTANCE; sourceRequested = false; } else { - fieldSelector = new UidAndSourceFieldSelector(); + fieldVisitor = new UidAndSourceFieldVisitor(); sourceRequested = true; } } else if (context.fieldNames().isEmpty()) { - fieldSelector = UidFieldSelector.INSTANCE; + fieldVisitor = UidFieldVisitor.INSTANCE; sourceRequested = false; } else { boolean loadAllStored = false; - FieldMappersFieldSelector fieldSelectorMapper = null; + FieldMappersFieldVisitor fieldVisitorMapper = null; for (String fieldName : context.fieldNames()) { if (fieldName.equals("*")) { loadAllStored = true; @@ -121,10 +123,10 @@ public class FetchPhase implements SearchPhase { } FieldMappers x = context.smartNameFieldMappers(fieldName); if (x != null && x.mapper().stored()) { - if (fieldSelectorMapper == null) { - fieldSelectorMapper = new FieldMappersFieldSelector(); + if (fieldVisitorMapper == null) { + fieldVisitorMapper = new FieldMappersFieldVisitor(); } - fieldSelectorMapper.add(x); + fieldVisitorMapper.add(x); } else { if (extractFieldNames == null) { extractFieldNames = Lists.newArrayList(); @@ -135,29 +137,29 @@ public class FetchPhase implements SearchPhase { if (loadAllStored) { if (sourceRequested || extractFieldNames != null) { - fieldSelector = null; // load everything, including _source + fieldVisitor = null; // load everything, including _source } else { - fieldSelector = AllButSourceFieldSelector.INSTANCE; + fieldVisitor = new AllButSourceFieldVisitor(); } - } else if (fieldSelectorMapper != null) { + } else if (fieldVisitorMapper != null) { // we are asking specific stored fields, just add the UID and be done - fieldSelectorMapper.add(UidFieldMapper.NAME); + fieldVisitorMapper.add(UidFieldMapper.NAME); if (extractFieldNames != null || sourceRequested) { - fieldSelectorMapper.add(SourceFieldMapper.NAME); + fieldVisitorMapper.add(SourceFieldMapper.NAME); } - fieldSelector = fieldSelectorMapper; + fieldVisitor = fieldVisitorMapper; } else if (extractFieldNames != null || sourceRequested) { - fieldSelector = new UidAndSourceFieldSelector(); + fieldVisitor = new UidAndSourceFieldVisitor(); } else { - fieldSelector = UidFieldSelector.INSTANCE; + fieldVisitor = UidFieldVisitor.INSTANCE; } } InternalSearchHit[] hits = new InternalSearchHit[context.docIdsToLoadSize()]; for (int index = 0; index < context.docIdsToLoadSize(); index++) { int docId = context.docIdsToLoad()[context.docIdsToLoadFrom() + index]; - Document doc = loadDocument(context, fieldSelector, docId); - Uid uid = extractUid(context, doc, fieldSelector); + Document doc = loadDocument(context, fieldVisitor, docId); + Uid uid = extractUid(context, doc, fieldVisitor); DocumentMapper documentMapper = context.mapperService().documentMapper(uid.type()); @@ -173,7 +175,7 @@ public class FetchPhase implements SearchPhase { hits[index] = searchHit; for (Object oField : doc.getFields()) { - Fieldable field = (Fieldable) oField; + IndexableField field = (IndexableField) oField; String name = field.name(); // ignore UID, we handled it above @@ -192,12 +194,13 @@ public class FetchPhase implements SearchPhase { FieldMapper mapper = fieldMappers.mapper(); if (mapper != null) { name = mapper.names().fullName(); - value = mapper.valueForSearch(field); + // LUCENE 4 UPGRADE: do we really need to use Field instead of IndexableField? + value = mapper.valueForSearch((Field) field); } } if (value == null) { - if (field.isBinary()) { - value = new BytesArray(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength()); + if (field.binaryValue() != null) { + value = new BytesArray(field.binaryValue()); } else { value = field.stringValue(); } @@ -215,12 +218,12 @@ public class FetchPhase implements SearchPhase { hitField.values().add(value); } - int readerIndex = context.searcher().readerIndex(docId); - IndexReader subReader = context.searcher().subReaders()[readerIndex]; - int subDoc = docId - context.searcher().docStarts()[readerIndex]; + int readerIndex = ReaderUtil.subIndex(docId, context.searcher().getIndexReader().leaves()); + AtomicReaderContext subReaderContext = context.searcher().getIndexReader().leaves().get(readerIndex); + int subDoc = docId - subReaderContext.docBase; // go over and extract fields that are not mapped / stored - context.lookup().setNextReader(subReader); + context.lookup().setNextReader(subReaderContext); context.lookup().setNextDocId(subDoc); if (source != null) { context.lookup().source().setNextSource(new BytesArray(source)); @@ -246,7 +249,7 @@ public class FetchPhase implements SearchPhase { for (FetchSubPhase fetchSubPhase : fetchSubPhases) { FetchSubPhase.HitContext hitContext = new FetchSubPhase.HitContext(); if (fetchSubPhase.hitExecutionNeeded(context)) { - hitContext.reset(searchHit, subReader, subDoc, context.searcher().getIndexReader(), docId, doc); + hitContext.reset(searchHit, subReaderContext, subDoc, context.searcher().getIndexReader(), docId, doc); fetchSubPhase.hitExecute(context, hitContext); } } @@ -262,30 +265,36 @@ public class FetchPhase implements SearchPhase { } private byte[] extractSource(Document doc, DocumentMapper documentMapper) { - Fieldable sourceField = doc.getFieldable(SourceFieldMapper.NAME); + IndexableField sourceField = doc.getField(SourceFieldMapper.NAME); if (sourceField != null) { - return documentMapper.sourceMapper().nativeValue(sourceField); + //LUCENE 4 UPGRADE: I think all sourceFields are of type Field + return documentMapper.sourceMapper().nativeValue((Field) sourceField); } return null; } - private Uid extractUid(SearchContext context, Document doc, @Nullable ResetFieldSelector fieldSelector) { + private Uid extractUid(SearchContext context, Document doc, @Nullable BaseFieldVisitor fieldVisitor) { String sUid = doc.get(UidFieldMapper.NAME); if (sUid != null) { return Uid.createUid(sUid); } // no type, nothing to do (should not really happen) List fieldNames = new ArrayList(); - for (Fieldable field : doc.getFields()) { + for (IndexableField field : doc.getFields()) { fieldNames.add(field.name()); } - throw new FetchPhaseExecutionException(context, "Failed to load uid from the index, missing internal _uid field, current fields in the doc [" + fieldNames + "], selector [" + fieldSelector + "]"); + throw new FetchPhaseExecutionException(context, "Failed to load uid from the index, missing internal _uid field, current fields in the doc [" + fieldNames + "], selector [" + fieldVisitor + "]"); } - private Document loadDocument(SearchContext context, @Nullable ResetFieldSelector fieldSelector, int docId) { + private Document loadDocument(SearchContext context, @Nullable BaseFieldVisitor fieldVisitor, int docId) { try { - if (fieldSelector != null) fieldSelector.reset(); - return context.searcher().doc(docId, fieldSelector); + if (fieldVisitor != null) { + fieldVisitor.reset(); + } else { + return context.searcher().doc(docId); + } + context.searcher().doc(docId, fieldVisitor); + return fieldVisitor.createDocument(); } catch (IOException e) { throw new FetchPhaseExecutionException(context, "Failed to fetch doc id [" + docId + "]", e); } diff --git a/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java b/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java index d47da76e698..92951f90fcf 100644 --- a/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java +++ b/src/main/java/org/elasticsearch/search/fetch/FetchSubPhase.java @@ -21,6 +21,8 @@ package org.elasticsearch.search.fetch; import com.google.common.collect.Maps; import org.apache.lucene.document.Document; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.search.SearchParseElement; @@ -38,14 +40,14 @@ public interface FetchSubPhase { private InternalSearchHit hit; private IndexReader topLevelReader; private int topLevelDocId; - private IndexReader reader; + private AtomicReaderContext readerContext; private int docId; private Document doc; private Map cache; - public void reset(InternalSearchHit hit, IndexReader reader, int docId, IndexReader topLevelReader, int topLevelDocId, Document doc) { + public void reset(InternalSearchHit hit, AtomicReaderContext context, int docId, IndexReader topLevelReader, int topLevelDocId, Document doc) { this.hit = hit; - this.reader = reader; + this.readerContext = context; this.docId = docId; this.topLevelReader = topLevelReader; this.topLevelDocId = topLevelDocId; @@ -56,8 +58,12 @@ public interface FetchSubPhase { return hit; } - public IndexReader reader() { - return reader; + public AtomicReader reader() { + return readerContext.reader(); + } + + public AtomicReaderContext readerContext() { + return readerContext; } public int docId() { diff --git a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java index 9be81f2ae13..dc378de73ed 100644 --- a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java +++ b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java @@ -23,9 +23,6 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.*; import org.apache.lucene.search.highlight.Formatter; @@ -34,7 +31,7 @@ import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.io.FastStringReader; -import org.elasticsearch.common.lucene.document.SingleFieldSelector; +import org.elasticsearch.common.lucene.document.SingleFieldVisitor; import org.elasticsearch.common.lucene.search.vectorhighlight.SimpleBoundaryScanner2; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.text.StringText; @@ -129,9 +126,9 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase { if (field.highlighterType() == null) { // if we can do highlighting using Term Vectors, use FastVectorHighlighter, otherwise, use the // slower plain highlighter - useFastVectorHighlighter = mapper.termVector() == Field.TermVector.WITH_POSITIONS_OFFSETS; + useFastVectorHighlighter = mapper.storeTermVectors() && mapper.storeTermVectorOffsets() && mapper.storeTermVectorPositions(); } else if (field.highlighterType().equals("fast-vector-highlighter") || field.highlighterType().equals("fvh")) { - if (mapper.termVector() != Field.TermVector.WITH_POSITIONS_OFFSETS) { + if (!(mapper.storeTermVectors() && mapper.storeTermVectorOffsets() && mapper.storeTermVectorPositions())) { throw new FetchPhaseExecutionException(context, "the field [" + field.field() + "] should be indexed with term vector with position offsets to be used with fast vector highlighter"); } useFastVectorHighlighter = true; @@ -170,19 +167,15 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase { List textsToHighlight; if (mapper.stored()) { try { - Document doc = hitContext.reader().document(hitContext.docId(), new SingleFieldSelector(mapper.names().indexName())); - textsToHighlight = new ArrayList(doc.getFields().size()); - for (Fieldable docField : doc.getFields()) { - if (docField.stringValue() != null) { - textsToHighlight.add(docField.stringValue()); - } - } + SingleFieldVisitor fieldVisitor = new SingleFieldVisitor(mapper.names().indexName()); + hitContext.reader().document(hitContext.docId(), fieldVisitor); + textsToHighlight = (List) fieldVisitor.values(); } catch (Exception e) { throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e); } } else { SearchLookup lookup = context.lookup(); - lookup.setNextReader(hitContext.reader()); + lookup.setNextReader(hitContext.readerContext()); lookup.setNextDocId(hitContext.docId()); textsToHighlight = lookup.source().extractRawValues(mapper.names().sourcePath()); } @@ -193,7 +186,7 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase { try { for (Object textToHighlight : textsToHighlight) { String text = textToHighlight.toString(); - Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer(); + Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer() TokenStream tokenStream = analyzer.reusableTokenStream(mapper.names().indexName(), new FastStringReader(text)); TextFragment[] bestTextFragments = entry.highlighter.getBestTextFragments(tokenStream, text, false, numberOfFragments); for (TextFragment bestTextFragment : bestTextFragments) { diff --git a/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java b/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java index 613891bda85..543c7e6b8dc 100644 --- a/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java @@ -25,7 +25,7 @@ import org.apache.lucene.index.AtomicReaderContext; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.ElasticSearchParseException; import org.elasticsearch.common.Nullable; -import org.elasticsearch.common.lucene.document.SingleFieldSelector; +import org.elasticsearch.common.lucene.document.SingleFieldVisitor; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.MapperService; @@ -51,7 +51,7 @@ public class FieldsLookup implements Map { private final Map cachedFieldData = Maps.newHashMap(); - private final SingleFieldSelector fieldSelector = new SingleFieldSelector(); + private final SingleFieldVisitor fieldVisitor = new SingleFieldVisitor(); FieldsLookup(MapperService mapperService, @Nullable String[] types) { this.mapperService = mapperService; @@ -152,11 +152,15 @@ public class FieldsLookup implements Map { cachedFieldData.put(name, data); } if (data.doc() == null) { - fieldSelector.name(data.mapper().names().indexName()); + fieldVisitor.name(data.mapper().names().indexName()); try { - data.doc(reader.document(docId, fieldSelector)); + reader.document(docId, fieldVisitor); + // LUCENE 4 UPGRADE: Only one field we don't need document + data.doc(fieldVisitor.createDocument()); } catch (IOException e) { throw new ElasticSearchParseException("failed to load field [" + name + "]", e); + } finally { + fieldVisitor.reset(); } } return data; diff --git a/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java index a1b30925b64..1b10ce779c8 100644 --- a/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java @@ -20,18 +20,14 @@ package org.elasticsearch.search.lookup; import com.google.common.collect.ImmutableMap; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexableField; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchParseException; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.XContentHelper; import org.elasticsearch.common.xcontent.support.XContentMapValues; -import org.elasticsearch.index.mapper.internal.SourceFieldMapper; -import org.elasticsearch.index.mapper.internal.SourceFieldSelector; +import org.elasticsearch.index.mapper.internal.SourceFieldVisitor; import java.util.Collection; import java.util.List; @@ -64,16 +60,17 @@ public class SourceLookup implements Map { return source; } try { - Document doc = reader.document(docId, SourceFieldSelector.INSTANCE); - IndexableField sourceField = doc.getField(SourceFieldMapper.NAME); - if (sourceField == null) { - source = ImmutableMap.of(); + reader.document(docId, SourceFieldVisitor.INSTANCE); + BytesRef source = SourceFieldVisitor.INSTANCE.source(); + if (source == null) { + this.source = ImmutableMap.of(); } else { - BytesRef source = sourceField.binaryValue(); this.source = sourceAsMap(source.bytes, source.offset, source.length); } } catch (Exception e) { throw new ElasticSearchParseException("failed to parse / load source", e); + } finally { + SourceFieldVisitor.INSTANCE.reset(); } return this.source; } From b928e7490413a191fbe64c5288a4fc98fd42e100 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 29 Oct 2012 15:11:49 +0100 Subject: [PATCH 047/146] lucene 4: Moved from FieldSelectors to FieldVisitors. Removed BaseFieldVisitor#reset and changed SourceFieldVisitor and UidFieldVisitor to singleton to prototype. --- .../lucene/document/BaseFieldVisitor.java | 3 --- .../document/MultipleFieldsVisitor.java | 5 ----- .../lucene/document/SingleFieldVisitor.java | 5 ----- .../mapper/internal/SourceFieldMapper.java | 2 +- .../mapper/internal/SourceFieldVisitor.java | 19 +++++-------------- .../selector/AllButSourceFieldVisitor.java | 1 - .../selector/UidAndRoutingFieldVisitor.java | 7 ------- .../selector/UidAndSourceFieldVisitor.java | 7 ------- .../mapper/selector/UidFieldVisitor.java | 18 ++++++------------ .../search/fetch/FetchPhase.java | 10 ++++------ .../search/lookup/SourceLookup.java | 7 +++---- 11 files changed, 19 insertions(+), 65 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/document/BaseFieldVisitor.java b/src/main/java/org/elasticsearch/common/lucene/document/BaseFieldVisitor.java index 49b54460398..a86ce265632 100644 --- a/src/main/java/org/elasticsearch/common/lucene/document/BaseFieldVisitor.java +++ b/src/main/java/org/elasticsearch/common/lucene/document/BaseFieldVisitor.java @@ -5,9 +5,6 @@ import org.apache.lucene.index.StoredFieldVisitor; public abstract class BaseFieldVisitor extends StoredFieldVisitor { - // LUCENE 4 UPGRADE: Some field visitors need to be cleared before they can be reused. Maybe a better way. - public abstract void reset(); - // LUCENE 4 UPGRADE: Added for now to make everything work. Want to make use of Document as less as possible. public abstract Document createDocument(); diff --git a/src/main/java/org/elasticsearch/common/lucene/document/MultipleFieldsVisitor.java b/src/main/java/org/elasticsearch/common/lucene/document/MultipleFieldsVisitor.java index 8120f7c6ad4..21fd6f5006a 100644 --- a/src/main/java/org/elasticsearch/common/lucene/document/MultipleFieldsVisitor.java +++ b/src/main/java/org/elasticsearch/common/lucene/document/MultipleFieldsVisitor.java @@ -73,11 +73,6 @@ public class MultipleFieldsVisitor extends BaseFieldVisitor { return fieldsToAdd == null || fieldsToAdd.contains(fieldInfo.name) ? Status.YES : Status.NO; } - @Override - public void reset() { - doc = null; - } - @Override public Document createDocument() { return doc; diff --git a/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java b/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java index f4a003d685e..09ff642c8b6 100644 --- a/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java +++ b/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java @@ -81,9 +81,4 @@ public class SingleFieldVisitor extends BaseFieldVisitor { values.add(value); } } - - @Override - public void reset() { - values = null; - } } diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java index e1c1d714cd1..f27d5fb75b6 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldMapper.java @@ -213,7 +213,7 @@ public class SourceFieldMapper extends AbstractFieldMapper implements In } public BaseFieldVisitor fieldSelector() { - return SourceFieldVisitor.INSTANCE; + return new SourceFieldVisitor(); } @Override diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java index cb8b06e7764..844ef9d5853 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java @@ -32,39 +32,30 @@ import java.io.IOException; */ public class SourceFieldVisitor extends BaseFieldVisitor { - public static final SourceFieldVisitor INSTANCE = new SourceFieldVisitor(); - private static ThreadLocal loadingContext = new ThreadLocal(); - - private SourceFieldVisitor() { - } + private BytesRef source; @Override public Status needsField(FieldInfo fieldInfo) throws IOException { if (SourceFieldMapper.NAME.equals(fieldInfo.name)) { return Status.YES; } - return loadingContext.get() != null ? Status.STOP : Status.NO; + return source != null ? Status.STOP : Status.NO; } @Override public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { - loadingContext.set(new BytesRef(value)); - } - - @Override - public void reset() { - loadingContext.remove(); + source = new BytesRef(value); } @Override public Document createDocument() { Document document = new Document(); - document.add(new StoredField("_source", loadingContext.get().utf8ToString())); + document.add(new StoredField("_source", source)); return document; } public BytesRef source() { - return loadingContext.get(); + return source; } @Override diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/AllButSourceFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/selector/AllButSourceFieldVisitor.java index 2a9103cf1d4..c586c7b4094 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/AllButSourceFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/AllButSourceFieldVisitor.java @@ -28,7 +28,6 @@ import java.io.IOException; /** * A field selector that loads all fields except the source field. */ -// LUCENE 4 UPGRADE: change into singleton public class AllButSourceFieldVisitor extends MultipleFieldsVisitor { @Override diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java index 7ff8e349fbf..26af9b3963d 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java @@ -31,7 +31,6 @@ import java.io.IOException; /** * An optimized field selector that loads just the uid and the routing. */ -// LUCENE 4 UPGRADE: change into singleton public class UidAndRoutingFieldVisitor extends BaseFieldVisitor { private String uid; @@ -65,12 +64,6 @@ public class UidAndRoutingFieldVisitor extends BaseFieldVisitor { } } - @Override - public void reset() { - uid = null; - routing = null; - } - public String uid() { return uid; } diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java index 9be0fcb64b6..fc9e8008d26 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java @@ -32,7 +32,6 @@ import java.io.IOException; /** * An optimized field selector that loads just the uid and the source. */ -// LUCENE 4 UPGRADE: change into singleton public class UidAndSourceFieldVisitor extends BaseFieldVisitor { private String uid; @@ -57,12 +56,6 @@ public class UidAndSourceFieldVisitor extends BaseFieldVisitor { return uid != null && source != null ? Status.STOP : Status.NO; } - @Override - public void reset() { - uid = null; - source = null; - } - @Override public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { source = new BytesRef(value); diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java index 5307e62c8ef..10c533fc904 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java @@ -32,15 +32,14 @@ import java.io.IOException; */ public class UidFieldVisitor extends BaseFieldVisitor { - public static final UidFieldVisitor INSTANCE = new UidFieldVisitor(); - private static ThreadLocal loadingContext = new ThreadLocal(); + private String uid; - private UidFieldVisitor() { + public UidFieldVisitor() { } @Override public void stringField(FieldInfo fieldInfo, String value) throws IOException { - loadingContext.set(value); + uid = value; } @Override @@ -48,23 +47,18 @@ public class UidFieldVisitor extends BaseFieldVisitor { if (UidFieldMapper.NAME.equals(fieldInfo.name)) { return Status.YES; } - return loadingContext.get() != null ? Status.STOP : Status.NO; - } - - @Override - public void reset() { - loadingContext.remove(); + return uid != null ? Status.STOP : Status.NO; } @Override public Document createDocument() { Document document = new Document(); - document.add(new StoredField("_uid", loadingContext.get())); + document.add(new StoredField("_uid", uid)); return document; } public String uid() { - return loadingContext.get(); + return uid; } @Override diff --git a/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java index 2d7abdb29a8..e0393866ed4 100644 --- a/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java +++ b/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java @@ -100,14 +100,14 @@ public class FetchPhase implements SearchPhase { sourceRequested = false; } else if (context.hasScriptFields()) { // we ask for script fields, and no field names, don't load the source - fieldVisitor = UidFieldVisitor.INSTANCE; + fieldVisitor = new UidFieldVisitor(); sourceRequested = false; } else { fieldVisitor = new UidAndSourceFieldVisitor(); sourceRequested = true; } } else if (context.fieldNames().isEmpty()) { - fieldVisitor = UidFieldVisitor.INSTANCE; + fieldVisitor = new UidFieldVisitor(); sourceRequested = false; } else { boolean loadAllStored = false; @@ -151,7 +151,7 @@ public class FetchPhase implements SearchPhase { } else if (extractFieldNames != null || sourceRequested) { fieldVisitor = new UidAndSourceFieldVisitor(); } else { - fieldVisitor = UidFieldVisitor.INSTANCE; + fieldVisitor = new UidFieldVisitor(); } } @@ -288,9 +288,7 @@ public class FetchPhase implements SearchPhase { private Document loadDocument(SearchContext context, @Nullable BaseFieldVisitor fieldVisitor, int docId) { try { - if (fieldVisitor != null) { - fieldVisitor.reset(); - } else { + if (fieldVisitor == null) { return context.searcher().doc(docId); } context.searcher().doc(docId, fieldVisitor); diff --git a/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java b/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java index 1b10ce779c8..a1cb2d7350a 100644 --- a/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/SourceLookup.java @@ -60,8 +60,9 @@ public class SourceLookup implements Map { return source; } try { - reader.document(docId, SourceFieldVisitor.INSTANCE); - BytesRef source = SourceFieldVisitor.INSTANCE.source(); + SourceFieldVisitor sourceFieldVisitor = new SourceFieldVisitor(); + reader.document(docId, sourceFieldVisitor); + BytesRef source = sourceFieldVisitor.source(); if (source == null) { this.source = ImmutableMap.of(); } else { @@ -69,8 +70,6 @@ public class SourceLookup implements Map { } } catch (Exception e) { throw new ElasticSearchParseException("failed to parse / load source", e); - } finally { - SourceFieldVisitor.INSTANCE.reset(); } return this.source; } From 42a1d25064edd6db5815a3d172891145ef9ecb7f Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 29 Oct 2012 15:27:26 +0100 Subject: [PATCH 048/146] lucene 4: Fixed last compile errors in HighlightPhase --- .../org/elasticsearch/search/highlight/HighlightPhase.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java index dc378de73ed..509d2441399 100644 --- a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java +++ b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java @@ -186,8 +186,8 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase { try { for (Object textToHighlight : textsToHighlight) { String text = textToHighlight.toString(); - Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer() - TokenStream tokenStream = analyzer.reusableTokenStream(mapper.names().indexName(), new FastStringReader(text)); + Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer(); + TokenStream tokenStream = analyzer.tokenStream(mapper.names().indexName(), new FastStringReader(text)); TextFragment[] bestTextFragments = entry.highlighter.getBestTextFragments(tokenStream, text, false, numberOfFragments); for (TextFragment bestTextFragment : bestTextFragments) { if (bestTextFragment != null && bestTextFragment.getScore() > 0) { From d531fa7a46ffb2de08caee09d5e1a358da6d630c Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Mon, 29 Oct 2012 15:42:29 +0100 Subject: [PATCH 049/146] lucene 4: Fixed compile error in FieldLookup --- .../java/org/elasticsearch/search/lookup/FieldLookup.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/lookup/FieldLookup.java b/src/main/java/org/elasticsearch/search/lookup/FieldLookup.java index eab43acfb8b..52b33eaa619 100644 --- a/src/main/java/org/elasticsearch/search/lookup/FieldLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/FieldLookup.java @@ -20,7 +20,7 @@ package org.elasticsearch.search.lookup; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexableField; import org.elasticsearch.index.mapper.FieldMapper; @@ -89,7 +89,7 @@ public class FieldLookup { if (field == null) { return null; } - value = mapper.value(field); + value = mapper.value((Field) field); return value; } @@ -101,7 +101,7 @@ public class FieldLookup { values.clear(); IndexableField[] fields = doc.getFields(mapper.names().indexName()); for (IndexableField field : fields) { - values.add(mapper.value(field)); + values.add(mapper.value((Field) field)); } return values; } From 595acd695e43bc3de3afe8c537866066ba9e5ff6 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 29 Oct 2012 13:09:21 +0100 Subject: [PATCH 050/146] lucene 4: s/reusableTokenStream/tokenStream --- .../action/admin/indices/analyze/TransportAnalyzeAction.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java b/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java index 0bf769cc8e6..8de2c356c2c 100644 --- a/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java +++ b/src/main/java/org/elasticsearch/action/admin/indices/analyze/TransportAnalyzeAction.java @@ -198,7 +198,7 @@ public class TransportAnalyzeAction extends TransportSingleCustomOperationAction List tokens = Lists.newArrayList(); TokenStream stream = null; try { - stream = analyzer.reusableTokenStream(field, new FastStringReader(request.text())); + stream = analyzer.tokenStream(field, new FastStringReader(request.text())); stream.reset(); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); From 22c14c73546b915a735d1c9eb80e36808847c1db Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Mon, 29 Oct 2012 17:42:18 +0100 Subject: [PATCH 051/146] lucene 4: lucene package cleanups --- .../lucene/search/PublicTermsFilter.java | 116 ----------- .../search/ShardFieldDocSortedHitQueue.java | 26 +-- .../apache/lucene/search/XTermsFilter.java | 195 ++++++++++++------ .../common/lucene/DocumentBuilder.java | 90 -------- .../common/lucene/FieldBuilder.java | 65 ------ .../common/lucene/IndexCommitDelegate.java | 10 - .../elasticsearch/common/lucene/Lucene.java | 14 +- .../common/lucene/MinimumScoreCollector.java | 5 +- .../common/lucene/MultiCollector.java | 8 +- .../common/lucene/all/AllTermQuery.java | 82 +++++--- .../common/lucene/all/AllTokenStream.java | 9 +- .../lucene/docset/DocIdSetCollector.java | 7 +- 12 files changed, 210 insertions(+), 417 deletions(-) delete mode 100644 src/main/java/org/apache/lucene/search/PublicTermsFilter.java delete mode 100644 src/main/java/org/elasticsearch/common/lucene/DocumentBuilder.java delete mode 100644 src/main/java/org/elasticsearch/common/lucene/FieldBuilder.java diff --git a/src/main/java/org/apache/lucene/search/PublicTermsFilter.java b/src/main/java/org/apache/lucene/search/PublicTermsFilter.java deleted file mode 100644 index c811de5f021..00000000000 --- a/src/main/java/org/apache/lucene/search/PublicTermsFilter.java +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.lucene.search; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.util.FixedBitSet; -import org.elasticsearch.common.lucene.Lucene; - -import java.io.IOException; -import java.util.Iterator; -import java.util.Set; -import java.util.TreeSet; - -/** - * - */ -// LUCENE MONITOR: Against TermsFilter -public class PublicTermsFilter extends Filter { - - Set terms = new TreeSet(); - - /** - * Adds a term to the list of acceptable terms - * - * @param term - */ - public void addTerm(Term term) { - terms.add(term); - } - - public Set getTerms() { - return terms; - } - - @Override - public boolean equals(Object obj) { - if (this == obj) - return true; - if ((obj == null) || (obj.getClass() != this.getClass())) - return false; - PublicTermsFilter test = (PublicTermsFilter) obj; - return (terms == test.terms || - (terms != null && terms.equals(test.terms))); - } - - @Override - public int hashCode() { - int hash = 9; - for (Iterator iter = terms.iterator(); iter.hasNext(); ) { - Term term = iter.next(); - hash = 31 * hash + term.hashCode(); - } - return hash; - } - - @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - FixedBitSet result = null; - TermDocs td = reader.termDocs(); - try { - // batch read, in Lucene 4.0 its no longer needed - int[] docs = new int[Lucene.BATCH_ENUM_DOCS]; - int[] freqs = new int[Lucene.BATCH_ENUM_DOCS]; - for (Term term : terms) { - td.seek(term); - int number = td.read(docs, freqs); - if (number > 0) { - if (result == null) { - result = new FixedBitSet(reader.maxDoc()); - } - while (number > 0) { - for (int i = 0; i < number; i++) { - result.set(docs[i]); - } - number = td.read(docs, freqs); - } - } - } - } finally { - td.close(); - } - return result; - } - - @Override - public String toString() { - StringBuilder builder = new StringBuilder(); - for (Term term : terms) { - if (builder.length() > 0) { - builder.append(' '); - } - builder.append(term); - } - return builder.toString(); - } - -} diff --git a/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java b/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java index 1aa325f20bb..6544b2427ae 100644 --- a/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java +++ b/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java @@ -48,7 +48,7 @@ public class ShardFieldDocSortedHitQueue extends PriorityQueue { * @param size The number of hits to retain. Must be greater than zero. */ public ShardFieldDocSortedHitQueue(SortField[] fields, int size) { - initialize(size); + super(size); setFields(fields); } @@ -83,26 +83,6 @@ public class ShardFieldDocSortedHitQueue extends PriorityQueue { return fields; } - - /** - * Returns an array of collators, possibly null. The collators - * correspond to any SortFields which were given a specific locale. - * - * @param fields Array of sort fields. - * @return Array, possibly null. - */ - private Collator[] hasCollators(final SortField[] fields) { - if (fields == null) return null; - Collator[] ret = new Collator[fields.length]; - for (int i = 0; i < fields.length; ++i) { - Locale locale = fields[i].getLocale(); - if (locale != null) - ret[i] = Collator.getInstance(locale); - } - return ret; - } - - /** * Returns whether a is less relevant than b. * @@ -116,8 +96,8 @@ public class ShardFieldDocSortedHitQueue extends PriorityQueue { final int n = fields.length; int c = 0; for (int i = 0; i < n && c == 0; ++i) { - final int type = fields[i].getType(); - if (type == SortField.STRING) { + final SortField.Type type = fields[i].getType(); + if (type == SortField.Type.STRING) { final String s1 = (String) docA.fields[i]; final String s2 = (String) docB.fields[i]; // null values need to be sorted first, because of how FieldCache.getStringIndex() diff --git a/src/main/java/org/apache/lucene/search/XTermsFilter.java b/src/main/java/org/apache/lucene/search/XTermsFilter.java index ccfcddc7332..a2f2a8515fd 100644 --- a/src/main/java/org/apache/lucene/search/XTermsFilter.java +++ b/src/main/java/org/apache/lucene/search/XTermsFilter.java @@ -19,91 +19,170 @@ package org.apache.lucene.search; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.Fields; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.queries.TermsFilter; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.FixedBitSet; -import org.elasticsearch.common.lucene.Lucene; import java.io.IOException; import java.util.Arrays; +import java.util.Collection; /** * Similar to {@link TermsFilter} but stores the terms in an array for better memory usage * when cached, and also uses bulk read */ -// LUCENE MONITOR: Against TermsFilter +// LUCENE MONITOR: Against TermsFilter - this is now identical to TermsFilter once 4.1 is released public class XTermsFilter extends Filter { - private final Term[] terms; - - public XTermsFilter(Term term) { - this.terms = new Term[]{term}; + private final Term[] filterTerms; + private final boolean[] resetTermsEnum;// true if the enum must be reset when building the bitset + private final int length; + + /** + * Creates a new {@link XTermsFilter} from the given collection. The collection + * can contain duplicate terms and multiple fields. + */ + public XTermsFilter(Collection terms) { + this(terms.toArray(new Term[terms.size()])); + } + + /** + * Creates a new {@link XTermsFilter} from the given array. The array can + * contain duplicate terms and multiple fields. + */ + public XTermsFilter(Term... terms) { + if (terms == null || terms.length == 0) { + throw new IllegalArgumentException("TermsFilter requires at least one term"); + } + Arrays.sort(terms); + this.filterTerms = new Term[terms.length]; + this.resetTermsEnum = new boolean[terms.length]; + int index = 0; + for (int i = 0; i < terms.length; i++) { + Term currentTerm = terms[i]; + boolean fieldChanged = true; + if (index > 0) { + // deduplicate + if (filterTerms[index-1].field().equals(currentTerm.field())) { + fieldChanged = false; + if (filterTerms[index-1].bytes().bytesEquals(currentTerm.bytes())){ + continue; + } + } + } + this.filterTerms[index] = currentTerm; + this.resetTermsEnum[index] = index == 0 || fieldChanged; // mark index 0 so we have a clear path in the iteration + + index++; + } + length = index; } - public XTermsFilter(Term[] terms) { - Arrays.sort(terms); - this.terms = terms; + + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + AtomicReader reader = context.reader(); + FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time + Fields fields = reader.fields(); + if (fields == null) { + return result; + } + final BytesRef br = new BytesRef(); + Terms terms = null; + TermsEnum termsEnum = null; + DocsEnum docs = null; + assert resetTermsEnum[0]; + for (int i = 0; i < length; i++) { + Term term = this.filterTerms[i]; + if (resetTermsEnum[i]) { + terms = fields.terms(term.field()); + if (terms == null) { + i = skipToNextField(i+1, length); // skip to the next field since this field is not indexed + continue; + } + } + if ((termsEnum = terms.iterator(termsEnum)) != null) { + br.copyBytes(term.bytes()); + assert termsEnum != null; + if (termsEnum.seekExact(br,true)) { + docs = termsEnum.docs(acceptDocs, docs, 0); + if (result == null) { + if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + result = new FixedBitSet(reader.maxDoc()); + // lazy init but don't do it in the hot loop since we could read many docs + result.set(docs.docID()); + } + } + while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + result.set(docs.docID()); + } + } + } + } + return result; } - public Term[] getTerms() { - return terms; + private final int skipToNextField(int index, int length) { + for (int i = index; i < length; i++) { + if (resetTermsEnum[i]) { + return i-1; + } + } + return length; } + @Override public boolean equals(Object obj) { - if (this == obj) - return true; - if ((obj == null) || (obj.getClass() != this.getClass())) - return false; - XTermsFilter test = (XTermsFilter) obj; - return Arrays.equals(terms, test.terms); + if (this == obj) { + return true; + } + if ((obj == null) || (obj.getClass() != this.getClass())) { + return false; + } + XTermsFilter test = (XTermsFilter) obj; + if (filterTerms != test.filterTerms) { + if (length == test.length) { + for (int i = 0; i < length; i++) { + // can not be null! + if (!filterTerms[i].equals(test.filterTerms[i])) { + return false; + } + } + } else { + return false; + } + } + return true; + } @Override public int hashCode() { - return Arrays.hashCode(terms); + int hash = 9; + for (int i = 0; i < length; i++) { + hash = 31 * hash + filterTerms[i].hashCode(); + } + return hash; } - - @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - FixedBitSet result = null; - TermDocs td = reader.termDocs(); - try { - // batch read, in Lucene 4.0 its no longer needed - int[] docs = new int[Lucene.BATCH_ENUM_DOCS]; - int[] freqs = new int[Lucene.BATCH_ENUM_DOCS]; - for (Term term : terms) { - td.seek(term); - int number = td.read(docs, freqs); - if (number > 0) { - if (result == null) { - result = new FixedBitSet(reader.maxDoc()); - } - while (number > 0) { - for (int i = 0; i < number; i++) { - result.set(docs[i]); - } - number = td.read(docs, freqs); - } - } - } - } finally { - td.close(); - } - return result; - } - + @Override public String toString() { - StringBuilder builder = new StringBuilder(); - for (Term term : terms) { - if (builder.length() > 0) { - builder.append(' '); - } - builder.append(term); + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < length; i++) { + if (builder.length() > 0) { + builder.append(' '); } - return builder.toString(); + builder.append(filterTerms[i]); + } + return builder.toString(); } - } diff --git a/src/main/java/org/elasticsearch/common/lucene/DocumentBuilder.java b/src/main/java/org/elasticsearch/common/lucene/DocumentBuilder.java deleted file mode 100644 index a2902de6c8d..00000000000 --- a/src/main/java/org/elasticsearch/common/lucene/DocumentBuilder.java +++ /dev/null @@ -1,90 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.common.lucene; - -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; -import org.elasticsearch.common.lucene.uid.UidField; - -/** - * - */ -public class DocumentBuilder { - - public static final Document EMPTY = new Document(); - - public static DocumentBuilder doc() { - return new DocumentBuilder(); - } - - public static Fieldable uidField(String value) { - return uidField(value, 0); - } - - public static Fieldable uidField(String value, long version) { - return new UidField("_uid", value, version); - } - - public static FieldBuilder field(String name, String value) { - return field(name, value, Field.Store.YES, Field.Index.ANALYZED); - } - - public static FieldBuilder field(String name, String value, Field.Store store, Field.Index index) { - return new FieldBuilder(name, value, store, index); - } - - public static FieldBuilder field(String name, String value, Field.Store store, Field.Index index, Field.TermVector termVector) { - return new FieldBuilder(name, value, store, index, termVector); - } - - public static FieldBuilder field(String name, byte[] value, Field.Store store) { - return new FieldBuilder(name, value, store); - } - - public static FieldBuilder field(String name, byte[] value, int offset, int length, Field.Store store) { - return new FieldBuilder(name, value, offset, length, store); - } - - private final Document document; - - private DocumentBuilder() { - this.document = new Document(); - } - - public DocumentBuilder boost(float boost) { - document.setBoost(boost); - return this; - } - - public DocumentBuilder add(Fieldable field) { - document.add(field); - return this; - } - - public DocumentBuilder add(FieldBuilder fieldBuilder) { - document.add(fieldBuilder.build()); - return this; - } - - public Document build() { - return document; - } -} diff --git a/src/main/java/org/elasticsearch/common/lucene/FieldBuilder.java b/src/main/java/org/elasticsearch/common/lucene/FieldBuilder.java deleted file mode 100644 index cce9bbcd364..00000000000 --- a/src/main/java/org/elasticsearch/common/lucene/FieldBuilder.java +++ /dev/null @@ -1,65 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.common.lucene; - -import org.apache.lucene.document.Field; - -/** - * - */ -public class FieldBuilder { - - private final Field field; - - FieldBuilder(String name, String value, Field.Store store, Field.Index index) { - field = new Field(name, value, store, index); - } - - FieldBuilder(String name, String value, Field.Store store, Field.Index index, Field.TermVector termVector) { - field = new Field(name, value, store, index, termVector); - } - - FieldBuilder(String name, byte[] value, Field.Store store) { - field = new Field(name, value, store); - } - - FieldBuilder(String name, byte[] value, int offset, int length, Field.Store store) { - field = new Field(name, value, offset, length, store); - } - - public FieldBuilder boost(float boost) { - field.setBoost(boost); - return this; - } - - public FieldBuilder omitNorms(boolean omitNorms) { - field.setOmitNorms(omitNorms); - return this; - } - - public FieldBuilder omitTermFreqAndPositions(boolean omitTermFreqAndPositions) { - field.setOmitTermFreqAndPositions(omitTermFreqAndPositions); - return this; - } - - public Field build() { - return field; - } -} diff --git a/src/main/java/org/elasticsearch/common/lucene/IndexCommitDelegate.java b/src/main/java/org/elasticsearch/common/lucene/IndexCommitDelegate.java index c0b5b2098d4..a3a5813f5c2 100644 --- a/src/main/java/org/elasticsearch/common/lucene/IndexCommitDelegate.java +++ b/src/main/java/org/elasticsearch/common/lucene/IndexCommitDelegate.java @@ -86,21 +86,11 @@ public abstract class IndexCommitDelegate extends IndexCommit { return delegate.hashCode(); } - @Override - public long getVersion() { - return delegate.getVersion(); - } - @Override public long getGeneration() { return delegate.getGeneration(); } - @Override - public long getTimestamp() throws IOException { - return delegate.getTimestamp(); - } - @Override public Map getUserData() throws IOException { return delegate.getUserData(); diff --git a/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/src/main/java/org/elasticsearch/common/lucene/Lucene.java index b275145fc93..000a147acdf 100644 --- a/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -92,18 +92,6 @@ public class Lucene { return countCollector.getTotalHits(); } - public static int docId(IndexReader reader, Term term) throws IOException { - TermDocs termDocs = reader.termDocs(term); - try { - if (termDocs.next()) { - return termDocs.doc(); - } - return NO_DOC; - } finally { - termDocs.close(); - } - } - /** * Closes the index writer, returning false if it failed to close. */ @@ -352,7 +340,7 @@ public class Lucene { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { + public void setNextReader(AtomicReaderContext context) throws IOException { } @Override diff --git a/src/main/java/org/elasticsearch/common/lucene/MinimumScoreCollector.java b/src/main/java/org/elasticsearch/common/lucene/MinimumScoreCollector.java index e97d4bd7f19..31069453b46 100644 --- a/src/main/java/org/elasticsearch/common/lucene/MinimumScoreCollector.java +++ b/src/main/java/org/elasticsearch/common/lucene/MinimumScoreCollector.java @@ -19,6 +19,7 @@ package org.elasticsearch.common.lucene; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.ScoreCachingWrappingScorer; @@ -59,8 +60,8 @@ public class MinimumScoreCollector extends Collector { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - collector.setNextReader(reader, docBase); + public void setNextReader(AtomicReaderContext context) throws IOException { + collector.setNextReader(context); } @Override diff --git a/src/main/java/org/elasticsearch/common/lucene/MultiCollector.java b/src/main/java/org/elasticsearch/common/lucene/MultiCollector.java index ab44e50627e..d20884fa6ef 100644 --- a/src/main/java/org/elasticsearch/common/lucene/MultiCollector.java +++ b/src/main/java/org/elasticsearch/common/lucene/MultiCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.common.lucene; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Collector; import org.apache.lucene.search.ScoreCachingWrappingScorer; import org.apache.lucene.search.Scorer; @@ -61,10 +61,10 @@ public class MultiCollector extends Collector { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - collector.setNextReader(reader, docBase); + public void setNextReader(AtomicReaderContext context) throws IOException { + collector.setNextReader(context); for (Collector collector : collectors) { - collector.setNextReader(reader, docBase); + collector.setNextReader(context); } } diff --git a/src/main/java/org/elasticsearch/common/lucene/all/AllTermQuery.java b/src/main/java/org/elasticsearch/common/lucene/all/AllTermQuery.java index 791b655d903..378dad3a9b3 100644 --- a/src/main/java/org/elasticsearch/common/lucene/all/AllTermQuery.java +++ b/src/main/java/org/elasticsearch/common/lucene/all/AllTermQuery.java @@ -19,14 +19,21 @@ package org.elasticsearch.common.lucene.all; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermPositions; import org.apache.lucene.search.*; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.Similarity.SloppySimScorer; import org.apache.lucene.search.spans.SpanScorer; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanWeight; import org.apache.lucene.search.spans.TermSpans; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; import java.io.IOException; @@ -51,32 +58,35 @@ public class AllTermQuery extends SpanTermQuery { } @Override - public Weight createWeight(Searcher searcher) throws IOException { + public Weight createWeight(IndexSearcher searcher) throws IOException { return new AllTermWeight(this, searcher); } protected class AllTermWeight extends SpanWeight { - public AllTermWeight(AllTermQuery query, Searcher searcher) throws IOException { + public AllTermWeight(AllTermQuery query, IndexSearcher searcher) throws IOException { super(query, searcher); } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, - boolean topScorer) throws IOException { - return new AllTermSpanScorer((TermSpans) query.getSpans(reader), this, similarity, reader.norms(query.getField())); + public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, + boolean topScorer, Bits acceptDocs) throws IOException { + if (this.stats == null) { + return null; + } + AtomicReader reader = context.reader(); + SloppySimScorer sloppySimScorer = similarity.sloppySimScorer(stats, context); + return new AllTermSpanScorer((TermSpans) query.getSpans(context, acceptDocs, termContexts), this, sloppySimScorer); } protected class AllTermSpanScorer extends SpanScorer { - // TODO: is this the best way to allocate this? - protected byte[] payload = new byte[4]; - protected TermPositions positions; + protected DocsAndPositionsEnum positions; protected float payloadScore; protected int payloadsSeen; - public AllTermSpanScorer(TermSpans spans, Weight weight, Similarity similarity, byte[] norms) throws IOException { - super(spans, weight, similarity, norms); - positions = spans.getPositions(); + public AllTermSpanScorer(TermSpans spans, Weight weight, Similarity.SloppySimScorer docScorer) throws IOException { + super(spans, weight, docScorer); + positions = spans.getPostings(); } @Override @@ -88,12 +98,11 @@ public class AllTermQuery extends SpanTermQuery { freq = 0.0f; payloadScore = 0; payloadsSeen = 0; - Similarity similarity1 = getSimilarity(); while (more && doc == spans.doc()) { int matchLength = spans.end() - spans.start(); - freq += similarity1.sloppyFreq(matchLength); - processPayload(similarity1); + freq += docScorer.computeSlopFactor(matchLength); + processPayload(); more = spans.next();// this moves positions to the next match in this // document @@ -101,10 +110,10 @@ public class AllTermQuery extends SpanTermQuery { return more || (freq != 0); } - protected void processPayload(Similarity similarity) throws IOException { - if (positions.isPayloadAvailable()) { - payload = positions.getPayload(payload, 0); - payloadScore += decodeFloat(payload); + protected void processPayload() throws IOException { + final BytesRef payload; + if ((payload = positions.getPayload()) != null) { + payloadScore += decodeFloat(payload.bytes, payload.offset); payloadsSeen++; } else { @@ -141,27 +150,40 @@ public class AllTermQuery extends SpanTermQuery { return payloadsSeen > 0 ? (payloadScore / payloadsSeen) : 1; } - @Override - protected Explanation explain(final int doc) throws IOException { + } + + @Override + public Explanation explain(AtomicReaderContext context, int doc) throws IOException{ + AllTermSpanScorer scorer = (AllTermSpanScorer) scorer(context, true, false, context.reader().getLiveDocs()); + if (scorer != null) { + int newDoc = scorer.advance(doc); + if (newDoc == doc) { + float freq = scorer.freq(); + SloppySimScorer docScorer = similarity.sloppySimScorer(stats, context); + ComplexExplanation inner = new ComplexExplanation(); + inner.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); + Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); + inner.addDetail(scoreExplanation); + inner.setValue(scoreExplanation.getValue()); + inner.setMatch(true); ComplexExplanation result = new ComplexExplanation(); - Explanation nonPayloadExpl = super.explain(doc); - result.addDetail(nonPayloadExpl); - // QUESTION: Is there a way to avoid this skipTo call? We need to know - // whether to load the payload or not + result.addDetail(inner); Explanation payloadBoost = new Explanation(); result.addDetail(payloadBoost); - - float payloadScore = getPayloadScore(); + final float payloadScore = scorer.getPayloadScore(); payloadBoost.setValue(payloadScore); // GSI: I suppose we could toString the payload, but I don't think that // would be a good idea payloadBoost.setDescription("allPayload(...)"); - result.setValue(nonPayloadExpl.getValue() * payloadScore); + result.setValue(inner.getValue() * payloadScore); result.setDescription("btq, product of:"); - result.setMatch(nonPayloadExpl.getValue() == 0 ? Boolean.FALSE : Boolean.TRUE); // LUCENE-1303 return result; + } } - + + return new ComplexExplanation(false, 0.0f, "no matching term"); + + } } diff --git a/src/main/java/org/elasticsearch/common/lucene/all/AllTokenStream.java b/src/main/java/org/elasticsearch/common/lucene/all/AllTokenStream.java index 34bb8925c0d..04e5e77da9d 100644 --- a/src/main/java/org/elasticsearch/common/lucene/all/AllTokenStream.java +++ b/src/main/java/org/elasticsearch/common/lucene/all/AllTokenStream.java @@ -23,7 +23,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.index.Payload; +import org.apache.lucene.util.BytesRef; import java.io.IOException; @@ -35,8 +35,10 @@ import static org.apache.lucene.analysis.payloads.PayloadHelper.encodeFloat; public final class AllTokenStream extends TokenFilter { public static TokenStream allTokenStream(String allFieldName, AllEntries allEntries, Analyzer analyzer) throws IOException { - return new AllTokenStream(analyzer.reusableTokenStream(allFieldName, allEntries), allEntries); + return new AllTokenStream(analyzer.tokenStream(allFieldName, allEntries), allEntries); } + + private final BytesRef payloadSpare = new BytesRef(new byte[4]); private final AllEntries allEntries; @@ -60,7 +62,8 @@ public final class AllTokenStream extends TokenFilter { if (allEntries.current() != null) { float boost = allEntries.current().boost(); if (boost != 1.0f) { - payloadAttribute.setPayload(new Payload(encodeFloat(boost))); + encodeFloat(boost, payloadSpare.bytes, payloadSpare.offset); + payloadAttribute.setPayload(payloadSpare); } else { payloadAttribute.setPayload(null); } diff --git a/src/main/java/org/elasticsearch/common/lucene/docset/DocIdSetCollector.java b/src/main/java/org/elasticsearch/common/lucene/docset/DocIdSetCollector.java index dbe5c77490b..c8d8158dd42 100644 --- a/src/main/java/org/elasticsearch/common/lucene/docset/DocIdSetCollector.java +++ b/src/main/java/org/elasticsearch/common/lucene/docset/DocIdSetCollector.java @@ -19,6 +19,7 @@ package org.elasticsearch.common.lucene.docset; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Scorer; @@ -58,9 +59,9 @@ public class DocIdSetCollector extends Collector { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - base = docBase; - collector.setNextReader(reader, docBase); + public void setNextReader(AtomicReaderContext ctx) throws IOException { + base = ctx.docBase; + collector.setNextReader(ctx); } @Override From fcc4fe263eb56c2ecc6dd1ac88f5f64f828ef3f5 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 30 Oct 2012 11:33:57 +0100 Subject: [PATCH 052/146] lucene 4: Upgraded PercolatorExecutor --- .../index/cache/id/simple/SimpleIdCache.java | 22 ------------ .../org/elasticsearch/index/mapper/Uid.java | 34 +++++++++++++++++++ .../index/percolator/PercolatorExecutor.java | 32 ++++++++++------- 3 files changed, 54 insertions(+), 34 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java index 38aeef884f6..adc54eef75c 100644 --- a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java +++ b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java @@ -237,28 +237,6 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se return false; } - // LUCENE 4 UPGRADE: This logic should go to Uid class. Uid class should BR based instead of string - private static HashedBytesArray[] splitUidIntoTypeAndId(BytesRef term) { - int loc = -1; - for (int i = term.offset; i < term.length; i++) { - if (term.bytes[i] == 0x23) { // 0x23 is equal to '#' - loc = i; - break; - } - } - - if (loc == -1) { - return null; - } - - byte[] type = new byte[loc - term.offset]; - System.arraycopy(term.bytes, term.offset, type, 0, type.length); - - byte[] id = new byte[term.length - type.length -1]; - System.arraycopy(term.bytes, loc + 1, id, 0, id.length); - return new HashedBytesArray[]{new HashedBytesArray(type), new HashedBytesArray(id)}; - } - static class TypeBuilder { final ExtTObjectIntHasMap idToDoc = new ExtTObjectIntHasMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1); final HashedBytesArray[] docToId; diff --git a/src/main/java/org/elasticsearch/index/mapper/Uid.java b/src/main/java/org/elasticsearch/index/mapper/Uid.java index ced66f91256..46dc1197656 100644 --- a/src/main/java/org/elasticsearch/index/mapper/Uid.java +++ b/src/main/java/org/elasticsearch/index/mapper/Uid.java @@ -19,6 +19,9 @@ package org.elasticsearch.index.mapper; +import org.apache.lucene.util.BytesRef; +import org.elasticsearch.common.bytes.HashedBytesArray; + /** * */ @@ -77,6 +80,14 @@ public final class Uid { return uid.substring(delimiterIndex + 1); } + public static HashedBytesArray idFromUid(BytesRef uid) { + return splitUidIntoTypeAndId(uid)[1]; + } + + public static HashedBytesArray typeFromUid(BytesRef uid) { + return splitUidIntoTypeAndId(uid)[0]; + } + public static String typeFromUid(String uid) { int delimiterIndex = uid.indexOf(DELIMITER); // type is not allowed to have # in it..., ids can return uid.substring(0, delimiterIndex); @@ -94,4 +105,27 @@ public final class Uid { public static String createUid(StringBuilder sb, String type, String id) { return sb.append(type).append(DELIMITER).append(id).toString(); } + + // LUCENE 4 UPGRADE: HashedBytesArray or BytesRef as return type? + private static HashedBytesArray[] splitUidIntoTypeAndId(BytesRef uid) { + int loc = -1; + for (int i = uid.offset; i < uid.length; i++) { + if (uid.bytes[i] == 0x23) { // 0x23 is equal to '#' + loc = i; + break; + } + } + + if (loc == -1) { + return null; + } + + byte[] type = new byte[loc - uid.offset]; + System.arraycopy(uid.bytes, uid.offset, type, 0, type.length); + + byte[] id = new byte[uid.length - type.length -1]; + System.arraycopy(uid.bytes, loc + 1, id, 0, id.length); + return new HashedBytesArray[]{new HashedBytesArray(type), new HashedBytesArray(id)}; + } + } diff --git a/src/main/java/org/elasticsearch/index/percolator/PercolatorExecutor.java b/src/main/java/org/elasticsearch/index/percolator/PercolatorExecutor.java index 7043c3a0cb1..5445bc1fd2c 100644 --- a/src/main/java/org/elasticsearch/index/percolator/PercolatorExecutor.java +++ b/src/main/java/org/elasticsearch/index/percolator/PercolatorExecutor.java @@ -20,13 +20,14 @@ package org.elasticsearch.index.percolator; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.memory.CustomMemoryIndex; import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Preconditions; @@ -292,22 +293,29 @@ public class PercolatorExecutor extends AbstractIndexComponent { final CustomMemoryIndex memoryIndex = new CustomMemoryIndex(); // TODO: This means percolation does not support nested docs... - for (Fieldable field : request.doc().rootDoc().getFields()) { - if (!field.isIndexed()) { + for (IndexableField field : request.doc().rootDoc().getFields()) { + if (!field.fieldType().indexed()) { continue; } // no need to index the UID field if (field.name().equals(UidFieldMapper.NAME)) { continue; } - TokenStream tokenStream = field.tokenStreamValue(); + TokenStream tokenStream; + try { + tokenStream = field.tokenStream( + mapperService.documentMapper(request.doc().type()).mappers().smartNameFieldMapper(field.name()).indexAnalyzer() + ); + } catch (IOException e) { + throw new ElasticSearchException("Failed to create token stream", e); + } if (tokenStream != null) { - memoryIndex.addField(field.name(), tokenStream, field.getBoost()); + memoryIndex.addField(field.name(), tokenStream, field.boost()); } else { Reader reader = field.readerValue(); if (reader != null) { try { - memoryIndex.addField(field.name(), request.doc().analyzer().reusableTokenStream(field.name(), reader), field.getBoost() * request.doc().rootDoc().getBoost()); + memoryIndex.addField(field.name(), request.doc().analyzer().tokenStream(field.name(), reader), field.boost() /** request.doc().rootDoc().getBoost()*/); } catch (IOException e) { throw new MapperParsingException("Failed to analyze field [" + field.name() + "]", e); } @@ -315,7 +323,7 @@ public class PercolatorExecutor extends AbstractIndexComponent { String value = field.stringValue(); if (value != null) { try { - memoryIndex.addField(field.name(), request.doc().analyzer().reusableTokenStream(field.name(), new FastStringReader(value)), field.getBoost() * request.doc().rootDoc().getBoost()); + memoryIndex.addField(field.name(), request.doc().analyzer().tokenStream(field.name(), new FastStringReader(value)), field.boost() /** request.doc().rootDoc().getBoost()*/); } catch (IOException e) { throw new MapperParsingException("Failed to analyze field [" + field.name() + "]", e); } @@ -398,11 +406,11 @@ public class PercolatorExecutor extends AbstractIndexComponent { @Override public void collect(int doc) throws IOException { - String uid = fieldData.stringValue(doc); + BytesRef uid = fieldData.stringValue(doc); if (uid == null) { return; } - String id = Uid.idFromUid(uid); + String id = Uid.idFromUid(uid).toUtf8(); Query query = queries.get(id); if (query == null) { // log??? @@ -421,9 +429,9 @@ public class PercolatorExecutor extends AbstractIndexComponent { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { + public void setNextReader(AtomicReaderContext context) throws IOException { // we use the UID because id might not be indexed - fieldData = percolatorIndex.cache().fieldData().cache(FieldDataType.DefaultTypes.STRING, reader, UidFieldMapper.NAME); + fieldData = percolatorIndex.cache().fieldData().cache(FieldDataType.DefaultTypes.STRING, context.reader(), UidFieldMapper.NAME); } @Override From 3269e0c88e99d2c7b998295bcbe3537daf5fa326 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 30 Oct 2012 11:36:02 +0100 Subject: [PATCH 053/146] lucene 4: Fixed compile error --- .../elasticsearch/index/cache/id/simple/SimpleIdCache.java | 5 +++-- src/main/java/org/elasticsearch/index/mapper/Uid.java | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java index adc54eef75c..5b85ed7649c 100644 --- a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java +++ b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java @@ -34,6 +34,7 @@ import org.elasticsearch.index.AbstractIndexComponent; import org.elasticsearch.index.Index; import org.elasticsearch.index.cache.id.IdCache; import org.elasticsearch.index.cache.id.IdReaderCache; +import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.mapper.internal.ParentFieldMapper; import org.elasticsearch.index.mapper.internal.UidFieldMapper; import org.elasticsearch.index.settings.IndexSettings; @@ -121,7 +122,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se TermsEnum termsEnum = terms.iterator(null); DocsEnum docsEnum = null; for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.term()) { - HashedBytesArray[] typeAndId = splitUidIntoTypeAndId(term); + HashedBytesArray[] typeAndId = Uid.splitUidIntoTypeAndId(term); TypeBuilder typeBuilder = readerBuilder.get(typeAndId[0].toUtf8()); if (typeBuilder == null) { typeBuilder = new TypeBuilder(reader); @@ -156,7 +157,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se TermsEnum termsEnum = terms.iterator(null); DocsEnum docsEnum = null; for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.term()) { - HashedBytesArray[] typeAndId = splitUidIntoTypeAndId(term); + HashedBytesArray[] typeAndId = Uid.splitUidIntoTypeAndId(term); TypeBuilder typeBuilder = readerBuilder.get(typeAndId[0].toUtf8()); if (typeBuilder == null) { diff --git a/src/main/java/org/elasticsearch/index/mapper/Uid.java b/src/main/java/org/elasticsearch/index/mapper/Uid.java index 46dc1197656..cad6b47e2cf 100644 --- a/src/main/java/org/elasticsearch/index/mapper/Uid.java +++ b/src/main/java/org/elasticsearch/index/mapper/Uid.java @@ -107,7 +107,7 @@ public final class Uid { } // LUCENE 4 UPGRADE: HashedBytesArray or BytesRef as return type? - private static HashedBytesArray[] splitUidIntoTypeAndId(BytesRef uid) { + public static HashedBytesArray[] splitUidIntoTypeAndId(BytesRef uid) { int loc = -1; for (int i = uid.offset; i < uid.length; i++) { if (uid.bytes[i] == 0x23) { // 0x23 is equal to '#' From fd2cf776d8b802afb280759f1927a403e38e068e Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Tue, 30 Oct 2012 11:32:17 -0400 Subject: [PATCH 054/146] lucene4: action package cleanup --- .../action/explain/TransportExplainAction.java | 4 ++-- .../action/mlt/TransportMoreLikeThisAction.java | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/elasticsearch/action/explain/TransportExplainAction.java b/src/main/java/org/elasticsearch/action/explain/TransportExplainAction.java index ff99e651479..09cf71a7825 100644 --- a/src/main/java/org/elasticsearch/action/explain/TransportExplainAction.java +++ b/src/main/java/org/elasticsearch/action/explain/TransportExplainAction.java @@ -86,7 +86,7 @@ public class TransportExplainAction extends TransportShardSingleOperationAction< protected ExplainResponse shardOperation(ExplainRequest request, int shardId) throws ElasticSearchException { IndexService indexService = indicesService.indexService(request.index()); IndexShard indexShard = indexService.shardSafe(shardId); - Term uidTerm = UidFieldMapper.TERM_FACTORY.createTerm(Uid.createUid(request.type(), request.id())); + Term uidTerm = new Term(UidFieldMapper.NAME, Uid.createUid(request.type(), request.id())); Engine.GetResult result = indexShard.get(new Engine.Get(false, uidTerm)); if (!result.exists()) { return new ExplainResponse(false); @@ -104,7 +104,7 @@ public class TransportExplainAction extends TransportShardSingleOperationAction< try { context.parsedQuery(parseQuery(request, indexService)); context.preProcess(); - int topLevelDocId = result.docIdAndVersion().docId + result.docIdAndVersion().docStart; + int topLevelDocId = result.docIdAndVersion().docId + result.docIdAndVersion().reader.docBase; Explanation explanation = context.searcher().explain(context.query(), topLevelDocId); if (request.fields() != null) { diff --git a/src/main/java/org/elasticsearch/action/mlt/TransportMoreLikeThisAction.java b/src/main/java/org/elasticsearch/action/mlt/TransportMoreLikeThisAction.java index 3376c3f1d2a..97f254cc6a1 100644 --- a/src/main/java/org/elasticsearch/action/mlt/TransportMoreLikeThisAction.java +++ b/src/main/java/org/elasticsearch/action/mlt/TransportMoreLikeThisAction.java @@ -19,7 +19,7 @@ package org.elasticsearch.action.mlt; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.document.Field; import org.apache.lucene.index.Term; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.action.ActionListener; @@ -263,7 +263,7 @@ public class TransportMoreLikeThisAction extends TransportAction Date: Tue, 30 Oct 2012 20:44:09 +0100 Subject: [PATCH 055/146] lucene 4: Upgraded o.e.search.dfs package. (Distributed idf) --- .../common/io/stream/StreamInput.java | 9 +++ .../common/io/stream/StreamOutput.java | 11 ++++ .../elasticsearch/search/SearchService.java | 4 +- .../controller/SearchPhaseController.java | 20 ++++-- .../search/dfs/AggregatedDfs.java | 30 ++++++--- .../search/dfs/CachedDfSource.java | 64 ++++++++++++------- .../elasticsearch/search/dfs/DfsPhase.java | 18 +++++- .../search/dfs/DfsSearchResult.java | 44 +++++++------ 8 files changed, 136 insertions(+), 64 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java b/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java index 27173ff7181..b98c7332309 100644 --- a/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java +++ b/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java @@ -19,6 +19,7 @@ package org.elasticsearch.common.io.stream; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; @@ -84,6 +85,14 @@ public abstract class StreamInput extends InputStream { return new BytesArray(bytes, 0, length); } + public BytesRef readBytesRef() throws IOException { + int length = readVInt(); + int offset = readVInt(); + byte[] bytes = new byte[length]; + readBytes(bytes, offset, length); + return new BytesRef(bytes, offset, length); + } + public void readFully(byte[] b) throws IOException { readBytes(b, 0, b.length); } diff --git a/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java index fa98ee8cde5..26285983d0e 100644 --- a/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java +++ b/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java @@ -19,6 +19,7 @@ package org.elasticsearch.common.io.stream; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.Version; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.bytes.BytesReference; @@ -106,6 +107,16 @@ public abstract class StreamOutput extends OutputStream { bytes.writeTo(this); } + public void writeBytesRef(BytesRef bytes) throws IOException { + if (bytes == null) { + writeVInt(0); + return; + } + writeVInt(bytes.length); + writeVInt(bytes.offset); + write(bytes.bytes, bytes.offset, bytes.length); + } + public final void writeShort(short v) throws IOException { writeByte((byte) (v >> 8)); writeByte((byte) v); diff --git a/src/main/java/org/elasticsearch/search/SearchService.java b/src/main/java/org/elasticsearch/search/SearchService.java index 4ef419f07c9..25cba579d6b 100644 --- a/src/main/java/org/elasticsearch/search/SearchService.java +++ b/src/main/java/org/elasticsearch/search/SearchService.java @@ -282,7 +282,7 @@ public class SearchService extends AbstractLifecycleComponent { SearchContext context = findContext(request.id()); contextProcessing(context); try { - context.searcher().dfSource(new CachedDfSource(request.dfs(), context.similarityService().defaultSearchSimilarity())); + context.searcher().dfSource(new CachedDfSource(context.searcher().getIndexReader(), request.dfs(), context.similarityService().defaultSearchSimilarity())); } catch (IOException e) { freeContext(context); cleanContext(context); @@ -348,7 +348,7 @@ public class SearchService extends AbstractLifecycleComponent { SearchContext context = findContext(request.id()); contextProcessing(context); try { - context.searcher().dfSource(new CachedDfSource(request.dfs(), context.similarityService().defaultSearchSimilarity())); + context.searcher().dfSource(new CachedDfSource(context.searcher().getIndexReader(), request.dfs(), context.similarityService().defaultSearchSimilarity())); } catch (IOException e) { freeContext(context); cleanContext(context); diff --git a/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java b/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java index 910bbffe599..eb105c34b3a 100644 --- a/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java +++ b/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java @@ -24,6 +24,7 @@ import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Ordering; import gnu.trove.impl.Constants; +import gnu.trove.map.TMap; import gnu.trove.map.hash.TObjectIntHashMap; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; @@ -32,6 +33,7 @@ import org.elasticsearch.common.Nullable; import org.elasticsearch.common.component.AbstractComponent; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; +import org.elasticsearch.common.trove.ExtTHashMap; import org.elasticsearch.common.trove.ExtTIntArrayList; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.search.dfs.AggregatedDfs; @@ -86,11 +88,17 @@ public class SearchPhaseController extends AbstractComponent { } public AggregatedDfs aggregateDfs(Iterable results) { - TObjectIntHashMap dfMap = new TObjectIntHashMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR, -1); + TMap dfMap = new ExtTHashMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR); long aggMaxDoc = 0; for (DfsSearchResult result : results) { - for (int i = 0; i < result.freqs().length; i++) { - dfMap.adjustOrPutValue(result.terms()[i], result.freqs()[i], result.freqs()[i]); + for (int i = 0; i < result.termStatistics().length; i++) { + TermStatistics existing = dfMap.get(result.terms()[i]); + if (existing != null) { + dfMap.put(result.terms()[i], new TermStatistics(existing.term(), existing.docFreq() + result.termStatistics()[i].docFreq(), existing.totalTermFreq() + result.termStatistics()[i].totalTermFreq())); + } else { + dfMap.put(result.terms()[i], result.termStatistics()[i]); + } + } aggMaxDoc += result.maxDoc(); } @@ -173,7 +181,7 @@ public class SearchPhaseController extends AbstractComponent { if (fDoc.fields[i] != null) { allValuesAreNull = false; if (fDoc.fields[i] instanceof String) { - fieldDocs.fields[i] = new SortField(fieldDocs.fields[i].getField(), SortField.STRING, fieldDocs.fields[i].getReverse()); + fieldDocs.fields[i] = new SortField(fieldDocs.fields[i].getField(), SortField.Type.STRING, fieldDocs.fields[i].getReverse()); } resolvedField = true; break; @@ -185,7 +193,7 @@ public class SearchPhaseController extends AbstractComponent { } if (!resolvedField && allValuesAreNull && fieldDocs.fields[i].getField() != null) { // we did not manage to resolve a field (and its not score or doc, which have no field), and all the fields are null (which can only happen for STRING), make it a STRING - fieldDocs.fields[i] = new SortField(fieldDocs.fields[i].getField(), SortField.STRING, fieldDocs.fields[i].getReverse()); + fieldDocs.fields[i] = new SortField(fieldDocs.fields[i].getField(), SortField.Type.STRING, fieldDocs.fields[i].getReverse()); } } queue = new ShardFieldDocSortedHitQueue(fieldDocs.fields, queueSize); @@ -270,7 +278,7 @@ public class SearchPhaseController extends AbstractComponent { sorted = true; TopFieldDocs fieldDocs = (TopFieldDocs) querySearchResult.queryResult().topDocs(); for (int i = 0; i < fieldDocs.fields.length; i++) { - if (fieldDocs.fields[i].getType() == SortField.SCORE) { + if (fieldDocs.fields[i].getType() == SortField.Type.SCORE) { sortScoreIndex = i; } } diff --git a/src/main/java/org/elasticsearch/search/dfs/AggregatedDfs.java b/src/main/java/org/elasticsearch/search/dfs/AggregatedDfs.java index eea53d483b3..1f0e8c348c5 100644 --- a/src/main/java/org/elasticsearch/search/dfs/AggregatedDfs.java +++ b/src/main/java/org/elasticsearch/search/dfs/AggregatedDfs.java @@ -21,21 +21,25 @@ package org.elasticsearch.search.dfs; import gnu.trove.impl.Constants; import gnu.trove.iterator.TObjectIntIterator; +import gnu.trove.map.TMap; import gnu.trove.map.hash.TObjectIntHashMap; import org.apache.lucene.index.Term; +import org.apache.lucene.search.TermStatistics; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Streamable; +import org.elasticsearch.common.trove.ExtTHashMap; import org.elasticsearch.common.trove.ExtTObjectIntHasMap; import java.io.IOException; +import java.util.Map; /** * */ public class AggregatedDfs implements Streamable { - private TObjectIntHashMap dfMap; + private TMap dfMap; private long maxDoc; @@ -43,12 +47,12 @@ public class AggregatedDfs implements Streamable { } - public AggregatedDfs(TObjectIntHashMap dfMap, long maxDoc) { + public AggregatedDfs(TMap dfMap, long maxDoc) { this.dfMap = dfMap; this.maxDoc = maxDoc; } - public TObjectIntHashMap dfMap() { + public TMap dfMap() { return dfMap; } @@ -65,9 +69,11 @@ public class AggregatedDfs implements Streamable { @Override public void readFrom(StreamInput in) throws IOException { int size = in.readVInt(); - dfMap = new ExtTObjectIntHasMap(size, Constants.DEFAULT_LOAD_FACTOR, -1); + dfMap = new ExtTHashMap(size, Constants.DEFAULT_LOAD_FACTOR); for (int i = 0; i < size; i++) { - dfMap.put(new Term(in.readUTF(), in.readUTF()), in.readVInt()); + Term term = new Term(in.readString(), in.readBytesRef()); + TermStatistics stats = new TermStatistics(in.readBytesRef(), in.readVLong(), in.readVLong()); + dfMap.put(term, stats); } maxDoc = in.readVLong(); } @@ -76,12 +82,16 @@ public class AggregatedDfs implements Streamable { public void writeTo(final StreamOutput out) throws IOException { out.writeVInt(dfMap.size()); - for (TObjectIntIterator it = dfMap.iterator(); it.hasNext(); ) { - it.advance(); - out.writeUTF(it.key().field()); - out.writeUTF(it.key().text()); - out.writeVInt(it.value()); + for (Map.Entry termTermStatisticsEntry : dfMap.entrySet()) { + Term term = termTermStatisticsEntry.getKey(); + out.writeString(term.field()); + out.writeBytesRef(term.bytes()); + TermStatistics stats = termTermStatisticsEntry.getValue(); + out.writeBytesRef(stats.term()); + out.writeVLong(stats.docFreq()); + out.writeVLong(stats.totalTermFreq()); } + out.writeVLong(maxDoc); } } diff --git a/src/main/java/org/elasticsearch/search/dfs/CachedDfSource.java b/src/main/java/org/elasticsearch/search/dfs/CachedDfSource.java index d2a50b97d16..1ecc2ac0eff 100644 --- a/src/main/java/org/elasticsearch/search/dfs/CachedDfSource.java +++ b/src/main/java/org/elasticsearch/search/dfs/CachedDfSource.java @@ -20,22 +20,26 @@ package org.elasticsearch.search.dfs; import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.index.Term; +import org.apache.lucene.index.*; import org.apache.lucene.search.*; +import org.apache.lucene.search.similarities.Similarity; +import org.elasticsearch.ElasticSearchIllegalArgumentException; +import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; +import java.util.List; /** * */ -public class CachedDfSource extends Searcher { +public class CachedDfSource extends IndexSearcher { private final AggregatedDfs dfs; private final int maxDoc; - public CachedDfSource(AggregatedDfs dfs, Similarity similarity) throws IOException { + public CachedDfSource(IndexReader reader, AggregatedDfs dfs, Similarity similarity) throws IOException { + super(reader); this.dfs = dfs; setSimilarity(similarity); if (dfs.maxDoc() > Integer.MAX_VALUE) { @@ -45,21 +49,19 @@ public class CachedDfSource extends Searcher { } } - public int docFreq(Term term) { - int df = dfs.dfMap().get(term); - if (df == -1) { - return 1; -// throw new IllegalArgumentException("df for term " + term + " not available"); + + @Override + public TermStatistics termStatistics(Term term, TermContext context) throws IOException { + TermStatistics termStatistics = dfs.dfMap().get(term); + if (termStatistics == null) { + throw new ElasticSearchIllegalArgumentException("Not distributed term statistics for term: " + term); } - return df; + return termStatistics; } - public int[] docFreqs(Term[] terms) { - int[] result = new int[terms.length]; - for (int i = 0; i < terms.length; i++) { - result[i] = docFreq(terms[i]); - } - return result; + @Override + public CollectionStatistics collectionStatistics(String field) throws IOException { + throw new UnsupportedOperationException(); } public int maxDoc() { @@ -74,15 +76,11 @@ public class CachedDfSource extends Searcher { return query; } - public void close() { - throw new UnsupportedOperationException(); - } - public Document doc(int i) { throw new UnsupportedOperationException(); } - public Document doc(int i, FieldSelector fieldSelector) { + public void doc(int docID, StoredFieldVisitor fieldVisitor) throws IOException { throw new UnsupportedOperationException(); } @@ -90,15 +88,33 @@ public class CachedDfSource extends Searcher { throw new UnsupportedOperationException(); } - public void search(Weight weight, Filter filter, Collector results) { + @Override + protected void search(List leaves, Weight weight, Collector collector) throws IOException { throw new UnsupportedOperationException(); } - public TopDocs search(Weight weight, Filter filter, int n) { + @Override + protected TopDocs search(Weight weight, ScoreDoc after, int nDocs) throws IOException { throw new UnsupportedOperationException(); } - public TopFieldDocs search(Weight weight, Filter filter, int n, Sort sort) { + @Override + protected TopDocs search(List leaves, Weight weight, ScoreDoc after, int nDocs) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + protected TopFieldDocs search(Weight weight, int nDocs, Sort sort, boolean doDocScores, boolean doMaxScore) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + protected TopFieldDocs search(Weight weight, FieldDoc after, int nDocs, Sort sort, boolean fillFields, boolean doDocScores, boolean doMaxScore) throws IOException { + throw new UnsupportedOperationException(); + } + + @Override + protected TopFieldDocs search(List leaves, Weight weight, FieldDoc after, int nDocs, Sort sort, boolean fillFields, boolean doDocScores, boolean doMaxScore) throws IOException { throw new UnsupportedOperationException(); } diff --git a/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java b/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java index b0378653b98..f4394387060 100644 --- a/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java +++ b/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java @@ -21,7 +21,10 @@ package org.elasticsearch.search.dfs; import com.google.common.collect.ImmutableMap; import gnu.trove.set.hash.THashSet; +import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.TermStatistics; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.SearchPhase; @@ -60,12 +63,21 @@ public class DfsPhase implements SearchPhase { termsSet.clear(); context.query().extractTerms(termsSet); Term[] terms = termsSet.toArray(new Term[termsSet.size()]); - int[] freqs = context.searcher().docFreqs(terms); + TermStatistics[] termStatistics = new TermStatistics[terms.length]; + IndexReaderContext indexReaderContext = context.searcher().getTopReaderContext(); + for (int i = 0; i < terms.length; i++) { + // LUCENE 4 UPGRADE: cache TermContext? + TermContext termContext = TermContext.build(indexReaderContext, terms[i], false); + termStatistics[i] = context.searcher().termStatistics(terms[i], termContext); + } - context.dfsResult().termsAndFreqs(terms, freqs); + // TODO: LUCENE 4 UPGRADE - add collection stats for each unique field, for distributed scoring +// context.searcher().collectionStatistics() + + context.dfsResult().termsAndFreqs(terms, termStatistics); context.dfsResult().maxDoc(context.searcher().getIndexReader().maxDoc()); } catch (Exception e) { - throw new DfsPhaseExecutionException(context, "", e); + throw new DfsPhaseExecutionException(context, "Exception during dfs phase", e); } } } diff --git a/src/main/java/org/elasticsearch/search/dfs/DfsSearchResult.java b/src/main/java/org/elasticsearch/search/dfs/DfsSearchResult.java index 777cb8da3ef..75de8e72065 100644 --- a/src/main/java/org/elasticsearch/search/dfs/DfsSearchResult.java +++ b/src/main/java/org/elasticsearch/search/dfs/DfsSearchResult.java @@ -20,6 +20,8 @@ package org.elasticsearch.search.dfs; import org.apache.lucene.index.Term; +import org.apache.lucene.search.TermStatistics; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.search.SearchPhaseResult; @@ -33,13 +35,13 @@ import java.io.IOException; */ public class DfsSearchResult extends TransportResponse implements SearchPhaseResult { - private static Term[] EMPTY_TERMS = new Term[0]; - private static int[] EMPTY_FREQS = new int[0]; + private static final Term[] EMPTY_TERMS = new Term[0]; + private static final TermStatistics[] EMPTY_TERM_STATS = new TermStatistics[0]; private SearchShardTarget shardTarget; private long id; private Term[] terms; - private int[] freqs; + private TermStatistics[] termStatistics; private int maxDoc; public DfsSearchResult() { @@ -73,9 +75,9 @@ public class DfsSearchResult extends TransportResponse implements SearchPhaseRes return maxDoc; } - public DfsSearchResult termsAndFreqs(Term[] terms, int[] freqs) { + public DfsSearchResult termsAndFreqs(Term[] terms, TermStatistics[] termStatistics) { this.terms = terms; - this.freqs = freqs; + this.termStatistics = termStatistics; return this; } @@ -83,8 +85,8 @@ public class DfsSearchResult extends TransportResponse implements SearchPhaseRes return terms; } - public int[] freqs() { - return freqs; + public TermStatistics[] termStatistics() { + return termStatistics; } public static DfsSearchResult readDfsSearchResult(StreamInput in) throws IOException, ClassNotFoundException { @@ -104,16 +106,19 @@ public class DfsSearchResult extends TransportResponse implements SearchPhaseRes } else { terms = new Term[termsSize]; for (int i = 0; i < terms.length; i++) { - terms[i] = new Term(in.readUTF(), in.readUTF()); + terms[i] = new Term(in.readString(), in.readBytesRef()); } } - int freqsSize = in.readVInt(); - if (freqsSize == 0) { - freqs = EMPTY_FREQS; + int termsStatsSize = in.readVInt(); + if (termsStatsSize == 0) { + termStatistics = EMPTY_TERM_STATS; } else { - freqs = new int[freqsSize]; - for (int i = 0; i < freqs.length; i++) { - freqs[i] = in.readVInt(); + termStatistics = new TermStatistics[termsStatsSize]; + for (int i = 0; i < termStatistics.length; i++) { + BytesRef term = terms[i].bytes(); + long docFreq = in.readVLong(); + long totalTermFreq = in.readVLong(); + termStatistics[i] = new TermStatistics(term, docFreq, totalTermFreq); } } maxDoc = in.readVInt(); @@ -126,12 +131,13 @@ public class DfsSearchResult extends TransportResponse implements SearchPhaseRes // shardTarget.writeTo(out); out.writeVInt(terms.length); for (Term term : terms) { - out.writeUTF(term.field()); - out.writeUTF(term.text()); + out.writeString(term.field()); + out.writeBytesRef(term.bytes()); } - out.writeVInt(freqs.length); - for (int freq : freqs) { - out.writeVInt(freq); + out.writeVInt(termStatistics.length); + for (TermStatistics termStatistic : termStatistics) { + out.writeVLong(termStatistic.docFreq()); + out.writeVLong(termStatistic.totalTermFreq()); } out.writeVInt(maxDoc); } From 083df0a86cfbf06bcf5a93e2f14a5e66e7e424a0 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 30 Oct 2012 22:36:43 +0100 Subject: [PATCH 056/146] lucene 4: Upgraded o.e.search.dfs package. #2 --- .../controller/SearchPhaseController.java | 25 ++++++++-- .../search/dfs/AggregatedDfs.java | 46 +++++++++++++------ .../search/dfs/CachedDfSource.java | 19 ++++---- .../elasticsearch/search/dfs/DfsPhase.java | 19 ++++++-- .../search/dfs/DfsSearchResult.java | 32 ++++++++++++- 5 files changed, 109 insertions(+), 32 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java b/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java index eb105c34b3a..b9e2ffabbc1 100644 --- a/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java +++ b/src/main/java/org/elasticsearch/search/controller/SearchPhaseController.java @@ -88,21 +88,36 @@ public class SearchPhaseController extends AbstractComponent { } public AggregatedDfs aggregateDfs(Iterable results) { - TMap dfMap = new ExtTHashMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR); + TMap termStatistics = new ExtTHashMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR); + TMap fieldStatistics = new ExtTHashMap(Constants.DEFAULT_CAPACITY, Constants.DEFAULT_LOAD_FACTOR); long aggMaxDoc = 0; for (DfsSearchResult result : results) { for (int i = 0; i < result.termStatistics().length; i++) { - TermStatistics existing = dfMap.get(result.terms()[i]); + TermStatistics existing = termStatistics.get(result.terms()[i]); if (existing != null) { - dfMap.put(result.terms()[i], new TermStatistics(existing.term(), existing.docFreq() + result.termStatistics()[i].docFreq(), existing.totalTermFreq() + result.termStatistics()[i].totalTermFreq())); + termStatistics.put(result.terms()[i], new TermStatistics(existing.term(), existing.docFreq() + result.termStatistics()[i].docFreq(), existing.totalTermFreq() + result.termStatistics()[i].totalTermFreq())); } else { - dfMap.put(result.terms()[i], result.termStatistics()[i]); + termStatistics.put(result.terms()[i], result.termStatistics()[i]); } } + for (Map.Entry entry : result.fieldStatistics().entrySet()) { + CollectionStatistics existing = fieldStatistics.get(entry.getKey()); + if (existing != null) { + CollectionStatistics merged = new CollectionStatistics( + entry.getKey(), existing.maxDoc() + entry.getValue().maxDoc(), + existing.docCount() + entry.getValue().docCount(), + existing.sumTotalTermFreq() + entry.getValue().sumTotalTermFreq(), + existing.sumDocFreq() + entry.getValue().sumDocFreq() + ); + fieldStatistics.put(entry.getKey(), merged); + } else { + fieldStatistics.put(entry.getKey(), entry.getValue()); + } + } aggMaxDoc += result.maxDoc(); } - return new AggregatedDfs(dfMap, aggMaxDoc); + return new AggregatedDfs(termStatistics, fieldStatistics, aggMaxDoc); } public ShardDoc[] sortDocs(Collection results1) { diff --git a/src/main/java/org/elasticsearch/search/dfs/AggregatedDfs.java b/src/main/java/org/elasticsearch/search/dfs/AggregatedDfs.java index 1f0e8c348c5..6b750e153e6 100644 --- a/src/main/java/org/elasticsearch/search/dfs/AggregatedDfs.java +++ b/src/main/java/org/elasticsearch/search/dfs/AggregatedDfs.java @@ -20,16 +20,14 @@ package org.elasticsearch.search.dfs; import gnu.trove.impl.Constants; -import gnu.trove.iterator.TObjectIntIterator; import gnu.trove.map.TMap; -import gnu.trove.map.hash.TObjectIntHashMap; import org.apache.lucene.index.Term; +import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.TermStatistics; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; import org.elasticsearch.common.io.stream.Streamable; import org.elasticsearch.common.trove.ExtTHashMap; -import org.elasticsearch.common.trove.ExtTObjectIntHasMap; import java.io.IOException; import java.util.Map; @@ -39,21 +37,26 @@ import java.util.Map; */ public class AggregatedDfs implements Streamable { - private TMap dfMap; - + private TMap termStatistics; + private TMap fieldStatistics; private long maxDoc; private AggregatedDfs() { } - public AggregatedDfs(TMap dfMap, long maxDoc) { - this.dfMap = dfMap; + public AggregatedDfs(TMap termStatistics, TMap fieldStatistics, long maxDoc) { + this.termStatistics = termStatistics; + this.fieldStatistics = fieldStatistics; this.maxDoc = maxDoc; } - public TMap dfMap() { - return dfMap; + public TMap termStatistics() { + return termStatistics; + } + + public TMap fieldStatistics() { + return fieldStatistics; } public long maxDoc() { @@ -69,20 +72,26 @@ public class AggregatedDfs implements Streamable { @Override public void readFrom(StreamInput in) throws IOException { int size = in.readVInt(); - dfMap = new ExtTHashMap(size, Constants.DEFAULT_LOAD_FACTOR); + termStatistics = new ExtTHashMap(size, Constants.DEFAULT_LOAD_FACTOR); for (int i = 0; i < size; i++) { Term term = new Term(in.readString(), in.readBytesRef()); TermStatistics stats = new TermStatistics(in.readBytesRef(), in.readVLong(), in.readVLong()); - dfMap.put(term, stats); + termStatistics.put(term, stats); + } + size = in.readVInt(); + fieldStatistics = new ExtTHashMap(size, Constants.DEFAULT_LOAD_FACTOR); + for (int i = 0; i < size; i++) { + String field = in.readString(); + CollectionStatistics stats = new CollectionStatistics(field, in.readVLong(), in.readVLong(), in.readVLong(), in.readVLong()); + fieldStatistics.put(field, stats); } maxDoc = in.readVLong(); } @Override public void writeTo(final StreamOutput out) throws IOException { - out.writeVInt(dfMap.size()); - - for (Map.Entry termTermStatisticsEntry : dfMap.entrySet()) { + out.writeVInt(termStatistics.size()); + for (Map.Entry termTermStatisticsEntry : termStatistics.entrySet()) { Term term = termTermStatisticsEntry.getKey(); out.writeString(term.field()); out.writeBytesRef(term.bytes()); @@ -92,6 +101,15 @@ public class AggregatedDfs implements Streamable { out.writeVLong(stats.totalTermFreq()); } + out.writeVInt(fieldStatistics.size()); + for (Map.Entry entry : fieldStatistics.entrySet()) { + out.writeString(entry.getKey()); + out.writeVLong(entry.getValue().maxDoc()); + out.writeVLong(entry.getValue().docCount()); + out.writeVLong(entry.getValue().sumTotalTermFreq()); + out.writeVLong(entry.getValue().sumDocFreq()); + } + out.writeVLong(maxDoc); } } diff --git a/src/main/java/org/elasticsearch/search/dfs/CachedDfSource.java b/src/main/java/org/elasticsearch/search/dfs/CachedDfSource.java index 1ecc2ac0eff..e28ff78268c 100644 --- a/src/main/java/org/elasticsearch/search/dfs/CachedDfSource.java +++ b/src/main/java/org/elasticsearch/search/dfs/CachedDfSource.java @@ -24,7 +24,6 @@ import org.apache.lucene.index.*; import org.apache.lucene.search.*; import org.apache.lucene.search.similarities.Similarity; import org.elasticsearch.ElasticSearchIllegalArgumentException; -import org.elasticsearch.search.internal.SearchContext; import java.io.IOException; import java.util.List; @@ -34,25 +33,25 @@ import java.util.List; */ public class CachedDfSource extends IndexSearcher { - private final AggregatedDfs dfs; + private final AggregatedDfs aggregatedDfs; private final int maxDoc; - public CachedDfSource(IndexReader reader, AggregatedDfs dfs, Similarity similarity) throws IOException { + public CachedDfSource(IndexReader reader, AggregatedDfs aggregatedDfs, Similarity similarity) throws IOException { super(reader); - this.dfs = dfs; + this.aggregatedDfs = aggregatedDfs; setSimilarity(similarity); - if (dfs.maxDoc() > Integer.MAX_VALUE) { + if (aggregatedDfs.maxDoc() > Integer.MAX_VALUE) { maxDoc = Integer.MAX_VALUE; } else { - maxDoc = (int) dfs.maxDoc(); + maxDoc = (int) aggregatedDfs.maxDoc(); } } @Override public TermStatistics termStatistics(Term term, TermContext context) throws IOException { - TermStatistics termStatistics = dfs.dfMap().get(term); + TermStatistics termStatistics = aggregatedDfs.termStatistics().get(term); if (termStatistics == null) { throw new ElasticSearchIllegalArgumentException("Not distributed term statistics for term: " + term); } @@ -61,7 +60,11 @@ public class CachedDfSource extends IndexSearcher { @Override public CollectionStatistics collectionStatistics(String field) throws IOException { - throw new UnsupportedOperationException(); + CollectionStatistics collectionStatistics = aggregatedDfs.fieldStatistics().get(field); + if (collectionStatistics == null) { + throw new ElasticSearchIllegalArgumentException("Not distributed collection statistics for field: " + field); + } + return collectionStatistics; } public int maxDoc() { diff --git a/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java b/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java index f4394387060..7fd8d20f594 100644 --- a/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java +++ b/src/main/java/org/elasticsearch/search/dfs/DfsPhase.java @@ -20,17 +20,23 @@ package org.elasticsearch.search.dfs; import com.google.common.collect.ImmutableMap; +import gnu.trove.map.TMap; import gnu.trove.set.hash.THashSet; import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.TermStatistics; +import org.elasticsearch.common.trove.ExtTHashMap; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.search.SearchParseElement; import org.elasticsearch.search.SearchPhase; import org.elasticsearch.search.internal.SearchContext; +import java.util.HashSet; +import java.util.List; import java.util.Map; +import java.util.Set; /** * @@ -71,11 +77,16 @@ public class DfsPhase implements SearchPhase { termStatistics[i] = context.searcher().termStatistics(terms[i], termContext); } - // TODO: LUCENE 4 UPGRADE - add collection stats for each unique field, for distributed scoring -// context.searcher().collectionStatistics() + TMap fieldStatistics = new ExtTHashMap(); + for (Term term : terms) { + if (!fieldStatistics.containsKey(term.field())) { + fieldStatistics.put(term.field(), context.searcher().collectionStatistics(term.field())); + } + } - context.dfsResult().termsAndFreqs(terms, termStatistics); - context.dfsResult().maxDoc(context.searcher().getIndexReader().maxDoc()); + context.dfsResult().termsStatistics(terms, termStatistics) + .fieldStatistics(fieldStatistics) + .maxDoc(context.searcher().getIndexReader().maxDoc()); } catch (Exception e) { throw new DfsPhaseExecutionException(context, "Exception during dfs phase", e); } diff --git a/src/main/java/org/elasticsearch/search/dfs/DfsSearchResult.java b/src/main/java/org/elasticsearch/search/dfs/DfsSearchResult.java index 75de8e72065..680e99b2a6a 100644 --- a/src/main/java/org/elasticsearch/search/dfs/DfsSearchResult.java +++ b/src/main/java/org/elasticsearch/search/dfs/DfsSearchResult.java @@ -19,16 +19,20 @@ package org.elasticsearch.search.dfs; +import gnu.trove.map.TMap; import org.apache.lucene.index.Term; +import org.apache.lucene.search.CollectionStatistics; import org.apache.lucene.search.TermStatistics; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.trove.ExtTHashMap; import org.elasticsearch.search.SearchPhaseResult; import org.elasticsearch.search.SearchShardTarget; import org.elasticsearch.transport.TransportResponse; import java.io.IOException; +import java.util.Map; /** * @@ -42,6 +46,7 @@ public class DfsSearchResult extends TransportResponse implements SearchPhaseRes private long id; private Term[] terms; private TermStatistics[] termStatistics; + private TMap fieldStatistics = new ExtTHashMap(); private int maxDoc; public DfsSearchResult() { @@ -75,12 +80,17 @@ public class DfsSearchResult extends TransportResponse implements SearchPhaseRes return maxDoc; } - public DfsSearchResult termsAndFreqs(Term[] terms, TermStatistics[] termStatistics) { + public DfsSearchResult termsStatistics(Term[] terms, TermStatistics[] termStatistics) { this.terms = terms; this.termStatistics = termStatistics; return this; } + public DfsSearchResult fieldStatistics(TMap fieldStatistics) { + this.fieldStatistics = fieldStatistics; + return this; + } + public Term[] terms() { return terms; } @@ -89,6 +99,10 @@ public class DfsSearchResult extends TransportResponse implements SearchPhaseRes return termStatistics; } + public TMap fieldStatistics() { + return fieldStatistics; + } + public static DfsSearchResult readDfsSearchResult(StreamInput in) throws IOException, ClassNotFoundException { DfsSearchResult result = new DfsSearchResult(); result.readFrom(in); @@ -121,6 +135,13 @@ public class DfsSearchResult extends TransportResponse implements SearchPhaseRes termStatistics[i] = new TermStatistics(term, docFreq, totalTermFreq); } } + int numFieldStatistics = in.readVInt(); + for (int i = 0; i < numFieldStatistics; i++) { + String field = in.readString(); + CollectionStatistics stats = new CollectionStatistics(field, in.readVLong(), in.readVLong(), in.readVLong(), in.readVLong()); + fieldStatistics.put(field, stats); + } + maxDoc = in.readVInt(); } @@ -139,6 +160,15 @@ public class DfsSearchResult extends TransportResponse implements SearchPhaseRes out.writeVLong(termStatistic.docFreq()); out.writeVLong(termStatistic.totalTermFreq()); } + out.writeVInt(fieldStatistics.size()); + for (Map.Entry entry : fieldStatistics.entrySet()) { + out.writeString(entry.getKey()); + out.writeVLong(entry.getValue().maxDoc()); + out.writeVLong(entry.getValue().docCount()); + out.writeVLong(entry.getValue().sumTotalTermFreq()); + out.writeVLong(entry.getValue().sumDocFreq()); + } out.writeVInt(maxDoc); } + } From 0354825914ba79d7e3ec500aa9eab0532e43850f Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 30 Oct 2012 22:39:59 +0100 Subject: [PATCH 057/146] lucene 4: Fixed compile error --- src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java b/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java index 543c7e6b8dc..9175dd1e69c 100644 --- a/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java @@ -159,8 +159,6 @@ public class FieldsLookup implements Map { data.doc(fieldVisitor.createDocument()); } catch (IOException e) { throw new ElasticSearchParseException("failed to load field [" + name + "]", e); - } finally { - fieldVisitor.reset(); } } return data; From b3e59d58e450726258c725ed728c4aa1b8a6054a Mon Sep 17 00:00:00 2001 From: Chris Male Date: Wed, 31 Oct 2012 11:13:10 +1300 Subject: [PATCH 058/146] lucene 4: Fixed TermFactory usage in MapperService --- .../elasticsearch/common/lucene/search/XBooleanFilter.java | 2 +- .../java/org/elasticsearch/index/mapper/MapperService.java | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java index 04f469cd860..b6ff7069215 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java @@ -20,10 +20,10 @@ package org.elasticsearch.common.lucene.search; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.queries.FilterClause; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.FilterClause; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.common.lucene.docset.DocSet; import org.elasticsearch.common.lucene.docset.DocSets; diff --git a/src/main/java/org/elasticsearch/index/mapper/MapperService.java b/src/main/java/org/elasticsearch/index/mapper/MapperService.java index 75ebec71a8d..0e76465f4b3 100644 --- a/src/main/java/org/elasticsearch/index/mapper/MapperService.java +++ b/src/main/java/org/elasticsearch/index/mapper/MapperService.java @@ -26,11 +26,10 @@ import com.google.common.collect.Sets; import com.google.common.collect.UnmodifiableIterator; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.document.Fieldable; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.FilterClause; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.FilterClause; import org.apache.lucene.search.XTermsFilter; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.collect.MapBuilder; @@ -436,7 +435,7 @@ public class MapperService extends AbstractIndexComponent implements Iterable Date: Tue, 30 Oct 2012 23:17:57 +0100 Subject: [PATCH 059/146] lucene 4: Upgraded ContentIndexSearcher --- .../search/internal/ContextIndexSearcher.java | 24 ++++--------------- .../search/query/QueryPhase.java | 3 ++- 2 files changed, 6 insertions(+), 21 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java b/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java index ab5f2c46271..b6e7564b7ea 100644 --- a/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java +++ b/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java @@ -22,6 +22,7 @@ package org.elasticsearch.search.internal; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import com.google.common.collect.Maps; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.*; import org.elasticsearch.common.lucene.MinimumScoreCollector; @@ -132,25 +133,8 @@ public class ContextIndexSearcher extends IndexSearcher { return super.createNormalizedWeight(query); } - // override from the Searcher to allow to control if scores will be tracked or not - // LUCENE MONITOR - We override the logic here to apply our own flags for track scores @Override - public TopFieldDocs search(Weight weight, Filter filter, int nDocs, - Sort sort, boolean fillFields) throws IOException { - int limit = reader.maxDoc(); - if (limit == 0) { - limit = 1; - } - nDocs = Math.min(nDocs, limit); - - TopFieldCollector collector = TopFieldCollector.create(sort, nDocs, - fillFields, searchContext.trackScores(), searchContext.trackScores(), !weight.scoresDocsOutOfOrder()); - search(weight, filter, collector); - return (TopFieldDocs) collector.topDocs(); - } - - @Override - public void search(Weight weight, Filter filter, Collector collector) throws IOException { + public void search(Query query, Filter filter, Collector collector) throws IOException { if (searchContext.parsedFilter() != null && Scopes.MAIN.equals(processingScope)) { // this will only get applied to the actual search collector and not // to any scoped collectors, also, it will only be applied to the main collector @@ -186,12 +170,12 @@ public class ContextIndexSearcher extends IndexSearcher { // we only compute the doc id set once since within a context, we execute the same query always... if (searchContext.timeoutInMillis() != -1) { try { - super.search(weight, combinedFilter, collector); + super.search(query, combinedFilter, collector); } catch (TimeLimitingCollector.TimeExceededException e) { searchContext.queryResult().searchTimedOut(true); } } else { - super.search(weight, combinedFilter, collector); + super.search(query, combinedFilter, collector); } } diff --git a/src/main/java/org/elasticsearch/search/query/QueryPhase.java b/src/main/java/org/elasticsearch/search/query/QueryPhase.java index 57b4e0bcf8c..1ff009b4882 100644 --- a/src/main/java/org/elasticsearch/search/query/QueryPhase.java +++ b/src/main/java/org/elasticsearch/search/query/QueryPhase.java @@ -173,7 +173,8 @@ public class QueryPhase implements SearchPhase { } else if (searchContext.searchType() == SearchType.SCAN) { topDocs = searchContext.scanContext().execute(searchContext); } else if (searchContext.sort() != null) { - topDocs = searchContext.searcher().search(query, null, numDocs, searchContext.sort()); + topDocs = searchContext.searcher().search(query, null, numDocs, searchContext.sort(), + searchContext.trackScores(), searchContext.trackScores()); } else { topDocs = searchContext.searcher().search(query, numDocs); } From 9f45b683d672353a1c352b489c615a5d4276f354 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Tue, 30 Oct 2012 23:21:44 +0100 Subject: [PATCH 060/146] lucene 4: Fixed TERM_FACTORY usage in VersionFetchSubPhase class. --- .../search/fetch/version/VersionFetchSubPhase.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/search/fetch/version/VersionFetchSubPhase.java b/src/main/java/org/elasticsearch/search/fetch/version/VersionFetchSubPhase.java index f5342822739..ed208fdf08b 100644 --- a/src/main/java/org/elasticsearch/search/fetch/version/VersionFetchSubPhase.java +++ b/src/main/java/org/elasticsearch/search/fetch/version/VersionFetchSubPhase.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.fetch.version; import com.google.common.collect.ImmutableMap; +import org.apache.lucene.index.Term; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.lucene.uid.UidField; import org.elasticsearch.index.mapper.internal.UidFieldMapper; @@ -59,7 +60,7 @@ public class VersionFetchSubPhase implements FetchSubPhase { // it might make sense to cache the TermDocs on a shared fetch context and just skip here) // it is going to mean we work on the high level multi reader and not the lower level reader as is // the case below... - long version = UidField.loadVersion(hitContext.reader(), UidFieldMapper.TERM_FACTORY.createTerm(hitContext.doc().get(UidFieldMapper.NAME))); + long version = UidField.loadVersion(hitContext.readerContext(), new Term(UidFieldMapper.NAME, hitContext.doc().get(UidFieldMapper.NAME))); if (version < 0) { version = -1; } From 724fadd2cd507e55a0dece9d755e2b5675c2fe9a Mon Sep 17 00:00:00 2001 From: Chris Male Date: Wed, 31 Oct 2012 11:22:54 +1300 Subject: [PATCH 061/146] lucene 4: Converted Analyzers in MapperService --- .../index/mapper/MapperService.java | 158 ++---------------- 1 file changed, 17 insertions(+), 141 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/MapperService.java b/src/main/java/org/elasticsearch/index/mapper/MapperService.java index 0e76465f4b3..07de483594a 100644 --- a/src/main/java/org/elasticsearch/index/mapper/MapperService.java +++ b/src/main/java/org/elasticsearch/index/mapper/MapperService.java @@ -25,7 +25,7 @@ import com.google.common.collect.Iterators; import com.google.common.collect.Sets; import com.google.common.collect.UnmodifiableIterator; import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.AnalyzerWrapper; import org.apache.lucene.index.Term; import org.apache.lucene.queries.FilterClause; import org.apache.lucene.search.BooleanClause; @@ -864,7 +864,7 @@ public class MapperService extends AbstractIndexComponent implements Iterable Date: Tue, 30 Oct 2012 23:23:44 +0100 Subject: [PATCH 062/146] lucene 4: Fixed import issue. --- .../org/elasticsearch/index/aliases/IndexAliasesService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/index/aliases/IndexAliasesService.java b/src/main/java/org/elasticsearch/index/aliases/IndexAliasesService.java index 31aa54aa470..f1fadc90d08 100644 --- a/src/main/java/org/elasticsearch/index/aliases/IndexAliasesService.java +++ b/src/main/java/org/elasticsearch/index/aliases/IndexAliasesService.java @@ -21,9 +21,9 @@ package org.elasticsearch.index.aliases; import com.google.common.collect.ImmutableMap; import com.google.common.collect.UnmodifiableIterator; +import org.apache.lucene.queries.FilterClause; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.Filter; -import org.apache.lucene.search.FilterClause; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.compress.CompressedString; import org.elasticsearch.common.inject.Inject; From 639b1323b8796cd7b2aea5f4c21ef5785f3773ed Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Tue, 30 Oct 2012 18:46:50 -0400 Subject: [PATCH 063/146] lucene4: upgrade CustomMemoryIndex to Lucene 4 --- .../index/memory/CustomMemoryIndex.java | 1136 ++++++++--------- 1 file changed, 532 insertions(+), 604 deletions(-) diff --git a/src/main/java/org/apache/lucene/index/memory/CustomMemoryIndex.java b/src/main/java/org/apache/lucene/index/memory/CustomMemoryIndex.java index 01617403751..bd0c368bbd9 100644 --- a/src/main/java/org/apache/lucene/index/memory/CustomMemoryIndex.java +++ b/src/main/java/org/apache/lucene/index/memory/CustomMemoryIndex.java @@ -1,6 +1,6 @@ package org.apache.lucene.index.memory; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,99 +17,108 @@ package org.apache.lucene.index.memory; * limitations under the License. */ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; +import java.util.Comparator; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.NoSuchElementException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.index.*; -import org.apache.lucene.search.*; -import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.Norm; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.FieldInvertState; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.OrdTermState; +import org.apache.lucene.index.StoredFieldVisitor; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.index.memory.MemoryIndexNormDocValues.SingleValueSource; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.store.RAMDirectory; // for javadocs import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.Constants; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.Constants; // for javadocs import org.elasticsearch.common.io.FastStringReader; -import java.io.IOException; -import java.io.Serializable; -import java.util.*; - /** - * High-performance single-document main memory Apache Lucene fulltext search index. - *

+ * High-performance single-document main memory Apache Lucene fulltext search index. + * *

Overview

- *

+ * * This class is a replacement/substitute for a large subset of * {@link RAMDirectory} functionality. It is designed to - * enable maximum efficiency for on-the-fly matchmaking combining structured and - * fuzzy fulltext search in realtime streaming applications such as Nux XQuery based XML - * message queues, publish-subscribe systems for Blogs/newsfeeds, text chat, data acquisition and - * distribution systems, application level routers, firewalls, classifiers, etc. - * Rather than targeting fulltext search of infrequent queries over huge persistent - * data archives (historic search), this class targets fulltext search of huge - * numbers of queries over comparatively small transient realtime data (prospective - * search). - * For example as in - *

+ * enable maximum efficiency for on-the-fly matchmaking combining structured and 
+ * fuzzy fulltext search in realtime streaming applications such as Nux XQuery based XML 
+ * message queues, publish-subscribe systems for Blogs/newsfeeds, text chat, data acquisition and 
+ * distribution systems, application level routers, firewalls, classifiers, etc. 
+ * Rather than targeting fulltext search of infrequent queries over huge persistent 
+ * data archives (historic search), this class targets fulltext search of huge 
+ * numbers of queries over comparatively small transient realtime data (prospective 
+ * search). 
+ * For example as in 
+ * 
  * float score = search(String text, Query query)
  * 
- *

+ *

* Each instance can hold at most one Lucene "document", with a document containing * zero or more "fields", each field having a name and a fulltext value. The - * fulltext value is tokenized (split and transformed) into zero or more index terms + * fulltext value is tokenized (split and transformed) into zero or more index terms * (aka words) on addField(), according to the policy implemented by an * Analyzer. For example, Lucene analyzers can split on whitespace, normalize to lower case * for case insensitivity, ignore common terms with little discriminatory value such as "he", "in", "and" (stop * words), reduce the terms to their natural linguistic root form such as "fishing" - * being reduced to "fish" (stemming), resolve synonyms/inflexions/thesauri + * being reduced to "fish" (stemming), resolve synonyms/inflexions/thesauri * (upon indexing and/or querying), etc. For details, see * Lucene Analyzer Intro. - *

- * Arbitrary Lucene queries can be run against this class - see Lucene Query Syntax - * as well as + * Arbitrary Lucene queries can be run against this class - see + * Lucene Query Syntax + * as well as Query Parser Rules. - * Note that a Lucene query selects on the field names and associated (indexed) - * tokenized terms, not on the original fulltext(s) - the latter are not stored + * Note that a Lucene query selects on the field names and associated (indexed) + * tokenized terms, not on the original fulltext(s) - the latter are not stored * but rather thrown away immediately after tokenization. - *

+ *

* For some interesting background information on search technology, see Bob Wyman's - * Prospective Search, + * Prospective Search, * Jim Gray's * * A Call to Arms - Custom subscriptions, and Tim Bray's - * On Search, the Series. - *

- *

- *

Example Usage

- *

- *

- * Analyzer analyzer = PatternAnalyzer.DEFAULT_ANALYZER;
- * //Analyzer analyzer = new SimpleAnalyzer();
+ *
+ *
+ * 

Example Usage

+ * + *
+ * Analyzer analyzer = new SimpleAnalyzer(version);
  * MemoryIndex index = new MemoryIndex();
  * index.addField("content", "Readings about Salmons and other select Alaska fishing Manuals", analyzer);
  * index.addField("author", "Tales of James", analyzer);
- * QueryParser parser = new QueryParser("content", analyzer);
+ * QueryParser parser = new QueryParser(version, "content", analyzer);
  * float score = index.search(parser.parse("+author:james +salmon~ +fish* manual~"));
  * if (score > 0.0f) {
  *     System.out.println("it's a match");
@@ -118,11 +127,11 @@ import java.util.*;
  * }
  * System.out.println("indexData=" + index.toString());
  * 
- *

- *

- *

Example XQuery Usage

- *

- *

+ *
+ *
+ * 

Example XQuery Usage

+ * + *
  * (: An XQuery that finds all books authored by James that have something to do with "salmon fishing manuals", sorted by relevance :)
  * declare namespace lucene = "java:nux.xom.pool.FullTextUtil";
  * declare variable $query := "+salmon~ +fish* manual~"; (: any arbitrary Lucene query can go here :)
@@ -132,38 +141,38 @@ import java.util.*;
  * order by $score descending
  * return $book
  * 
- *

- *

+ * + * *

No thread safety guarantees

- *

+ * * An instance can be queried multiple times with the same or different queries, * but an instance is not thread-safe. If desired use idioms such as: - *

+ * 
  * MemoryIndex index = ...
  * synchronized (index) {
  *    // read and/or write index (i.e. add fields and/or query)
  * }
  * 
- *

- *

+ * + * *

Performance Notes

- *

- * Internally there's a new data structure geared towards efficient indexing - * and searching, plus the necessary support code to seamlessly plug into the Lucene + * + * Internally there's a new data structure geared towards efficient indexing + * and searching, plus the necessary support code to seamlessly plug into the Lucene * framework. - *

- * This class performs very well for very small texts (e.g. 10 chars) - * as well as for large texts (e.g. 10 MB) and everything in between. + *

+ * This class performs very well for very small texts (e.g. 10 chars) + * as well as for large texts (e.g. 10 MB) and everything in between. * Typically, it is about 10-100 times faster than RAMDirectory. - * Note that RAMDirectory has particularly + * Note that RAMDirectory has particularly * large efficiency overheads for small to medium sized texts, both in time and space. - * Indexing a field with N tokens takes O(N) in the best case, and O(N logN) in the worst + * Indexing a field with N tokens takes O(N) in the best case, and O(N logN) in the worst * case. Memory consumption is probably larger than for RAMDirectory. - *

- * Example throughput of many simple term queries over a single MemoryIndex: - * ~500000 queries/sec on a MacBook Pro, jdk 1.5.0_06, server VM. + *

+ * Example throughput of many simple term queries over a single MemoryIndex: + * ~500000 queries/sec on a MacBook Pro, jdk 1.5.0_06, server VM. * As always, your mileage may vary. - *

+ *

* If you're curious about * the whereabouts of bottlenecks, run java 1.5 with the non-perturbing '-server * -agentlib:hprof=cpu=samples,depth=10' flags, then study the trace log and @@ -171,49 +180,40 @@ import java.util.*; * target="_blank" * href="http://java.sun.com/developer/technicalArticles/Programming/HPROF.html"> * hprof tracing ). + * */ // LUCENE MONITOR - Support adding same field several times // -- Added pos to Info // -- Use current info of existing field -public class CustomMemoryIndex implements Serializable { +public class CustomMemoryIndex { - /** - * info for each field: Map - */ - private final HashMap fields = new HashMap(); + /** info for each field: Map */ + private final HashMap fields = new HashMap(); - /** - * fields sorted ascending by fieldName; lazily computed on demand - */ - private transient Map.Entry[] sortedFields; + /** fields sorted ascending by fieldName; lazily computed on demand */ + private transient Map.Entry[] sortedFields; - /** - * pos: positions[3*i], startOffset: positions[3*i +1], endOffset: positions[3*i +2] - */ + /** pos: positions[3*i], startOffset: positions[3*i +1], endOffset: positions[3*i +2] */ private final int stride; - /** - * Could be made configurable; See {@link Document#setBoost(float)} - */ + /** Could be made configurable; */ private static final float docBoost = 1.0f; - private static final long serialVersionUID = 2782195016849084649L; - private static final boolean DEBUG = false; - private final FieldInfos fieldInfos; + private HashMap fieldInfos = new HashMap(); /** * Sorts term entries into ascending order; also works for * Arrays.binarySearch() and Arrays.sort() */ private static final Comparator termComparator = new Comparator() { - @SuppressWarnings("unchecked") + @SuppressWarnings({"unchecked","rawtypes"}) public int compare(Object o1, Object o2) { - if (o1 instanceof Map.Entry) o1 = ((Map.Entry) o1).getKey(); - if (o2 instanceof Map.Entry) o2 = ((Map.Entry) o2).getKey(); + if (o1 instanceof Map.Entry) o1 = ((Map.Entry) o1).getKey(); + if (o2 instanceof Map.Entry) o2 = ((Map.Entry) o2).getKey(); if (o1 == o2) return 0; - return ((String) o1).compareTo((String) o2); + return ((Comparable) o1).compareTo((Comparable) o2); } }; @@ -228,29 +228,29 @@ public class CustomMemoryIndex implements Serializable { * Constructs an empty instance that can optionally store the start and end * character offset of each token term in the text. This can be useful for * highlighting of hit locations with the Lucene highlighter package. - * Private until the highlighter package matures, so that this can actually + * Protected until the highlighter package matures, so that this can actually * be meaningfully integrated. * - * @param storeOffsets whether or not to store the start and end character offset of - * each token term in the text + * @param storeOffsets + * whether or not to store the start and end character offset of + * each token term in the text */ - private CustomMemoryIndex(boolean storeOffsets) { + protected CustomMemoryIndex(boolean storeOffsets) { this.stride = storeOffsets ? 3 : 1; - fieldInfos = new FieldInfos(); } /** * Convenience method; Tokenizes the given field text and adds the resulting * terms to the index; Equivalent to adding an indexed non-keyword Lucene - * {@link org.apache.lucene.document.Field} that is - * {@link org.apache.lucene.document.Field.Index#ANALYZED tokenized}, - * {@link org.apache.lucene.document.Field.Store#NO not stored}, - * {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS termVectorStored with positions} (or - * {@link org.apache.lucene.document.Field.TermVector#WITH_POSITIONS termVectorStored with positions and offsets}), + * {@link org.apache.lucene.document.Field} that is tokenized, not stored, + * termVectorStored with positions (or termVectorStored with positions and offsets), * - * @param fieldName a name to be associated with the text - * @param text the text to tokenize and index. - * @param analyzer the analyzer to use for tokenization + * @param fieldName + * a name to be associated with the text + * @param text + * the text to tokenize and index. + * @param analyzer + * the analyzer to use for tokenization */ public void addField(String fieldName, String text, Analyzer analyzer) { if (fieldName == null) @@ -262,7 +262,7 @@ public class CustomMemoryIndex implements Serializable { TokenStream stream; try { - stream = analyzer.reusableTokenStream(fieldName, new FastStringReader(text)); + stream = analyzer.tokenStream(fieldName, new FastStringReader(text)); } catch (IOException ex) { throw new RuntimeException(ex); } @@ -277,7 +277,8 @@ public class CustomMemoryIndex implements Serializable { * {@link #addField(String, TokenStream)}, perhaps wrapped into another * {@link org.apache.lucene.analysis.TokenFilter}, as desired. * - * @param keywords the keywords to generate tokens for + * @param keywords + * the keywords to generate tokens for * @return the corresponding token stream */ public TokenStream keywordTokenStream(final Collection keywords) { @@ -302,7 +303,7 @@ public class CustomMemoryIndex implements Serializable { String term = obj.toString(); clearAttributes(); termAtt.setEmpty().append(term); - offsetAtt.setOffset(start, start + termAtt.length()); + offsetAtt.setOffset(start, start+termAtt.length()); start += term.length() + 1; // separate words by 1 (blank) character return true; } @@ -312,8 +313,10 @@ public class CustomMemoryIndex implements Serializable { /** * Equivalent to addField(fieldName, stream, 1.0f). * - * @param fieldName a name to be associated with the text - * @param stream the token stream to retrieve tokens from + * @param fieldName + * a name to be associated with the text + * @param stream + * the token stream to retrieve tokens from */ public void addField(String fieldName, TokenStream stream) { addField(fieldName, stream, 1.0f); @@ -324,11 +327,14 @@ public class CustomMemoryIndex implements Serializable { * Equivalent to adding a tokenized, indexed, termVectorStored, unstored, * Lucene {@link org.apache.lucene.document.Field}. * Finally closes the token stream. Note that untokenized keywords can be added with this method via - * {@link #keywordTokenStream(Collection)}, the Lucene contrib KeywordTokenizer or similar utilities. + * {@link #keywordTokenStream(Collection)}, the Lucene KeywordTokenizer or similar utilities. * - * @param fieldName a name to be associated with the text - * @param stream the token stream to retrieve tokens from. - * @param boost the boost factor for hits for this field + * @param fieldName + * a name to be associated with the text + * @param stream + * the token stream to retrieve tokens from. + * @param boost + * the boost factor for hits for this field * @see org.apache.lucene.document.Field#setBoost(float) */ public void addField(String fieldName, TokenStream stream, float boost) { @@ -340,12 +346,16 @@ public class CustomMemoryIndex implements Serializable { if (boost <= 0.0f) throw new IllegalArgumentException("boost factor must be greater than 0.0"); - HashMap terms = new HashMap(); + HashMap terms = new HashMap(); int numTokens = 0; int numOverlapTokens = 0; int pos = -1; - fieldInfos.add(fieldName, true, true); + if (!fieldInfos.containsKey(fieldName)) { + fieldInfos.put(fieldName, + new FieldInfo(fieldName, true, fieldInfos.size(), false, false, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS, null, null, null)); + } + // CHANGE if (fields.get(fieldName) != null) { @@ -355,16 +365,16 @@ public class CustomMemoryIndex implements Serializable { numOverlapTokens = info.numOverlapTokens; pos = info.pos; } else { - terms = new HashMap(); + terms = new HashMap(); } - - CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); + TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncrAttribute = stream.addAttribute(PositionIncrementAttribute.class); OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); + BytesRef ref = termAtt.getBytesRef(); stream.reset(); while (stream.incrementToken()) { - String term = termAtt.toString(); - if (term.length() == 0) continue; // nothing to do + termAtt.fillBytesRef(); + if (ref.length == 0) continue; // nothing to do // if (DEBUG) System.err.println("token='" + term + "'"); numTokens++; final int posIncr = posIncrAttribute.getPositionIncrement(); @@ -372,10 +382,10 @@ public class CustomMemoryIndex implements Serializable { numOverlapTokens++; pos += posIncr; - ArrayIntList positions = terms.get(term); + ArrayIntList positions = terms.get(ref); if (positions == null) { // term not seen before positions = new ArrayIntList(stride); - terms.put(term, positions); + terms.put(BytesRef.deepCopyOf(ref), positions); } if (stride == 1) { positions.add(pos); @@ -419,16 +429,18 @@ public class CustomMemoryIndex implements Serializable { * Convenience method that efficiently returns the relevance score by * matching this index against the given Lucene query expression. * - * @param query an arbitrary Lucene query to run against this index + * @param query + * an arbitrary Lucene query to run against this index * @return the relevance score of the matchmaking; A number in the range * [0.0 .. 1.0], with 0.0 indicating no match. The higher the number * the better the match. + * */ public float search(Query query) { if (query == null) throw new IllegalArgumentException("query must not be null"); - Searcher searcher = createSearcher(); + IndexSearcher searcher = createSearcher(); try { final float[] scores = new float[1]; // inits to 0.0f (no match) searcher.search(query, new Collector() { @@ -440,7 +452,7 @@ public class CustomMemoryIndex implements Serializable { } @Override - public void setScorer(Scorer scorer) throws IOException { + public void setScorer(Scorer scorer) { this.scorer = scorer; } @@ -450,8 +462,7 @@ public class CustomMemoryIndex implements Serializable { } @Override - public void setNextReader(IndexReader reader, int docBase) { - } + public void setNextReader(AtomicReaderContext context) { } }); float score = scores[0]; return score; @@ -498,9 +509,9 @@ public class CustomMemoryIndex implements Serializable { int len = info.terms.size(); size += VM.sizeOfHashMap(len); - Iterator> iter2 = info.terms.entrySet().iterator(); + Iterator> iter2 = info.terms.entrySet().iterator(); while (--len >= 0) { // for each term - Map.Entry e = iter2.next(); + Map.Entry e = iter2.next(); size += VM.sizeOfObject(PTR + 3 * INT); // assumes substring() memory overlay // size += STR + 2 * ((String) e.getKey()).length(); ArrayIntList positions = e.getValue(); @@ -514,23 +525,19 @@ public class CustomMemoryIndex implements Serializable { return positions.size() / stride; } - /** - * sorts into ascending order (on demand), reusing memory along the way - */ + /** sorts into ascending order (on demand), reusing memory along the way */ private void sortFields() { if (sortedFields == null) sortedFields = sort(fields); } - /** - * returns a view of the given map's entries, sorted ascending by key - */ - private static Map.Entry[] sort(HashMap map) { + /** returns a view of the given map's entries, sorted ascending by key */ + private static Map.Entry[] sort(HashMap map) { int size = map.size(); @SuppressWarnings("unchecked") - Map.Entry[] entries = new Map.Entry[size]; + Map.Entry[] entries = new Map.Entry[size]; - Iterator> iter = map.entrySet().iterator(); - for (int i = 0; i < size; i++) { + Iterator> iter = map.entrySet().iterator(); + for (int i=0; i < size; i++) { entries[i] = iter.next(); } @@ -551,8 +558,8 @@ public class CustomMemoryIndex implements Serializable { int sumPositions = 0; int sumTerms = 0; - for (int i = 0; i < sortedFields.length; i++) { - Map.Entry entry = sortedFields[i]; + for (int i=0; i < sortedFields.length; i++) { + Map.Entry entry = sortedFields[i]; String fieldName = entry.getKey(); Info info = entry.getValue(); info.sortTerms(); @@ -560,15 +567,15 @@ public class CustomMemoryIndex implements Serializable { int numChars = 0; int numPositions = 0; - for (int j = 0; j < info.sortedTerms.length; j++) { - Map.Entry e = info.sortedTerms[j]; - String term = e.getKey(); + for (int j=0; j < info.sortedTerms.length; j++) { + Map.Entry e = info.sortedTerms[j]; + BytesRef term = e.getKey(); ArrayIntList positions = e.getValue(); result.append("\t'" + term + "':" + numPositions(positions) + ":"); result.append(positions.toString(stride)); // ignore offsets result.append("\n"); numPositions += numPositions(positions); - numChars += term.length(); + numChars += term.length; } result.append("\tterms=" + info.sortedTerms.length); @@ -587,58 +594,49 @@ public class CustomMemoryIndex implements Serializable { return result.toString(); } - - /////////////////////////////////////////////////////////////////////////////// - // Nested classes: - /////////////////////////////////////////////////////////////////////////////// - /** * Index data structure for a field; Contains the tokenized term texts and * their positions. */ - private static final class Info implements Serializable { + private static final class Info { /** * Term strings and their positions for this field: Map */ - private final HashMap terms; + private final HashMap terms; - /** - * Terms sorted ascending by term text; computed on demand - */ - private transient Map.Entry[] sortedTerms; + /** Terms sorted ascending by term text; computed on demand */ + private transient Map.Entry[] sortedTerms; - /** - * Number of added tokens for this field - */ + /** Number of added tokens for this field */ private final int numTokens; - /** - * Number of overlapping tokens for this field - */ + /** Number of overlapping tokens for this field */ private final int numOverlapTokens; - /** - * Boost factor for hits for this field - */ + /** Boost factor for hits for this field */ private final float boost; private final int pos; - /** - * Term for this field's fieldName, lazily computed on demand - */ - public transient Term template; + private final long sumTotalTermFreq; - private static final long serialVersionUID = 2882195016849084649L; - - public Info(HashMap terms, int numTokens, int numOverlapTokens, float boost, int pos) { + public Info(HashMap terms, int numTokens, int numOverlapTokens, float boost, int pos) { this.terms = terms; this.numTokens = numTokens; this.numOverlapTokens = numOverlapTokens; this.boost = boost; this.pos = pos; + long sum = 0; + for(Map.Entry ent : terms.entrySet()) { + sum += ent.getValue().size(); + } + sumTotalTermFreq = sum; + } + + public long getSumTotalTermFreq() { + return sumTotalTermFreq; } /** @@ -653,20 +651,6 @@ public class CustomMemoryIndex implements Serializable { if (sortedTerms == null) sortedTerms = sort(terms); } - /** - * note that the frequency can be calculated as numPosition(getPositions(x)) - */ - public ArrayIntList getPositions(String term) { - return terms.get(term); - } - - /** - * note that the frequency can be calculated as numPosition(getPositions(x)) - */ - public ArrayIntList getPositions(int pos) { - return sortedTerms[pos].getValue(); - } - public float getBoost() { return boost; } @@ -677,12 +661,11 @@ public class CustomMemoryIndex implements Serializable { /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// - /** * Efficient resizable auto-expanding list holding int elements; * implemented with arrays. */ - private static final class ArrayIntList implements Serializable { + private static final class ArrayIntList { private int[] elements; private int size = 0; @@ -705,8 +688,8 @@ public class CustomMemoryIndex implements Serializable { public void add(int pos, int start, int end) { if (size + 3 > elements.length) ensureCapacity(size + 3); elements[size] = pos; - elements[size + 1] = start; - elements[size + 2] = end; + elements[size+1] = start; + elements[size+2] = end; size += 3; } @@ -719,16 +702,6 @@ public class CustomMemoryIndex implements Serializable { return size; } - public int[] toArray(int stride) { - int[] arr = new int[size() / stride]; - if (stride == 1) { - System.arraycopy(elements, 0, arr, 0, size); // fast path - } else { - for (int i = 0, j = 0; j < size; i++, j += stride) arr[i] = elements[j]; - } - return arr; - } - private void ensureCapacity(int minCapacity) { int newCapacity = Math.max(minCapacity, (elements.length * 3) / 2 + 1); int[] newElements = new int[newCapacity]; @@ -741,17 +714,15 @@ public class CustomMemoryIndex implements Serializable { + ", size: " + size); } - /** - * returns the first few positions (without offsets); debug only - */ + /** returns the first few positions (without offsets); debug only */ public String toString(int stride) { int s = size() / stride; int len = Math.min(10, s); // avoid printing huge lists - StringBuilder buf = new StringBuilder(4 * len); + StringBuilder buf = new StringBuilder(4*len); buf.append("["); for (int i = 0; i < len; i++) { - buf.append(get(i * stride)); - if (i < len - 1) buf.append(", "); + buf.append(get(i*stride)); + if (i < len-1) buf.append(", "); } if (len != s) buf.append(", ..."); // and some more... buf.append("]"); @@ -763,15 +734,14 @@ public class CustomMemoryIndex implements Serializable { /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// - private static final Term MATCH_ALL_TERM = new Term(""); /** * Search support for Lucene framework integration; implements all methods * required by the Lucene IndexReader contracts. */ - final class MemoryIndexReader extends IndexReader { + final class MemoryIndexReader extends AtomicReader { - private Searcher searcher; // needed to find searcher.getSimilarity() + private IndexSearcher searcher; // needed to find searcher.getSimilarity() private MemoryIndexReader() { super(); // avoid as much superclass baggage as possible @@ -786,390 +756,343 @@ public class CustomMemoryIndex implements Serializable { } @Override - public int docFreq(Term term) { - Info info = getInfo(term.field()); - int freq = 0; - if (info != null) freq = info.getPositions(term.text()) != null ? 1 : 0; - if (DEBUG) System.err.println("MemoryIndexReader.docFreq: " + term + ", freq:" + freq); - return freq; + public Bits getLiveDocs() { + return null; } @Override - public TermEnum terms() { - if (DEBUG) System.err.println("MemoryIndexReader.terms()"); - return terms(MATCH_ALL_TERM); + public FieldInfos getFieldInfos() { + return new FieldInfos(fieldInfos.values().toArray(new FieldInfo[fieldInfos.size()])); + } + + private class MemoryFields extends Fields { + @Override + public Iterator iterator() { + return new Iterator() { + int upto = -1; + + @Override + public String next() { + upto++; + if (upto >= sortedFields.length) { + throw new NoSuchElementException(); + } + return sortedFields[upto].getKey(); + } + + @Override + public boolean hasNext() { + return upto+1 < sortedFields.length; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + + @Override + public Terms terms(final String field) { + int i = Arrays.binarySearch(sortedFields, field, termComparator); + if (i < 0) { + return null; + } else { + final Info info = getInfo(i); + info.sortTerms(); + + return new Terms() { + @Override + public TermsEnum iterator(TermsEnum reuse) { + return new MemoryTermsEnum(info); + } + + @Override + public Comparator getComparator() { + return BytesRef.getUTF8SortedAsUnicodeComparator(); + } + + @Override + public long size() { + return info.sortedTerms.length; + } + + @Override + public long getSumTotalTermFreq() { + return info.getSumTotalTermFreq(); + } + + @Override + public long getSumDocFreq() { + // each term has df=1 + return info.sortedTerms.length; + } + + @Override + public int getDocCount() { + return info.sortedTerms.length > 0 ? 1 : 0; + } + + @Override + public boolean hasOffsets() { + return stride == 3; + } + + @Override + public boolean hasPositions() { + return true; + } + + @Override + public boolean hasPayloads() { + return false; + } + }; + } + } + + @Override + public int size() { + return sortedFields.length; + } } @Override - public TermEnum terms(Term term) { - if (DEBUG) System.err.println("MemoryIndexReader.terms: " + term); - - int i; // index into info.sortedTerms - int j; // index into sortedFields - + public Fields fields() { sortFields(); - if (sortedFields.length == 1 && sortedFields[0].getKey() == term.field()) { - j = 0; // fast path - } else { - j = Arrays.binarySearch(sortedFields, term.field(), termComparator); - } - - if (j < 0) { // not found; choose successor - j = -j - 1; - i = 0; - if (j < sortedFields.length) getInfo(j).sortTerms(); - } else { // found - Info info = getInfo(j); - info.sortTerms(); - i = Arrays.binarySearch(info.sortedTerms, term.text(), termComparator); - if (i < 0) { // not found; choose successor - i = -i - 1; - if (i >= info.sortedTerms.length) { // move to next successor - j++; - i = 0; - if (j < sortedFields.length) getInfo(j).sortTerms(); - } - } - } - final int ix = i; - final int jx = j; - - return new TermEnum() { - - private int srtTermsIdx = ix; // index into info.sortedTerms - private int srtFldsIdx = jx; // index into sortedFields - - @Override - public boolean next() { - if (DEBUG) System.err.println("TermEnum.next"); - if (srtFldsIdx >= sortedFields.length) return false; - Info info = getInfo(srtFldsIdx); - if (++srtTermsIdx < info.sortedTerms.length) return true; - - // move to successor - srtFldsIdx++; - srtTermsIdx = 0; - if (srtFldsIdx >= sortedFields.length) return false; - getInfo(srtFldsIdx).sortTerms(); - return true; - } - - @Override - public Term term() { - if (DEBUG) System.err.println("TermEnum.term: " + srtTermsIdx); - if (srtFldsIdx >= sortedFields.length) return null; - Info info = getInfo(srtFldsIdx); - if (srtTermsIdx >= info.sortedTerms.length) return null; -// if (DEBUG) System.err.println("TermEnum.term: " + i + ", " + info.sortedTerms[i].getKey()); - return createTerm(info, srtFldsIdx, info.sortedTerms[srtTermsIdx].getKey()); - } - - @Override - public int docFreq() { - if (DEBUG) System.err.println("TermEnum.docFreq"); - if (srtFldsIdx >= sortedFields.length) return 0; - Info info = getInfo(srtFldsIdx); - if (srtTermsIdx >= info.sortedTerms.length) return 0; - return numPositions(info.getPositions(srtTermsIdx)); - } - - @Override - public void close() { - if (DEBUG) System.err.println("TermEnum.close"); - } - - /** Returns a new Term object, minimizing String.intern() overheads. */ - private Term createTerm(Info info, int pos, String text) { - // Assertion: sortFields has already been called before - Term template = info.template; - if (template == null) { // not yet cached? - String fieldName = sortedFields[pos].getKey(); - template = new Term(fieldName); - info.template = template; - } - - return template.createTerm(text); - } - - }; + return new MemoryFields(); } - @Override - public TermPositions termPositions() { - if (DEBUG) System.err.println("MemoryIndexReader.termPositions"); + private class MemoryTermsEnum extends TermsEnum { + private final Info info; + private final BytesRef br = new BytesRef(); + int termUpto = -1; - return new TermPositions() { + public MemoryTermsEnum(Info info) { + this.info = info; + info.sortTerms(); + } - private boolean hasNext; - private int cursor = 0; - private ArrayIntList current; - private Term term; - - public void seek(Term term) { - this.term = term; - if (DEBUG) System.err.println(".seek: " + term); - if (term == null) { - hasNext = true; // term==null means match all docs - } else { - Info info = getInfo(term.field()); - current = info == null ? null : info.getPositions(term.text()); - hasNext = (current != null); - cursor = 0; - } - } - - public void seek(TermEnum termEnum) { - if (DEBUG) System.err.println(".seekEnum"); - seek(termEnum.term()); - } - - public int doc() { - if (DEBUG) System.err.println(".doc"); - return 0; - } - - public int freq() { - int freq = current != null ? numPositions(current) : (term == null ? 1 : 0); - if (DEBUG) System.err.println(".freq: " + freq); - return freq; - } - - public boolean next() { - if (DEBUG) System.err.println(".next: " + current + ", oldHasNext=" + hasNext); - boolean next = hasNext; - hasNext = false; - return next; - } - - public int read(int[] docs, int[] freqs) { - if (DEBUG) System.err.println(".read: " + docs.length); - if (!hasNext) return 0; - hasNext = false; - docs[0] = 0; - freqs[0] = freq(); - return 1; - } - - public boolean skipTo(int target) { - if (DEBUG) System.err.println(".skipTo: " + target); - return next(); - } - - public void close() { - if (DEBUG) System.err.println(".close"); - } - - public int nextPosition() { // implements TermPositions - int pos = current.get(cursor); - cursor += stride; - if (DEBUG) System.err.println(".nextPosition: " + pos); - return pos; - } - - /** - * Not implemented. - * @throws UnsupportedOperationException - */ - public int getPayloadLength() { - throw new UnsupportedOperationException(); - } - - /** - * Not implemented. - * @throws UnsupportedOperationException - */ - public byte[] getPayload(byte[] data, int offset) throws IOException { - throw new UnsupportedOperationException(); - } - - public boolean isPayloadAvailable() { - // unsuported + @Override + public boolean seekExact(BytesRef text, boolean useCache) { + termUpto = Arrays.binarySearch(info.sortedTerms, text, termComparator); + if (termUpto >= 0) { + br.copyBytes(info.sortedTerms[termUpto].getKey()); + return true; + } else { return false; } - - }; - } - - @Override - public TermDocs termDocs() { - if (DEBUG) System.err.println("MemoryIndexReader.termDocs"); - return termPositions(); - } - - @Override - public TermFreqVector[] getTermFreqVectors(int docNumber) { - if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVectors"); - TermFreqVector[] vectors = new TermFreqVector[fields.size()]; -// if (vectors.length == 0) return null; - Iterator iter = fields.keySet().iterator(); - for (int i = 0; i < vectors.length; i++) { - vectors[i] = getTermFreqVector(docNumber, iter.next()); } - return vectors; + + @Override + public SeekStatus seekCeil(BytesRef text, boolean useCache) { + termUpto = Arrays.binarySearch(info.sortedTerms, text, termComparator); + if (termUpto < 0) { // not found; choose successor + termUpto = -termUpto -1; + if (termUpto >= info.sortedTerms.length) { + return SeekStatus.END; + } else { + br.copyBytes(info.sortedTerms[termUpto].getKey()); + return SeekStatus.NOT_FOUND; + } + } else { + br.copyBytes(info.sortedTerms[termUpto].getKey()); + return SeekStatus.FOUND; + } + } + + @Override + public void seekExact(long ord) { + assert ord < info.sortedTerms.length; + termUpto = (int) ord; + } + + @Override + public BytesRef next() { + termUpto++; + if (termUpto >= info.sortedTerms.length) { + return null; + } else { + br.copyBytes(info.sortedTerms[termUpto].getKey()); + return br; + } + } + + @Override + public BytesRef term() { + return br; + } + + @Override + public long ord() { + return termUpto; + } + + @Override + public int docFreq() { + return 1; + } + + @Override + public long totalTermFreq() { + return info.sortedTerms[termUpto].getValue().size(); + } + + @Override + public DocsEnum docs(Bits liveDocs, DocsEnum reuse, int flags) { + if (reuse == null || !(reuse instanceof MemoryDocsEnum)) { + reuse = new MemoryDocsEnum(); + } + return ((MemoryDocsEnum) reuse).reset(liveDocs, info.sortedTerms[termUpto].getValue()); + } + + @Override + public DocsAndPositionsEnum docsAndPositions(Bits liveDocs, DocsAndPositionsEnum reuse, int flags) { + if (reuse == null || !(reuse instanceof MemoryDocsAndPositionsEnum)) { + reuse = new MemoryDocsAndPositionsEnum(); + } + return ((MemoryDocsAndPositionsEnum) reuse).reset(liveDocs, info.sortedTerms[termUpto].getValue()); + } + + @Override + public Comparator getComparator() { + return BytesRef.getUTF8SortedAsUnicodeComparator(); + } + + @Override + public void seekExact(BytesRef term, TermState state) throws IOException { + assert state != null; + this.seekExact(((OrdTermState)state).ord); + } + + @Override + public TermState termState() throws IOException { + OrdTermState ts = new OrdTermState(); + ts.ord = termUpto; + return ts; + } } - @Override - public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException { - if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVectors"); + private class MemoryDocsEnum extends DocsEnum { + private ArrayIntList positions; + private boolean hasNext; + private Bits liveDocs; + private int doc = -1; - // if (vectors.length == 0) return null; - for (final String fieldName : fields.keySet()) { - getTermFreqVector(docNumber, fieldName, mapper); + public DocsEnum reset(Bits liveDocs, ArrayIntList positions) { + this.liveDocs = liveDocs; + this.positions = positions; + hasNext = true; + doc = -1; + return this; + } + + @Override + public int docID() { + return doc; + } + + @Override + public int nextDoc() { + if (hasNext && (liveDocs == null || liveDocs.get(0))) { + hasNext = false; + return doc = 0; + } else { + return doc = NO_MORE_DOCS; + } + } + + @Override + public int advance(int target) { + return nextDoc(); + } + + @Override + public int freq() throws IOException { + return positions.size(); + } + } + + private class MemoryDocsAndPositionsEnum extends DocsAndPositionsEnum { + private ArrayIntList positions; + private int posUpto; + private boolean hasNext; + private Bits liveDocs; + private int doc = -1; + + public DocsAndPositionsEnum reset(Bits liveDocs, ArrayIntList positions) { + this.liveDocs = liveDocs; + this.positions = positions; + posUpto = 0; + hasNext = true; + doc = -1; + return this; + } + + @Override + public int docID() { + return doc; + } + + @Override + public int nextDoc() { + if (hasNext && (liveDocs == null || liveDocs.get(0))) { + hasNext = false; + return doc = 0; + } else { + return doc = NO_MORE_DOCS; + } + } + + @Override + public int advance(int target) { + return nextDoc(); + } + + @Override + public int freq() throws IOException { + return positions.size() / stride; + } + + @Override + public int nextPosition() { + return positions.get(posUpto++ * stride); + } + + @Override + public int startOffset() { + return stride == 1 ? -1 : positions.get((posUpto - 1) * stride + 1); + } + + @Override + public int endOffset() { + return stride == 1 ? -1 : positions.get((posUpto - 1) * stride + 2); + } + + @Override + public BytesRef getPayload() { + return null; } } @Override - public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException { - if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVector"); - final Info info = getInfo(field); - if (info == null) { - return; + public Fields getTermVectors(int docID) { + if (docID == 0) { + return fields(); + } else { + return null; } - info.sortTerms(); - mapper.setExpectations(field, info.sortedTerms.length, stride != 1, true); - for (int i = info.sortedTerms.length; --i >= 0; ) { - - ArrayIntList positions = info.sortedTerms[i].getValue(); - int size = positions.size(); - org.apache.lucene.index.TermVectorOffsetInfo[] offsets = - new org.apache.lucene.index.TermVectorOffsetInfo[size / stride]; - - for (int k = 0, j = 1; j < size; k++, j += stride) { - int start = positions.get(j); - int end = positions.get(j + 1); - offsets[k] = new org.apache.lucene.index.TermVectorOffsetInfo(start, end); - } - mapper.map(info.sortedTerms[i].getKey(), - numPositions(info.sortedTerms[i].getValue()), - offsets, (info.sortedTerms[i].getValue()).toArray(stride)); - } - } - - @Override - public TermFreqVector getTermFreqVector(int docNumber, final String fieldName) { - if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVector"); - final Info info = getInfo(fieldName); - if (info == null) return null; // TODO: or return empty vector impl??? - info.sortTerms(); - - return new TermPositionVector() { - - private final Map.Entry[] sortedTerms = info.sortedTerms; - - public String getField() { - return fieldName; - } - - public int size() { - return sortedTerms.length; - } - - public String[] getTerms() { - String[] terms = new String[sortedTerms.length]; - for (int i = sortedTerms.length; --i >= 0; ) { - terms[i] = sortedTerms[i].getKey(); - } - return terms; - } - - public int[] getTermFrequencies() { - int[] freqs = new int[sortedTerms.length]; - for (int i = sortedTerms.length; --i >= 0; ) { - freqs[i] = numPositions(sortedTerms[i].getValue()); - } - return freqs; - } - - public int indexOf(String term) { - int i = Arrays.binarySearch(sortedTerms, term, termComparator); - return i >= 0 ? i : -1; - } - - public int[] indexesOf(String[] terms, int start, int len) { - int[] indexes = new int[len]; - for (int i = 0; i < len; i++) { - indexes[i] = indexOf(terms[start++]); - } - return indexes; - } - - // lucene >= 1.4.3 - public int[] getTermPositions(int index) { - return sortedTerms[index].getValue().toArray(stride); - } - - // lucene >= 1.9 (remove this method for lucene-1.4.3) - public org.apache.lucene.index.TermVectorOffsetInfo[] getOffsets(int index) { - if (stride == 1) return null; // no offsets stored - - ArrayIntList positions = sortedTerms[index].getValue(); - int size = positions.size(); - org.apache.lucene.index.TermVectorOffsetInfo[] offsets = - new org.apache.lucene.index.TermVectorOffsetInfo[size / stride]; - - for (int i = 0, j = 1; j < size; i++, j += stride) { - int start = positions.get(j); - int end = positions.get(j + 1); - offsets[i] = new org.apache.lucene.index.TermVectorOffsetInfo(start, end); - } - return offsets; - } - - }; } private Similarity getSimilarity() { if (searcher != null) return searcher.getSimilarity(); - return Similarity.getDefault(); + return IndexSearcher.getDefaultSimilarity(); } - private void setSearcher(Searcher searcher) { + private void setSearcher(IndexSearcher searcher) { this.searcher = searcher; } - /** - * performance hack: cache norms to avoid repeated expensive calculations - */ - private byte[] cachedNorms; - private String cachedFieldName; - private Similarity cachedSimilarity; - - @Override - public byte[] norms(String fieldName) { - byte[] norms = cachedNorms; - Similarity sim = getSimilarity(); - if (fieldName != cachedFieldName || sim != cachedSimilarity) { // not cached? - Info info = getInfo(fieldName); - int numTokens = info != null ? info.numTokens : 0; - int numOverlapTokens = info != null ? info.numOverlapTokens : 0; - float boost = info != null ? info.getBoost() : 1.0f; - FieldInvertState invertState = new FieldInvertState(0, numTokens, numOverlapTokens, 0, boost); - float n = sim.computeNorm(fieldName, invertState); - byte norm = sim.encodeNormValue(n); - norms = new byte[]{norm}; - - // cache it for future reuse - cachedNorms = norms; - cachedFieldName = fieldName; - cachedSimilarity = sim; - if (DEBUG) - System.err.println("MemoryIndexReader.norms: " + fieldName + ":" + n + ":" + norm + ":" + numTokens); - } - return norms; - } - - @Override - public void norms(String fieldName, byte[] bytes, int offset) { - if (DEBUG) System.err.println("MemoryIndexReader.norms*: " + fieldName); - byte[] norms = norms(fieldName); - System.arraycopy(norms, 0, bytes, offset, norms.length); - } - - @Override - protected void doSetNorm(int doc, String fieldName, byte value) { - throw new UnsupportedOperationException(); - } - @Override public int numDocs() { if (DEBUG) System.err.println("MemoryIndexReader.numDocs"); @@ -1182,17 +1105,10 @@ public class CustomMemoryIndex implements Serializable { return 1; } - //When we convert to JDK 1.5 make this Set @Override - public Document document(int n, FieldSelector fieldSelector) throws IOException { + public void document(int docID, StoredFieldVisitor visitor) { if (DEBUG) System.err.println("MemoryIndexReader.document"); - return new Document(); // there are no stored fields - } - - @Override - public boolean isDeleted(int n) { - if (DEBUG) System.err.println("MemoryIndexReader.isDeleted"); - return false; + // no-op: there are no stored fields } @Override @@ -1201,33 +1117,45 @@ public class CustomMemoryIndex implements Serializable { return false; } - @Override - protected void doDelete(int docNum) { - throw new UnsupportedOperationException(); - } - - @Override - protected void doUndeleteAll() { - throw new UnsupportedOperationException(); - } - - @Override - protected void doCommit(Map commitUserData) { - if (DEBUG) System.err.println("MemoryIndexReader.doCommit"); - } - @Override protected void doClose() { if (DEBUG) System.err.println("MemoryIndexReader.doClose"); } @Override - public FieldInfos getFieldInfos() { - return fieldInfos; + public DocValues docValues(String field) { + return null; + } + + /** performance hack: cache norms to avoid repeated expensive calculations */ + private DocValues cachedNormValues; + private String cachedFieldName; + private Similarity cachedSimilarity; + + @Override + public DocValues normValues(String field) { + DocValues norms = cachedNormValues; + Similarity sim = getSimilarity(); + if (!field.equals(cachedFieldName) || sim != cachedSimilarity) { // not cached? + Info info = getInfo(field); + int numTokens = info != null ? info.numTokens : 0; + int numOverlapTokens = info != null ? info.numOverlapTokens : 0; + float boost = info != null ? info.getBoost() : 1.0f; + FieldInvertState invertState = new FieldInvertState(field, 0, numTokens, numOverlapTokens, 0, boost); + Norm norm = new Norm(); + sim.computeNorm(invertState, norm); + SingleValueSource singleByteSource = new SingleValueSource(norm); + norms = new MemoryIndexNormDocValues(singleByteSource); + // cache it for future reuse + cachedNormValues = norms; + cachedFieldName = field; + cachedSimilarity = sim; + if (DEBUG) System.err.println("MemoryIndexReader.norms: " + field + ":" + norm + ":" + numTokens); + } + return norms; } } - /////////////////////////////////////////////////////////////////////////////// // Nested classes: /////////////////////////////////////////////////////////////////////////////// From e8092fe290dd95579b6236d40ce5dbce7cfe4292 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Tue, 30 Oct 2012 21:12:19 -0400 Subject: [PATCH 064/146] lucene4: org.apache.lucene.search.vectorhighlight package cleanup --- src/main/java/org/apache/lucene/search/XTermsFilter.java | 5 ++++- .../search/vectorhighlight/AbstractFragmentsBuilder.java | 6 ++---- .../vectorhighlight/XScoreOrderFragmentsBuilder.java | 8 ++++---- 3 files changed, 10 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/apache/lucene/search/XTermsFilter.java b/src/main/java/org/apache/lucene/search/XTermsFilter.java index a2f2a8515fd..6873ebcd207 100644 --- a/src/main/java/org/apache/lucene/search/XTermsFilter.java +++ b/src/main/java/org/apache/lucene/search/XTermsFilter.java @@ -86,7 +86,10 @@ public class XTermsFilter extends Filter { length = index; } - + public Term[] getTerms() { + return filterTerms; + } + @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { AtomicReader reader = context.reader(); diff --git a/src/main/java/org/apache/lucene/search/vectorhighlight/AbstractFragmentsBuilder.java b/src/main/java/org/apache/lucene/search/vectorhighlight/AbstractFragmentsBuilder.java index 2535655f9d8..d89a02f579a 100644 --- a/src/main/java/org/apache/lucene/search/vectorhighlight/AbstractFragmentsBuilder.java +++ b/src/main/java/org/apache/lucene/search/vectorhighlight/AbstractFragmentsBuilder.java @@ -144,7 +144,7 @@ public abstract class AbstractFragmentsBuilder extends BaseFragmentsBuilder { } } if (!toffsList.isEmpty()) { - subInfos.add(new FieldFragList.WeightedFragInfo.SubInfo(subInfo.text, toffsList, subInfo.getSeqnum())); + subInfos.add(new FieldFragList.WeightedFragInfo.SubInfo(subInfo.getText(), toffsList, subInfo.getSeqnum())); } if (subInfo.getTermsOffsets().isEmpty()) { @@ -175,9 +175,7 @@ public abstract class AbstractFragmentsBuilder extends BaseFragmentsBuilder { private final static List EMPTY = Collections.emptyList(); private WeightedFragInfo(int startOffset, int endOffset, float totalBoost, List subInfos) { - super(startOffset, endOffset, EMPTY); - this.subInfos = subInfos; - this.totalBoost = totalBoost; + super(startOffset, endOffset, subInfos, totalBoost); } } diff --git a/src/main/java/org/apache/lucene/search/vectorhighlight/XScoreOrderFragmentsBuilder.java b/src/main/java/org/apache/lucene/search/vectorhighlight/XScoreOrderFragmentsBuilder.java index 5e01d3112ca..9a3a33c472e 100644 --- a/src/main/java/org/apache/lucene/search/vectorhighlight/XScoreOrderFragmentsBuilder.java +++ b/src/main/java/org/apache/lucene/search/vectorhighlight/XScoreOrderFragmentsBuilder.java @@ -65,12 +65,12 @@ public class XScoreOrderFragmentsBuilder extends AbstractFragmentsBuilder { public static class ScoreComparator implements Comparator { public int compare(WeightedFragInfo o1, WeightedFragInfo o2) { - if (o1.totalBoost > o2.totalBoost) return -1; - else if (o1.totalBoost < o2.totalBoost) return 1; + if (o1.getTotalBoost() > o2.getTotalBoost()) return -1; + else if (o1.getTotalBoost() < o2.getTotalBoost()) return 1; // if same score then check startOffset else { - if (o1.startOffset < o2.startOffset) return -1; - else if (o1.startOffset > o2.startOffset) return 1; + if (o1.getStartOffset() < o2.getStartOffset()) return -1; + else if (o1.getStartOffset() > o2.getStartOffset()) return 1; } return 0; } From be424c45645b2c25c61b5e6ca0f137abe50bec16 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Tue, 30 Oct 2012 21:42:58 -0400 Subject: [PATCH 065/146] lucene4: fixed SwitchDirectory and CompressedDirectory (except fileLength method) --- .../common/compress/CompressedDirectory.java | 44 ++++--------------- .../common/compress/CompressedIndexInput.java | 2 +- .../common/lucene/store/SwitchDirectory.java | 27 +++--------- 3 files changed, 16 insertions(+), 57 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/compress/CompressedDirectory.java b/src/main/java/org/elasticsearch/common/compress/CompressedDirectory.java index 81e96e6f703..3f62ebfdb88 100644 --- a/src/main/java/org/elasticsearch/common/compress/CompressedDirectory.java +++ b/src/main/java/org/elasticsearch/common/compress/CompressedDirectory.java @@ -1,5 +1,6 @@ package org.elasticsearch.common.compress; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import org.apache.lucene.store.*; import org.elasticsearch.index.store.support.ForceSyncDirectory; @@ -60,16 +61,6 @@ public class CompressedDirectory extends Directory implements ForceSyncDirectory return dir.fileExists(name); } - @Override - public long fileModified(String name) throws IOException { - return dir.fileModified(name); - } - - @Override - public void touchFile(String name) throws IOException { - dir.touchFile(name); - } - @Override public void deleteFile(String name) throws IOException { dir.deleteFile(name); @@ -97,24 +88,19 @@ public class CompressedDirectory extends Directory implements ForceSyncDirectory dir.sync(names); } - @Override - public void sync(String name) throws IOException { - dir.sync(name); - } - @Override public void forceSync(String name) throws IOException { if (dir instanceof ForceSyncDirectory) { ((ForceSyncDirectory) dir).forceSync(name); } else { - dir.sync(name); + dir.sync(ImmutableList.of(name)); } } @Override - public IndexInput openInput(String name) throws IOException { + public IndexInput openInput(String name, IOContext context) throws IOException { if (decompressExtensions.contains(getExtension(name))) { - IndexInput in = dir.openInput(name); + IndexInput in = dir.openInput(name, context); Compressor compressor1 = CompressorFactory.compressor(in); if (compressor1 != null) { return compressor1.indexInput(in); @@ -122,29 +108,15 @@ public class CompressedDirectory extends Directory implements ForceSyncDirectory return in; } } - return dir.openInput(name); + return dir.openInput(name, context); } @Override - public IndexInput openInput(String name, int bufferSize) throws IOException { - if (decompressExtensions.contains(getExtension(name))) { - IndexInput in = dir.openInput(name, bufferSize); - Compressor compressor1 = CompressorFactory.compressor(in); - if (compressor1 != null) { - return compressor1.indexInput(in); - } else { - return in; - } - } - return dir.openInput(name, bufferSize); - } - - @Override - public IndexOutput createOutput(String name) throws IOException { + public IndexOutput createOutput(String name, IOContext context) throws IOException { if (compress && compressExtensions.contains(getExtension(name))) { - return compressor.indexOutput(dir.createOutput(name)); + return compressor.indexOutput(dir.createOutput(name, context)); } - return dir.createOutput(name); + return dir.createOutput(name, context); } // can't override this one, we need to open the correct compression diff --git a/src/main/java/org/elasticsearch/common/compress/CompressedIndexInput.java b/src/main/java/org/elasticsearch/common/compress/CompressedIndexInput.java index c127f8dd612..153edae03b3 100644 --- a/src/main/java/org/elasticsearch/common/compress/CompressedIndexInput.java +++ b/src/main/java/org/elasticsearch/common/compress/CompressedIndexInput.java @@ -203,7 +203,7 @@ public abstract class CompressedIndexInput extends protected abstract int uncompress(IndexInput in, byte[] out) throws IOException; @Override - public Object clone() { + public IndexInput clone() { // we clone and we need to make sure we keep the same positions! CompressedIndexInput cloned = (CompressedIndexInput) super.clone(); cloned.uncompressed = new byte[uncompressedLength]; diff --git a/src/main/java/org/elasticsearch/common/lucene/store/SwitchDirectory.java b/src/main/java/org/elasticsearch/common/lucene/store/SwitchDirectory.java index 8252ba13200..1d453802aa7 100644 --- a/src/main/java/org/elasticsearch/common/lucene/store/SwitchDirectory.java +++ b/src/main/java/org/elasticsearch/common/lucene/store/SwitchDirectory.java @@ -19,8 +19,10 @@ package org.elasticsearch.common.lucene.store; +import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.elasticsearch.index.store.support.ForceSyncDirectory; @@ -123,16 +125,6 @@ public class SwitchDirectory extends Directory implements ForceSyncDirectory { return getDirectory(name).fileExists(name); } - @Override - public long fileModified(String name) throws IOException { - return getDirectory(name).fileModified(name); - } - - @Override - public void touchFile(String name) throws IOException { - getDirectory(name).touchFile(name); - } - @Override public void deleteFile(String name) throws IOException { getDirectory(name).deleteFile(name); @@ -144,8 +136,8 @@ public class SwitchDirectory extends Directory implements ForceSyncDirectory { } @Override - public IndexOutput createOutput(String name) throws IOException { - return getDirectory(name).createOutput(name); + public IndexOutput createOutput(String name, IOContext context) throws IOException { + return getDirectory(name).createOutput(name, context); } @Override @@ -163,23 +155,18 @@ public class SwitchDirectory extends Directory implements ForceSyncDirectory { secondaryDir.sync(secondaryNames); } - @Override - public void sync(String name) throws IOException { - getDirectory(name).sync(name); - } - @Override public void forceSync(String name) throws IOException { Directory dir = getDirectory(name); if (dir instanceof ForceSyncDirectory) { ((ForceSyncDirectory) dir).forceSync(name); } else { - dir.sync(name); + dir.sync(ImmutableList.of(name)); } } @Override - public IndexInput openInput(String name) throws IOException { - return getDirectory(name).openInput(name); + public IndexInput openInput(String name, IOContext context) throws IOException { + return getDirectory(name).openInput(name, context); } } From 27481800bc6fe4fea9ca13896e30ed650d621a29 Mon Sep 17 00:00:00 2001 From: Chris Male Date: Wed, 31 Oct 2012 12:01:11 +1300 Subject: [PATCH 066/146] lucene 4: Upgraded FieldMapper.fuzzyQuery to use new FuzzyQuery API --- .../lucene/queryparser/classic/MapperQueryParser.java | 3 ++- .../org/elasticsearch/index/mapper/FieldMapper.java | 4 ++-- .../index/mapper/core/AbstractFieldMapper.java | 10 ++++++---- .../index/mapper/core/ByteFieldMapper.java | 4 ++-- .../index/mapper/core/DateFieldMapper.java | 4 ++-- .../index/mapper/core/DoubleFieldMapper.java | 4 ++-- .../index/mapper/core/FloatFieldMapper.java | 4 ++-- .../index/mapper/core/IntegerFieldMapper.java | 4 ++-- .../index/mapper/core/LongFieldMapper.java | 4 ++-- .../index/mapper/core/NumberFieldMapper.java | 4 ++-- .../index/mapper/core/ShortFieldMapper.java | 4 ++-- .../index/mapper/internal/BoostFieldMapper.java | 4 ++-- .../elasticsearch/index/mapper/ip/IpFieldMapper.java | 8 +++++--- .../elasticsearch/index/query/FuzzyQueryParser.java | 2 +- .../org/elasticsearch/index/search/MatchQuery.java | 2 +- 15 files changed, 35 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java b/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java index 703a9d189a9..a213519e2cd 100644 --- a/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java +++ b/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java @@ -397,7 +397,8 @@ public class MapperQueryParser extends QueryParser { currentMapper = fieldMappers.fieldMappers().mapper(); if (currentMapper != null) { try { - Query fuzzyQuery = currentMapper.fuzzyQuery(termStr, minSimilarity, fuzzyPrefixLength, settings.fuzzyMaxExpansions()); + //LUCENE 4 UPGRADE I disabled transpositions here by default - maybe this needs to be changed + Query fuzzyQuery = currentMapper.fuzzyQuery(termStr, minSimilarity, fuzzyPrefixLength, settings.fuzzyMaxExpansions(), false); return wrapSmartNameQuery(fuzzyQuery, fieldMappers, parseContext); } catch (RuntimeException e) { if (settings.lenient()) { diff --git a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java index 01afed9e167..613f2f0c22b 100644 --- a/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/FieldMapper.java @@ -189,9 +189,9 @@ public interface FieldMapper { */ Query fieldQuery(String value, @Nullable QueryParseContext context); - Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions); + Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions); - Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions); + Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions, boolean transpositions); Query prefixQuery(String value, @Nullable MultiTermQuery.RewriteMethod method, @Nullable QueryParseContext context); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java index b6d2de6e227..9b224b3ffdf 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/AbstractFieldMapper.java @@ -409,13 +409,15 @@ public abstract class AbstractFieldMapper implements FieldMapper, Mapper { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions) { - return new FuzzyQuery(names().createIndexNameTerm(indexedValue(value)), Float.parseFloat(minSim), prefixLength, maxExpansions); + public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { + int edits = FuzzyQuery.floatToEdits(Float.parseFloat(minSim), value.codePointCount(0, value.length())); + return new FuzzyQuery(names.createIndexNameTerm(indexedValue(value)), edits, prefixLength, maxExpansions, transpositions); } @Override - public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions) { - return new FuzzyQuery(names().createIndexNameTerm(value), (float) minSim, prefixLength, maxExpansions); + public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions, boolean transpositions) { + int edits = FuzzyQuery.floatToEdits((float) minSim, value.codePointCount(0, value.length())); + return new FuzzyQuery(names.createIndexNameTerm(indexedValue(value)), edits, prefixLength, maxExpansions, transpositions); } @Override diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java index f665688e208..9f32901e525 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java @@ -145,7 +145,7 @@ public class ByteFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { byte iValue = Byte.parseByte(value); byte iSim; try { @@ -160,7 +160,7 @@ public class ByteFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions, boolean transpositions) { byte iValue = Byte.parseByte(value); byte iSim = (byte) (minSim * dFuzzyFactor); return NumericRangeQuery.newIntRange(names.indexName(), precisionStep, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java index 06bc083bf77..7447d7e4be9 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java @@ -218,7 +218,7 @@ public class DateFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { long iValue = dateMathParser.parse(value, System.currentTimeMillis()); long iSim; try { @@ -234,7 +234,7 @@ public class DateFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions, boolean transpositions) { long iValue = dateMathParser.parse(value, System.currentTimeMillis()); long iSim = (long) (minSim * dFuzzyFactor); return NumericRangeQuery.newLongRange(names.indexName(), precisionStep, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java index cded1df85b6..e26ba4697e9 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java @@ -148,7 +148,7 @@ public class DoubleFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { double iValue = Double.parseDouble(value); double iSim = Double.parseDouble(minSim); return NumericRangeQuery.newDoubleRange(names.indexName(), precisionStep, @@ -158,7 +158,7 @@ public class DoubleFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions, boolean transpositions) { double iValue = Double.parseDouble(value); double iSim = minSim * dFuzzyFactor; return NumericRangeQuery.newDoubleRange(names.indexName(), precisionStep, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java index 820c8d894ef..6ce1e8b3fa2 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java @@ -149,7 +149,7 @@ public class FloatFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { float iValue = Float.parseFloat(value); float iSim = Float.parseFloat(minSim); return NumericRangeQuery.newFloatRange(names.indexName(), precisionStep, @@ -159,7 +159,7 @@ public class FloatFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions, boolean transpositions) { float iValue = Float.parseFloat(value); float iSim = (float) (minSim * dFuzzyFactor); return NumericRangeQuery.newFloatRange(names.indexName(), precisionStep, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java index c61355478e3..d28e57b3ba4 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java @@ -148,7 +148,7 @@ public class IntegerFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { int iValue = Integer.parseInt(value); int iSim; try { @@ -163,7 +163,7 @@ public class IntegerFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions, boolean transpositions) { int iValue = Integer.parseInt(value); int iSim = (int) (minSim * dFuzzyFactor); return NumericRangeQuery.newIntRange(names.indexName(), precisionStep, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java index f3348a05f39..49c32f288ae 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java @@ -149,7 +149,7 @@ public class LongFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { long iValue = Long.parseLong(value); long iSim; try { @@ -164,7 +164,7 @@ public class LongFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions, boolean transpositions) { long iValue = Long.parseLong(value); long iSim = (long) (minSim * dFuzzyFactor); return NumericRangeQuery.newLongRange(names.indexName(), precisionStep, diff --git a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java index 3d6909e5df6..05571ad2ea0 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java @@ -216,10 +216,10 @@ public abstract class NumberFieldMapper extends AbstractFieldM } @Override - public abstract Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions); + public abstract Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions); @Override - public abstract Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions); + public abstract Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions, boolean transpositions); /** * Numeric field level filter are basically range queries with same value and included. That's the recommended diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java index 83281a4bc00..f281f6383fb 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java @@ -148,7 +148,7 @@ public class ShortFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { short iValue = Short.parseShort(value); short iSim; try { @@ -163,7 +163,7 @@ public class ShortFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions, boolean transpositions) { short iValue = Short.parseShort(value); short iSim = (short) (minSim * dFuzzyFactor); return NumericRangeQuery.newIntRange(names.indexName(), precisionStep, diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java index 21a0c7fdcf8..f2c99d8c1aa 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java @@ -152,7 +152,7 @@ public class BoostFieldMapper extends NumberFieldMapper implements Intern } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { float iValue = Float.parseFloat(value); float iSim = Float.parseFloat(minSim); return NumericRangeQuery.newFloatRange(names.indexName(), precisionStep, @@ -162,7 +162,7 @@ public class BoostFieldMapper extends NumberFieldMapper implements Intern } @Override - public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions, boolean transpositions) { float iValue = Float.parseFloat(value); float iSim = (float) (minSim * dFuzzyFactor); return NumericRangeQuery.newFloatRange(names.indexName(), precisionStep, diff --git a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java index bcd48ef38d3..6114801e3e1 100644 --- a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java @@ -189,7 +189,7 @@ public class IpFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions) { + public Query fuzzyQuery(String value, String minSim, int prefixLength, int maxExpansions, boolean transpositions) { long iValue = ipToLong(value); long iSim; try { @@ -208,8 +208,10 @@ public class IpFieldMapper extends NumberFieldMapper { } @Override - public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions) { - return new FuzzyQuery(names().createIndexNameTerm(value), (float) minSim, prefixLength, maxExpansions); + public Query fuzzyQuery(String value, double minSim, int prefixLength, int maxExpansions, boolean transpositions) { + // Lucene 4 Upgrade: It's surprising this uses FuzzyQuery instead of NumericRangeQuery + int edits = FuzzyQuery.floatToEdits((float) minSim, value.codePointCount(0, value.length())); + return new FuzzyQuery(names.createIndexNameTerm(indexedValue(value)), edits, prefixLength, maxExpansions, transpositions); } @Override diff --git a/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java b/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java index 07749b928d2..7bb9b072b72 100644 --- a/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/FuzzyQueryParser.java @@ -109,7 +109,7 @@ public class FuzzyQueryParser implements QueryParser { MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); if (smartNameFieldMappers != null) { if (smartNameFieldMappers.hasMapper()) { - query = smartNameFieldMappers.mapper().fuzzyQuery(value, minSimilarity, prefixLength, maxExpansions); + query = smartNameFieldMappers.mapper().fuzzyQuery(value, minSimilarity, prefixLength, maxExpansions, transpositions); } } if (query == null) { diff --git a/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 4134f5fb506..74c1726c356 100644 --- a/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -371,7 +371,7 @@ public class MatchQuery { private Query newTermQuery(@Nullable FieldMapper mapper, Term term) { if (fuzziness != null) { if (mapper != null) { - Query query = mapper.fuzzyQuery(term.text(), fuzziness, fuzzyPrefixLength, maxExpansions); + Query query = mapper.fuzzyQuery(term.text(), fuzziness, fuzzyPrefixLength, maxExpansions, transpositions); if (query instanceof FuzzyQuery) { QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod); } From 5cd9da45659e300956fb643bb076d217aba99fa4 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Tue, 30 Oct 2012 22:03:34 -0400 Subject: [PATCH 067/146] lucene4: fixed TransportNodesListShardStoreMetaData --- .../indices/store/TransportNodesListShardStoreMetaData.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/indices/store/TransportNodesListShardStoreMetaData.java b/src/main/java/org/elasticsearch/indices/store/TransportNodesListShardStoreMetaData.java index f8671e1d84a..7c7f57843f2 100644 --- a/src/main/java/org/elasticsearch/indices/store/TransportNodesListShardStoreMetaData.java +++ b/src/main/java/org/elasticsearch/indices/store/TransportNodesListShardStoreMetaData.java @@ -196,7 +196,7 @@ public class TransportNodesListShardStoreMetaData extends TransportNodesOperatio if (Store.isChecksum(file.getName())) { continue; } - files.put(file.getName(), new StoreFileMetaData(file.getName(), file.length(), file.lastModified(), checksums.get(file.getName()))); + files.put(file.getName(), new StoreFileMetaData(file.getName(), file.length(), checksums.get(file.getName()))); } } From 25d03a6a7df1ffd4ae7b36ff7e56ee7f08091451 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Tue, 30 Oct 2012 22:04:11 -0400 Subject: [PATCH 068/146] lucene4: upgraded ScoreDocQueue --- .../java/org/elasticsearch/search/controller/ScoreDocQueue.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/search/controller/ScoreDocQueue.java b/src/main/java/org/elasticsearch/search/controller/ScoreDocQueue.java index a9cbcb7c21e..881ef6a6cd3 100644 --- a/src/main/java/org/elasticsearch/search/controller/ScoreDocQueue.java +++ b/src/main/java/org/elasticsearch/search/controller/ScoreDocQueue.java @@ -29,7 +29,7 @@ import org.apache.lucene.util.PriorityQueue; public class ScoreDocQueue extends PriorityQueue { public ScoreDocQueue(int size) { - initialize(size); + super(size); } protected final boolean lessThan(ShardScoreDoc hitA, ShardScoreDoc hitB) { From 93906903b6f0f813414e865ebece49ce2bb4e71c Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Tue, 30 Oct 2012 22:37:43 -0400 Subject: [PATCH 069/146] lucene4: switched setNextReader from IndexReader to AtomicReaderContext --- .../lucene/search/FilteredCollector.java | 8 +++--- .../search/function/BoostScoreFunction.java | 4 +-- .../function/FiltersFunctionScoreQuery.java | 28 ++++++++++--------- .../search/function/FunctionScoreQuery.java | 22 ++++++++------- .../lucene/search/function/ScoreFunction.java | 4 +-- .../index/query/ScriptFilterParser.java | 2 +- .../script/ScriptFieldsFetchSubPhase.java | 2 +- 7 files changed, 37 insertions(+), 33 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/search/FilteredCollector.java b/src/main/java/org/elasticsearch/common/lucene/search/FilteredCollector.java index 37a7fdf8563..38ce581c5a7 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/FilteredCollector.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/FilteredCollector.java @@ -19,7 +19,7 @@ package org.elasticsearch.common.lucene.search; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Scorer; @@ -57,9 +57,9 @@ public class FilteredCollector extends Collector { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - collector.setNextReader(reader, docBase); - docSet = DocSets.convert(reader, filter.getDocIdSet(reader)); + public void setNextReader(AtomicReaderContext context) throws IOException { + collector.setNextReader(context); + docSet = DocSets.convert(context.reader(), filter.getDocIdSet(context)); } @Override diff --git a/src/main/java/org/elasticsearch/common/lucene/search/function/BoostScoreFunction.java b/src/main/java/org/elasticsearch/common/lucene/search/function/BoostScoreFunction.java index 99b2bcbdbed..c6416338b75 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/function/BoostScoreFunction.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/function/BoostScoreFunction.java @@ -19,7 +19,7 @@ package org.elasticsearch.common.lucene.search.function; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Explanation; /** @@ -39,7 +39,7 @@ public class BoostScoreFunction implements ScoreFunction { } @Override - public void setNextReader(IndexReader reader) { + public void setNextReader(AtomicReaderContext context) { // nothing to do here... } diff --git a/src/main/java/org/elasticsearch/common/lucene/search/function/FiltersFunctionScoreQuery.java b/src/main/java/org/elasticsearch/common/lucene/search/function/FiltersFunctionScoreQuery.java index 5dd306df6af..890c2c05bb2 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/function/FiltersFunctionScoreQuery.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/function/FiltersFunctionScoreQuery.java @@ -19,9 +19,11 @@ package org.elasticsearch.common.lucene.search.function; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; import org.elasticsearch.common.lucene.docset.DocSet; import org.elasticsearch.common.lucene.docset.DocSets; @@ -106,15 +108,15 @@ public class FiltersFunctionScoreQuery extends Query { } @Override - public Weight createWeight(Searcher searcher) throws IOException { + public Weight createWeight(IndexSearcher searcher) throws IOException { return new CustomBoostFactorWeight(searcher); } class CustomBoostFactorWeight extends Weight { - Searcher searcher; + IndexSearcher searcher; Weight subQueryWeight; - public CustomBoostFactorWeight(Searcher searcher) throws IOException { + public CustomBoostFactorWeight(IndexSearcher searcher) throws IOException { this.searcher = searcher; this.subQueryWeight = subQuery.weight(searcher); } @@ -141,31 +143,31 @@ public class FiltersFunctionScoreQuery extends Query { } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { - Scorer subQueryScorer = subQueryWeight.scorer(reader, scoreDocsInOrder, false); + public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { + Scorer subQueryScorer = subQueryWeight.scorer(context, scoreDocsInOrder, false, acceptDocs); if (subQueryScorer == null) { return null; } for (int i = 0; i < filterFunctions.length; i++) { FilterFunction filterFunction = filterFunctions[i]; - filterFunction.function.setNextReader(reader); - docSets[i] = DocSets.convert(reader, filterFunction.filter.getDocIdSet(reader)); + filterFunction.function.setNextReader(context); + docSets[i] = DocSets.convert(context.reader(), filterFunction.filter.getDocIdSet(context, acceptDocs)); } return new CustomBoostFactorScorer(getSimilarity(searcher), this, subQueryScorer, scoreMode, filterFunctions, maxBoost, docSets); } @Override - public Explanation explain(IndexReader reader, int doc) throws IOException { - Explanation subQueryExpl = subQueryWeight.explain(reader, doc); + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + Explanation subQueryExpl = subQueryWeight.explain(context, doc); if (!subQueryExpl.isMatch()) { return subQueryExpl; } if (scoreMode == ScoreMode.First) { for (FilterFunction filterFunction : filterFunctions) { - DocSet docSet = DocSets.convert(reader, filterFunction.filter.getDocIdSet(reader)); + DocSet docSet = DocSets.convert(context.reader(), filterFunction.filter.getDocIdSet(context)); if (docSet.get(doc)) { - filterFunction.function.setNextReader(reader); + filterFunction.function.setNextReader(context); Explanation functionExplanation = filterFunction.function.explainFactor(doc); float sc = getValue() * subQueryExpl.getValue() * functionExplanation.getValue(); Explanation filterExplanation = new ComplexExplanation(true, sc, "custom score, product of:"); @@ -189,9 +191,9 @@ public class FiltersFunctionScoreQuery extends Query { float min = Float.POSITIVE_INFINITY; ArrayList filtersExplanations = new ArrayList(); for (FilterFunction filterFunction : filterFunctions) { - DocSet docSet = DocSets.convert(reader, filterFunction.filter.getDocIdSet(reader)); + DocSet docSet = DocSets.convert(context.reader(), filterFunction.filter.getDocIdSet(context)); if (docSet.get(doc)) { - filterFunction.function.setNextReader(reader); + filterFunction.function.setNextReader(context); Explanation functionExplanation = filterFunction.function.explainFactor(doc); float factor = functionExplanation.getValue(); count++; diff --git a/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java b/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java index 02b59404899..9297fb0def6 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java @@ -19,9 +19,11 @@ package org.elasticsearch.common.lucene.search.function; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; @@ -63,17 +65,17 @@ public class FunctionScoreQuery extends Query { } @Override - public Weight createWeight(Searcher searcher) throws IOException { + public Weight createWeight(IndexSearcher searcher) throws IOException { return new CustomBoostFactorWeight(searcher); } class CustomBoostFactorWeight extends Weight { - Searcher searcher; + IndexSearcher searcher; Weight subQueryWeight; - public CustomBoostFactorWeight(Searcher searcher) throws IOException { + public CustomBoostFactorWeight(IndexSearcher searcher) throws IOException { this.searcher = searcher; - this.subQueryWeight = subQuery.weight(searcher); + this.subQueryWeight = subQuery.createWeight(searcher); } public Query getQuery() { @@ -98,23 +100,23 @@ public class FunctionScoreQuery extends Query { } @Override - public Scorer scorer(IndexReader reader, boolean scoreDocsInOrder, boolean topScorer) throws IOException { - Scorer subQueryScorer = subQueryWeight.scorer(reader, scoreDocsInOrder, false); + public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { + Scorer subQueryScorer = subQueryWeight.scorer(context, scoreDocsInOrder, false, acceptDocs); if (subQueryScorer == null) { return null; } - function.setNextReader(reader); + function.setNextReader(context); return new CustomBoostFactorScorer(getSimilarity(searcher), this, subQueryScorer, function); } @Override - public Explanation explain(IndexReader reader, int doc) throws IOException { - Explanation subQueryExpl = subQueryWeight.explain(reader, doc); + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + Explanation subQueryExpl = subQueryWeight.explain(context, doc); if (!subQueryExpl.isMatch()) { return subQueryExpl; } - function.setNextReader(reader); + function.setNextReader(context); Explanation functionExplanation = function.explainScore(doc, subQueryExpl); float sc = getValue() * functionExplanation.getValue(); Explanation res = new ComplexExplanation(true, sc, "custom score, product of:"); diff --git a/src/main/java/org/elasticsearch/common/lucene/search/function/ScoreFunction.java b/src/main/java/org/elasticsearch/common/lucene/search/function/ScoreFunction.java index b54f140fbfa..d5d9f70b875 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/function/ScoreFunction.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/function/ScoreFunction.java @@ -19,7 +19,7 @@ package org.elasticsearch.common.lucene.search.function; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.Explanation; /** @@ -27,7 +27,7 @@ import org.apache.lucene.search.Explanation; */ public interface ScoreFunction { - void setNextReader(IndexReader reader); + void setNextReader(AtomicReaderContext context); float score(int docId, float subQueryScore); diff --git a/src/main/java/org/elasticsearch/index/query/ScriptFilterParser.java b/src/main/java/org/elasticsearch/index/query/ScriptFilterParser.java index 5e42153e019..d6007f5539d 100644 --- a/src/main/java/org/elasticsearch/index/query/ScriptFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/ScriptFilterParser.java @@ -166,7 +166,7 @@ public class ScriptFilterParser implements FilterParser { @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { - searchScript.setNextReader(context.reader()); + searchScript.setNextReader(context); // LUCENE 4 UPGRADE: we can simply wrap this here since it is not cacheable and if we are not top level we will get a null passed anyway return BitsFilteredDocIdSet.wrap(new ScriptDocSet(context.reader(), searchScript), acceptDocs); } diff --git a/src/main/java/org/elasticsearch/search/fetch/script/ScriptFieldsFetchSubPhase.java b/src/main/java/org/elasticsearch/search/fetch/script/ScriptFieldsFetchSubPhase.java index ee48b32575a..77ddf0b04e5 100644 --- a/src/main/java/org/elasticsearch/search/fetch/script/ScriptFieldsFetchSubPhase.java +++ b/src/main/java/org/elasticsearch/search/fetch/script/ScriptFieldsFetchSubPhase.java @@ -67,7 +67,7 @@ public class ScriptFieldsFetchSubPhase implements FetchSubPhase { @Override public void hitExecute(SearchContext context, HitContext hitContext) throws ElasticSearchException { for (ScriptFieldsContext.ScriptField scriptField : context.scriptFields().fields()) { - scriptField.script().setNextReader(hitContext.reader()); + scriptField.script().setNextReader(hitContext.readerContext()); scriptField.script().setNextDocId(hitContext.docId()); Object value; From f57efcf6c868c107edde6bb087dab2280acd86d3 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Tue, 30 Oct 2012 23:19:38 -0400 Subject: [PATCH 070/146] lucene4: finish org.elasticsearch.common.compress cleanup --- .../org/elasticsearch/common/compress/CompressedDirectory.java | 3 ++- .../common/compress/lzf/LZFCompressedIndexInput.java | 2 +- .../common/compress/snappy/SnappyCompressedIndexInput.java | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/compress/CompressedDirectory.java b/src/main/java/org/elasticsearch/common/compress/CompressedDirectory.java index 3f62ebfdb88..e5a6b2d0cd5 100644 --- a/src/main/java/org/elasticsearch/common/compress/CompressedDirectory.java +++ b/src/main/java/org/elasticsearch/common/compress/CompressedDirectory.java @@ -73,7 +73,8 @@ public class CompressedDirectory extends Directory implements ForceSyncDirectory @Override public long fileLength(String name) throws IOException { if (actualLength && decompressExtensions.contains(getExtension(name))) { - IndexInput in = openInput(name); + // LUCENE 4 UPGRADE: Is this the right IOContext? + IndexInput in = openInput(name, IOContext.READONCE); try { return in.length(); } catch (Exception e) { diff --git a/src/main/java/org/elasticsearch/common/compress/lzf/LZFCompressedIndexInput.java b/src/main/java/org/elasticsearch/common/compress/lzf/LZFCompressedIndexInput.java index 95b12604653..58d79415fd2 100644 --- a/src/main/java/org/elasticsearch/common/compress/lzf/LZFCompressedIndexInput.java +++ b/src/main/java/org/elasticsearch/common/compress/lzf/LZFCompressedIndexInput.java @@ -65,7 +65,7 @@ public class LZFCompressedIndexInput extends CompressedIndexInput Date: Tue, 30 Oct 2012 23:38:30 -0400 Subject: [PATCH 071/146] lucene4: fixed index.merge.policy --- .../index/merge/policy/LogByteSizeMergePolicyProvider.java | 7 ++----- .../index/merge/policy/LogDocMergePolicyProvider.java | 7 ++----- .../index/merge/policy/TieredMergePolicyProvider.java | 2 +- 3 files changed, 5 insertions(+), 11 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/merge/policy/LogByteSizeMergePolicyProvider.java b/src/main/java/org/elasticsearch/index/merge/policy/LogByteSizeMergePolicyProvider.java index fcede657031..384448fd9cc 100644 --- a/src/main/java/org/elasticsearch/index/merge/policy/LogByteSizeMergePolicyProvider.java +++ b/src/main/java/org/elasticsearch/index/merge/policy/LogByteSizeMergePolicyProvider.java @@ -19,10 +19,7 @@ package org.elasticsearch.index.merge.policy; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.LogByteSizeMergePolicy; -import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.index.*; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.Preconditions; @@ -220,7 +217,7 @@ public class LogByteSizeMergePolicyProvider extends AbstractIndexShardComponent } @Override - public MergeSpecification findForcedMerges(SegmentInfos infos, int maxSegmentCount, Map segmentsToMerge) throws IOException { + public MergeSpecification findForcedMerges(SegmentInfos infos, int maxSegmentCount, Map segmentsToMerge) throws IOException { if (enableMerge.get() == Boolean.FALSE) { return null; } diff --git a/src/main/java/org/elasticsearch/index/merge/policy/LogDocMergePolicyProvider.java b/src/main/java/org/elasticsearch/index/merge/policy/LogDocMergePolicyProvider.java index 4467e20a173..41a2b2810f8 100644 --- a/src/main/java/org/elasticsearch/index/merge/policy/LogDocMergePolicyProvider.java +++ b/src/main/java/org/elasticsearch/index/merge/policy/LogDocMergePolicyProvider.java @@ -19,10 +19,7 @@ package org.elasticsearch.index.merge.policy; -import org.apache.lucene.index.CorruptIndexException; -import org.apache.lucene.index.LogDocMergePolicy; -import org.apache.lucene.index.SegmentInfo; -import org.apache.lucene.index.SegmentInfos; +import org.apache.lucene.index.*; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.Preconditions; @@ -204,7 +201,7 @@ public class LogDocMergePolicyProvider extends AbstractIndexShardComponent imple } @Override - public MergeSpecification findForcedMerges(SegmentInfos infos, int maxSegmentCount, Map segmentsToMerge) throws IOException { + public MergeSpecification findForcedMerges(SegmentInfos infos, int maxSegmentCount, Map segmentsToMerge) throws IOException { if (enableMerge.get() == Boolean.FALSE) { return null; } diff --git a/src/main/java/org/elasticsearch/index/merge/policy/TieredMergePolicyProvider.java b/src/main/java/org/elasticsearch/index/merge/policy/TieredMergePolicyProvider.java index 8db69d3d94c..fb8eee2854e 100644 --- a/src/main/java/org/elasticsearch/index/merge/policy/TieredMergePolicyProvider.java +++ b/src/main/java/org/elasticsearch/index/merge/policy/TieredMergePolicyProvider.java @@ -262,7 +262,7 @@ public class TieredMergePolicyProvider extends AbstractIndexShardComponent imple } @Override - public MergeSpecification findForcedMerges(SegmentInfos infos, int maxSegmentCount, Map segmentsToMerge) throws IOException { + public MergeSpecification findForcedMerges(SegmentInfos infos, int maxSegmentCount, Map segmentsToMerge) throws IOException { if (enableMerge.get() == Boolean.FALSE) { return null; } From 6bbe37f876cf8d7b178755f13058ea4d8354cbab Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Wed, 31 Oct 2012 05:52:42 -0400 Subject: [PATCH 072/146] lucene4: fixed integration tests that got broken by switch from String to Text in Facet terms --- .../integration/nested/SimpleNestedTests.java | 10 +- .../child/SimpleChildQuerySearchTests.java | 4 +- .../search/facet/SimpleFacetsTests.java | 172 +++++++++--------- 3 files changed, 93 insertions(+), 93 deletions(-) diff --git a/src/test/java/org/elasticsearch/test/integration/nested/SimpleNestedTests.java b/src/test/java/org/elasticsearch/test/integration/nested/SimpleNestedTests.java index 1c839145c4f..429e86d4057 100644 --- a/src/test/java/org/elasticsearch/test/integration/nested/SimpleNestedTests.java +++ b/src/test/java/org/elasticsearch/test/integration/nested/SimpleNestedTests.java @@ -424,16 +424,16 @@ public class SimpleNestedTests extends AbstractNodesTests { TermsStatsFacet termsStatsFacet = searchResponse.facets().facet("facet1"); assertThat(termsStatsFacet.entries().size(), equalTo(4)); - assertThat(termsStatsFacet.entries().get(0).term(), equalTo("blue")); + assertThat(termsStatsFacet.entries().get(0).term().string(), equalTo("blue")); assertThat(termsStatsFacet.entries().get(0).count(), equalTo(3l)); assertThat(termsStatsFacet.entries().get(0).total(), equalTo(8d)); - assertThat(termsStatsFacet.entries().get(1).term(), equalTo("yellow")); + assertThat(termsStatsFacet.entries().get(1).term().string(), equalTo("yellow")); assertThat(termsStatsFacet.entries().get(1).count(), equalTo(2l)); assertThat(termsStatsFacet.entries().get(1).total(), equalTo(13d)); - assertThat(termsStatsFacet.entries().get(2).term(), equalTo("green")); + assertThat(termsStatsFacet.entries().get(2).term().string(), equalTo("green")); assertThat(termsStatsFacet.entries().get(2).count(), equalTo(2l)); assertThat(termsStatsFacet.entries().get(2).total(), equalTo(14d)); - assertThat(termsStatsFacet.entries().get(3).term(), equalTo("red")); + assertThat(termsStatsFacet.entries().get(3).term().string(), equalTo("red")); assertThat(termsStatsFacet.entries().get(3).count(), equalTo(1l)); assertThat(termsStatsFacet.entries().get(3).total(), equalTo(12d)); @@ -448,7 +448,7 @@ public class SimpleNestedTests extends AbstractNodesTests { termsStatsFacet = searchResponse.facets().facet("facet1"); assertThat(termsStatsFacet.entries().size(), equalTo(1)); - assertThat(termsStatsFacet.entries().get(0).term(), equalTo("blue")); + assertThat(termsStatsFacet.entries().get(0).term().string(), equalTo("blue")); assertThat(termsStatsFacet.entries().get(0).count(), equalTo(3l)); assertThat(termsStatsFacet.entries().get(0).total(), equalTo(8d)); } diff --git a/src/test/java/org/elasticsearch/test/integration/search/child/SimpleChildQuerySearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/child/SimpleChildQuerySearchTests.java index 06aced5298b..8073940d603 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/child/SimpleChildQuerySearchTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/child/SimpleChildQuerySearchTests.java @@ -564,9 +564,9 @@ public class SimpleChildQuerySearchTests extends AbstractNodesTests { assertThat(searchResponse.facets().facets().size(), equalTo(1)); TermsFacet termsFacet = searchResponse.facets().facet("facet1"); assertThat(termsFacet.entries().size(), equalTo(2)); - assertThat(termsFacet.entries().get(0).term(), equalTo("red")); + assertThat(termsFacet.entries().get(0).term().string(), equalTo("red")); assertThat(termsFacet.entries().get(0).count(), equalTo(2)); - assertThat(termsFacet.entries().get(1).term(), equalTo("yellow")); + assertThat(termsFacet.entries().get(1).term().string(), equalTo("yellow")); assertThat(termsFacet.entries().get(1).count(), equalTo(1)); } diff --git a/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java b/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java index f831512d2cc..c1456d84ed8 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/facet/SimpleFacetsTests.java @@ -134,9 +134,9 @@ public class SimpleFacetsTests extends AbstractNodesTests { TermsFacet facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), anyOf(equalTo("green"), equalTo("blue"))); + assertThat(facet.entries().get(0).term().string(), anyOf(equalTo("green"), equalTo("blue"))); assertThat(facet.entries().get(0).count(), equalTo(1)); - assertThat(facet.entries().get(1).term(), anyOf(equalTo("green"), equalTo("blue"))); + assertThat(facet.entries().get(1).term().string(), anyOf(equalTo("green"), equalTo("blue"))); assertThat(facet.entries().get(1).count(), equalTo(1)); } } @@ -174,9 +174,9 @@ public class SimpleFacetsTests extends AbstractNodesTests { TermsFacet facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), anyOf(equalTo("green"), equalTo("blue"))); + assertThat(facet.entries().get(0).term().string(), anyOf(equalTo("green"), equalTo("blue"))); assertThat(facet.entries().get(0).count(), equalTo(1)); - assertThat(facet.entries().get(1).term(), anyOf(equalTo("green"), equalTo("blue"))); + assertThat(facet.entries().get(1).term().string(), anyOf(equalTo("green"), equalTo("blue"))); assertThat(facet.entries().get(1).count(), equalTo(1)); searchResponse = client.prepareSearch() @@ -189,9 +189,9 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), anyOf(equalTo("green"), equalTo("blue"))); + assertThat(facet.entries().get(0).term().string(), anyOf(equalTo("green"), equalTo("blue"))); assertThat(facet.entries().get(0).count(), equalTo(1)); - assertThat(facet.entries().get(1).term(), anyOf(equalTo("green"), equalTo("blue"))); + assertThat(facet.entries().get(1).term().string(), anyOf(equalTo("green"), equalTo("blue"))); assertThat(facet.entries().get(1).count(), equalTo(1)); } } @@ -237,7 +237,7 @@ public class SimpleFacetsTests extends AbstractNodesTests { TermsFacet facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(1)); - assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).term().string(), equalTo("111")); assertThat(facet.entries().get(0).count(), equalTo(2)); searchResponse = client.prepareSearch() @@ -253,7 +253,7 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(1)); - assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).term().string(), equalTo("111")); assertThat(facet.entries().get(0).count(), equalTo(2)); } } @@ -296,9 +296,9 @@ public class SimpleFacetsTests extends AbstractNodesTests { TermsFacet facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("test1")); + assertThat(facet.entries().get(0).term().string(), equalTo("test1")); assertThat(facet.entries().get(0).count(), equalTo(2)); - assertThat(facet.entries().get(1).term(), equalTo("test2")); + assertThat(facet.entries().get(1).term().string(), equalTo("test2")); assertThat(facet.entries().get(1).count(), equalTo(1)); } @@ -460,13 +460,13 @@ public class SimpleFacetsTests extends AbstractNodesTests { assertThat(facet.getTotalCount(), equalTo(2l)); assertThat(facet.getOtherCount(), equalTo(0l)); assertThat(facet.entries().size(), equalTo(1)); - assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).term().string(), equalTo("111")); assertThat(facet.entries().get(0).count(), equalTo(2)); facet = searchResponse.facets().facet("facet2"); assertThat(facet.name(), equalTo("facet2")); assertThat(facet.entries().size(), equalTo(3)); - assertThat(facet.entries().get(0).term(), equalTo("yyy")); + assertThat(facet.entries().get(0).term().string(), equalTo("yyy")); assertThat(facet.entries().get(0).count(), equalTo(2)); // Numeric @@ -482,27 +482,27 @@ public class SimpleFacetsTests extends AbstractNodesTests { assertThat(facet, instanceOf(InternalLongTermsFacet.class)); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(1)); - assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).term().string(), equalTo("111")); assertThat(facet.entries().get(0).count(), equalTo(2)); facet = searchResponse.facets().facet("facet2"); assertThat(facet, instanceOf(InternalLongTermsFacet.class)); assertThat(facet.name(), equalTo("facet2")); assertThat(facet.entries().size(), equalTo(3)); - assertThat(facet.entries().get(0).term(), equalTo("2000")); + assertThat(facet.entries().get(0).term().string(), equalTo("2000")); assertThat(facet.entries().get(0).count(), equalTo(2)); - assertThat(facet.entries().get(1).term(), anyOf(equalTo("1000"), equalTo("3000"))); + assertThat(facet.entries().get(1).term().string(), anyOf(equalTo("1000"), equalTo("3000"))); assertThat(facet.entries().get(1).count(), equalTo(1)); - assertThat(facet.entries().get(2).term(), anyOf(equalTo("1000"), equalTo("3000"))); + assertThat(facet.entries().get(2).term().string(), anyOf(equalTo("1000"), equalTo("3000"))); assertThat(facet.entries().get(2).count(), equalTo(1)); facet = searchResponse.facets().facet("facet3"); assertThat(facet, instanceOf(InternalLongTermsFacet.class)); assertThat(facet.name(), equalTo("facet3")); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("2000")); + assertThat(facet.entries().get(0).term().string(), equalTo("2000")); assertThat(facet.entries().get(0).count(), equalTo(2)); - assertThat(facet.entries().get(1).term(), equalTo("1000")); + assertThat(facet.entries().get(1).term().string(), equalTo("1000")); assertThat(facet.entries().get(1).count(), equalTo(1)); searchResponse = client.prepareSearch() @@ -515,18 +515,18 @@ public class SimpleFacetsTests extends AbstractNodesTests { assertThat(facet, instanceOf(InternalDoubleTermsFacet.class)); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(1)); - assertThat(facet.entries().get(0).term(), equalTo("111.1")); + assertThat(facet.entries().get(0).term().string(), equalTo("111.1")); assertThat(facet.entries().get(0).count(), equalTo(2)); facet = searchResponse.facets().facet("facet2"); assertThat(facet, instanceOf(InternalDoubleTermsFacet.class)); assertThat(facet.name(), equalTo("facet2")); assertThat(facet.entries().size(), equalTo(3)); - assertThat(facet.entries().get(0).term(), equalTo("2000.1")); + assertThat(facet.entries().get(0).term().string(), equalTo("2000.1")); assertThat(facet.entries().get(0).count(), equalTo(2)); - assertThat(facet.entries().get(1).term(), anyOf(equalTo("1000.1"), equalTo("3000.1"))); + assertThat(facet.entries().get(1).term().string(), anyOf(equalTo("1000.1"), equalTo("3000.1"))); assertThat(facet.entries().get(1).count(), equalTo(1)); - assertThat(facet.entries().get(2).term(), anyOf(equalTo("1000.1"), equalTo("3000.1"))); + assertThat(facet.entries().get(2).term().string(), anyOf(equalTo("1000.1"), equalTo("3000.1"))); assertThat(facet.entries().get(2).count(), equalTo(1)); searchResponse = client.prepareSearch() @@ -538,7 +538,7 @@ public class SimpleFacetsTests extends AbstractNodesTests { assertThat(facet, instanceOf(InternalByteTermsFacet.class)); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(1)); - assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).term().string(), equalTo("111")); assertThat(facet.entries().get(0).count(), equalTo(2)); searchResponse = client.prepareSearch() @@ -550,7 +550,7 @@ public class SimpleFacetsTests extends AbstractNodesTests { assertThat(facet, instanceOf(InternalIntTermsFacet.class)); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(1)); - assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).term().string(), equalTo("111")); assertThat(facet.entries().get(0).count(), equalTo(2)); searchResponse = client.prepareSearch() @@ -562,7 +562,7 @@ public class SimpleFacetsTests extends AbstractNodesTests { assertThat(facet, instanceOf(InternalShortTermsFacet.class)); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(1)); - assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).term().string(), equalTo("111")); assertThat(facet.entries().get(0).count(), equalTo(2)); // Test Facet Filter @@ -575,7 +575,7 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(1)); - assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).term().string(), equalTo("111")); assertThat(facet.entries().get(0).count(), equalTo(1)); // now with global @@ -587,7 +587,7 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(1)); - assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).term().string(), equalTo("111")); assertThat(facet.entries().get(0).count(), equalTo(1)); // Test Facet Filter (with a type) @@ -600,7 +600,7 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(1)); - assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).term().string(), equalTo("111")); assertThat(facet.entries().get(0).count(), equalTo(1)); searchResponse = client.prepareSearch() @@ -611,11 +611,11 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(3)); - assertThat(facet.entries().get(0).term(), equalTo("yyy")); + assertThat(facet.entries().get(0).term().string(), equalTo("yyy")); assertThat(facet.entries().get(0).count(), equalTo(2)); - assertThat(facet.entries().get(1).term(), anyOf(equalTo("xxx"), equalTo("zzz"))); + assertThat(facet.entries().get(1).term().string(), anyOf(equalTo("xxx"), equalTo("zzz"))); assertThat(facet.entries().get(1).count(), equalTo(1)); - assertThat(facet.entries().get(2).term(), anyOf(equalTo("xxx"), equalTo("zzz"))); + assertThat(facet.entries().get(2).term().string(), anyOf(equalTo("xxx"), equalTo("zzz"))); assertThat(facet.entries().get(2).count(), equalTo(1)); // Bounded Size @@ -628,9 +628,9 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("yyy")); + assertThat(facet.entries().get(0).term().string(), equalTo("yyy")); assertThat(facet.entries().get(0).count(), equalTo(2)); - assertThat(facet.entries().get(1).term(), anyOf(equalTo("xxx"), equalTo("zzz"))); + assertThat(facet.entries().get(1).term().string(), anyOf(equalTo("xxx"), equalTo("zzz"))); assertThat(facet.entries().get(1).count(), equalTo(1)); // Test Exclude @@ -643,9 +643,9 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), anyOf(equalTo("xxx"), equalTo("zzz"))); + assertThat(facet.entries().get(0).term().string(), anyOf(equalTo("xxx"), equalTo("zzz"))); assertThat(facet.entries().get(0).count(), equalTo(1)); - assertThat(facet.entries().get(1).term(), anyOf(equalTo("xxx"), equalTo("zzz"))); + assertThat(facet.entries().get(1).term().string(), anyOf(equalTo("xxx"), equalTo("zzz"))); assertThat(facet.entries().get(1).count(), equalTo(1)); // Test Order @@ -658,11 +658,11 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(3)); - assertThat(facet.entries().get(0).term(), equalTo("xxx")); + assertThat(facet.entries().get(0).term().string(), equalTo("xxx")); assertThat(facet.entries().get(0).count(), equalTo(1)); - assertThat(facet.entries().get(1).term(), equalTo("yyy")); + assertThat(facet.entries().get(1).term().string(), equalTo("yyy")); assertThat(facet.entries().get(1).count(), equalTo(2)); - assertThat(facet.entries().get(2).term(), equalTo("zzz")); + assertThat(facet.entries().get(2).term().string(), equalTo("zzz")); assertThat(facet.entries().get(2).count(), equalTo(1)); searchResponse = client.prepareSearch() @@ -673,11 +673,11 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(3)); - assertThat(facet.entries().get(2).term(), equalTo("xxx")); + assertThat(facet.entries().get(2).term().string(), equalTo("xxx")); assertThat(facet.entries().get(2).count(), equalTo(1)); - assertThat(facet.entries().get(1).term(), equalTo("yyy")); + assertThat(facet.entries().get(1).term().string(), equalTo("yyy")); assertThat(facet.entries().get(1).count(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("zzz")); + assertThat(facet.entries().get(0).term().string(), equalTo("zzz")); assertThat(facet.entries().get(0).count(), equalTo(1)); // Script @@ -690,11 +690,11 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(3)); - assertThat(facet.entries().get(0).term(), equalTo("xxxa")); + assertThat(facet.entries().get(0).term().string(), equalTo("xxxa")); assertThat(facet.entries().get(0).count(), equalTo(1)); - assertThat(facet.entries().get(1).term(), equalTo("yyya")); + assertThat(facet.entries().get(1).term().string(), equalTo("yyya")); assertThat(facet.entries().get(1).count(), equalTo(2)); - assertThat(facet.entries().get(2).term(), equalTo("zzza")); + assertThat(facet.entries().get(2).term().string(), equalTo("zzza")); assertThat(facet.entries().get(2).count(), equalTo(1)); searchResponse = client.prepareSearch() @@ -705,9 +705,9 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("yyy")); + assertThat(facet.entries().get(0).term().string(), equalTo("yyy")); assertThat(facet.entries().get(0).count(), equalTo(2)); - assertThat(facet.entries().get(1).term(), equalTo("zzz")); + assertThat(facet.entries().get(1).term().string(), equalTo("zzz")); assertThat(facet.entries().get(1).count(), equalTo(1)); // Fields Facets @@ -720,13 +720,13 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(4)); - assertThat(facet.entries().get(0).term(), anyOf(equalTo("111"), equalTo("yyy"))); + assertThat(facet.entries().get(0).term().string(), anyOf(equalTo("111"), equalTo("yyy"))); assertThat(facet.entries().get(0).count(), equalTo(2)); - assertThat(facet.entries().get(1).term(), anyOf(equalTo("111"), equalTo("yyy"))); + assertThat(facet.entries().get(1).term().string(), anyOf(equalTo("111"), equalTo("yyy"))); assertThat(facet.entries().get(1).count(), equalTo(2)); - assertThat(facet.entries().get(2).term(), anyOf(equalTo("zzz"), equalTo("xxx"))); + assertThat(facet.entries().get(2).term().string(), anyOf(equalTo("zzz"), equalTo("xxx"))); assertThat(facet.entries().get(2).count(), equalTo(1)); - assertThat(facet.entries().get(3).term(), anyOf(equalTo("zzz"), equalTo("xxx"))); + assertThat(facet.entries().get(3).term().string(), anyOf(equalTo("zzz"), equalTo("xxx"))); assertThat(facet.entries().get(3).count(), equalTo(1)); searchResponse = client.prepareSearch() @@ -737,11 +737,11 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(3)); - assertThat(facet.entries().get(0).term(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"))); + assertThat(facet.entries().get(0).term().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"))); assertThat(facet.entries().get(0).count(), equalTo(0)); - assertThat(facet.entries().get(1).term(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"))); + assertThat(facet.entries().get(1).term().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"))); assertThat(facet.entries().get(1).count(), equalTo(0)); - assertThat(facet.entries().get(2).term(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"))); + assertThat(facet.entries().get(2).term().string(), anyOf(equalTo("xxx"), equalTo("yyy"), equalTo("zzz"))); assertThat(facet.entries().get(2).count(), equalTo(0)); // Script Field @@ -755,13 +755,13 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("facet1"); assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(1)); - assertThat(facet.entries().get(0).term(), equalTo("111")); + assertThat(facet.entries().get(0).term().string(), equalTo("111")); assertThat(facet.entries().get(0).count(), equalTo(2)); facet = searchResponse.facets().facet("facet2"); assertThat(facet.name(), equalTo("facet2")); assertThat(facet.entries().size(), equalTo(3)); - assertThat(facet.entries().get(0).term(), equalTo("yyy")); + assertThat(facet.entries().get(0).term().string(), equalTo("yyy")); assertThat(facet.entries().get(0).count(), equalTo(2)); } } @@ -807,7 +807,7 @@ public class SimpleFacetsTests extends AbstractNodesTests { assertThat(facet.name(), equalTo("facet1")); assertThat(facet.entries().size(), equalTo(3)); for (int j = 0; j < 3; j++) { - assertThat(facet.entries().get(j).term(), anyOf(equalTo("foo"), equalTo("bar"), equalTo("baz"))); + assertThat(facet.entries().get(j).term().string(), anyOf(equalTo("foo"), equalTo("bar"), equalTo("baz"))); assertThat(facet.entries().get(j).count(), equalTo(10)); } } @@ -1602,13 +1602,13 @@ public class SimpleFacetsTests extends AbstractNodesTests { TermsStatsFacet facet = searchResponse.facets().facet("stats1"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("xxx")); + assertThat(facet.entries().get(0).term().string(), equalTo("xxx")); assertThat(facet.entries().get(0).count(), equalTo(2l)); assertThat(facet.entries().get(0).totalCount(), equalTo(2l)); assertThat(facet.entries().get(0).min(), closeTo(100d, 0.00001d)); assertThat(facet.entries().get(0).max(), closeTo(200d, 0.00001d)); assertThat(facet.entries().get(0).total(), closeTo(300d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("yyy")); + assertThat(facet.entries().get(1).term().string(), equalTo("yyy")); assertThat(facet.entries().get(1).count(), equalTo(1l)); assertThat(facet.entries().get(1).totalCount(), equalTo(1l)); assertThat(facet.entries().get(1).min(), closeTo(500d, 0.00001d)); @@ -1617,12 +1617,12 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("stats2"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("xxx")); + assertThat(facet.entries().get(0).term().string(), equalTo("xxx")); assertThat(facet.entries().get(0).count(), equalTo(2l)); assertThat(facet.entries().get(0).min(), closeTo(1d, 0.00001d)); assertThat(facet.entries().get(0).max(), closeTo(3d, 0.00001d)); assertThat(facet.entries().get(0).total(), closeTo(8d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("yyy")); + assertThat(facet.entries().get(1).term().string(), equalTo("yyy")); assertThat(facet.entries().get(1).count(), equalTo(1l)); assertThat(facet.entries().get(1).min(), closeTo(5d, 0.00001d)); assertThat(facet.entries().get(1).max(), closeTo(6d, 0.00001d)); @@ -1630,100 +1630,100 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("stats3"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("xxx")); + assertThat(facet.entries().get(0).term().string(), equalTo("xxx")); assertThat(facet.entries().get(0).count(), equalTo(2l)); assertThat(facet.entries().get(0).total(), closeTo(300d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("yyy")); + assertThat(facet.entries().get(1).term().string(), equalTo("yyy")); assertThat(facet.entries().get(1).count(), equalTo(1l)); assertThat(facet.entries().get(1).total(), closeTo(500d, 0.00001d)); facet = searchResponse.facets().facet("stats4"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("xxx")); + assertThat(facet.entries().get(0).term().string(), equalTo("xxx")); assertThat(facet.entries().get(0).count(), equalTo(2l)); assertThat(facet.entries().get(0).total(), closeTo(8d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("yyy")); + assertThat(facet.entries().get(1).term().string(), equalTo("yyy")); assertThat(facet.entries().get(1).count(), equalTo(1l)); assertThat(facet.entries().get(1).total(), closeTo(11d, 0.00001d)); facet = searchResponse.facets().facet("stats5"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("yyy")); + assertThat(facet.entries().get(0).term().string(), equalTo("yyy")); assertThat(facet.entries().get(0).count(), equalTo(1l)); assertThat(facet.entries().get(0).total(), closeTo(500d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("xxx")); + assertThat(facet.entries().get(1).term().string(), equalTo("xxx")); assertThat(facet.entries().get(1).count(), equalTo(2l)); assertThat(facet.entries().get(1).total(), closeTo(300d, 0.00001d)); facet = searchResponse.facets().facet("stats6"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("yyy")); + assertThat(facet.entries().get(0).term().string(), equalTo("yyy")); assertThat(facet.entries().get(0).count(), equalTo(1l)); assertThat(facet.entries().get(0).total(), closeTo(11d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("xxx")); + assertThat(facet.entries().get(1).term().string(), equalTo("xxx")); assertThat(facet.entries().get(1).count(), equalTo(2l)); assertThat(facet.entries().get(1).total(), closeTo(8d, 0.00001d)); facet = searchResponse.facets().facet("stats7"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("xxx")); + assertThat(facet.entries().get(0).term().string(), equalTo("xxx")); assertThat(facet.entries().get(0).count(), equalTo(2l)); assertThat(facet.entries().get(0).total(), closeTo(300d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("yyy")); + assertThat(facet.entries().get(1).term().string(), equalTo("yyy")); assertThat(facet.entries().get(1).count(), equalTo(1l)); assertThat(facet.entries().get(1).total(), closeTo(500d, 0.00001d)); facet = searchResponse.facets().facet("stats8"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("xxx")); + assertThat(facet.entries().get(0).term().string(), equalTo("xxx")); assertThat(facet.entries().get(0).count(), equalTo(2l)); assertThat(facet.entries().get(0).total(), closeTo(8d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("yyy")); + assertThat(facet.entries().get(1).term().string(), equalTo("yyy")); assertThat(facet.entries().get(1).count(), equalTo(1l)); assertThat(facet.entries().get(1).total(), closeTo(11d, 0.00001d)); facet = searchResponse.facets().facet("stats9"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("xxx")); + assertThat(facet.entries().get(0).term().string(), equalTo("xxx")); assertThat(facet.entries().get(0).count(), equalTo(2l)); assertThat(facet.entries().get(0).total(), closeTo(300d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("yyy")); + assertThat(facet.entries().get(1).term().string(), equalTo("yyy")); assertThat(facet.entries().get(1).count(), equalTo(1l)); assertThat(facet.entries().get(1).total(), closeTo(500d, 0.00001d)); facet = searchResponse.facets().facet("stats10"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("xxx")); + assertThat(facet.entries().get(0).term().string(), equalTo("xxx")); assertThat(facet.entries().get(0).count(), equalTo(2l)); assertThat(facet.entries().get(0).total(), closeTo(8d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("yyy")); + assertThat(facet.entries().get(1).term().string(), equalTo("yyy")); assertThat(facet.entries().get(1).count(), equalTo(1l)); assertThat(facet.entries().get(1).total(), closeTo(11d, 0.00001d)); facet = searchResponse.facets().facet("stats11"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("yyy")); + assertThat(facet.entries().get(0).term().string(), equalTo("yyy")); assertThat(facet.entries().get(0).count(), equalTo(1l)); assertThat(facet.entries().get(0).total(), closeTo(500d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("xxx")); + assertThat(facet.entries().get(1).term().string(), equalTo("xxx")); assertThat(facet.entries().get(1).count(), equalTo(2l)); assertThat(facet.entries().get(1).total(), closeTo(300d, 0.00001d)); facet = searchResponse.facets().facet("stats12"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("yyy")); + assertThat(facet.entries().get(0).term().string(), equalTo("yyy")); assertThat(facet.entries().get(0).count(), equalTo(1l)); assertThat(facet.entries().get(0).total(), closeTo(11d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("xxx")); + assertThat(facet.entries().get(1).term().string(), equalTo("xxx")); assertThat(facet.entries().get(1).count(), equalTo(2l)); assertThat(facet.entries().get(1).total(), closeTo(8d, 0.00001d)); facet = searchResponse.facets().facet("stats13"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("xxx")); + assertThat(facet.entries().get(0).term().string(), equalTo("xxx")); assertThat(facet.entries().get(0).count(), equalTo(2l)); assertThat(facet.entries().get(0).total(), closeTo(600d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("yyy")); + assertThat(facet.entries().get(1).term().string(), equalTo("yyy")); assertThat(facet.entries().get(1).count(), equalTo(1l)); assertThat(facet.entries().get(1).total(), closeTo(1000d, 0.00001d)); } @@ -1776,12 +1776,12 @@ public class SimpleFacetsTests extends AbstractNodesTests { TermsStatsFacet facet = searchResponse.facets().facet("stats1"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("100")); + assertThat(facet.entries().get(0).term().string(), equalTo("100")); assertThat(facet.entries().get(0).count(), equalTo(2l)); assertThat(facet.entries().get(0).min(), closeTo(100d, 0.00001d)); assertThat(facet.entries().get(0).max(), closeTo(200d, 0.00001d)); assertThat(facet.entries().get(0).total(), closeTo(300d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("200")); + assertThat(facet.entries().get(1).term().string(), equalTo("200")); assertThat(facet.entries().get(1).count(), equalTo(1l)); assertThat(facet.entries().get(1).min(), closeTo(500d, 0.00001d)); assertThat(facet.entries().get(1).max(), closeTo(500d, 0.00001d)); @@ -1789,12 +1789,12 @@ public class SimpleFacetsTests extends AbstractNodesTests { facet = searchResponse.facets().facet("stats2"); assertThat(facet.entries().size(), equalTo(2)); - assertThat(facet.entries().get(0).term(), equalTo("100.1")); + assertThat(facet.entries().get(0).term().string(), equalTo("100.1")); assertThat(facet.entries().get(0).count(), equalTo(2l)); assertThat(facet.entries().get(0).min(), closeTo(100d, 0.00001d)); assertThat(facet.entries().get(0).max(), closeTo(200d, 0.00001d)); assertThat(facet.entries().get(0).total(), closeTo(300d, 0.00001d)); - assertThat(facet.entries().get(1).term(), equalTo("200.2")); + assertThat(facet.entries().get(1).term().string(), equalTo("200.2")); assertThat(facet.entries().get(1).count(), equalTo(1l)); assertThat(facet.entries().get(1).min(), closeTo(500d, 0.00001d)); assertThat(facet.entries().get(1).max(), closeTo(500d, 0.00001d)); From da551e884706cb772148479923390bf8e1bab475 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 31 Oct 2012 11:35:10 +0100 Subject: [PATCH 073/146] lucene 4: Upgraded o.e.common.lucene.search.function package. --- .../function/FiltersFunctionScoreQuery.java | 67 ++++++++++--------- .../search/function/FunctionScoreQuery.java | 49 +++++++------- 2 files changed, 59 insertions(+), 57 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/search/function/FiltersFunctionScoreQuery.java b/src/main/java/org/elasticsearch/common/lucene/search/function/FiltersFunctionScoreQuery.java index 890c2c05bb2..1a35f561ace 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/function/FiltersFunctionScoreQuery.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/function/FiltersFunctionScoreQuery.java @@ -75,13 +75,11 @@ public class FiltersFunctionScoreQuery extends Query { final FilterFunction[] filterFunctions; final ScoreMode scoreMode; final float maxBoost; - DocSet[] docSets; public FiltersFunctionScoreQuery(Query subQuery, ScoreMode scoreMode, FilterFunction[] filterFunctions, float maxBoost) { this.subQuery = subQuery; this.scoreMode = scoreMode; this.filterFunctions = filterFunctions; - this.docSets = new DocSet[filterFunctions.length]; this.maxBoost = maxBoost; } @@ -109,42 +107,39 @@ public class FiltersFunctionScoreQuery extends Query { @Override public Weight createWeight(IndexSearcher searcher) throws IOException { - return new CustomBoostFactorWeight(searcher); + Weight subQueryWeight = subQuery.createWeight(searcher); + return new CustomBoostFactorWeight(subQueryWeight, filterFunctions.length); } class CustomBoostFactorWeight extends Weight { - IndexSearcher searcher; - Weight subQueryWeight; - public CustomBoostFactorWeight(IndexSearcher searcher) throws IOException { - this.searcher = searcher; - this.subQueryWeight = subQuery.weight(searcher); + final Weight subQueryWeight; + final DocSet[] docSets; + + public CustomBoostFactorWeight(Weight subQueryWeight, int filterFunctionLength) throws IOException { + this.subQueryWeight = subQueryWeight; + this.docSets = new DocSet[filterFunctionLength]; } public Query getQuery() { return FiltersFunctionScoreQuery.this; } - public float getValue() { - return getBoost(); - } - @Override - public float sumOfSquaredWeights() throws IOException { - float sum = subQueryWeight.sumOfSquaredWeights(); + public float getValueForNormalization() throws IOException { + float sum = subQueryWeight.getValueForNormalization(); sum *= getBoost() * getBoost(); return sum; } @Override - public void normalize(float norm) { - norm *= getBoost(); - subQueryWeight.normalize(norm); + public void normalize(float norm, float topLevelBoost) { + subQueryWeight.normalize(norm, topLevelBoost * getBoost()); } @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { - Scorer subQueryScorer = subQueryWeight.scorer(context, scoreDocsInOrder, false, acceptDocs); + Scorer subQueryScorer = subQueryWeight.scorer(context, scoreDocsInOrder, topScorer, acceptDocs); if (subQueryScorer == null) { return null; } @@ -153,7 +148,7 @@ public class FiltersFunctionScoreQuery extends Query { filterFunction.function.setNextReader(context); docSets[i] = DocSets.convert(context.reader(), filterFunction.filter.getDocIdSet(context, acceptDocs)); } - return new CustomBoostFactorScorer(getSimilarity(searcher), this, subQueryScorer, scoreMode, filterFunctions, maxBoost, docSets); + return new CustomBoostFactorScorer(this, subQueryScorer, scoreMode, filterFunctions, maxBoost, docSets); } @Override @@ -165,15 +160,15 @@ public class FiltersFunctionScoreQuery extends Query { if (scoreMode == ScoreMode.First) { for (FilterFunction filterFunction : filterFunctions) { - DocSet docSet = DocSets.convert(context.reader(), filterFunction.filter.getDocIdSet(context)); + DocSet docSet = DocSets.convert(context.reader(), filterFunction.filter.getDocIdSet(context, context.reader().getLiveDocs())); if (docSet.get(doc)) { filterFunction.function.setNextReader(context); Explanation functionExplanation = filterFunction.function.explainFactor(doc); - float sc = getValue() * subQueryExpl.getValue() * functionExplanation.getValue(); + float sc = getBoost() * subQueryExpl.getValue() * functionExplanation.getValue(); Explanation filterExplanation = new ComplexExplanation(true, sc, "custom score, product of:"); filterExplanation.addDetail(new Explanation(1.0f, "match filter: " + filterFunction.filter.toString())); filterExplanation.addDetail(functionExplanation); - filterExplanation.addDetail(new Explanation(getValue(), "queryBoost")); + filterExplanation.addDetail(new Explanation(getBoost(), "queryBoost")); // top level score = subquery.score * filter.score (this already has the query boost) float topLevelScore = subQueryExpl.getValue() * sc; @@ -191,7 +186,7 @@ public class FiltersFunctionScoreQuery extends Query { float min = Float.POSITIVE_INFINITY; ArrayList filtersExplanations = new ArrayList(); for (FilterFunction filterFunction : filterFunctions) { - DocSet docSet = DocSets.convert(context.reader(), filterFunction.filter.getDocIdSet(context)); + DocSet docSet = DocSets.convert(context.reader(), filterFunction.filter.getDocIdSet(context, context.reader().getLiveDocs())); if (docSet.get(doc)) { filterFunction.function.setNextReader(context); Explanation functionExplanation = filterFunction.function.explainFactor(doc); @@ -204,7 +199,7 @@ public class FiltersFunctionScoreQuery extends Query { Explanation res = new ComplexExplanation(true, factor, "custom score, product of:"); res.addDetail(new Explanation(1.0f, "match filter: " + filterFunction.filter.toString())); res.addDetail(functionExplanation); - res.addDetail(new Explanation(getValue(), "queryBoost")); + res.addDetail(new Explanation(getBoost(), "queryBoost")); filtersExplanations.add(res); } } @@ -231,7 +226,7 @@ public class FiltersFunctionScoreQuery extends Query { if (factor > maxBoost) { factor = maxBoost; } - float sc = factor * subQueryExpl.getValue() * getValue(); + float sc = factor * subQueryExpl.getValue() * getBoost(); Explanation res = new ComplexExplanation(true, sc, "custom score, score mode [" + scoreMode.toString().toLowerCase() + "]"); res.addDetail(subQueryExpl); for (Explanation explanation : filtersExplanations) { @@ -241,27 +236,28 @@ public class FiltersFunctionScoreQuery extends Query { } } - float sc = getValue() * subQueryExpl.getValue(); + float sc = getBoost() * subQueryExpl.getValue(); Explanation res = new ComplexExplanation(true, sc, "custom score, no filter match, product of:"); res.addDetail(subQueryExpl); - res.addDetail(new Explanation(getValue(), "queryBoost")); + res.addDetail(new Explanation(getBoost(), "queryBoost")); return res; } } static class CustomBoostFactorScorer extends Scorer { - private final float subQueryWeight; + + private final float subQueryBoost; private final Scorer scorer; private final FilterFunction[] filterFunctions; private final ScoreMode scoreMode; private final float maxBoost; private final DocSet[] docSets; - private CustomBoostFactorScorer(Similarity similarity, CustomBoostFactorWeight w, Scorer scorer, - ScoreMode scoreMode, FilterFunction[] filterFunctions, float maxBoost, DocSet[] docSets) throws IOException { - super(similarity); - this.subQueryWeight = w.getValue(); + private CustomBoostFactorScorer(CustomBoostFactorWeight w, Scorer scorer, ScoreMode scoreMode, + FilterFunction[] filterFunctions, float maxBoost, DocSet[] docSets) throws IOException { + super(w); + this.subQueryBoost = w.getQuery().getBoost(); this.scorer = scorer; this.scoreMode = scoreMode; this.filterFunctions = filterFunctions; @@ -341,7 +337,12 @@ public class FiltersFunctionScoreQuery extends Query { factor = maxBoost; } float score = scorer.score(); - return subQueryWeight * score * factor; + return subQueryBoost * score * factor; + } + + @Override + public float freq() throws IOException { + return scorer.freq(); } } diff --git a/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java b/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java index 9297fb0def6..7bb11567ba0 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/function/FunctionScoreQuery.java @@ -66,47 +66,42 @@ public class FunctionScoreQuery extends Query { @Override public Weight createWeight(IndexSearcher searcher) throws IOException { - return new CustomBoostFactorWeight(searcher); + Weight subQueryWeight = subQuery.createWeight(searcher); + return new CustomBoostFactorWeight(subQueryWeight); } class CustomBoostFactorWeight extends Weight { - IndexSearcher searcher; - Weight subQueryWeight; - public CustomBoostFactorWeight(IndexSearcher searcher) throws IOException { - this.searcher = searcher; - this.subQueryWeight = subQuery.createWeight(searcher); + final Weight subQueryWeight; + + public CustomBoostFactorWeight(Weight subQueryWeight) throws IOException { + this.subQueryWeight = subQueryWeight; } public Query getQuery() { return FunctionScoreQuery.this; } - public float getValue() { - return getBoost(); - } - @Override - public float sumOfSquaredWeights() throws IOException { - float sum = subQueryWeight.sumOfSquaredWeights(); + public float getValueForNormalization() throws IOException { + float sum = subQueryWeight.getValueForNormalization(); sum *= getBoost() * getBoost(); return sum; } @Override - public void normalize(float norm) { - norm *= getBoost(); - subQueryWeight.normalize(norm); + public void normalize(float norm, float topLevelBoost) { + subQueryWeight.normalize(norm, topLevelBoost * getBoost()); } @Override public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { - Scorer subQueryScorer = subQueryWeight.scorer(context, scoreDocsInOrder, false, acceptDocs); + Scorer subQueryScorer = subQueryWeight.scorer(context, scoreDocsInOrder, topScorer, acceptDocs); if (subQueryScorer == null) { return null; } function.setNextReader(context); - return new CustomBoostFactorScorer(getSimilarity(searcher), this, subQueryScorer, function); + return new CustomBoostFactorScorer(this, subQueryScorer, function); } @Override @@ -118,23 +113,24 @@ public class FunctionScoreQuery extends Query { function.setNextReader(context); Explanation functionExplanation = function.explainScore(doc, subQueryExpl); - float sc = getValue() * functionExplanation.getValue(); + float sc = getBoost() * functionExplanation.getValue(); Explanation res = new ComplexExplanation(true, sc, "custom score, product of:"); res.addDetail(functionExplanation); - res.addDetail(new Explanation(getValue(), "queryBoost")); + res.addDetail(new Explanation(getBoost(), "queryBoost")); return res; } } static class CustomBoostFactorScorer extends Scorer { - private final float subQueryWeight; + + private final float subQueryBoost; private final Scorer scorer; private final ScoreFunction function; - private CustomBoostFactorScorer(Similarity similarity, CustomBoostFactorWeight w, Scorer scorer, ScoreFunction function) throws IOException { - super(similarity); - this.subQueryWeight = w.getValue(); + private CustomBoostFactorScorer(CustomBoostFactorWeight w, Scorer scorer, ScoreFunction function) throws IOException { + super(w); + this.subQueryBoost = w.getQuery().getBoost(); this.scorer = scorer; this.function = function; } @@ -156,7 +152,12 @@ public class FunctionScoreQuery extends Query { @Override public float score() throws IOException { - return subQueryWeight * function.score(scorer.docID(), scorer.score()); + return subQueryBoost * function.score(scorer.docID(), scorer.score()); + } + + @Override + public float freq() throws IOException { + return scorer.freq(); } } From ddc3eb34153b373b9dc43a156b2904776af6038c Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 31 Oct 2012 13:35:06 +0100 Subject: [PATCH 074/146] lucene 4: Upgraded MultiPhrasePrefixQuery. --- .../lucene/search/MultiPhrasePrefixQuery.java | 39 +++++++++++-------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java b/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java index 315b47afdc4..c867d25dccf 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java @@ -19,11 +19,11 @@ package org.elasticsearch.common.lucene.search; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.*; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.StringHelper; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; @@ -152,23 +152,30 @@ public class MultiPhrasePrefixQuery extends Query { } private void getPrefixTerms(List terms, final Term prefix, final IndexReader reader) throws IOException { - TermEnum enumerator = reader.terms(prefix); - try { - do { - Term term = enumerator.term(); - if (term != null - && term.text().startsWith(prefix.text()) - && term.field().equals(field)) { - terms.add(term); - } else { + TermsEnum termsEnum = null; + List leaves = reader.leaves(); + for (AtomicReaderContext leaf : leaves) { + Terms _terms = leaf.reader().terms(field); + if (_terms == null) { + continue; + } + + termsEnum = _terms.iterator(termsEnum); + TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(prefix.bytes()); + if (TermsEnum.SeekStatus.END == seekStatus) { + continue; + } + + for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) { + if (StringHelper.startsWith(term, prefix.bytes())) { break; } + + terms.add(new Term(field, BytesRef.deepCopyOf(term))); if (terms.size() >= maxExpansions) { - break; + return; } - } while (enumerator.next()); - } finally { - enumerator.close(); + } } } From e75c732bdd29f6ffe1e104318ec4bff1cf56ea96 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 31 Oct 2012 13:51:47 +0100 Subject: [PATCH 075/146] lucene 4: Upgraded MatchNoDocsQuery. --- .../lucene/search/MatchNoDocsQuery.java | 35 +++++-------------- .../lucene/search/MultiPhrasePrefixQuery.java | 9 +++-- 2 files changed, 14 insertions(+), 30 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/search/MatchNoDocsQuery.java b/src/main/java/org/elasticsearch/common/lucene/search/MatchNoDocsQuery.java index f2b8d6c66b6..81a7e5845e3 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/MatchNoDocsQuery.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/MatchNoDocsQuery.java @@ -19,9 +19,11 @@ package org.elasticsearch.common.lucene.search; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; +import org.apache.lucene.util.Bits; import java.io.IOException; import java.util.Set; @@ -43,20 +45,6 @@ public final class MatchNoDocsQuery extends Query { * Weight implementation that matches no documents. */ private class MatchNoDocsWeight extends Weight { - /** - * The similarity implementation. - */ - private final Similarity similarity; - - - /** - * Creates a new weight that matches nothing. - * - * @param searcher the search to match for - */ - public MatchNoDocsWeight(final Searcher searcher) { - this.similarity = searcher.getSimilarity(); - } @Override public String toString() { @@ -69,36 +57,29 @@ public final class MatchNoDocsQuery extends Query { } @Override - public float getValue() { + public float getValueForNormalization() throws IOException { return 0; } @Override - public float sumOfSquaredWeights() { - return 0; + public void normalize(float norm, float topLevelBoost) { } @Override - public void normalize(final float queryNorm) { - } - - @Override - public Scorer scorer(final IndexReader reader, - final boolean scoreDocsInOrder, - final boolean topScorer) throws IOException { + public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException { return null; } @Override - public Explanation explain(final IndexReader reader, + public Explanation explain(final AtomicReaderContext context, final int doc) { return new ComplexExplanation(false, 0, "MatchNoDocs matches nothing"); } } @Override - public Weight createWeight(final Searcher searcher) { - return new MatchNoDocsWeight(searcher); + public Weight createWeight(IndexSearcher searcher) throws IOException { + return new MatchNoDocsWeight(); } @Override diff --git a/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java b/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java index c867d25dccf..aae37ec8086 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/MultiPhrasePrefixQuery.java @@ -19,6 +19,7 @@ package org.elasticsearch.common.lucene.search; +import gnu.trove.set.hash.THashSet; import org.apache.lucene.index.*; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.Query; @@ -137,7 +138,7 @@ public class MultiPhrasePrefixQuery extends Query { } Term[] suffixTerms = termArrays.get(sizeMinus1); int position = positions.get(sizeMinus1); - List terms = new ArrayList(); + Set terms = new THashSet(); for (Term term : suffixTerms) { getPrefixTerms(terms, term, reader); if (terms.size() > maxExpansions) { @@ -151,7 +152,9 @@ public class MultiPhrasePrefixQuery extends Query { return query.rewrite(reader); } - private void getPrefixTerms(List terms, final Term prefix, final IndexReader reader) throws IOException { + private void getPrefixTerms(Set terms, final Term prefix, final IndexReader reader) throws IOException { + // SlowCompositeReaderWrapper could be used... but this would merge all terms from each segment into one terms + // instance, which is very expensive. Therefore I think it is better to iterate over each leaf individually. TermsEnum termsEnum = null; List leaves = reader.leaves(); for (AtomicReaderContext leaf : leaves) { @@ -167,7 +170,7 @@ public class MultiPhrasePrefixQuery extends Query { } for (BytesRef term = termsEnum.term(); term != null; term = termsEnum.next()) { - if (StringHelper.startsWith(term, prefix.bytes())) { + if (!StringHelper.startsWith(term, prefix.bytes())) { break; } From 1a46179c4e7bbfdd1350bda6a685168ef40c6681 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 31 Oct 2012 13:58:07 +0100 Subject: [PATCH 076/146] lucene 4: Upgraded AndFilter, FilteredCollector, LimitFilter, MatchAllDocsFilter and MatchNoDocsFilter. --- .../org/elasticsearch/common/lucene/search/AndFilter.java | 8 +++++--- .../common/lucene/search/FilteredCollector.java | 2 +- .../elasticsearch/common/lucene/search/LimitFilter.java | 6 ++++-- .../common/lucene/search/MatchAllDocsFilter.java | 6 ++++-- .../common/lucene/search/MatchNoDocsFilter.java | 4 +++- 5 files changed, 17 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/search/AndFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/AndFilter.java index 704643baea3..8c8abfa3dde 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/AndFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/AndFilter.java @@ -20,9 +20,11 @@ package org.elasticsearch.common.lucene.search; import com.google.common.collect.Lists; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.elasticsearch.common.lucene.docset.AndDocIdSet; import org.elasticsearch.common.lucene.docset.AndDocSet; import org.elasticsearch.common.lucene.docset.DocSet; @@ -46,14 +48,14 @@ public class AndFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { if (filters.size() == 1) { - return filters.get(0).getDocIdSet(reader); + return filters.get(0).getDocIdSet(context, acceptDocs); } List sets = Lists.newArrayListWithExpectedSize(filters.size()); boolean allAreDocSet = true; for (Filter filter : filters) { - DocIdSet set = filter.getDocIdSet(reader); + DocIdSet set = filter.getDocIdSet(context, acceptDocs); if (set == null) { // none matching for this filter, we AND, so return EMPTY return DocSet.EMPTY_DOC_SET; } diff --git a/src/main/java/org/elasticsearch/common/lucene/search/FilteredCollector.java b/src/main/java/org/elasticsearch/common/lucene/search/FilteredCollector.java index 38ce581c5a7..e5d8d12f7d6 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/FilteredCollector.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/FilteredCollector.java @@ -59,7 +59,7 @@ public class FilteredCollector extends Collector { @Override public void setNextReader(AtomicReaderContext context) throws IOException { collector.setNextReader(context); - docSet = DocSets.convert(context.reader(), filter.getDocIdSet(context)); + docSet = DocSets.convert(context.reader(), filter.getDocIdSet(context, null)); } @Override diff --git a/src/main/java/org/elasticsearch/common/lucene/search/LimitFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/LimitFilter.java index ece97578047..f0fc3bb02a7 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/LimitFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/LimitFilter.java @@ -19,8 +19,10 @@ package org.elasticsearch.common.lucene.search; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.DocIdSet; +import org.apache.lucene.util.Bits; import org.elasticsearch.common.lucene.docset.GetDocSet; import java.io.IOException; @@ -39,11 +41,11 @@ public class LimitFilter extends NoCacheFilter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { if (counter > limit) { return null; } - return new LimitDocSet(reader.maxDoc(), limit); + return new LimitDocSet(context.reader().maxDoc(), limit); } public class LimitDocSet extends GetDocSet { diff --git a/src/main/java/org/elasticsearch/common/lucene/search/MatchAllDocsFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/MatchAllDocsFilter.java index fd382a83380..0f597e280eb 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/MatchAllDocsFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/MatchAllDocsFilter.java @@ -19,9 +19,11 @@ package org.elasticsearch.common.lucene.search; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.elasticsearch.common.lucene.docset.AllDocSet; import java.io.IOException; @@ -34,8 +36,8 @@ import java.io.IOException; public class MatchAllDocsFilter extends Filter { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - return new AllDocSet(reader.maxDoc()); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + return new AllDocSet(context.reader().maxDoc()); } @Override diff --git a/src/main/java/org/elasticsearch/common/lucene/search/MatchNoDocsFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/MatchNoDocsFilter.java index 8c80431c973..b380752bc16 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/MatchNoDocsFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/MatchNoDocsFilter.java @@ -19,9 +19,11 @@ package org.elasticsearch.common.lucene.search; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import java.io.IOException; @@ -31,7 +33,7 @@ import java.io.IOException; public class MatchNoDocsFilter extends Filter { @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { return null; } From 5d7ef8f58578931cad9490300cc918d951b8306e Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Wed, 31 Oct 2012 08:48:00 -0400 Subject: [PATCH 077/146] lucene4: fixed SortParseElement --- .../elasticsearch/search/sort/SortParseElement.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/sort/SortParseElement.java b/src/main/java/org/elasticsearch/search/sort/SortParseElement.java index eb72e40c023..d9a1b27622d 100644 --- a/src/main/java/org/elasticsearch/search/sort/SortParseElement.java +++ b/src/main/java/org/elasticsearch/search/sort/SortParseElement.java @@ -37,10 +37,10 @@ import java.util.List; */ public class SortParseElement implements SearchParseElement { - private static final SortField SORT_SCORE = new SortField(null, SortField.SCORE); - private static final SortField SORT_SCORE_REVERSE = new SortField(null, SortField.SCORE, true); - private static final SortField SORT_DOC = new SortField(null, SortField.DOC); - private static final SortField SORT_DOC_REVERSE = new SortField(null, SortField.DOC, true); + private static final SortField SORT_SCORE = new SortField(null, SortField.Type.SCORE); + private static final SortField SORT_SCORE_REVERSE = new SortField(null, SortField.Type.SCORE, true); + private static final SortField SORT_DOC = new SortField(null, SortField.Type.DOC); + private static final SortField SORT_DOC_REVERSE = new SortField(null, SortField.Type.DOC, true); public static final String SCORE_FIELD_NAME = "_score"; public static final String DOC_FIELD_NAME = "_doc"; @@ -82,7 +82,7 @@ public class SortParseElement implements SearchParseElement { sort = true; } else { SortField sortField = sortFields.get(0); - if (sortField.getType() == SortField.SCORE && !sortField.getReverse()) { + if (sortField.getType() == SortField.Type.SCORE && !sortField.getReverse()) { sort = false; } else { sort = true; From 6b4e483f558e7cbc1c5b6118161cefabfbcbb9ae Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Wed, 31 Oct 2012 10:13:12 -0400 Subject: [PATCH 078/146] lucene4: fixed unit.index.mapper, unit.index.query and unit.index.store test (with exception of document boost and similarity issues) --- .../mapper/all/SimpleAllMapperTests.java | 12 +-- .../mapper/compound/CompoundTypesTests.java | 6 +- .../mapper/date/SimpleDateMappingTests.java | 8 +- .../GenericStoreDynamicTemplateTests.java | 10 +- .../PathMatchDynamicTemplateTests.java | 14 +-- .../simple/SimpleDynamicTemplatesTests.java | 62 +++++------ .../geo/GeohashMappingGeoPointTests.java | 12 +-- .../LatLonAndGeohashMappingGeoPointTests.java | 12 +-- .../geo/LatLonMappingGeoPointTests.java | 101 +++++++++--------- .../mapper/index/IndexTypeMapperTests.java | 6 +- .../mapper/multifield/MultiFieldTests.java | 49 +++++---- .../merge/JavaMultiFieldMergeTests.java | 10 +- .../mapper/nested/NestedMappingTests.java | 6 +- .../mapper/numeric/SimpleNumericTests.java | 6 +- .../mapper/simple/SimpleMapperTests.java | 2 +- .../index/mapper/size/SizeMappingTests.java | 12 +-- .../source/CompressSourceMappingTests.java | 14 ++- .../source/DefaultSourceMappingTests.java | 7 +- .../string/SimpleStringMappingTests.java | 18 ++-- .../timestamp/TimestampMappingTests.java | 16 +-- .../index/mapper/ttl/TTLMappingTests.java | 16 +-- .../query/SimpleIndexQueryParserTests.java | 54 ++++++---- .../memory/SimpleByteBufferStoreTests.java | 11 +- 23 files changed, 237 insertions(+), 227 deletions(-) diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/all/SimpleAllMapperTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/all/SimpleAllMapperTests.java index 33e1730ddb9..36b8a414882 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/all/SimpleAllMapperTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/all/SimpleAllMapperTests.java @@ -50,7 +50,7 @@ public class SimpleAllMapperTests { DocumentMapper docMapper = MapperTests.newParser().parse(mapping); byte[] json = copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/all/test1.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); - AllField field = (AllField) doc.getFieldable("_all"); + AllField field = (AllField) doc.getField("_all"); AllEntries allEntries = ((AllTokenStream) field.tokenStreamValue()).allEntries(); assertThat(allEntries.fields().size(), equalTo(3)); assertThat(allEntries.fields().contains("address.last.location"), equalTo(true)); @@ -66,7 +66,7 @@ public class SimpleAllMapperTests { DocumentMapper docMapper = MapperTests.newParser().parse(mapping); byte[] json = copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/all/test1.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); - AllField field = (AllField) doc.getFieldable("_all"); + AllField field = (AllField) doc.getField("_all"); AllEntries allEntries = ((AllTokenStream) field.tokenStreamValue()).allEntries(); assertThat(allEntries.fields().size(), equalTo(3)); assertThat(allEntries.fields().contains("address.last.location"), equalTo(true)); @@ -82,7 +82,7 @@ public class SimpleAllMapperTests { DocumentMapper docMapper = MapperTests.newParser().parse(mapping); byte[] json = copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/all/test1.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); - AllField field = (AllField) doc.getFieldable("_all"); + AllField field = (AllField) doc.getField("_all"); AllEntries allEntries = ((AllTokenStream) field.tokenStreamValue()).allEntries(); assertThat(allEntries.fields().size(), equalTo(3)); assertThat(allEntries.fields().contains("address.last.location"), equalTo(true)); @@ -105,7 +105,7 @@ public class SimpleAllMapperTests { byte[] json = copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/all/test1.json"); Document doc = builtDocMapper.parse(new BytesArray(json)).rootDoc(); - AllField field = (AllField) doc.getFieldable("_all"); + AllField field = (AllField) doc.getField("_all"); AllEntries allEntries = ((AllTokenStream) field.tokenStreamValue()).allEntries(); assertThat(allEntries.fields().size(), equalTo(3)); assertThat(allEntries.fields().contains("address.last.location"), equalTo(true)); @@ -119,7 +119,7 @@ public class SimpleAllMapperTests { DocumentMapper docMapper = MapperTests.newParser().parse(mapping); byte[] json = copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/all/test1.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); - AllField field = (AllField) doc.getFieldable("_all"); + AllField field = (AllField) doc.getField("_all"); AllEntries allEntries = ((AllTokenStream) field.tokenStreamValue()).allEntries(); assertThat(allEntries.fields().size(), equalTo(2)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); @@ -140,7 +140,7 @@ public class SimpleAllMapperTests { byte[] json = copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/all/test1.json"); Document doc = builtDocMapper.parse(new BytesArray(json)).rootDoc(); - AllField field = (AllField) doc.getFieldable("_all"); + AllField field = (AllField) doc.getField("_all"); AllEntries allEntries = ((AllTokenStream) field.tokenStreamValue()).allEntries(); assertThat(allEntries.fields().size(), equalTo(2)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/compound/CompoundTypesTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/compound/CompoundTypesTests.java index f9cf7687e00..072e612bdcc 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/compound/CompoundTypesTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/compound/CompoundTypesTests.java @@ -49,7 +49,7 @@ public class CompoundTypesTests { .bytes()); assertThat(doc.rootDoc().get("field1"), equalTo("value1")); - assertThat((double) doc.rootDoc().getFieldable("field1").getBoost(), closeTo(1.0d, 0.000001d)); + assertThat((double) doc.rootDoc().getField("field1").boost(), closeTo(1.0d, 0.000001d)); assertThat(doc.rootDoc().get("field2"), equalTo("value2")); doc = defaultMapper.parse("type", "1", XContentFactory.jsonBuilder() @@ -59,7 +59,7 @@ public class CompoundTypesTests { .bytes()); assertThat(doc.rootDoc().get("field1"), equalTo("value1")); - assertThat((double) doc.rootDoc().getFieldable("field1").getBoost(), closeTo(2.0d, 0.000001d)); + assertThat((double) doc.rootDoc().getField("field1").boost(), closeTo(2.0d, 0.000001d)); assertThat(doc.rootDoc().get("field2"), equalTo("value2")); doc = defaultMapper.parse("type", "1", XContentFactory.jsonBuilder() @@ -69,7 +69,7 @@ public class CompoundTypesTests { .bytes()); assertThat(doc.rootDoc().get("field1"), equalTo("value1")); - assertThat((double) doc.rootDoc().getFieldable("field1").getBoost(), closeTo(1.0d, 0.000001d)); + assertThat((double) doc.rootDoc().getField("field1").boost(), closeTo(1.0d, 0.000001d)); assertThat(doc.rootDoc().get("field2"), equalTo("value2")); } } \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/date/SimpleDateMappingTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/date/SimpleDateMappingTests.java index 0ec0c025db7..4da6a1c0847 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/date/SimpleDateMappingTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/date/SimpleDateMappingTests.java @@ -78,7 +78,7 @@ public class SimpleDateMappingTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("date_field").tokenStreamValue(), notNullValue()); + assertThat(doc.rootDoc().getField("date_field").tokenStream(defaultMapper.indexAnalyzer()), notNullValue()); } @Test @@ -119,8 +119,8 @@ public class SimpleDateMappingTests { .field("field2", "2010-01-01") .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("field1"), nullValue()); - assertThat(doc.rootDoc().getFieldable("field2"), notNullValue()); + assertThat(doc.rootDoc().getField("field1"), nullValue()); + assertThat(doc.rootDoc().getField("field2"), notNullValue()); try { defaultMapper.parse("type", "1", XContentFactory.jsonBuilder() @@ -151,7 +151,7 @@ public class SimpleDateMappingTests { .field("field3", "a") .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("field3"), nullValue()); + assertThat(doc.rootDoc().getField("field3"), nullValue()); // This should still throw an exception, since field2 is specifically set to ignore_malformed=false try { diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/dynamictemplate/genericstore/GenericStoreDynamicTemplateTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/dynamictemplate/genericstore/GenericStoreDynamicTemplateTests.java index f115c161bec..cdfbfa9502f 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/dynamictemplate/genericstore/GenericStoreDynamicTemplateTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/dynamictemplate/genericstore/GenericStoreDynamicTemplateTests.java @@ -20,7 +20,7 @@ package org.elasticsearch.test.unit.index.mapper.dynamictemplate.genericstore; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.FieldMappers; @@ -44,18 +44,18 @@ public class GenericStoreDynamicTemplateTests { byte[] json = copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/dynamictemplate/genericstore/test-data.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); - Fieldable f = doc.getFieldable("name"); + IndexableField f = doc.getField("name"); assertThat(f.name(), equalTo("name")); assertThat(f.stringValue(), equalTo("some name")); - assertThat(f.isStored(), equalTo(true)); + assertThat(f.fieldType().stored(), equalTo(true)); FieldMappers fieldMappers = docMapper.mappers().fullName("name"); assertThat(fieldMappers.mappers().size(), equalTo(1)); assertThat(fieldMappers.mapper().stored(), equalTo(true)); - f = doc.getFieldable("age"); + f = doc.getField("age"); assertThat(f.name(), equalTo("age")); - assertThat(f.isStored(), equalTo(true)); + assertThat(f.fieldType().stored(), equalTo(true)); fieldMappers = docMapper.mappers().fullName("age"); assertThat(fieldMappers.mappers().size(), equalTo(1)); diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/dynamictemplate/pathmatch/PathMatchDynamicTemplateTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/dynamictemplate/pathmatch/PathMatchDynamicTemplateTests.java index b4583809ec2..72574f87f3d 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/dynamictemplate/pathmatch/PathMatchDynamicTemplateTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/dynamictemplate/pathmatch/PathMatchDynamicTemplateTests.java @@ -20,7 +20,7 @@ package org.elasticsearch.test.unit.index.mapper.dynamictemplate.pathmatch; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.index.field.data.strings.StringFieldDataType; import org.elasticsearch.index.mapper.DocumentMapper; @@ -46,26 +46,26 @@ public class PathMatchDynamicTemplateTests { byte[] json = copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/dynamictemplate/pathmatch/test-data.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); - Fieldable f = doc.getFieldable("name"); + IndexableField f = doc.getField("name"); assertThat(f.name(), equalTo("name")); assertThat(f.stringValue(), equalTo("top_level")); - assertThat(f.isStored(), equalTo(false)); + assertThat(f.fieldType().stored(), equalTo(false)); FieldMappers fieldMappers = docMapper.mappers().fullName("name"); assertThat(fieldMappers.mappers().size(), equalTo(1)); assertThat(fieldMappers.mapper().stored(), equalTo(false)); - f = doc.getFieldable("obj1.name"); + f = doc.getField("obj1.name"); assertThat(f.name(), equalTo("obj1.name")); - assertThat(f.isStored(), equalTo(true)); + assertThat(f.fieldType().stored(), equalTo(true)); fieldMappers = docMapper.mappers().fullName("obj1.name"); assertThat(fieldMappers.mappers().size(), equalTo(1)); assertThat(fieldMappers.mapper().stored(), equalTo(true)); - f = doc.getFieldable("obj1.obj2.name"); + f = doc.getField("obj1.obj2.name"); assertThat(f.name(), equalTo("obj1.obj2.name")); - assertThat(f.isStored(), equalTo(false)); + assertThat(f.fieldType().stored(), equalTo(false)); fieldMappers = docMapper.mappers().fullName("obj1.obj2.name"); assertThat(fieldMappers.mappers().size(), equalTo(1)); diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/dynamictemplate/simple/SimpleDynamicTemplatesTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/dynamictemplate/simple/SimpleDynamicTemplatesTests.java index d9a0f63e718..b360b905d47 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/dynamictemplate/simple/SimpleDynamicTemplatesTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/dynamictemplate/simple/SimpleDynamicTemplatesTests.java @@ -20,7 +20,7 @@ package org.elasticsearch.test.unit.index.mapper.dynamictemplate.simple; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.FieldMappers; @@ -44,47 +44,47 @@ public class SimpleDynamicTemplatesTests { byte[] json = copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/dynamictemplate/simple/test-data.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); - Fieldable f = doc.getFieldable("name"); + IndexableField f = doc.getField("name"); assertThat(f.name(), equalTo("name")); assertThat(f.stringValue(), equalTo("some name")); - assertThat(f.isIndexed(), equalTo(true)); - assertThat(f.isTokenized(), equalTo(false)); + assertThat(f.fieldType().indexed(), equalTo(true)); + assertThat(f.fieldType().tokenized(), equalTo(false)); FieldMappers fieldMappers = docMapper.mappers().fullName("name"); assertThat(fieldMappers.mappers().size(), equalTo(1)); - f = doc.getFieldable("multi1"); + f = doc.getField("multi1"); assertThat(f.name(), equalTo("multi1")); assertThat(f.stringValue(), equalTo("multi 1")); - assertThat(f.isIndexed(), equalTo(true)); - assertThat(f.isTokenized(), equalTo(true)); + assertThat(f.fieldType().indexed(), equalTo(true)); + assertThat(f.fieldType().tokenized(), equalTo(true)); fieldMappers = docMapper.mappers().fullName("multi1"); assertThat(fieldMappers.mappers().size(), equalTo(1)); - f = doc.getFieldable("multi1.org"); + f = doc.getField("multi1.org"); assertThat(f.name(), equalTo("multi1.org")); assertThat(f.stringValue(), equalTo("multi 1")); - assertThat(f.isIndexed(), equalTo(true)); - assertThat(f.isTokenized(), equalTo(false)); + assertThat(f.fieldType().indexed(), equalTo(true)); + assertThat(f.fieldType().tokenized(), equalTo(false)); fieldMappers = docMapper.mappers().fullName("multi1.org"); assertThat(fieldMappers.mappers().size(), equalTo(1)); - f = doc.getFieldable("multi2"); + f = doc.getField("multi2"); assertThat(f.name(), equalTo("multi2")); assertThat(f.stringValue(), equalTo("multi 2")); - assertThat(f.isIndexed(), equalTo(true)); - assertThat(f.isTokenized(), equalTo(true)); + assertThat(f.fieldType().indexed(), equalTo(true)); + assertThat(f.fieldType().tokenized(), equalTo(true)); fieldMappers = docMapper.mappers().fullName("multi2"); assertThat(fieldMappers.mappers().size(), equalTo(1)); - f = doc.getFieldable("multi2.org"); + f = doc.getField("multi2.org"); assertThat(f.name(), equalTo("multi2.org")); assertThat(f.stringValue(), equalTo("multi 2")); - assertThat(f.isIndexed(), equalTo(true)); - assertThat(f.isTokenized(), equalTo(false)); + assertThat(f.fieldType().indexed(), equalTo(true)); + assertThat(f.fieldType().tokenized(), equalTo(false)); fieldMappers = docMapper.mappers().fullName("multi2.org"); assertThat(fieldMappers.mappers().size(), equalTo(1)); @@ -100,47 +100,47 @@ public class SimpleDynamicTemplatesTests { byte[] json = copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/dynamictemplate/simple/test-data.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); - Fieldable f = doc.getFieldable("name"); + IndexableField f = doc.getField("name"); assertThat(f.name(), equalTo("name")); assertThat(f.stringValue(), equalTo("some name")); - assertThat(f.isIndexed(), equalTo(true)); - assertThat(f.isTokenized(), equalTo(false)); + assertThat(f.fieldType().indexed(), equalTo(true)); + assertThat(f.fieldType().tokenized(), equalTo(false)); FieldMappers fieldMappers = docMapper.mappers().fullName("name"); assertThat(fieldMappers.mappers().size(), equalTo(1)); - f = doc.getFieldable("multi1"); + f = doc.getField("multi1"); assertThat(f.name(), equalTo("multi1")); assertThat(f.stringValue(), equalTo("multi 1")); - assertThat(f.isIndexed(), equalTo(true)); - assertThat(f.isTokenized(), equalTo(true)); + assertThat(f.fieldType().indexed(), equalTo(true)); + assertThat(f.fieldType().tokenized(), equalTo(true)); fieldMappers = docMapper.mappers().fullName("multi1"); assertThat(fieldMappers.mappers().size(), equalTo(1)); - f = doc.getFieldable("multi1.org"); + f = doc.getField("multi1.org"); assertThat(f.name(), equalTo("multi1.org")); assertThat(f.stringValue(), equalTo("multi 1")); - assertThat(f.isIndexed(), equalTo(true)); - assertThat(f.isTokenized(), equalTo(false)); + assertThat(f.fieldType().indexed(), equalTo(true)); + assertThat(f.fieldType().tokenized(), equalTo(false)); fieldMappers = docMapper.mappers().fullName("multi1.org"); assertThat(fieldMappers.mappers().size(), equalTo(1)); - f = doc.getFieldable("multi2"); + f = doc.getField("multi2"); assertThat(f.name(), equalTo("multi2")); assertThat(f.stringValue(), equalTo("multi 2")); - assertThat(f.isIndexed(), equalTo(true)); - assertThat(f.isTokenized(), equalTo(true)); + assertThat(f.fieldType().indexed(), equalTo(true)); + assertThat(f.fieldType().tokenized(), equalTo(true)); fieldMappers = docMapper.mappers().fullName("multi2"); assertThat(fieldMappers.mappers().size(), equalTo(1)); - f = doc.getFieldable("multi2.org"); + f = doc.getField("multi2.org"); assertThat(f.name(), equalTo("multi2.org")); assertThat(f.stringValue(), equalTo("multi 2")); - assertThat(f.isIndexed(), equalTo(true)); - assertThat(f.isTokenized(), equalTo(false)); + assertThat(f.fieldType().indexed(), equalTo(true)); + assertThat(f.fieldType().tokenized(), equalTo(false)); fieldMappers = docMapper.mappers().fullName("multi2.org"); assertThat(fieldMappers.mappers().size(), equalTo(1)); diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/geo/GeohashMappingGeoPointTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/geo/GeohashMappingGeoPointTests.java index 64feebec0b1..cde370494ee 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/geo/GeohashMappingGeoPointTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/geo/GeohashMappingGeoPointTests.java @@ -48,8 +48,8 @@ public class GeohashMappingGeoPointTests { .endObject() .bytes()); - MatcherAssert.assertThat(doc.rootDoc().getFieldable("point.lat"), nullValue()); - MatcherAssert.assertThat(doc.rootDoc().getFieldable("point.lon"), nullValue()); + MatcherAssert.assertThat(doc.rootDoc().getField("point.lat"), nullValue()); + MatcherAssert.assertThat(doc.rootDoc().getField("point.lon"), nullValue()); MatcherAssert.assertThat(doc.rootDoc().get("point"), equalTo("1.2,1.3")); } @@ -67,8 +67,8 @@ public class GeohashMappingGeoPointTests { .endObject() .bytes()); - MatcherAssert.assertThat(doc.rootDoc().getFieldable("point.lat"), nullValue()); - MatcherAssert.assertThat(doc.rootDoc().getFieldable("point.lon"), nullValue()); + MatcherAssert.assertThat(doc.rootDoc().getField("point.lat"), nullValue()); + MatcherAssert.assertThat(doc.rootDoc().getField("point.lon"), nullValue()); MatcherAssert.assertThat(doc.rootDoc().get("point"), equalTo("1.2,1.3")); } @@ -86,8 +86,8 @@ public class GeohashMappingGeoPointTests { .endObject() .bytes()); - MatcherAssert.assertThat(doc.rootDoc().getFieldable("point.lat"), nullValue()); - MatcherAssert.assertThat(doc.rootDoc().getFieldable("point.lon"), nullValue()); + MatcherAssert.assertThat(doc.rootDoc().getField("point.lat"), nullValue()); + MatcherAssert.assertThat(doc.rootDoc().getField("point.lon"), nullValue()); MatcherAssert.assertThat(doc.rootDoc().get("point.geohash"), equalTo(GeoHashUtils.encode(1.2, 1.3))); MatcherAssert.assertThat(doc.rootDoc().get("point"), notNullValue()); } diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/geo/LatLonAndGeohashMappingGeoPointTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/geo/LatLonAndGeohashMappingGeoPointTests.java index 66418c25780..84fd2a3605b 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/geo/LatLonAndGeohashMappingGeoPointTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/geo/LatLonAndGeohashMappingGeoPointTests.java @@ -49,8 +49,8 @@ public class LatLonAndGeohashMappingGeoPointTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("point.lat"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lon"), notNullValue()); + assertThat(doc.rootDoc().getField("point.lat"), notNullValue()); + assertThat(doc.rootDoc().getField("point.lon"), notNullValue()); assertThat(doc.rootDoc().get("point.geohash"), equalTo(GeoHashUtils.encode(1.2, 1.3))); } @@ -68,8 +68,8 @@ public class LatLonAndGeohashMappingGeoPointTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("point.lat"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lon"), notNullValue()); + assertThat(doc.rootDoc().getField("point.lat"), notNullValue()); + assertThat(doc.rootDoc().getField("point.lon"), notNullValue()); assertThat(doc.rootDoc().get("point.geohash"), equalTo(GeoHashUtils.encode(1.2, 1.3))); } @@ -87,8 +87,8 @@ public class LatLonAndGeohashMappingGeoPointTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("point.lat"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lon"), notNullValue()); + assertThat(doc.rootDoc().getField("point.lat"), notNullValue()); + assertThat(doc.rootDoc().getField("point.lon"), notNullValue()); assertThat(doc.rootDoc().get("point.geohash"), equalTo(GeoHashUtils.encode(1.2, 1.3))); } } \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/geo/LatLonMappingGeoPointTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/geo/LatLonMappingGeoPointTests.java index 0e926803326..17ea9f5eb21 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/geo/LatLonMappingGeoPointTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/geo/LatLonMappingGeoPointTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.test.unit.index.mapper.geo; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.Numbers; import org.elasticsearch.common.xcontent.XContentFactory; @@ -185,11 +186,11 @@ public class LatLonMappingGeoPointTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("point.lat"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lat").getBinaryValue(), nullValue()); - assertThat(doc.rootDoc().getFieldable("point.lon"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lon").getBinaryValue(), nullValue()); - assertThat(doc.rootDoc().getFieldable("point.geohash"), nullValue()); + assertThat(doc.rootDoc().getField("point.lat"), notNullValue()); + assertThat(doc.rootDoc().getField("point.lat").binaryValue(), nullValue()); + assertThat(doc.rootDoc().getField("point.lon"), notNullValue()); + assertThat(doc.rootDoc().getField("point.lon").binaryValue(), nullValue()); + assertThat(doc.rootDoc().getField("point.geohash"), nullValue()); assertThat(doc.rootDoc().get("point"), equalTo("1.2,1.3")); } @@ -207,11 +208,11 @@ public class LatLonMappingGeoPointTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("point.lat"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lat").getBinaryValue(), equalTo(Numbers.doubleToBytes(1.2))); - assertThat(doc.rootDoc().getFieldable("point.lon"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lon").getBinaryValue(), equalTo(Numbers.doubleToBytes(1.3))); - assertThat(doc.rootDoc().getFieldable("point.geohash"), nullValue()); + assertThat(doc.rootDoc().getField("point.lat"), notNullValue()); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getField("point.lat").binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.2))); + assertThat(doc.rootDoc().getField("point.lon"), notNullValue()); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getField("point.lon").binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.3))); + assertThat(doc.rootDoc().getField("point.geohash"), nullValue()); assertThat(doc.rootDoc().get("point"), equalTo("1.2,1.3")); } @@ -232,14 +233,14 @@ public class LatLonMappingGeoPointTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldables("point.lat").length, equalTo(2)); - assertThat(doc.rootDoc().getFieldables("point.lon").length, equalTo(2)); - assertThat(doc.rootDoc().getFieldables("point.lat")[0].getBinaryValue(), equalTo(Numbers.doubleToBytes(1.2))); - assertThat(doc.rootDoc().getFieldables("point.lon")[0].getBinaryValue(), equalTo(Numbers.doubleToBytes(1.3))); - assertThat(doc.rootDoc().getFieldables("point")[0].stringValue(), equalTo("1.2,1.3")); - assertThat(doc.rootDoc().getFieldables("point.lat")[1].getBinaryValue(), equalTo(Numbers.doubleToBytes(1.4))); - assertThat(doc.rootDoc().getFieldables("point.lon")[1].getBinaryValue(), equalTo(Numbers.doubleToBytes(1.5))); - assertThat(doc.rootDoc().getFieldables("point")[1].stringValue(), equalTo("1.4,1.5")); + assertThat(doc.rootDoc().getFields("point.lat").length, equalTo(2)); + assertThat(doc.rootDoc().getFields("point.lon").length, equalTo(2)); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[0].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.2))); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[0].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.3))); + assertThat(doc.rootDoc().getFields("point")[0].stringValue(), equalTo("1.2,1.3")); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[1].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.4))); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[1].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.5))); + assertThat(doc.rootDoc().getFields("point")[1].stringValue(), equalTo("1.4,1.5")); } @Test @@ -256,8 +257,8 @@ public class LatLonMappingGeoPointTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("point.lat"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lon"), notNullValue()); + assertThat(doc.rootDoc().getField("point.lat"), notNullValue()); + assertThat(doc.rootDoc().getField("point.lon"), notNullValue()); assertThat(doc.rootDoc().get("point"), equalTo("1.2,1.3")); } @@ -275,10 +276,10 @@ public class LatLonMappingGeoPointTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("point.lat"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lat").getBinaryValue(), equalTo(Numbers.doubleToBytes(1.2))); - assertThat(doc.rootDoc().getFieldable("point.lon"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lon").getBinaryValue(), equalTo(Numbers.doubleToBytes(1.3))); + assertThat(doc.rootDoc().getField("point.lat"), notNullValue()); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getField("point.lat").binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.2))); + assertThat(doc.rootDoc().getField("point.lon"), notNullValue()); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getField("point.lon").binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.3))); assertThat(doc.rootDoc().get("point"), equalTo("1.2,1.3")); } @@ -299,14 +300,14 @@ public class LatLonMappingGeoPointTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldables("point.lat").length, equalTo(2)); - assertThat(doc.rootDoc().getFieldables("point.lon").length, equalTo(2)); - assertThat(doc.rootDoc().getFieldables("point.lat")[0].getBinaryValue(), equalTo(Numbers.doubleToBytes(1.2))); - assertThat(doc.rootDoc().getFieldables("point.lon")[0].getBinaryValue(), equalTo(Numbers.doubleToBytes(1.3))); - assertThat(doc.rootDoc().getFieldables("point")[0].stringValue(), equalTo("1.2,1.3")); - assertThat(doc.rootDoc().getFieldables("point.lat")[1].getBinaryValue(), equalTo(Numbers.doubleToBytes(1.4))); - assertThat(doc.rootDoc().getFieldables("point.lon")[1].getBinaryValue(), equalTo(Numbers.doubleToBytes(1.5))); - assertThat(doc.rootDoc().getFieldables("point")[1].stringValue(), equalTo("1.4,1.5")); + assertThat(doc.rootDoc().getFields("point.lat").length, equalTo(2)); + assertThat(doc.rootDoc().getFields("point.lon").length, equalTo(2)); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[0].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.2))); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[0].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.3))); + assertThat(doc.rootDoc().getFields("point")[0].stringValue(), equalTo("1.2,1.3")); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[1].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.4))); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[1].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.5))); + assertThat(doc.rootDoc().getFields("point")[1].stringValue(), equalTo("1.4,1.5")); } @Test @@ -323,8 +324,8 @@ public class LatLonMappingGeoPointTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("point.lat"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lon"), notNullValue()); + assertThat(doc.rootDoc().getField("point.lat"), notNullValue()); + assertThat(doc.rootDoc().getField("point.lon"), notNullValue()); assertThat(doc.rootDoc().get("point"), notNullValue()); } @@ -342,10 +343,10 @@ public class LatLonMappingGeoPointTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("point.lat"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lat").getBinaryValue(), nullValue()); - assertThat(doc.rootDoc().getFieldable("point.lon"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lon").getBinaryValue(), nullValue()); + assertThat(doc.rootDoc().getField("point.lat"), notNullValue()); + assertThat(doc.rootDoc().getField("point.lat").binaryValue(), nullValue()); + assertThat(doc.rootDoc().getField("point.lon"), notNullValue()); + assertThat(doc.rootDoc().getField("point.lon").binaryValue(), nullValue()); assertThat(doc.rootDoc().get("point"), equalTo("1.2,1.3")); } @@ -363,10 +364,10 @@ public class LatLonMappingGeoPointTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("point.lat"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lat").getBinaryValue(), equalTo(Numbers.doubleToBytes(1.2))); - assertThat(doc.rootDoc().getFieldable("point.lon"), notNullValue()); - assertThat(doc.rootDoc().getFieldable("point.lon").getBinaryValue(), equalTo(Numbers.doubleToBytes(1.3))); + assertThat(doc.rootDoc().getField("point.lat"), notNullValue()); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getField("point.lat").binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.2))); + assertThat(doc.rootDoc().getField("point.lon"), notNullValue()); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getField("point.lon").binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.3))); assertThat(doc.rootDoc().get("point"), equalTo("1.2,1.3")); } @@ -387,13 +388,13 @@ public class LatLonMappingGeoPointTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldables("point.lat").length, equalTo(2)); - assertThat(doc.rootDoc().getFieldables("point.lon").length, equalTo(2)); - assertThat(doc.rootDoc().getFieldables("point.lat")[0].getBinaryValue(), equalTo(Numbers.doubleToBytes(1.2))); - assertThat(doc.rootDoc().getFieldables("point.lon")[0].getBinaryValue(), equalTo(Numbers.doubleToBytes(1.3))); - assertThat(doc.rootDoc().getFieldables("point")[0].stringValue(), equalTo("1.2,1.3")); - assertThat(doc.rootDoc().getFieldables("point.lat")[1].getBinaryValue(), equalTo(Numbers.doubleToBytes(1.4))); - assertThat(doc.rootDoc().getFieldables("point.lon")[1].getBinaryValue(), equalTo(Numbers.doubleToBytes(1.5))); - assertThat(doc.rootDoc().getFieldables("point")[1].stringValue(), equalTo("1.4,1.5")); + assertThat(doc.rootDoc().getFields("point.lat").length, equalTo(2)); + assertThat(doc.rootDoc().getFields("point.lon").length, equalTo(2)); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[0].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.2))); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[0].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.3))); + assertThat(doc.rootDoc().getFields("point")[0].stringValue(), equalTo("1.2,1.3")); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lat")[1].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.4))); + assertThat(BytesRef.deepCopyOf(doc.rootDoc().getFields("point.lon")[1].binaryValue()).bytes, equalTo(Numbers.doubleToBytes(1.5))); + assertThat(doc.rootDoc().getFields("point")[1].stringValue(), equalTo("1.4,1.5")); } } diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/index/IndexTypeMapperTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/index/IndexTypeMapperTests.java index 73b16756bf6..2a6a79804df 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/index/IndexTypeMapperTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/index/IndexTypeMapperTests.java @@ -43,7 +43,7 @@ public class IndexTypeMapperTests { DocumentMapper docMapper = MapperTests.newParser().parse(mapping); IndexFieldMapper indexMapper = docMapper.rootMapper(IndexFieldMapper.class); assertThat(indexMapper.enabled(), equalTo(true)); - assertThat(indexMapper.store(), equalTo(Field.Store.YES)); + assertThat(indexMapper.stored(), equalTo(true)); assertThat(docMapper.mappers().indexName("_index").mapper(), instanceOf(IndexFieldMapper.class)); ParsedDocument doc = docMapper.parse("type", "1", XContentFactory.jsonBuilder() @@ -64,7 +64,7 @@ public class IndexTypeMapperTests { DocumentMapper docMapper = MapperTests.newParser().parse(mapping); IndexFieldMapper indexMapper = docMapper.rootMapper(IndexFieldMapper.class); assertThat(indexMapper.enabled(), equalTo(false)); - assertThat(indexMapper.store(), equalTo(Field.Store.YES)); + assertThat(indexMapper.stored(), equalTo(true)); ParsedDocument doc = docMapper.parse("type", "1", XContentFactory.jsonBuilder() .startObject() @@ -83,7 +83,7 @@ public class IndexTypeMapperTests { DocumentMapper docMapper = MapperTests.newParser().parse(mapping); IndexFieldMapper indexMapper = docMapper.rootMapper(IndexFieldMapper.class); assertThat(indexMapper.enabled(), equalTo(false)); - assertThat(indexMapper.store(), equalTo(Field.Store.NO)); + assertThat(indexMapper.stored(), equalTo(false)); ParsedDocument doc = docMapper.parse("type", "1", XContentFactory.jsonBuilder() .startObject() diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/multifield/MultiFieldTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/multifield/MultiFieldTests.java index b895d750c2f..375600eb3b9 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/multifield/MultiFieldTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/multifield/MultiFieldTests.java @@ -20,8 +20,7 @@ package org.elasticsearch.test.unit.index.mapper.multifield; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.index.mapper.DocumentMapper; @@ -48,28 +47,28 @@ public class MultiFieldTests { BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/multifield/test-data.json")); Document doc = docMapper.parse(json).rootDoc(); - Fieldable f = doc.getFieldable("name"); + IndexableField f = doc.getField("name"); assertThat(f.name(), equalTo("name")); assertThat(f.stringValue(), equalTo("some name")); - assertThat(f.isStored(), equalTo(true)); - assertThat(f.isIndexed(), equalTo(true)); + assertThat(f.fieldType().stored(), equalTo(true)); + assertThat(f.fieldType().indexed(), equalTo(true)); - f = doc.getFieldable("name.indexed"); + f = doc.getField("name.indexed"); assertThat(f.name(), equalTo("name.indexed")); assertThat(f.stringValue(), equalTo("some name")); - assertThat(f.isStored(), equalTo(false)); - assertThat(f.isIndexed(), equalTo(true)); + assertThat(f.fieldType().stored(), equalTo(false)); + assertThat(f.fieldType().indexed(), equalTo(true)); - f = doc.getFieldable("name.not_indexed"); + f = doc.getField("name.not_indexed"); assertThat(f.name(), equalTo("name.not_indexed")); assertThat(f.stringValue(), equalTo("some name")); - assertThat(f.isStored(), equalTo(true)); - assertThat(f.isIndexed(), equalTo(false)); + assertThat(f.fieldType().stored(), equalTo(true)); + assertThat(f.fieldType().indexed(), equalTo(false)); - f = doc.getFieldable("object1.multi1"); + f = doc.getField("object1.multi1"); assertThat(f.name(), equalTo("object1.multi1")); - f = doc.getFieldable("object1.multi1.string"); + f = doc.getField("object1.multi1.string"); assertThat(f.name(), equalTo("object1.multi1.string")); assertThat(f.stringValue(), equalTo("2010-01-01")); } @@ -80,9 +79,9 @@ public class MultiFieldTests { DocumentMapper builderDocMapper = doc("test", rootObject("person").add( multiField("name") - .add(stringField("name").store(Field.Store.YES)) - .add(stringField("indexed").index(Field.Index.ANALYZED)) - .add(stringField("not_indexed").index(Field.Index.NO).store(Field.Store.YES)) + .add(stringField("name").store(true)) + .add(stringField("indexed").index(true).tokenized(true)) + .add(stringField("not_indexed").index(false).store(true)) )).build(mapperParser); builderDocMapper.refreshSource(); @@ -95,22 +94,22 @@ public class MultiFieldTests { BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/multifield/test-data.json")); Document doc = docMapper.parse(json).rootDoc(); - Fieldable f = doc.getFieldable("name"); + IndexableField f = doc.getField("name"); assertThat(f.name(), equalTo("name")); assertThat(f.stringValue(), equalTo("some name")); - assertThat(f.isStored(), equalTo(true)); - assertThat(f.isIndexed(), equalTo(true)); + assertThat(f.fieldType().stored(), equalTo(true)); + assertThat(f.fieldType().indexed(), equalTo(true)); - f = doc.getFieldable("name.indexed"); + f = doc.getField("name.indexed"); assertThat(f.name(), equalTo("name.indexed")); assertThat(f.stringValue(), equalTo("some name")); - assertThat(f.isStored(), equalTo(false)); - assertThat(f.isIndexed(), equalTo(true)); + assertThat(f.fieldType().stored(), equalTo(false)); + assertThat(f.fieldType().indexed(), equalTo(true)); - f = doc.getFieldable("name.not_indexed"); + f = doc.getField("name.not_indexed"); assertThat(f.name(), equalTo("name.not_indexed")); assertThat(f.stringValue(), equalTo("some name")); - assertThat(f.isStored(), equalTo(true)); - assertThat(f.isIndexed(), equalTo(false)); + assertThat(f.fieldType().stored(), equalTo(true)); + assertThat(f.fieldType().indexed(), equalTo(false)); } } diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/multifield/merge/JavaMultiFieldMergeTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/multifield/merge/JavaMultiFieldMergeTests.java index dd524000716..4ce7cc50bbe 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/multifield/merge/JavaMultiFieldMergeTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/multifield/merge/JavaMultiFieldMergeTests.java @@ -20,7 +20,7 @@ package org.elasticsearch.test.unit.index.mapper.multifield.merge; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.index.mapper.DocumentMapper; @@ -54,9 +54,9 @@ public class JavaMultiFieldMergeTests { BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/multifield/merge/test-data.json")); Document doc = docMapper.parse(json).rootDoc(); - Fieldable f = doc.getFieldable("name"); + IndexableField f = doc.getField("name"); assertThat(f, notNullValue()); - f = doc.getFieldable("name.indexed"); + f = doc.getField("name.indexed"); assertThat(f, nullValue()); @@ -78,9 +78,9 @@ public class JavaMultiFieldMergeTests { json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/multifield/merge/test-data.json")); doc = docMapper.parse(json).rootDoc(); - f = doc.getFieldable("name"); + f = doc.getField("name"); assertThat(f, notNullValue()); - f = doc.getFieldable("name.indexed"); + f = doc.getField("name.indexed"); assertThat(f, notNullValue()); mapping = copyToStringFromClasspath("/org/elasticsearch/test/unit/index/mapper/multifield/merge/test-mapping3.json"); diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/nested/NestedMappingTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/nested/NestedMappingTests.java index 131023f58ee..9abe6921d39 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/nested/NestedMappingTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/nested/NestedMappingTests.java @@ -261,8 +261,8 @@ public class NestedMappingTests { assertThat(doc.docs().get(5).get("nested1.nested2.field2"), equalTo("2")); assertThat(doc.docs().get(5).get("field"), nullValue()); assertThat(doc.docs().get(6).get("field"), equalTo("value")); - assertThat(doc.docs().get(6).getFieldables("nested1.field1").length, equalTo(2)); - assertThat(doc.docs().get(6).getFieldables("nested1.nested2.field2").length, equalTo(4)); + assertThat(doc.docs().get(6).getFields("nested1.field1").length, equalTo(2)); + assertThat(doc.docs().get(6).getFields("nested1.nested2.field2").length, equalTo(4)); } @Test @@ -314,6 +314,6 @@ public class NestedMappingTests { assertThat(doc.docs().get(5).get("field"), nullValue()); assertThat(doc.docs().get(6).get("field"), equalTo("value")); assertThat(doc.docs().get(6).get("nested1.field1"), nullValue()); - assertThat(doc.docs().get(6).getFieldables("nested1.nested2.field2").length, equalTo(4)); + assertThat(doc.docs().get(6).getFields("nested1.nested2.field2").length, equalTo(4)); } } \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/numeric/SimpleNumericTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/numeric/SimpleNumericTests.java index eba387eafac..222809fcf85 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/numeric/SimpleNumericTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/numeric/SimpleNumericTests.java @@ -98,8 +98,8 @@ public class SimpleNumericTests { .field("field2", "1") .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("field1"), nullValue()); - assertThat(doc.rootDoc().getFieldable("field2"), notNullValue()); + assertThat(doc.rootDoc().getField("field1"), nullValue()); + assertThat(doc.rootDoc().getField("field2"), notNullValue()); try { defaultMapper.parse("type", "1", XContentFactory.jsonBuilder() @@ -130,7 +130,7 @@ public class SimpleNumericTests { .field("field3", "a") .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("field3"), nullValue()); + assertThat(doc.rootDoc().getField("field3"), nullValue()); // This should still throw an exception, since field2 is specifically set to ignore_malformed=false try { diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/simple/SimpleMapperTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/simple/SimpleMapperTests.java index f66c2d522d9..6249657950c 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/simple/SimpleMapperTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/simple/SimpleMapperTests.java @@ -47,7 +47,7 @@ public class SimpleMapperTests { DocumentMapperParser mapperParser = MapperTests.newParser(); DocumentMapper docMapper = doc("test", rootObject("person") - .add(object("name").add(stringField("first").store(YES).index(Field.Index.NO))) + .add(object("name").add(stringField("first").store(true).index(false))) ).build(mapperParser); BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/simple/test1.json")); diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/size/SizeMappingTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/size/SizeMappingTests.java index 328b9c6ded4..87ddf2c80f8 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/size/SizeMappingTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/size/SizeMappingTests.java @@ -47,8 +47,8 @@ public class SizeMappingTests { .bytes(); ParsedDocument doc = docMapper.parse(SourceToParse.source(source).type("type").id("1")); - assertThat(doc.rootDoc().getFieldable("_size").isStored(), equalTo(false)); - assertThat(doc.rootDoc().getFieldable("_size").tokenStreamValue(), notNullValue()); + assertThat(doc.rootDoc().getField("_size").fieldType().stored(), equalTo(false)); + assertThat(doc.rootDoc().getField("_size").tokenStream(docMapper.indexAnalyzer()), notNullValue()); } @Test @@ -65,8 +65,8 @@ public class SizeMappingTests { .bytes(); ParsedDocument doc = docMapper.parse(SourceToParse.source(source).type("type").id("1")); - assertThat(doc.rootDoc().getFieldable("_size").isStored(), equalTo(true)); - assertThat(doc.rootDoc().getFieldable("_size").tokenStreamValue(), notNullValue()); + assertThat(doc.rootDoc().getField("_size").fieldType().stored(), equalTo(true)); + assertThat(doc.rootDoc().getField("_size").tokenStream(docMapper.indexAnalyzer()), notNullValue()); } @Test @@ -83,7 +83,7 @@ public class SizeMappingTests { .bytes(); ParsedDocument doc = docMapper.parse(SourceToParse.source(source).type("type").id("1")); - assertThat(doc.rootDoc().getFieldable("_size"), nullValue()); + assertThat(doc.rootDoc().getField("_size"), nullValue()); } @Test @@ -99,6 +99,6 @@ public class SizeMappingTests { .bytes(); ParsedDocument doc = docMapper.parse(SourceToParse.source(source).type("type").id("1")); - assertThat(doc.rootDoc().getFieldable("_size"), nullValue()); + assertThat(doc.rootDoc().getField("_size"), nullValue()); } } \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/source/CompressSourceMappingTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/source/CompressSourceMappingTests.java index 47b70ce2590..bb594106804 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/source/CompressSourceMappingTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/source/CompressSourceMappingTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.test.unit.index.mapper.source; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.compress.CompressorFactory; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.mapper.DocumentMapper; @@ -46,8 +47,8 @@ public class CompressSourceMappingTests { .field("field1", "value1") .field("field2", "value2") .endObject().bytes()); - - assertThat(CompressorFactory.isCompressed(doc.rootDoc().getBinaryValue("_source")), equalTo(false)); + BytesRef bytes = doc.rootDoc().getBinaryValue("_source"); + assertThat(CompressorFactory.isCompressed(bytes.bytes, bytes.offset, bytes.length), equalTo(false)); } @Test @@ -63,7 +64,8 @@ public class CompressSourceMappingTests { .field("field2", "value2") .endObject().bytes()); - assertThat(CompressorFactory.isCompressed(doc.rootDoc().getBinaryValue("_source")), equalTo(true)); + BytesRef bytes = doc.rootDoc().getBinaryValue("_source"); + assertThat(CompressorFactory.isCompressed(bytes.bytes, bytes.offset, bytes.length), equalTo(true)); } @Test @@ -78,7 +80,8 @@ public class CompressSourceMappingTests { .field("field1", "value1") .endObject().bytes()); - assertThat(CompressorFactory.isCompressed(doc.rootDoc().getBinaryValue("_source")), equalTo(false)); + BytesRef bytes = doc.rootDoc().getBinaryValue("_source"); + assertThat(CompressorFactory.isCompressed(bytes.bytes, bytes.offset, bytes.length), equalTo(false)); doc = documentMapper.parse("type", "1", XContentFactory.jsonBuilder().startObject() .field("field1", "value1") @@ -88,6 +91,7 @@ public class CompressSourceMappingTests { .field("field2", "value2 xxxxxxxxxxxxxx yyyyyyyyyyyyyyyyyyy zzzzzzzzzzzzzzzzz") .endObject().bytes()); - assertThat(CompressorFactory.isCompressed(doc.rootDoc().getBinaryValue("_source")), equalTo(true)); + bytes = doc.rootDoc().getBinaryValue("_source"); + assertThat(CompressorFactory.isCompressed(bytes.bytes, bytes.offset, bytes.length), equalTo(true)); } } diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/source/DefaultSourceMappingTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/source/DefaultSourceMappingTests.java index ed2a3c5389f..18c64c45c87 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/source/DefaultSourceMappingTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/source/DefaultSourceMappingTests.java @@ -19,7 +19,8 @@ package org.elasticsearch.test.unit.index.mapper.source; -import org.apache.lucene.document.Fieldable; +import org.apache.lucene.index.IndexableField; +import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.compress.CompressorFactory; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; @@ -120,8 +121,8 @@ public class DefaultSourceMappingTests { .startObject("path2").field("field2", "value2").endObject() .endObject().bytes()); - Fieldable sourceField = doc.rootDoc().getFieldable("_source"); - Map sourceAsMap = XContentFactory.xContent(XContentType.JSON).createParser(sourceField.getBinaryValue(), sourceField.getBinaryOffset(), sourceField.getBinaryLength()).mapAndClose(); + IndexableField sourceField = doc.rootDoc().getField("_source"); + Map sourceAsMap = XContentFactory.xContent(XContentType.JSON).createParser(new BytesArray(sourceField.binaryValue())).mapAndClose(); assertThat(sourceAsMap.containsKey("path1"), equalTo(true)); assertThat(sourceAsMap.containsKey("path2"), equalTo(false)); } diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/string/SimpleStringMappingTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/string/SimpleStringMappingTests.java index 0d34893b451..7b7e0a50f76 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/string/SimpleStringMappingTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/string/SimpleStringMappingTests.java @@ -48,7 +48,7 @@ public class SimpleStringMappingTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("field"), notNullValue()); + assertThat(doc.rootDoc().getField("field"), notNullValue()); doc = defaultMapper.parse("type", "1", XContentFactory.jsonBuilder() .startObject() @@ -56,7 +56,7 @@ public class SimpleStringMappingTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("field"), notNullValue()); + assertThat(doc.rootDoc().getField("field"), notNullValue()); doc = defaultMapper.parse("type", "1", XContentFactory.jsonBuilder() .startObject() @@ -64,7 +64,7 @@ public class SimpleStringMappingTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("field"), nullValue()); + assertThat(doc.rootDoc().getField("field"), nullValue()); } @Test @@ -81,8 +81,8 @@ public class SimpleStringMappingTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("field").getOmitNorms(), equalTo(false)); - assertThat(doc.rootDoc().getFieldable("field").getIndexOptions(), equalTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)); + assertThat(doc.rootDoc().getField("field").fieldType().omitNorms(), equalTo(false)); + assertThat(doc.rootDoc().getField("field").fieldType().indexOptions(), equalTo(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS)); } @Test @@ -99,8 +99,8 @@ public class SimpleStringMappingTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("field").getOmitNorms(), equalTo(true)); - assertThat(doc.rootDoc().getFieldable("field").getIndexOptions(), equalTo(FieldInfo.IndexOptions.DOCS_ONLY)); + assertThat(doc.rootDoc().getField("field").fieldType().omitNorms(), equalTo(true)); + assertThat(doc.rootDoc().getField("field").fieldType().indexOptions(), equalTo(FieldInfo.IndexOptions.DOCS_ONLY)); // now test it explicitly set @@ -116,7 +116,7 @@ public class SimpleStringMappingTests { .endObject() .bytes()); - assertThat(doc.rootDoc().getFieldable("field").getOmitNorms(), equalTo(false)); - assertThat(doc.rootDoc().getFieldable("field").getIndexOptions(), equalTo(FieldInfo.IndexOptions.DOCS_AND_FREQS)); + assertThat(doc.rootDoc().getField("field").fieldType().omitNorms(), equalTo(false)); + assertThat(doc.rootDoc().getField("field").fieldType().indexOptions(), equalTo(FieldInfo.IndexOptions.DOCS_AND_FREQS)); } } diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/timestamp/TimestampMappingTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/timestamp/TimestampMappingTests.java index 4cf171b0ca1..be6311e3ea7 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/timestamp/TimestampMappingTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/timestamp/TimestampMappingTests.java @@ -48,7 +48,7 @@ public class TimestampMappingTests { .bytes(); ParsedDocument doc = docMapper.parse(SourceToParse.source(source).type("type").id("1").timestamp(1)); - assertThat(doc.rootDoc().getFieldable("_timestamp"), equalTo(null)); + assertThat(doc.rootDoc().getField("_timestamp"), equalTo(null)); } @Test @@ -64,9 +64,9 @@ public class TimestampMappingTests { .bytes(); ParsedDocument doc = docMapper.parse(SourceToParse.source(source).type("type").id("1").timestamp(1)); - assertThat(doc.rootDoc().getFieldable("_timestamp").isStored(), equalTo(true)); - assertThat(doc.rootDoc().getFieldable("_timestamp").isIndexed(), equalTo(true)); - assertThat(doc.rootDoc().getFieldable("_timestamp").tokenStreamValue(), notNullValue()); + assertThat(doc.rootDoc().getField("_timestamp").fieldType().stored(), equalTo(true)); + assertThat(doc.rootDoc().getField("_timestamp").fieldType().indexed(), equalTo(true)); + assertThat(doc.rootDoc().getField("_timestamp").tokenStream(docMapper.indexAnalyzer()), notNullValue()); } @Test @@ -74,8 +74,8 @@ public class TimestampMappingTests { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").endObject().string(); DocumentMapper docMapper = MapperTests.newParser().parse(mapping); assertThat(docMapper.timestampFieldMapper().enabled(), equalTo(TimestampFieldMapper.Defaults.ENABLED)); - assertThat(docMapper.timestampFieldMapper().store(), equalTo(TimestampFieldMapper.Defaults.STORE)); - assertThat(docMapper.timestampFieldMapper().index(), equalTo(TimestampFieldMapper.Defaults.INDEX)); + assertThat(docMapper.timestampFieldMapper().stored(), equalTo(TimestampFieldMapper.Defaults.FIELD_TYPE.stored())); + assertThat(docMapper.timestampFieldMapper().indexed(), equalTo(TimestampFieldMapper.Defaults.FIELD_TYPE.indexed())); assertThat(docMapper.timestampFieldMapper().path(), equalTo(null)); assertThat(docMapper.timestampFieldMapper().dateTimeFormatter().format(), equalTo(TimestampFieldMapper.DEFAULT_DATE_TIME_FORMAT)); } @@ -91,8 +91,8 @@ public class TimestampMappingTests { .endObject().endObject().string(); DocumentMapper docMapper = MapperTests.newParser().parse(mapping); assertThat(docMapper.timestampFieldMapper().enabled(), equalTo(true)); - assertThat(docMapper.timestampFieldMapper().store(), equalTo(Field.Store.YES)); - assertThat(docMapper.timestampFieldMapper().index(), equalTo(Field.Index.NO)); + assertThat(docMapper.timestampFieldMapper().stored(), equalTo(true)); + assertThat(docMapper.timestampFieldMapper().indexed(), equalTo(false)); assertThat(docMapper.timestampFieldMapper().path(), equalTo("timestamp")); assertThat(docMapper.timestampFieldMapper().dateTimeFormatter().format(), equalTo("year")); } diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/ttl/TTLMappingTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/ttl/TTLMappingTests.java index ca11797cba5..afcfc5f367f 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/ttl/TTLMappingTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/ttl/TTLMappingTests.java @@ -46,7 +46,7 @@ public class TTLMappingTests { .bytes(); ParsedDocument doc = docMapper.parse(SourceToParse.source(source).type("type").id("1").ttl(Long.MAX_VALUE)); - assertThat(doc.rootDoc().getFieldable("_ttl"), equalTo(null)); + assertThat(doc.rootDoc().getField("_ttl"), equalTo(null)); } @Test @@ -62,9 +62,9 @@ public class TTLMappingTests { .bytes(); ParsedDocument doc = docMapper.parse(SourceToParse.source(source).type("type").id("1").ttl(Long.MAX_VALUE)); - assertThat(doc.rootDoc().getFieldable("_ttl").isStored(), equalTo(true)); - assertThat(doc.rootDoc().getFieldable("_ttl").isIndexed(), equalTo(true)); - assertThat(doc.rootDoc().getFieldable("_ttl").tokenStreamValue(), notNullValue()); + assertThat(doc.rootDoc().getField("_ttl").fieldType().stored(), equalTo(true)); + assertThat(doc.rootDoc().getField("_ttl").fieldType().indexed(), equalTo(true)); + assertThat(doc.rootDoc().getField("_ttl").tokenStream(docMapper.indexAnalyzer()), notNullValue()); } @Test @@ -72,8 +72,8 @@ public class TTLMappingTests { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").endObject().string(); DocumentMapper docMapper = MapperTests.newParser().parse(mapping); assertThat(docMapper.TTLFieldMapper().enabled(), equalTo(TTLFieldMapper.Defaults.ENABLED)); - assertThat(docMapper.TTLFieldMapper().store(), equalTo(TTLFieldMapper.Defaults.STORE)); - assertThat(docMapper.TTLFieldMapper().index(), equalTo(TTLFieldMapper.Defaults.INDEX)); + assertThat(docMapper.TTLFieldMapper().stored(), equalTo(TTLFieldMapper.Defaults.FIELD_TYPE.stored())); + assertThat(docMapper.TTLFieldMapper().indexed(), equalTo(TTLFieldMapper.Defaults.FIELD_TYPE.indexed())); } @@ -86,7 +86,7 @@ public class TTLMappingTests { .endObject().endObject().string(); DocumentMapper docMapper = MapperTests.newParser().parse(mapping); assertThat(docMapper.TTLFieldMapper().enabled(), equalTo(true)); - assertThat(docMapper.TTLFieldMapper().store(), equalTo(Field.Store.NO)); - assertThat(docMapper.TTLFieldMapper().index(), equalTo(Field.Index.NO)); + assertThat(docMapper.TTLFieldMapper().stored(), equalTo(false)); + assertThat(docMapper.TTLFieldMapper().indexed(), equalTo(false)); } } \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java index 2f78a6b9f76..7c5cc9f0ce7 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java @@ -20,8 +20,10 @@ package org.elasticsearch.test.unit.index.query; import org.apache.lucene.index.Term; +import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery; import org.apache.lucene.search.*; import org.apache.lucene.search.spans.*; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.cluster.ClusterService; import org.elasticsearch.common.bytes.BytesArray; @@ -122,6 +124,12 @@ public class SimpleIndexQueryParserTests { return this.queryParser; } + private BytesRef longToPrefixCoded(long val) { + BytesRef bytesRef = new BytesRef(); + NumericUtils.longToPrefixCoded(val, 0, bytesRef); + return bytesRef; + } + @Test public void testQueryStringBuilder() throws Exception { IndexQueryParserService queryParser = queryParser(); @@ -1217,7 +1225,7 @@ public class SimpleIndexQueryParserTests { assertThat(parsedQuery, instanceOf(SpanTermQuery.class)); SpanTermQuery termQuery = (SpanTermQuery) parsedQuery; // since age is automatically registered in data, we encode it as numeric - assertThat(termQuery.getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(34)))); + assertThat(termQuery.getTerm(), equalTo(new Term("age", longToPrefixCoded(34)))); } @Test @@ -1228,7 +1236,7 @@ public class SimpleIndexQueryParserTests { assertThat(parsedQuery, instanceOf(SpanTermQuery.class)); SpanTermQuery termQuery = (SpanTermQuery) parsedQuery; // since age is automatically registered in data, we encode it as numeric - assertThat(termQuery.getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(34)))); + assertThat(termQuery.getTerm(), equalTo(new Term("age", longToPrefixCoded(34)))); } @Test @@ -1238,8 +1246,8 @@ public class SimpleIndexQueryParserTests { assertThat(parsedQuery, instanceOf(SpanNotQuery.class)); SpanNotQuery spanNotQuery = (SpanNotQuery) parsedQuery; // since age is automatically registered in data, we encode it as numeric - assertThat(((SpanTermQuery) spanNotQuery.getInclude()).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(34)))); - assertThat(((SpanTermQuery) spanNotQuery.getExclude()).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(35)))); + assertThat(((SpanTermQuery) spanNotQuery.getInclude()).getTerm(), equalTo(new Term("age", longToPrefixCoded(34)))); + assertThat(((SpanTermQuery) spanNotQuery.getExclude()).getTerm(), equalTo(new Term("age", longToPrefixCoded(35)))); } @Test @@ -1250,8 +1258,8 @@ public class SimpleIndexQueryParserTests { assertThat(parsedQuery, instanceOf(SpanNotQuery.class)); SpanNotQuery spanNotQuery = (SpanNotQuery) parsedQuery; // since age is automatically registered in data, we encode it as numeric - assertThat(((SpanTermQuery) spanNotQuery.getInclude()).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(34)))); - assertThat(((SpanTermQuery) spanNotQuery.getExclude()).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(35)))); + assertThat(((SpanTermQuery) spanNotQuery.getInclude()).getTerm(), equalTo(new Term("age", longToPrefixCoded(34)))); + assertThat(((SpanTermQuery) spanNotQuery.getExclude()).getTerm(), equalTo(new Term("age", longToPrefixCoded(35)))); } @Test @@ -1261,7 +1269,7 @@ public class SimpleIndexQueryParserTests { assertThat(parsedQuery, instanceOf(SpanFirstQuery.class)); SpanFirstQuery spanFirstQuery = (SpanFirstQuery) parsedQuery; // since age is automatically registered in data, we encode it as numeric - assertThat(((SpanTermQuery) spanFirstQuery.getMatch()).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(34)))); + assertThat(((SpanTermQuery) spanFirstQuery.getMatch()).getTerm(), equalTo(new Term("age", longToPrefixCoded(34)))); assertThat(spanFirstQuery.getEnd(), equalTo(12)); } @@ -1273,7 +1281,7 @@ public class SimpleIndexQueryParserTests { assertThat(parsedQuery, instanceOf(SpanFirstQuery.class)); SpanFirstQuery spanFirstQuery = (SpanFirstQuery) parsedQuery; // since age is automatically registered in data, we encode it as numeric - assertThat(((SpanTermQuery) spanFirstQuery.getMatch()).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(34)))); + assertThat(((SpanTermQuery) spanFirstQuery.getMatch()).getTerm(), equalTo(new Term("age", longToPrefixCoded(34)))); assertThat(spanFirstQuery.getEnd(), equalTo(12)); } @@ -1284,9 +1292,9 @@ public class SimpleIndexQueryParserTests { assertThat(parsedQuery, instanceOf(SpanNearQuery.class)); SpanNearQuery spanNearQuery = (SpanNearQuery) parsedQuery; assertThat(spanNearQuery.getClauses().length, equalTo(3)); - assertThat(((SpanTermQuery) spanNearQuery.getClauses()[0]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(34)))); - assertThat(((SpanTermQuery) spanNearQuery.getClauses()[1]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(35)))); - assertThat(((SpanTermQuery) spanNearQuery.getClauses()[2]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(36)))); + assertThat(((SpanTermQuery) spanNearQuery.getClauses()[0]).getTerm(), equalTo(new Term("age", longToPrefixCoded(34)))); + assertThat(((SpanTermQuery) spanNearQuery.getClauses()[1]).getTerm(), equalTo(new Term("age", longToPrefixCoded(35)))); + assertThat(((SpanTermQuery) spanNearQuery.getClauses()[2]).getTerm(), equalTo(new Term("age", longToPrefixCoded(36)))); assertThat(spanNearQuery.isInOrder(), equalTo(false)); } @@ -1298,9 +1306,9 @@ public class SimpleIndexQueryParserTests { assertThat(parsedQuery, instanceOf(SpanNearQuery.class)); SpanNearQuery spanNearQuery = (SpanNearQuery) parsedQuery; assertThat(spanNearQuery.getClauses().length, equalTo(3)); - assertThat(((SpanTermQuery) spanNearQuery.getClauses()[0]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(34)))); - assertThat(((SpanTermQuery) spanNearQuery.getClauses()[1]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(35)))); - assertThat(((SpanTermQuery) spanNearQuery.getClauses()[2]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(36)))); + assertThat(((SpanTermQuery) spanNearQuery.getClauses()[0]).getTerm(), equalTo(new Term("age", longToPrefixCoded(34)))); + assertThat(((SpanTermQuery) spanNearQuery.getClauses()[1]).getTerm(), equalTo(new Term("age", longToPrefixCoded(35)))); + assertThat(((SpanTermQuery) spanNearQuery.getClauses()[2]).getTerm(), equalTo(new Term("age", longToPrefixCoded(36)))); assertThat(spanNearQuery.isInOrder(), equalTo(false)); } @@ -1311,9 +1319,9 @@ public class SimpleIndexQueryParserTests { assertThat(parsedQuery, instanceOf(SpanOrQuery.class)); SpanOrQuery spanOrQuery = (SpanOrQuery) parsedQuery; assertThat(spanOrQuery.getClauses().length, equalTo(3)); - assertThat(((SpanTermQuery) spanOrQuery.getClauses()[0]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(34)))); - assertThat(((SpanTermQuery) spanOrQuery.getClauses()[1]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(35)))); - assertThat(((SpanTermQuery) spanOrQuery.getClauses()[2]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(36)))); + assertThat(((SpanTermQuery) spanOrQuery.getClauses()[0]).getTerm(), equalTo(new Term("age", longToPrefixCoded(34)))); + assertThat(((SpanTermQuery) spanOrQuery.getClauses()[1]).getTerm(), equalTo(new Term("age", longToPrefixCoded(35)))); + assertThat(((SpanTermQuery) spanOrQuery.getClauses()[2]).getTerm(), equalTo(new Term("age", longToPrefixCoded(36)))); } @Test @@ -1324,9 +1332,9 @@ public class SimpleIndexQueryParserTests { assertThat(parsedQuery, instanceOf(SpanOrQuery.class)); SpanOrQuery spanOrQuery = (SpanOrQuery) parsedQuery; assertThat(spanOrQuery.getClauses().length, equalTo(3)); - assertThat(((SpanTermQuery) spanOrQuery.getClauses()[0]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(34)))); - assertThat(((SpanTermQuery) spanOrQuery.getClauses()[1]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(35)))); - assertThat(((SpanTermQuery) spanOrQuery.getClauses()[2]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(36)))); + assertThat(((SpanTermQuery) spanOrQuery.getClauses()[0]).getTerm(), equalTo(new Term("age", longToPrefixCoded(34)))); + assertThat(((SpanTermQuery) spanOrQuery.getClauses()[1]).getTerm(), equalTo(new Term("age", longToPrefixCoded(35)))); + assertThat(((SpanTermQuery) spanOrQuery.getClauses()[2]).getTerm(), equalTo(new Term("age", longToPrefixCoded(36)))); } @Test @@ -1337,9 +1345,9 @@ public class SimpleIndexQueryParserTests { assertThat(parsedQuery, instanceOf(SpanOrQuery.class)); SpanOrQuery spanOrQuery = (SpanOrQuery) parsedQuery; assertThat(spanOrQuery.getClauses().length, equalTo(3)); - assertThat(((SpanTermQuery) spanOrQuery.getClauses()[0]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(34)))); - assertThat(((SpanTermQuery) spanOrQuery.getClauses()[1]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(35)))); - assertThat(((SpanTermQuery) spanOrQuery.getClauses()[2]).getTerm(), equalTo(new Term("age", NumericUtils.longToPrefixCoded(36)))); + assertThat(((SpanTermQuery) spanOrQuery.getClauses()[0]).getTerm(), equalTo(new Term("age", longToPrefixCoded(34)))); + assertThat(((SpanTermQuery) spanOrQuery.getClauses()[1]).getTerm(), equalTo(new Term("age", longToPrefixCoded(35)))); + assertThat(((SpanTermQuery) spanOrQuery.getClauses()[2]).getTerm(), equalTo(new Term("age", longToPrefixCoded(36)))); } @Test diff --git a/src/test/java/org/elasticsearch/test/unit/index/store/memory/SimpleByteBufferStoreTests.java b/src/test/java/org/elasticsearch/test/unit/index/store/memory/SimpleByteBufferStoreTests.java index 88ef0f53989..3bd2c9a9ae7 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/store/memory/SimpleByteBufferStoreTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/store/memory/SimpleByteBufferStoreTests.java @@ -19,10 +19,7 @@ package org.elasticsearch.test.unit.index.store.memory; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.Lock; -import org.apache.lucene.store.LockObtainFailedException; +import org.apache.lucene.store.*; import org.apache.lucene.store.bytebuffer.ByteBufferDirectory; import org.elasticsearch.cache.memory.ByteBufferCache; import org.testng.annotations.Test; @@ -121,7 +118,7 @@ public class SimpleByteBufferStoreTests { private void insertData(ByteBufferDirectory dir, int bufferSizeInBytes) throws IOException { byte[] test = new byte[]{1, 2, 3, 4, 5, 6, 7, 8}; - IndexOutput indexOutput = dir.createOutput("value1"); + IndexOutput indexOutput = dir.createOutput("value1", IOContext.DEFAULT); indexOutput.writeBytes(new byte[]{2, 4, 6, 7, 8}, 5); indexOutput.writeInt(-1); indexOutput.writeLong(10); @@ -145,7 +142,7 @@ public class SimpleByteBufferStoreTests { assertThat(dir.fileExists("value1"), equalTo(true)); assertThat(dir.fileLength("value1"), equalTo(38l)); - IndexInput indexInput = dir.openInput("value1"); + IndexInput indexInput = dir.openInput("value1", IOContext.DEFAULT); indexInput.readBytes(test, 0, 5); assertThat(test[0], equalTo((byte) 8)); assertThat(indexInput.readInt(), equalTo(-1)); @@ -170,7 +167,7 @@ public class SimpleByteBufferStoreTests { indexInput.close(); - indexInput = dir.openInput("value1"); + indexInput = dir.openInput("value1", IOContext.DEFAULT); // iterate over all the data for (int i = 0; i < 38; i++) { indexInput.readByte(); From 5a553a1924b05152882002c25e1dc137cf2b77b7 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Wed, 31 Oct 2012 15:38:30 +0100 Subject: [PATCH 079/146] lucene 4: Upgraded AndFilter, NotDeletedFilter, NotFilter, OrFilter, TermFilter, XBooleanFilter. Left a live docs and accepted docs unhandled (used null) for. I added a note at all places. --- .../common/lucene/search/AndFilter.java | 6 ++- .../lucene/search/NotDeletedFilter.java | 28 +++++------ .../common/lucene/search/NotFilter.java | 13 ++++-- .../common/lucene/search/OrFilter.java | 10 ++-- .../common/lucene/search/TermFilter.java | 46 +++++++++---------- .../common/lucene/search/XBooleanFilter.java | 36 +++++++++------ 6 files changed, 77 insertions(+), 62 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/search/AndFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/AndFilter.java index 8c8abfa3dde..dff2ef11bfb 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/AndFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/AndFilter.java @@ -50,12 +50,14 @@ public class AndFilter extends Filter { @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { if (filters.size() == 1) { - return filters.get(0).getDocIdSet(context, acceptDocs); + // LUCENE 4 UPGRADE: For leave this null, until we figure out how to deal with deleted docs... + return filters.get(0).getDocIdSet(context, null); } List sets = Lists.newArrayListWithExpectedSize(filters.size()); boolean allAreDocSet = true; for (Filter filter : filters) { - DocIdSet set = filter.getDocIdSet(context, acceptDocs); + // LUCENE 4 UPGRADE: For leave this null, until we figure out how to deal with deleted docs... + DocIdSet set = filter.getDocIdSet(context, null); if (set == null) { // none matching for this filter, we AND, so return EMPTY return DocSet.EMPTY_DOC_SET; } diff --git a/src/main/java/org/elasticsearch/common/lucene/search/NotDeletedFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/NotDeletedFilter.java index ece78a6d214..9d802e6447e 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/NotDeletedFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/NotDeletedFilter.java @@ -19,11 +19,13 @@ package org.elasticsearch.common.lucene.search; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.Filter; import org.apache.lucene.search.FilteredDocIdSetIterator; +import org.apache.lucene.util.Bits; import java.io.IOException; @@ -39,15 +41,15 @@ public class NotDeletedFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - DocIdSet docIdSet = filter.getDocIdSet(reader); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + DocIdSet docIdSet = filter.getDocIdSet(context, acceptDocs); if (docIdSet == null) { return null; } - if (!reader.hasDeletions()) { + if (!context.reader().hasDeletions()) { return docIdSet; } - return new NotDeletedDocIdSet(docIdSet, reader); + return new NotDeletedDocIdSet(docIdSet, context.reader().getLiveDocs()); } public Filter filter() { @@ -63,11 +65,11 @@ public class NotDeletedFilter extends Filter { private final DocIdSet innerSet; - private final IndexReader reader; + private final Bits liveDocs; - NotDeletedDocIdSet(DocIdSet innerSet, IndexReader reader) { + NotDeletedDocIdSet(DocIdSet innerSet, Bits liveDocs) { this.innerSet = innerSet; - this.reader = reader; + this.liveDocs = liveDocs; } @Override @@ -76,22 +78,22 @@ public class NotDeletedFilter extends Filter { if (iterator == null) { return null; } - return new NotDeletedDocIdSetIterator(iterator, reader); + return new NotDeletedDocIdSetIterator(iterator, liveDocs); } } static class NotDeletedDocIdSetIterator extends FilteredDocIdSetIterator { - private final IndexReader reader; + private final Bits liveDocs; - NotDeletedDocIdSetIterator(DocIdSetIterator innerIter, IndexReader reader) { + NotDeletedDocIdSetIterator(DocIdSetIterator innerIter, Bits liveDocs) { super(innerIter); - this.reader = reader; + this.liveDocs = liveDocs; } @Override - protected boolean match(int doc) throws IOException { - return !reader.isDeleted(doc); + protected boolean match(int doc) { + return liveDocs.get(doc); } } } diff --git a/src/main/java/org/elasticsearch/common/lucene/search/NotFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/NotFilter.java index 865a4c8287d..10416a9c6e3 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/NotFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/NotFilter.java @@ -19,9 +19,11 @@ package org.elasticsearch.common.lucene.search; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.elasticsearch.common.lucene.docset.AllDocSet; import org.elasticsearch.common.lucene.docset.DocSet; import org.elasticsearch.common.lucene.docset.NotDocIdSet; @@ -45,15 +47,16 @@ public class NotFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - DocIdSet set = filter.getDocIdSet(reader); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + // LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs... + DocIdSet set = filter.getDocIdSet(context, null); if (set == null) { - return new AllDocSet(reader.maxDoc()); + return new AllDocSet(context.reader().maxDoc()); } if (set instanceof DocSet) { - return new NotDocSet((DocSet) set, reader.maxDoc()); + return new NotDocSet((DocSet) set, context.reader().maxDoc()); } - return new NotDocIdSet(set, reader.maxDoc()); + return new NotDocIdSet(set, context.reader().maxDoc()); } @Override diff --git a/src/main/java/org/elasticsearch/common/lucene/search/OrFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/OrFilter.java index b6d12b03e89..111e7669181 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/OrFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/OrFilter.java @@ -20,9 +20,11 @@ package org.elasticsearch.common.lucene.search; import com.google.common.collect.Lists; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.elasticsearch.common.lucene.docset.DocSet; import org.elasticsearch.common.lucene.docset.OrDocIdSet; import org.elasticsearch.common.lucene.docset.OrDocSet; @@ -46,14 +48,16 @@ public class OrFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { if (filters.size() == 1) { - return filters.get(0).getDocIdSet(reader); + // LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs... + return filters.get(0).getDocIdSet(context, null); } List sets = Lists.newArrayListWithExpectedSize(filters.size()); boolean allAreDocSet = true; for (Filter filter : filters) { - DocIdSet set = filter.getDocIdSet(reader); + // LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs... + DocIdSet set = filter.getDocIdSet(context, null); if (set == null) { // none matching for this filter, continue continue; } diff --git a/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java index 938c2f4b162..c5d345504d7 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java @@ -19,13 +19,11 @@ package org.elasticsearch.common.lucene.search; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.*; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; -import org.elasticsearch.common.lucene.Lucene; import java.io.IOException; @@ -45,26 +43,26 @@ public class TermFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - FixedBitSet result = null; - TermDocs td = reader.termDocs(); - try { - td.seek(term); - // batch read, in Lucene 4.0 its no longer needed - int[] docs = new int[Lucene.BATCH_ENUM_DOCS]; - int[] freqs = new int[Lucene.BATCH_ENUM_DOCS]; - int number = td.read(docs, freqs); - if (number > 0) { - result = new FixedBitSet(reader.maxDoc()); - while (number > 0) { - for (int i = 0; i < number; i++) { - result.set(docs[i]); - } - number = td.read(docs, freqs); - } - } - } finally { - td.close(); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + Terms terms = context.reader().terms(term.field()); + if (terms == null) { + return null; + } + + TermsEnum termsEnum = terms.iterator(null); + if (!termsEnum.seekExact(term.bytes(), false)) { + return null; + } + // LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs... + DocsEnum docsEnum = termsEnum.docs(null, null); + int docId = docsEnum.nextDoc(); + if (docId == DocsEnum.NO_MORE_DOCS) { + return null; + } + + final FixedBitSet result = new FixedBitSet(context.reader().maxDoc()); + for (; docId < DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { + result.set(docId); } return result; } diff --git a/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java index b6ff7069215..acd104aae52 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/XBooleanFilter.java @@ -19,11 +19,12 @@ package org.elasticsearch.common.lucene.search; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.queries.FilterClause; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.common.lucene.docset.DocSet; import org.elasticsearch.common.lucene.docset.DocSets; @@ -41,9 +42,9 @@ public class XBooleanFilter extends Filter { ArrayList notFilters = null; ArrayList mustFilters = null; - private DocIdSet getDISI(ArrayList filters, int index, IndexReader reader) + private DocIdSet getDISI(ArrayList filters, int index, AtomicReaderContext context, Bits acceptedDocs) throws IOException { - DocIdSet docIdSet = filters.get(index).getDocIdSet(reader); + DocIdSet docIdSet = filters.get(index).getDocIdSet(context, acceptedDocs); if (docIdSet == DocIdSet.EMPTY_DOCIDSET || docIdSet == DocSet.EMPTY_DOC_SET) { return null; } @@ -67,23 +68,26 @@ public class XBooleanFilter extends Filter { * of the filters that have been added. */ @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptedDocs) throws IOException { FixedBitSet res = null; if (mustFilters == null && notFilters == null && shouldFilters != null && shouldFilters.size() == 1) { - return shouldFilters.get(0).getDocIdSet(reader); + // LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs... + return shouldFilters.get(0).getDocIdSet(context, null); } if (shouldFilters == null && notFilters == null && mustFilters != null && mustFilters.size() == 1) { - return mustFilters.get(0).getDocIdSet(reader); + // LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs... + return mustFilters.get(0).getDocIdSet(context, null); } if (shouldFilters != null) { for (int i = 0; i < shouldFilters.size(); i++) { - final DocIdSet disi = getDISI(shouldFilters, i, reader); + // LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs... + final DocIdSet disi = getDISI(shouldFilters, i, context, null); if (disi == null) continue; if (res == null) { - res = new FixedBitSet(reader.maxDoc()); + res = new FixedBitSet(context.reader().maxDoc()); } DocSets.or(res, disi); } @@ -98,10 +102,11 @@ public class XBooleanFilter extends Filter { if (notFilters != null) { for (int i = 0; i < notFilters.size(); i++) { if (res == null) { - res = new FixedBitSet(reader.maxDoc()); - res.set(0, reader.maxDoc()); // NOTE: may set bits on deleted docs + res = new FixedBitSet(context.reader().maxDoc()); + res.set(0, context.reader().maxDoc()); // NOTE: may set bits on deleted docs } - final DocIdSet disi = getDISI(notFilters, i, reader); + // LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs... + final DocIdSet disi = getDISI(notFilters, i, context, null); if (disi != null) { DocSets.andNot(res, disi); } @@ -110,12 +115,13 @@ public class XBooleanFilter extends Filter { if (mustFilters != null) { for (int i = 0; i < mustFilters.size(); i++) { - final DocIdSet disi = getDISI(mustFilters, i, reader); + // LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs... + final DocIdSet disi = getDISI(mustFilters, i, context, null); if (disi == null) { return null; } if (res == null) { - res = new FixedBitSet(reader.maxDoc()); + res = new FixedBitSet(context.reader().maxDoc()); DocSets.or(res, disi); } else { DocSets.and(res, disi); @@ -219,10 +225,10 @@ public class XBooleanFilter extends Filter { private void appendFilters(ArrayList filters, String occurString, StringBuilder buffer) { if (filters != null) { - for (int i = 0; i < filters.size(); i++) { + for (Filter filter : filters) { buffer.append(' '); buffer.append(occurString); - buffer.append(filters.get(i).toString()); + buffer.append(filter.toString()); } } } From bb765420681a1a7ac4ec4b8f2c6ee69ce1008b55 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Wed, 31 Oct 2012 14:29:11 -0400 Subject: [PATCH 080/146] lucene4: unit tests cleanup --- .../common/lucene/DocumentBuilder.java | 89 +++++++++++++++++++ .../common/lucene/FieldBuilder.java | 75 ++++++++++++++++ .../unit/common/bloom/BoomFilterTests.java | 50 ----------- .../CompressIndexInputOutputTests.java | 48 +++++----- .../TermQueryPrefixTreeStrategyTests.java | 6 +- .../store/InputStreamIndexInputTests.java | 25 +++--- .../deps/lucene/LuceneFieldCacheTests.java | 14 ++- .../unit/deps/lucene/SimpleLuceneTests.java | 51 ++++++----- .../index/analysis/CompoundAnalysisTests.java | 6 +- .../ShingleTokenFilterFactoryTests.java | 2 +- .../filter1/MyFilterTokenFilterFactory.java | 4 +- .../synonyms/SynonymsAnalysisTest.java | 6 +- 12 files changed, 244 insertions(+), 132 deletions(-) create mode 100644 src/main/java/org/elasticsearch/common/lucene/DocumentBuilder.java create mode 100644 src/main/java/org/elasticsearch/common/lucene/FieldBuilder.java delete mode 100644 src/test/java/org/elasticsearch/test/unit/common/bloom/BoomFilterTests.java diff --git a/src/main/java/org/elasticsearch/common/lucene/DocumentBuilder.java b/src/main/java/org/elasticsearch/common/lucene/DocumentBuilder.java new file mode 100644 index 00000000000..a6583284477 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/lucene/DocumentBuilder.java @@ -0,0 +1,89 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.lucene; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.elasticsearch.common.lucene.uid.UidField; + +/** + * + */ +public class DocumentBuilder { + + public static final Document EMPTY = new Document(); + + public static DocumentBuilder doc() { + return new DocumentBuilder(); + } + + public static Field uidField(String value) { + return uidField(value, 0); + } + + public static Field uidField(String value, long version) { + return new UidField("_uid", value, version); + } + + public static FieldBuilder field(String name, String value) { + return field(name, value, Field.Store.YES, Field.Index.ANALYZED); + } + + public static FieldBuilder field(String name, String value, Field.Store store, Field.Index index) { + return new FieldBuilder(name, value, store, index); + } + + public static FieldBuilder field(String name, String value, Field.Store store, Field.Index index, Field.TermVector termVector) { + return new FieldBuilder(name, value, store, index, termVector); + } + + public static FieldBuilder field(String name, byte[] value, Field.Store store) { + return new FieldBuilder(name, value, store); + } + + public static FieldBuilder field(String name, byte[] value, int offset, int length, Field.Store store) { + return new FieldBuilder(name, value, offset, length, store); + } + + private final Document document; + + private DocumentBuilder() { + this.document = new Document(); + } + + public DocumentBuilder boost(float boost) { + document.setBoost(boost); + return this; + } + + public DocumentBuilder add(Field field) { + document.add(field); + return this; + } + + public DocumentBuilder add(FieldBuilder fieldBuilder) { + document.add(fieldBuilder.build()); + return this; + } + + public Document build() { + return document; + } +} diff --git a/src/main/java/org/elasticsearch/common/lucene/FieldBuilder.java b/src/main/java/org/elasticsearch/common/lucene/FieldBuilder.java new file mode 100644 index 00000000000..4f62f3c0884 --- /dev/null +++ b/src/main/java/org/elasticsearch/common/lucene/FieldBuilder.java @@ -0,0 +1,75 @@ +/* + * Licensed to ElasticSearch and Shay Banon under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. ElasticSearch licenses this + * file to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.elasticsearch.common.lucene; + +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.FieldInfo; + +/** + * + */ +public class FieldBuilder { + + private final Field field; + + FieldBuilder(String name, String value, Field.Store store, Field.Index index) { + field = new Field(name, value, store, index); + } + + FieldBuilder(String name, String value, Field.Store store, Field.Index index, Field.TermVector termVector) { + field = new Field(name, value, store, index, termVector); + } + + FieldBuilder(String name, byte[] value, Field.Store store) { + FieldType fieldType = new FieldType(); + fieldType.setStored(store == Field.Store.YES); + field = new Field(name, value, fieldType); + } + + FieldBuilder(String name, byte[] value, int offset, int length, Field.Store store) { + FieldType fieldType = new FieldType(); + fieldType.setStored(store == Field.Store.YES); + field = new Field(name, value, offset, length, fieldType); + } + + public FieldBuilder boost(float boost) { + field.setBoost(boost); + return this; + } + + public FieldBuilder omitNorms(boolean omitNorms) { + field.fieldType().setOmitNorms(omitNorms); + return this; + } + + public FieldBuilder omitTermFreqAndPositions(boolean omitTermFreqAndPositions) { + if (omitTermFreqAndPositions) { + field.fieldType().setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY); + } else { + field.fieldType().setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); + } + return this; + } + + public Field build() { + return field; + } +} diff --git a/src/test/java/org/elasticsearch/test/unit/common/bloom/BoomFilterTests.java b/src/test/java/org/elasticsearch/test/unit/common/bloom/BoomFilterTests.java deleted file mode 100644 index 16ca73de68b..00000000000 --- a/src/test/java/org/elasticsearch/test/unit/common/bloom/BoomFilterTests.java +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Licensed to Elastic Search and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. Elastic Search licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.test.unit.common.bloom; - -import com.google.common.base.Charsets; -import org.elasticsearch.common.bloom.BloomFilter; -import org.elasticsearch.common.bloom.BloomFilterFactory; -import org.testng.annotations.Test; - -import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.equalTo; - -/** - * - */ -@Test -public class BoomFilterTests { - - @Test - public void testSimpleOps() { - BloomFilter filter = BloomFilterFactory.getFilter(10, 15); - filter.add(wrap("1"), 0, wrap("1").length); - assertThat(filter.isPresent(wrap("1"), 0, wrap("1").length), equalTo(true)); - assertThat(filter.isPresent(wrap("2"), 0, wrap("2").length), equalTo(false)); - filter.add(wrap("2"), 0, wrap("2").length); - assertThat(filter.isPresent(wrap("1"), 0, wrap("1").length), equalTo(true)); - assertThat(filter.isPresent(wrap("2"), 0, wrap("2").length), equalTo(true)); - } - - private byte[] wrap(String key) { - return key.getBytes(Charsets.UTF_8); - } -} \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/test/unit/common/compress/CompressIndexInputOutputTests.java b/src/test/java/org/elasticsearch/test/unit/common/compress/CompressIndexInputOutputTests.java index 6249994d54b..42465aaf269 100644 --- a/src/test/java/org/elasticsearch/test/unit/common/compress/CompressIndexInputOutputTests.java +++ b/src/test/java/org/elasticsearch/test/unit/common/compress/CompressIndexInputOutputTests.java @@ -21,16 +21,11 @@ package org.elasticsearch.test.unit.common.compress; import jsr166y.ThreadLocalRandom; import org.apache.lucene.document.Document; +import org.apache.lucene.document.DocumentStoredFieldVisitor; import org.apache.lucene.document.Field; -import org.apache.lucene.document.MapFieldSelector; -import org.apache.lucene.index.CheckIndex; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.index.*; +import org.apache.lucene.store.*; +import org.apache.lucene.util.Bits; import org.elasticsearch.common.RandomStringGenerator; import org.elasticsearch.common.compress.CompressedDirectory; import org.elasticsearch.common.compress.CompressedIndexInput; @@ -88,10 +83,10 @@ public class CompressIndexInputOutputTests { private void empty(Compressor compressor) throws Exception { Directory dir = new RAMDirectory(); - IndexOutput out = compressor.indexOutput(dir.createOutput("test")); + IndexOutput out = compressor.indexOutput(dir.createOutput("test", IOContext.DEFAULT)); out.close(); - IndexInput in = compressor.indexInput(dir.openInput("test")); + IndexInput in = compressor.indexInput(dir.openInput("test", IOContext.DEFAULT)); try { in.readByte(); assert false; @@ -110,7 +105,7 @@ public class CompressIndexInputOutputTests { private void simple(Compressor compressor) throws Exception { Directory dir = new RAMDirectory(); - IndexOutput out = compressor.indexOutput(dir.createOutput("test")); + IndexOutput out = compressor.indexOutput(dir.createOutput("test", IOContext.DEFAULT)); long pos1 = out.getFilePointer(); out.writeInt(1); long pos2 = out.getFilePointer(); @@ -124,7 +119,7 @@ public class CompressIndexInputOutputTests { out.writeString("test2"); out.close(); - IndexInput in = compressor.indexInput(dir.openInput("test")); + IndexInput in = compressor.indexInput(dir.openInput("test", IOContext.DEFAULT)); assertThat(in.readInt(), equalTo(1)); assertThat(in.readString(), equalTo("test1")); assertThat(in.readString(), equalTo(largeString)); @@ -157,7 +152,7 @@ public class CompressIndexInputOutputTests { private void seek1(boolean compressed, Compressor compressor) throws Exception { Directory dir = new RAMDirectory(); - IndexOutput out = compressed ? compressor.indexOutput(dir.createOutput("test")) : dir.createOutput("test"); + IndexOutput out = compressed ? compressor.indexOutput(dir.createOutput("test", IOContext.DEFAULT)) : dir.createOutput("test", IOContext.DEFAULT); long pos1 = out.getFilePointer(); out.writeVInt(4); out.writeInt(1); @@ -182,7 +177,7 @@ public class CompressIndexInputOutputTests { out.close(); //IndexInput in = dir.openInput("test"); - IndexInput in = compressed ? compressor.indexInput(dir.openInput("test")) : dir.openInput("test"); + IndexInput in = compressed ? compressor.indexInput(dir.openInput("test", IOContext.DEFAULT)) : dir.openInput("test", IOContext.DEFAULT); in.seek(pos2); // now "skip" int numBytes = in.readVInt(); @@ -200,7 +195,7 @@ public class CompressIndexInputOutputTests { private void copyBytes(Compressor compressor) throws Exception { Directory dir = new RAMDirectory(); - IndexOutput out = compressor.indexOutput(dir.createOutput("test")); + IndexOutput out = compressor.indexOutput(dir.createOutput("test", IOContext.DEFAULT)); long pos1 = out.getFilePointer(); out.writeInt(1); long pos2 = out.getFilePointer(); @@ -217,17 +212,17 @@ public class CompressIndexInputOutputTests { long length = out.length(); out.close(); - CompressedIndexOutput out2 = compressor.indexOutput(dir.createOutput("test2")); + CompressedIndexOutput out2 = compressor.indexOutput(dir.createOutput("test2", IOContext.DEFAULT)); out2.writeString("mergeStart"); long startMergePos = out2.getFilePointer(); - CompressedIndexInput testInput = compressor.indexInput(dir.openInput("test")); + CompressedIndexInput testInput = compressor.indexInput(dir.openInput("test", IOContext.DEFAULT)); assertThat(testInput.length(), equalTo(length)); out2.copyBytes(testInput, testInput.length()); long endMergePos = out2.getFilePointer(); out2.writeString("mergeEnd"); out2.close(); - IndexInput in = compressor.indexInput(dir.openInput("test2")); + IndexInput in = compressor.indexInput(dir.openInput("test2", IOContext.DEFAULT)); assertThat(in.readString(), equalTo("mergeStart")); assertThat(in.readInt(), equalTo(1)); assertThat(in.readString(), equalTo("test1")); @@ -276,24 +271,29 @@ public class CompressIndexInputOutputTests { CheckIndex checkIndex = new CheckIndex(writer.getDirectory()); CheckIndex.Status status = checkIndex.checkIndex(); assertThat(status.clean, equalTo(true)); - IndexReader reader = IndexReader.open(writer, true); + IndexReader reader = DirectoryReader.open(writer, true); + final Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < reader.maxDoc(); i++) { - if (reader.isDeleted(i)) { + if (liveDocs != null && !liveDocs.get(i)) { continue; } Document document = reader.document(i); checkDoc(document); - document = reader.document(i, new MapFieldSelector("id", "field", "count")); + DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count"); + reader.document(i, visitor); + document = visitor.getDocument(); checkDoc(document); } for (int i = 0; i < 100; i++) { int doc = ThreadLocalRandom.current().nextInt(reader.maxDoc()); - if (reader.isDeleted(i)) { + if (liveDocs != null && !liveDocs.get(i)) { continue; } Document document = reader.document(doc); checkDoc(document); - document = reader.document(doc, new MapFieldSelector("id", "field", "count")); + DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count"); + reader.document(i, visitor); + document = visitor.getDocument(); checkDoc(document); } } diff --git a/src/test/java/org/elasticsearch/test/unit/common/lucene/spatial/prefix/TermQueryPrefixTreeStrategyTests.java b/src/test/java/org/elasticsearch/test/unit/common/lucene/spatial/prefix/TermQueryPrefixTreeStrategyTests.java index 9651f52479c..b04862fb6fb 100644 --- a/src/test/java/org/elasticsearch/test/unit/common/lucene/spatial/prefix/TermQueryPrefixTreeStrategyTests.java +++ b/src/test/java/org/elasticsearch/test/unit/common/lucene/spatial/prefix/TermQueryPrefixTreeStrategyTests.java @@ -2,7 +2,7 @@ package org.elasticsearch.test.unit.common.lucene.spatial.prefix; import com.spatial4j.core.shape.Rectangle; import com.spatial4j.core.shape.Shape; -import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; @@ -75,7 +75,7 @@ public class TermQueryPrefixTreeStrategyTests { Set foundIDs = new HashSet(); for (ScoreDoc doc : topDocs.scoreDocs) { Document foundDocument = indexSearcher.doc(doc.doc); - foundIDs.add(foundDocument.getFieldable("id").stringValue()); + foundIDs.add(foundDocument.getField("id").stringValue()); } for (String id : ids) { @@ -157,6 +157,6 @@ public class TermQueryPrefixTreeStrategyTests { @AfterTest public void tearDown() throws IOException { - IOUtils.close(indexSearcher, indexReader, directory); + IOUtils.close(indexReader, directory); } } diff --git a/src/test/java/org/elasticsearch/test/unit/common/lucene/store/InputStreamIndexInputTests.java b/src/test/java/org/elasticsearch/test/unit/common/lucene/store/InputStreamIndexInputTests.java index 68e8d6e4dd3..7be1f7fc886 100644 --- a/src/test/java/org/elasticsearch/test/unit/common/lucene/store/InputStreamIndexInputTests.java +++ b/src/test/java/org/elasticsearch/test/unit/common/lucene/store/InputStreamIndexInputTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.test.unit.common.lucene.store; +import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; import org.apache.lucene.store.RAMDirectory; @@ -39,7 +40,7 @@ public class InputStreamIndexInputTests { @Test public void testSingleReadSingleByteLimit() throws IOException { RAMDirectory dir = new RAMDirectory(); - IndexOutput output = dir.createOutput("test"); + IndexOutput output = dir.createOutput("test", IOContext.DEFAULT); for (int i = 0; i < 3; i++) { output.writeByte((byte) 1); } @@ -49,7 +50,7 @@ public class InputStreamIndexInputTests { output.close(); - IndexInput input = dir.openInput("test"); + IndexInput input = dir.openInput("test", IOContext.DEFAULT); for (int i = 0; i < 3; i++) { InputStreamIndexInput is = new InputStreamIndexInput(input, 1); @@ -76,7 +77,7 @@ public class InputStreamIndexInputTests { @Test public void testReadMultiSingleByteLimit1() throws IOException { RAMDirectory dir = new RAMDirectory(); - IndexOutput output = dir.createOutput("test"); + IndexOutput output = dir.createOutput("test", IOContext.DEFAULT); for (int i = 0; i < 3; i++) { output.writeByte((byte) 1); } @@ -86,7 +87,7 @@ public class InputStreamIndexInputTests { output.close(); - IndexInput input = dir.openInput("test"); + IndexInput input = dir.openInput("test", IOContext.DEFAULT); byte[] read = new byte[2]; @@ -115,7 +116,7 @@ public class InputStreamIndexInputTests { @Test public void testSingleReadTwoBytesLimit() throws IOException { RAMDirectory dir = new RAMDirectory(); - IndexOutput output = dir.createOutput("test"); + IndexOutput output = dir.createOutput("test", IOContext.DEFAULT); for (int i = 0; i < 3; i++) { output.writeByte((byte) 1); } @@ -125,7 +126,7 @@ public class InputStreamIndexInputTests { output.close(); - IndexInput input = dir.openInput("test"); + IndexInput input = dir.openInput("test", IOContext.DEFAULT); assertThat(input.getFilePointer(), lessThan(input.length())); InputStreamIndexInput is = new InputStreamIndexInput(input, 2); @@ -157,7 +158,7 @@ public class InputStreamIndexInputTests { @Test public void testReadMultiTwoBytesLimit1() throws IOException { RAMDirectory dir = new RAMDirectory(); - IndexOutput output = dir.createOutput("test"); + IndexOutput output = dir.createOutput("test", IOContext.DEFAULT); for (int i = 0; i < 3; i++) { output.writeByte((byte) 1); } @@ -167,7 +168,7 @@ public class InputStreamIndexInputTests { output.close(); - IndexInput input = dir.openInput("test"); + IndexInput input = dir.openInput("test", IOContext.DEFAULT); byte[] read = new byte[2]; @@ -201,7 +202,7 @@ public class InputStreamIndexInputTests { @Test public void testReadMultiFourBytesLimit() throws IOException { RAMDirectory dir = new RAMDirectory(); - IndexOutput output = dir.createOutput("test"); + IndexOutput output = dir.createOutput("test", IOContext.DEFAULT); for (int i = 0; i < 3; i++) { output.writeByte((byte) 1); } @@ -211,7 +212,7 @@ public class InputStreamIndexInputTests { output.close(); - IndexInput input = dir.openInput("test"); + IndexInput input = dir.openInput("test", IOContext.DEFAULT); byte[] read = new byte[4]; @@ -240,7 +241,7 @@ public class InputStreamIndexInputTests { @Test public void testMarkRest() throws Exception { RAMDirectory dir = new RAMDirectory(); - IndexOutput output = dir.createOutput("test"); + IndexOutput output = dir.createOutput("test", IOContext.DEFAULT); for (int i = 0; i < 3; i++) { output.writeByte((byte) 1); } @@ -250,7 +251,7 @@ public class InputStreamIndexInputTests { output.close(); - IndexInput input = dir.openInput("test"); + IndexInput input = dir.openInput("test", IOContext.DEFAULT); InputStreamIndexInput is = new InputStreamIndexInput(input, 4); assertThat(is.markSupported(), equalTo(true)); assertThat(is.read(), equalTo(1)); diff --git a/src/test/java/org/elasticsearch/test/unit/deps/lucene/LuceneFieldCacheTests.java b/src/test/java/org/elasticsearch/test/unit/deps/lucene/LuceneFieldCacheTests.java index 210385aa30b..41a95bdc882 100644 --- a/src/test/java/org/elasticsearch/test/unit/deps/lucene/LuceneFieldCacheTests.java +++ b/src/test/java/org/elasticsearch/test/unit/deps/lucene/LuceneFieldCacheTests.java @@ -20,10 +20,8 @@ package org.elasticsearch.test.unit.deps.lucene; import org.apache.lucene.document.Document; -import org.apache.lucene.document.NumericField; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.document.IntField; +import org.apache.lucene.index.*; import org.apache.lucene.search.FieldCache; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; @@ -49,16 +47,16 @@ public class LuceneFieldCacheTests { IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Document doc = new Document(); - NumericField field = new NumericField("int1").setIntValue(1); + IntField field = new IntField("int1", 1, IntField.TYPE_NOT_STORED); doc.add(field); - field = new NumericField("int1").setIntValue(2); + field = new IntField("int1", 2, IntField.TYPE_NOT_STORED); doc.add(field); indexWriter.addDocument(doc); - IndexReader reader = IndexReader.open(indexWriter, true); - int[] ints = FieldCache.DEFAULT.getInts(reader, "int1"); + AtomicReader reader = SlowCompositeReaderWrapper.wrap(IndexReader.open(indexWriter, true)); + int[] ints = FieldCache.DEFAULT.getInts(reader, "int1", false); assertThat(ints.length, equalTo(1)); assertThat(ints[0], equalTo(2)); } diff --git a/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java b/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java index 2210a48c8ba..f7430a815d2 100644 --- a/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java +++ b/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java @@ -24,6 +24,7 @@ import org.apache.lucene.index.*; import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.lucene.Lucene; import org.testng.annotations.Test; @@ -50,7 +51,7 @@ public class SimpleLuceneTests { } IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); - TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), null, 10, new Sort(new SortField("str", SortField.STRING))); + TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), null, 10, new Sort(new SortField("str", SortField.Type.STRING))); for (int i = 0; i < 10; i++) { FieldDoc fieldDoc = (FieldDoc) docs.scoreDocs[i]; assertThat(fieldDoc.fields[0].toString(), equalTo(new String(new char[]{(char) (97 + i), (char) (97 + i)}))); @@ -63,17 +64,17 @@ public class SimpleLuceneTests { IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); indexWriter.addDocument(doc() .add(field("_id", "1")).build()); - IndexReader reader = IndexReader.open(indexWriter, true); + DirectoryReader reader = IndexReader.open(indexWriter, true); assertThat(reader.numDocs(), equalTo(1)); indexWriter.prepareCommit(); - reader = reader.reopen(); + reader = DirectoryReader.openIfChanged(reader); assertThat(reader.numDocs(), equalTo(1)); indexWriter.addDocument(doc() .add(field("_id", "2")).build()); indexWriter.commit(); - reader = reader.reopen(); + reader = DirectoryReader.openIfChanged(reader); assertThat(reader.numDocs(), equalTo(2)); } @@ -82,18 +83,20 @@ public class SimpleLuceneTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc().add(field("_id", "1")).add(new NumericField("test", Field.Store.YES, true).setIntValue(2)).build()); + indexWriter.addDocument(doc().add(field("_id", "1")).add(new IntField("test", 2, IntField.TYPE_STORED)).build()); IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); Document doc = searcher.doc(topDocs.scoreDocs[0].doc); - Fieldable f = doc.getFieldable("test"); + IndexableField f = doc.getField("test"); assertThat(f.stringValue(), equalTo("2")); - topDocs = searcher.search(new TermQuery(new Term("test", NumericUtils.intToPrefixCoded(2))), 1); + BytesRef bytes = new BytesRef(); + NumericUtils.intToPrefixCoded(2, 0, bytes); + topDocs = searcher.search(new TermQuery(new Term("test", bytes)), 1); doc = searcher.doc(topDocs.scoreDocs[0].doc); - f = doc.getFieldable("test"); + f = doc.getField("test"); assertThat(f.stringValue(), equalTo("2")); indexWriter.close(); @@ -117,11 +120,11 @@ public class SimpleLuceneTests { IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); final ArrayList fieldsOrder = new ArrayList(); - Document doc = searcher.doc(topDocs.scoreDocs[0].doc, new FieldSelector() { + searcher.doc(topDocs.scoreDocs[0].doc, new StoredFieldVisitor() { @Override - public FieldSelectorResult accept(String fieldName) { - fieldsOrder.add(fieldName); - return FieldSelectorResult.LOAD; + public Status needsField(FieldInfo fieldInfo) throws IOException { + fieldsOrder.add(fieldInfo.name); + return Status.YES; } }); @@ -167,7 +170,7 @@ public class SimpleLuceneTests { public void testNRTSearchOnClosedWriter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - IndexReader reader = IndexReader.open(indexWriter, true); + DirectoryReader reader = IndexReader.open(indexWriter, true); for (int i = 0; i < 100; i++) { indexWriter.addDocument(doc() @@ -192,22 +195,18 @@ public class SimpleLuceneTests { IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Document doc = new Document(); - NumericField field = new NumericField("int1").setIntValue(1); - field.setOmitNorms(true); - field.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS); + FieldType type = IntField.TYPE_STORED; + type.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS); + IntField field = new IntField("int1", 1, type); doc.add(field); - field = new NumericField("int1").setIntValue(1); + field = new IntField("int1", 1, type); doc.add(field); - field = new NumericField("int2").setIntValue(1); - field.setOmitNorms(true); - field.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS); + field = new IntField("int2", 1, type); doc.add(field); - field = new NumericField("int2").setIntValue(1); - field.setOmitNorms(true); - field.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS); + field = new IntField("int2", 1, type); doc.add(field); indexWriter.addDocument(doc); @@ -232,9 +231,9 @@ public class SimpleLuceneTests { indexWriter.close(); } - private IndexReader refreshReader(IndexReader reader) throws IOException { - IndexReader oldReader = reader; - reader = reader.reopen(); + private DirectoryReader refreshReader(DirectoryReader reader) throws IOException { + DirectoryReader oldReader = reader; + reader = DirectoryReader.openIfChanged(reader);; if (reader != oldReader) { oldReader.close(); } diff --git a/src/test/java/org/elasticsearch/test/unit/index/analysis/CompoundAnalysisTests.java b/src/test/java/org/elasticsearch/test/unit/index/analysis/CompoundAnalysisTests.java index 61ed2179a4d..10c040359d1 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/analysis/CompoundAnalysisTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/analysis/CompoundAnalysisTests.java @@ -21,7 +21,7 @@ package org.elasticsearch.test.unit.index.analysis; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.common.inject.Injector; import org.elasticsearch.common.inject.ModulesBuilder; import org.elasticsearch.common.lucene.all.AllEntries; @@ -98,11 +98,11 @@ public class CompoundAnalysisTests { allEntries.reset(); TokenStream stream = AllTokenStream.allTokenStream("_all", allEntries, analyzer); - TermAttribute termAtt = stream.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); List terms = new ArrayList(); while (stream.incrementToken()) { - String tokText = termAtt.term(); + String tokText = termAtt.toString(); terms.add(tokText); } return terms; diff --git a/src/test/java/org/elasticsearch/test/unit/index/analysis/ShingleTokenFilterFactoryTests.java b/src/test/java/org/elasticsearch/test/unit/index/analysis/ShingleTokenFilterFactoryTests.java index 3d656c3bf79..5ce91ad4278 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/analysis/ShingleTokenFilterFactoryTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/analysis/ShingleTokenFilterFactoryTests.java @@ -20,7 +20,7 @@ package org.elasticsearch.test.unit.index.analysis; import org.apache.lucene.analysis.Tokenizer; -import org.apache.lucene.analysis.WhitespaceTokenizer; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.util.Version; import org.elasticsearch.index.analysis.AnalysisService; import org.elasticsearch.index.analysis.ShingleTokenFilterFactory; diff --git a/src/test/java/org/elasticsearch/test/unit/index/analysis/filter1/MyFilterTokenFilterFactory.java b/src/test/java/org/elasticsearch/test/unit/index/analysis/filter1/MyFilterTokenFilterFactory.java index cb3ed833823..8673387215f 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/analysis/filter1/MyFilterTokenFilterFactory.java +++ b/src/test/java/org/elasticsearch/test/unit/index/analysis/filter1/MyFilterTokenFilterFactory.java @@ -19,9 +19,9 @@ package org.elasticsearch.test.unit.index.analysis.filter1; -import org.apache.lucene.analysis.StopAnalyzer; -import org.apache.lucene.analysis.StopFilter; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.core.StopAnalyzer; +import org.apache.lucene.analysis.core.StopFilter; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; diff --git a/src/test/java/org/elasticsearch/test/unit/index/analysis/synonyms/SynonymsAnalysisTest.java b/src/test/java/org/elasticsearch/test/unit/index/analysis/synonyms/SynonymsAnalysisTest.java index db9a1b3b827..10196dccd5d 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/analysis/synonyms/SynonymsAnalysisTest.java +++ b/src/test/java/org/elasticsearch/test/unit/index/analysis/synonyms/SynonymsAnalysisTest.java @@ -21,7 +21,7 @@ package org.elasticsearch.test.unit.index.analysis.synonyms; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.common.inject.Injector; import org.elasticsearch.common.inject.ModulesBuilder; import org.elasticsearch.common.logging.ESLogger; @@ -90,11 +90,11 @@ public class SynonymsAnalysisTest { allEntries.reset(); TokenStream stream = AllTokenStream.allTokenStream("_all", allEntries, analyzer); - TermAttribute termAtt = stream.addAttribute(TermAttribute.class); + CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); StringBuilder sb = new StringBuilder(); while (stream.incrementToken()) { - sb.append(termAtt.term()).append(" "); + sb.append(termAtt.toString()).append(" "); } MatcherAssert.assertThat(target, equalTo(sb.toString().trim())); From 594598f49382d8aa83db3cf2f73bc67fcd020424 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Wed, 31 Oct 2012 20:21:49 +0100 Subject: [PATCH 081/146] close the index input in any case when computing length --- .../elasticsearch/common/compress/CompressedDirectory.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/compress/CompressedDirectory.java b/src/main/java/org/elasticsearch/common/compress/CompressedDirectory.java index e5a6b2d0cd5..11a6b9897e1 100644 --- a/src/main/java/org/elasticsearch/common/compress/CompressedDirectory.java +++ b/src/main/java/org/elasticsearch/common/compress/CompressedDirectory.java @@ -3,6 +3,7 @@ package org.elasticsearch.common.compress; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import org.apache.lucene.store.*; +import org.apache.lucene.util.IOUtils; import org.elasticsearch.index.store.support.ForceSyncDirectory; import java.io.IOException; @@ -77,8 +78,8 @@ public class CompressedDirectory extends Directory implements ForceSyncDirectory IndexInput in = openInput(name, IOContext.READONCE); try { return in.length(); - } catch (Exception e) { - in.close(); + } finally { + IOUtils.close(in); } } return dir.fileLength(name); From 5ad40205c2aa100267d2c64f025ae32cb3780f51 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Wed, 31 Oct 2012 16:41:24 -0400 Subject: [PATCH 082/146] lucene4: remove DocumentBuilder and FieldBuilder --- .../common/lucene/DocumentBuilder.java | 89 ------------------- .../common/lucene/FieldBuilder.java | 75 ---------------- .../search/MatchAllDocsFilterTests.java | 14 ++- .../lucene/search/MoreLikeThisQueryTests.java | 16 +++- .../unit/deps/lucene/SimpleLuceneTests.java | 44 +++++---- .../deps/lucene/VectorHighlighterTests.java | 25 ++++-- .../index/cache/filter/FilterCacheTests.java | 22 +++-- .../SnapshotDeletionPolicyTests.java | 39 ++++---- .../engine/AbstractSimpleEngineTests.java | 89 ++++++++++++------- .../data/doubles/DoubleFieldDataTests.java | 41 ++++----- .../data/floats/FloatFieldDataTests.java | 41 ++++----- .../field/data/ints/IntFieldDataTests.java | 41 ++++----- .../field/data/longs/LongFieldDataTests.java | 41 ++++----- .../data/shorts/ShortFieldDataTests.java | 41 ++++----- .../data/strings/StringFieldDataTests.java | 34 ++++--- 15 files changed, 282 insertions(+), 370 deletions(-) delete mode 100644 src/main/java/org/elasticsearch/common/lucene/DocumentBuilder.java delete mode 100644 src/main/java/org/elasticsearch/common/lucene/FieldBuilder.java diff --git a/src/main/java/org/elasticsearch/common/lucene/DocumentBuilder.java b/src/main/java/org/elasticsearch/common/lucene/DocumentBuilder.java deleted file mode 100644 index a6583284477..00000000000 --- a/src/main/java/org/elasticsearch/common/lucene/DocumentBuilder.java +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.common.lucene; - -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; -import org.elasticsearch.common.lucene.uid.UidField; - -/** - * - */ -public class DocumentBuilder { - - public static final Document EMPTY = new Document(); - - public static DocumentBuilder doc() { - return new DocumentBuilder(); - } - - public static Field uidField(String value) { - return uidField(value, 0); - } - - public static Field uidField(String value, long version) { - return new UidField("_uid", value, version); - } - - public static FieldBuilder field(String name, String value) { - return field(name, value, Field.Store.YES, Field.Index.ANALYZED); - } - - public static FieldBuilder field(String name, String value, Field.Store store, Field.Index index) { - return new FieldBuilder(name, value, store, index); - } - - public static FieldBuilder field(String name, String value, Field.Store store, Field.Index index, Field.TermVector termVector) { - return new FieldBuilder(name, value, store, index, termVector); - } - - public static FieldBuilder field(String name, byte[] value, Field.Store store) { - return new FieldBuilder(name, value, store); - } - - public static FieldBuilder field(String name, byte[] value, int offset, int length, Field.Store store) { - return new FieldBuilder(name, value, offset, length, store); - } - - private final Document document; - - private DocumentBuilder() { - this.document = new Document(); - } - - public DocumentBuilder boost(float boost) { - document.setBoost(boost); - return this; - } - - public DocumentBuilder add(Field field) { - document.add(field); - return this; - } - - public DocumentBuilder add(FieldBuilder fieldBuilder) { - document.add(fieldBuilder.build()); - return this; - } - - public Document build() { - return document; - } -} diff --git a/src/main/java/org/elasticsearch/common/lucene/FieldBuilder.java b/src/main/java/org/elasticsearch/common/lucene/FieldBuilder.java deleted file mode 100644 index 4f62f3c0884..00000000000 --- a/src/main/java/org/elasticsearch/common/lucene/FieldBuilder.java +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.common.lucene; - -import org.apache.lucene.document.Field; -import org.apache.lucene.document.FieldType; -import org.apache.lucene.index.FieldInfo; - -/** - * - */ -public class FieldBuilder { - - private final Field field; - - FieldBuilder(String name, String value, Field.Store store, Field.Index index) { - field = new Field(name, value, store, index); - } - - FieldBuilder(String name, String value, Field.Store store, Field.Index index, Field.TermVector termVector) { - field = new Field(name, value, store, index, termVector); - } - - FieldBuilder(String name, byte[] value, Field.Store store) { - FieldType fieldType = new FieldType(); - fieldType.setStored(store == Field.Store.YES); - field = new Field(name, value, fieldType); - } - - FieldBuilder(String name, byte[] value, int offset, int length, Field.Store store) { - FieldType fieldType = new FieldType(); - fieldType.setStored(store == Field.Store.YES); - field = new Field(name, value, offset, length, fieldType); - } - - public FieldBuilder boost(float boost) { - field.setBoost(boost); - return this; - } - - public FieldBuilder omitNorms(boolean omitNorms) { - field.fieldType().setOmitNorms(omitNorms); - return this; - } - - public FieldBuilder omitTermFreqAndPositions(boolean omitTermFreqAndPositions) { - if (omitTermFreqAndPositions) { - field.fieldType().setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY); - } else { - field.fieldType().setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); - } - return this; - } - - public Field build() { - return field; - } -} diff --git a/src/test/java/org/elasticsearch/test/unit/common/lucene/search/MatchAllDocsFilterTests.java b/src/test/java/org/elasticsearch/test/unit/common/lucene/search/MatchAllDocsFilterTests.java index 2af7599e42f..df99d45e9ff 100644 --- a/src/test/java/org/elasticsearch/test/unit/common/lucene/search/MatchAllDocsFilterTests.java +++ b/src/test/java/org/elasticsearch/test/unit/common/lucene/search/MatchAllDocsFilterTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.test.unit.common.lucene.search; +import org.apache.lucene.document.*; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -30,8 +31,6 @@ import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.search.Queries; import org.testng.annotations.Test; -import static org.elasticsearch.common.lucene.DocumentBuilder.doc; -import static org.elasticsearch.common.lucene.DocumentBuilder.field; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; @@ -45,8 +44,15 @@ public class MatchAllDocsFilterTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc().add(field("_id", "1")).add(field("text", "lucene")).build()); - indexWriter.addDocument(doc().add(field("_id", "2")).add(field("text", "lucene release")).build()); + Document document = new Document(); + document.add(new TextField("_id", "1", Field.Store.YES)); + document.add(new TextField("text", "lucene", Field.Store.YES)); + indexWriter.addDocument(document); + + document = new Document(); + document.add(new TextField("_id", "2", Field.Store.YES)); + document.add(new TextField("text", "lucene release", Field.Store.YES)); + indexWriter.addDocument(document); IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); diff --git a/src/test/java/org/elasticsearch/test/unit/common/lucene/search/MoreLikeThisQueryTests.java b/src/test/java/org/elasticsearch/test/unit/common/lucene/search/MoreLikeThisQueryTests.java index 206f4152287..f43efe9d1b0 100644 --- a/src/test/java/org/elasticsearch/test/unit/common/lucene/search/MoreLikeThisQueryTests.java +++ b/src/test/java/org/elasticsearch/test/unit/common/lucene/search/MoreLikeThisQueryTests.java @@ -19,6 +19,9 @@ package org.elasticsearch.test.unit.common.lucene.search; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -29,8 +32,6 @@ import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.search.MoreLikeThisQuery; import org.testng.annotations.Test; -import static org.elasticsearch.common.lucene.DocumentBuilder.doc; -import static org.elasticsearch.common.lucene.DocumentBuilder.field; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; @@ -47,8 +48,15 @@ public class MoreLikeThisQueryTests { indexWriter.commit(); - indexWriter.addDocument(doc().add(field("_id", "1")).add(field("text", "lucene")).build()); - indexWriter.addDocument(doc().add(field("_id", "2")).add(field("text", "lucene release")).build()); + Document document = new Document(); + document.add(new TextField("_id", "1", Field.Store.YES)); + document.add(new TextField("text", "lucene", Field.Store.YES)); + indexWriter.addDocument(document); + + document = new Document(); + document.add(new TextField("_id", "2", Field.Store.YES)); + document.add(new TextField("text", "lucene release", Field.Store.YES)); + indexWriter.addDocument(document); IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); diff --git a/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java b/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java index f7430a815d2..4c1ac0e8332 100644 --- a/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java +++ b/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java @@ -32,8 +32,6 @@ import org.testng.annotations.Test; import java.io.IOException; import java.util.ArrayList; -import static org.elasticsearch.common.lucene.DocumentBuilder.doc; -import static org.elasticsearch.common.lucene.DocumentBuilder.field; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; @@ -47,7 +45,9 @@ public class SimpleLuceneTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); for (int i = 0; i < 10; i++) { - indexWriter.addDocument(doc().add(field("str", new String(new char[]{(char) (97 + i), (char) (97 + i)}))).build()); + Document document = new Document(); + document.add(new TextField("str", new String(new char[]{(char) (97 + i), (char) (97 + i)}), Field.Store.YES)); + indexWriter.addDocument(document); } IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); @@ -62,8 +62,9 @@ public class SimpleLuceneTests { public void testAddDocAfterPrepareCommit() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc() - .add(field("_id", "1")).build()); + Document document = new Document(); + document.add(new TextField("_id", "1", Field.Store.YES)); + indexWriter.addDocument(document); DirectoryReader reader = IndexReader.open(indexWriter, true); assertThat(reader.numDocs(), equalTo(1)); @@ -71,8 +72,9 @@ public class SimpleLuceneTests { reader = DirectoryReader.openIfChanged(reader); assertThat(reader.numDocs(), equalTo(1)); - indexWriter.addDocument(doc() - .add(field("_id", "2")).build()); + document = new Document(); + document.add(new TextField("_id", "2", Field.Store.YES)); + indexWriter.addDocument(document); indexWriter.commit(); reader = DirectoryReader.openIfChanged(reader); assertThat(reader.numDocs(), equalTo(2)); @@ -83,7 +85,10 @@ public class SimpleLuceneTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc().add(field("_id", "1")).add(new IntField("test", 2, IntField.TYPE_STORED)).build()); + Document document = new Document(); + document.add(new TextField("_id", "1", Field.Store.YES)); + document.add(new IntField("test", 2, IntField.TYPE_STORED)); + indexWriter.addDocument(document); IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); @@ -112,9 +117,10 @@ public class SimpleLuceneTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc() - .add(field("_id", "1")) - .add(field("#id", "1")).build()); + Document document = new Document(); + document.add(new TextField("_id", "1", Field.Store.YES)); + document.add(new TextField("#id", "1", Field.Store.YES)); + indexWriter.addDocument(document); IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); @@ -146,10 +152,11 @@ public class SimpleLuceneTests { for (int j = 0; j < i; j++) { value.append(" ").append("value"); } - indexWriter.addDocument(doc() - .add(field("id", Integer.toString(i))) - .add(field("value", value.toString())) - .boost(i).build()); + Document document = new Document(); + document.add(new TextField("_id", Integer.toString(i), Field.Store.YES)); + document.add(new TextField("value", value.toString(), Field.Store.YES)); + document.boost(i); + indexWriter.addDocument(document); } IndexReader reader = IndexReader.open(indexWriter, true); @@ -173,9 +180,10 @@ public class SimpleLuceneTests { DirectoryReader reader = IndexReader.open(indexWriter, true); for (int i = 0; i < 100; i++) { - indexWriter.addDocument(doc() - .add(field("id", Integer.toString(i))) - .boost(i).build()); + Document document = new Document(); + document.add(new TextField("_id", Integer.toString(i), Field.Store.YES)); + document.boost(i); + indexWriter.addDocument(document); } reader = refreshReader(reader); diff --git a/src/test/java/org/elasticsearch/test/unit/deps/lucene/VectorHighlighterTests.java b/src/test/java/org/elasticsearch/test/unit/deps/lucene/VectorHighlighterTests.java index 09ba8e1dd42..f253594fe15 100644 --- a/src/test/java/org/elasticsearch/test/unit/deps/lucene/VectorHighlighterTests.java +++ b/src/test/java/org/elasticsearch/test/unit/deps/lucene/VectorHighlighterTests.java @@ -19,7 +19,10 @@ package org.elasticsearch.test.unit.deps.lucene; +import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.FieldType; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -32,8 +35,6 @@ import org.apache.lucene.store.RAMDirectory; import org.elasticsearch.common.lucene.Lucene; import org.testng.annotations.Test; -import static org.elasticsearch.common.lucene.DocumentBuilder.doc; -import static org.elasticsearch.common.lucene.DocumentBuilder.field; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.*; @@ -48,7 +49,10 @@ public class VectorHighlighterTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc().add(field("_id", "1")).add(field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)).build()); + Document document = new Document(); + document.add(new TextField("_id", "1", Field.Store.YES)); + document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + indexWriter.addDocument(document); IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); @@ -68,7 +72,10 @@ public class VectorHighlighterTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc().add(field("_id", "1")).add(field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)).build()); + Document document = new Document(); + document.add(new TextField("_id", "1", Field.Store.YES)); + document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + indexWriter.addDocument(document); IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); @@ -107,7 +114,10 @@ public class VectorHighlighterTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc().add(field("_id", "1")).add(field("content", "the big bad dog", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)).build()); + Document document = new Document(); + document.add(new TextField("_id", "1", Field.Store.YES)); + document.add(new Field("content", "the big bad dog", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS)); + indexWriter.addDocument(document); IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); @@ -126,7 +136,10 @@ public class VectorHighlighterTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc().add(field("_id", "1")).add(field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)).build()); + Document document = new Document(); + document.add(new TextField("_id", "1", Field.Store.YES)); + document.add(new Field("content", "the big bad dog", Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO)); + indexWriter.addDocument(document); IndexReader reader = IndexReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); diff --git a/src/test/java/org/elasticsearch/test/unit/index/cache/filter/FilterCacheTests.java b/src/test/java/org/elasticsearch/test/unit/index/cache/filter/FilterCacheTests.java index faf959b5cc3..98b168c39d8 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/cache/filter/FilterCacheTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/cache/filter/FilterCacheTests.java @@ -19,10 +19,10 @@ package org.elasticsearch.test.unit.index.cache.filter; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.Term; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; +import org.apache.lucene.index.*; import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; @@ -35,8 +35,6 @@ import org.testng.annotations.Test; import java.io.IOException; -import static org.elasticsearch.common.lucene.DocumentBuilder.doc; -import static org.elasticsearch.common.lucene.DocumentBuilder.field; import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; @@ -55,12 +53,12 @@ public class FilterCacheTests { private void verifyCache(FilterCache filterCache) throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - IndexReader reader = IndexReader.open(indexWriter, true); + DirectoryReader reader = IndexReader.open(indexWriter, true); for (int i = 0; i < 100; i++) { - indexWriter.addDocument(doc() - .add(field("id", Integer.toString(i))) - .boost(i).build()); + Document document = new Document(); + document.add(new TextField("id", Integer.toString(i), Field.Store.YES)); + indexWriter.addDocument(document); } reader = refreshReader(reader); @@ -82,9 +80,9 @@ public class FilterCacheTests { indexWriter.close(); } - private IndexReader refreshReader(IndexReader reader) throws IOException { + private DirectoryReader refreshReader(DirectoryReader reader) throws IOException { IndexReader oldReader = reader; - reader = reader.reopen(); + reader = DirectoryReader.openIfChanged(reader); if (reader != oldReader) { oldReader.close(); } diff --git a/src/test/java/org/elasticsearch/test/unit/index/deletionpolicy/SnapshotDeletionPolicyTests.java b/src/test/java/org/elasticsearch/test/unit/index/deletionpolicy/SnapshotDeletionPolicyTests.java index f0957eb9ae2..071c5a239d1 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/deletionpolicy/SnapshotDeletionPolicyTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/deletionpolicy/SnapshotDeletionPolicyTests.java @@ -19,6 +19,9 @@ package org.elasticsearch.test.unit.index.deletionpolicy; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.RAMDirectory; import org.elasticsearch.common.lucene.Lucene; @@ -33,8 +36,6 @@ import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import static org.apache.lucene.index.IndexReader.listCommits; -import static org.elasticsearch.common.lucene.DocumentBuilder.doc; -import static org.elasticsearch.common.lucene.DocumentBuilder.field; import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; @@ -64,29 +65,35 @@ public class SnapshotDeletionPolicyTests { indexWriter.close(); dir.close(); } + + private Document testDocument() { + Document document = new Document(); + document.add(new TextField("test", "1", Field.Store.YES)); + return document; + } @Test public void testSimpleSnapshot() throws Exception { // add a document and commit, resulting in one commit point - indexWriter.addDocument(doc().add(field("test", "1")).build()); + indexWriter.addDocument(testDocument()); indexWriter.commit(); assertThat(listCommits(dir).size(), equalTo(1)); // add another document and commit, resulting again in one commit point - indexWriter.addDocument(doc().add(field("test", "1")).build()); + indexWriter.addDocument(testDocument()); indexWriter.commit(); assertThat(listCommits(dir).size(), equalTo(1)); // snapshot the last commit, and then add a document and commit, now we should have two commit points SnapshotIndexCommit snapshot = deletionPolicy.snapshot(); - indexWriter.addDocument(doc().add(field("test", "1")).build()); + indexWriter.addDocument(testDocument()); indexWriter.commit(); assertThat(listCommits(dir).size(), equalTo(2)); // release the commit, add a document and commit, now we should be back to one commit point assertThat(snapshot.release(), equalTo(true)); - indexWriter.addDocument(doc().add(field("test", "1")).build()); + indexWriter.addDocument(testDocument()); indexWriter.commit(); assertThat(listCommits(dir).size(), equalTo(1)); } @@ -94,7 +101,7 @@ public class SnapshotDeletionPolicyTests { @Test public void testMultiSnapshot() throws Exception { // add a document and commit, resulting in one commit point - indexWriter.addDocument(doc().add(field("test", "1")).build()); + indexWriter.addDocument(testDocument()); indexWriter.commit(); assertThat(listCommits(dir).size(), equalTo(1)); @@ -103,19 +110,19 @@ public class SnapshotDeletionPolicyTests { SnapshotIndexCommit snapshot2 = deletionPolicy.snapshot(); // we should have two commits points - indexWriter.addDocument(doc().add(field("test", "1")).build()); + indexWriter.addDocument(testDocument()); indexWriter.commit(); assertThat(listCommits(dir).size(), equalTo(2)); // release one snapshot, we should still have two commit points assertThat(snapshot1.release(), equalTo(true)); - indexWriter.addDocument(doc().add(field("test", "1")).build()); + indexWriter.addDocument(testDocument()); indexWriter.commit(); assertThat(listCommits(dir).size(), equalTo(2)); // release the second snapshot, we should be back to one commit assertThat(snapshot2.release(), equalTo(true)); - indexWriter.addDocument(doc().add(field("test", "1")).build()); + indexWriter.addDocument(testDocument()); indexWriter.commit(); assertThat(listCommits(dir).size(), equalTo(1)); } @@ -123,7 +130,7 @@ public class SnapshotDeletionPolicyTests { @Test public void testMultiReleaseException() throws Exception { // add a document and commit, resulting in one commit point - indexWriter.addDocument(doc().add(field("test", "1")).build()); + indexWriter.addDocument(testDocument()); indexWriter.commit(); assertThat(listCommits(dir).size(), equalTo(1)); @@ -136,18 +143,18 @@ public class SnapshotDeletionPolicyTests { @Test public void testSimpleSnapshots() throws Exception { // add a document and commit, resulting in one commit point - indexWriter.addDocument(doc().add(field("test", "1")).build()); + indexWriter.addDocument(testDocument()); indexWriter.commit(); assertThat(listCommits(dir).size(), equalTo(1)); // add another document and commit, resulting again in one commint point - indexWriter.addDocument(doc().add(field("test", "1")).build()); + indexWriter.addDocument(testDocument()); indexWriter.commit(); assertThat(listCommits(dir).size(), equalTo(1)); // snapshot the last commit, and then add a document and commit, now we should have two commit points SnapshotIndexCommit snapshot = deletionPolicy.snapshot(); - indexWriter.addDocument(doc().add(field("test", "1")).build()); + indexWriter.addDocument(testDocument()); indexWriter.commit(); assertThat(listCommits(dir).size(), equalTo(2)); @@ -159,13 +166,13 @@ public class SnapshotDeletionPolicyTests { // we should have 3 commits points since we are holding onto the first two with snapshots // and we are using the keep only last assertThat(snapshot.release(), equalTo(true)); - indexWriter.addDocument(doc().add(field("test", "1")).build()); + indexWriter.addDocument(testDocument()); indexWriter.commit(); assertThat(listCommits(dir).size(), equalTo(3)); // now release the snapshots, we should be back to a single commit point assertThat(snapshots.release(), equalTo(true)); - indexWriter.addDocument(doc().add(field("test", "1")).build()); + indexWriter.addDocument(testDocument()); indexWriter.commit(); assertThat(listCommits(dir).size(), equalTo(1)); } diff --git a/src/test/java/org/elasticsearch/test/unit/index/engine/AbstractSimpleEngineTests.java b/src/test/java/org/elasticsearch/test/unit/index/engine/AbstractSimpleEngineTests.java index 9af8d99b16b..d473b5417ca 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/engine/AbstractSimpleEngineTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/engine/AbstractSimpleEngineTests.java @@ -19,13 +19,16 @@ package org.elasticsearch.test.unit.index.engine; +import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexDeletionPolicy; import org.apache.lucene.index.Term; import org.apache.lucene.search.TermQuery; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.common.lucene.uid.UidField; import org.elasticsearch.index.Index; import org.elasticsearch.index.VersionType; import org.elasticsearch.index.deletionpolicy.KeepOnlyLastDeletionPolicy; @@ -60,7 +63,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; -import static org.elasticsearch.common.lucene.DocumentBuilder.*; import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS; import static org.elasticsearch.index.engine.Engine.Operation.Origin.REPLICA; import static org.hamcrest.MatcherAssert.assertThat; @@ -106,6 +108,18 @@ public abstract class AbstractSimpleEngineTests { threadPool.shutdownNow(); } } + + private Document testDocumentWithTextField(String id) { + Document document = testDocument(id); + document.add(new TextField("value", "test", Field.Store.YES)); + return document; + } + + private Document testDocument(String id) { + Document document = new Document(); + document.add(new UidField("_uid", id, 0)); + return document; + } protected Store createStore() throws IOException { return new Store(shardId, EMPTY_SETTINGS, null, new IndexSettingsService(shardId.index(), EMPTY_SETTINGS), new RamDirectoryService(shardId, EMPTY_SETTINGS)); @@ -151,10 +165,12 @@ public abstract class AbstractSimpleEngineTests { assertThat(segments.isEmpty(), equalTo(true)); // create a doc and refresh - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).add(field("value", "test")).add(field(SourceFieldMapper.NAME, B_1.toBytes(), Field.Store.YES)).build(), Lucene.STANDARD_ANALYZER, B_1, false); + Document document = testDocumentWithTextField("1"); + document.add(new Field(SourceFieldMapper.NAME, B_1.toBytes(), TextField.TYPE_STORED)); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, document, Lucene.STANDARD_ANALYZER, B_1, false); engine.create(new Engine.Create(null, newUid("1"), doc)); - ParsedDocument doc2 = new ParsedDocument("2", "2", "test", null, -1, -1, doc().add(uidField("2")).add(field("value", "test")).build(), Lucene.STANDARD_ANALYZER, B_2, false); + ParsedDocument doc2 = new ParsedDocument("2", "2", "test", null, -1, -1, testDocumentWithTextField("2"), Lucene.STANDARD_ANALYZER, B_2, false); engine.create(new Engine.Create(null, newUid("2"), doc2)); engine.refresh(new Engine.Refresh(true)); @@ -175,7 +191,7 @@ public abstract class AbstractSimpleEngineTests { assertThat(segments.get(0).deletedDocs(), equalTo(0)); - ParsedDocument doc3 = new ParsedDocument("3", "3", "test", null, -1, -1, doc().add(uidField("3")).add(field("value", "test")).build(), Lucene.STANDARD_ANALYZER, B_3, false); + ParsedDocument doc3 = new ParsedDocument("3", "3", "test", null, -1, -1, testDocumentWithTextField("3"), Lucene.STANDARD_ANALYZER, B_3, false); engine.create(new Engine.Create(null, newUid("3"), doc3)); engine.refresh(new Engine.Refresh(true)); @@ -216,7 +232,9 @@ public abstract class AbstractSimpleEngineTests { searchResult.release(); // create a document - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).add(field("value", "test")).add(field(SourceFieldMapper.NAME, B_1.toBytes(), Field.Store.YES)).build(), Lucene.STANDARD_ANALYZER, B_1, false); + Document document = testDocumentWithTextField("1"); + document.add(new Field(SourceFieldMapper.NAME, B_1.toBytes(), TextField.TYPE_STORED)); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, document, Lucene.STANDARD_ANALYZER, B_1, false); engine.create(new Engine.Create(null, newUid("1"), doc)); // its not there... @@ -250,7 +268,10 @@ public abstract class AbstractSimpleEngineTests { assertThat(getResult.docIdAndVersion(), notNullValue()); // now do an update - doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).add(field("value", "test1")).add(field(SourceFieldMapper.NAME, B_2.toBytes(), Field.Store.YES)).build(), Lucene.STANDARD_ANALYZER, B_2, false); + document = testDocument("1"); + document.add(new TextField("value", "test1", Field.Store.YES)); + document.add(new Field(SourceFieldMapper.NAME, B_2.toBytes(), TextField.TYPE_STORED)); + doc = new ParsedDocument("1", "1", "test", null, -1, -1, document, Lucene.STANDARD_ANALYZER, B_2, false); engine.index(new Engine.Index(null, newUid("1"), doc)); // its not updated yet... @@ -299,7 +320,9 @@ public abstract class AbstractSimpleEngineTests { searchResult.release(); // add it back - doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).add(field("value", "test")).add(field(SourceFieldMapper.NAME, B_1.toBytes(), Field.Store.YES)).build(), Lucene.STANDARD_ANALYZER, B_1, false); + document = testDocumentWithTextField("1"); + document.add(new Field(SourceFieldMapper.NAME, B_1.toBytes(), TextField.TYPE_STORED)); + doc = new ParsedDocument("1", "1", "test", null, -1, -1, document, Lucene.STANDARD_ANALYZER, B_1, false); engine.create(new Engine.Create(null, newUid("1"), doc)); // its not there... @@ -331,7 +354,9 @@ public abstract class AbstractSimpleEngineTests { // make sure we can still work with the engine // now do an update - doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).add(field("value", "test1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + document = testDocument("1"); + document.add(new TextField("value", "test1", Field.Store.YES)); + doc = new ParsedDocument("1", "1", "test", null, -1, -1, document, Lucene.STANDARD_ANALYZER, B_1, false); engine.index(new Engine.Index(null, newUid("1"), doc)); // its not updated yet... @@ -360,7 +385,7 @@ public abstract class AbstractSimpleEngineTests { searchResult.release(); // create a document - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).add(field("value", "test")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocumentWithTextField("1"), Lucene.STANDARD_ANALYZER, B_1, false); engine.create(new Engine.Create(null, newUid("1"), doc)); // its not there... @@ -394,7 +419,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testSimpleSnapshot() throws Exception { // create a document - ParsedDocument doc1 = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).add(field("value", "test")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc1 = new ParsedDocument("1", "1", "test", null, -1, -1, testDocumentWithTextField("1"), Lucene.STANDARD_ANALYZER, B_1, false); engine.create(new Engine.Create(null, newUid("1"), doc1)); final ExecutorService executorService = Executors.newCachedThreadPool(); @@ -412,10 +437,10 @@ public abstract class AbstractSimpleEngineTests { @Override public Object call() throws Exception { engine.flush(new Engine.Flush()); - ParsedDocument doc2 = new ParsedDocument("2", "2", "test", null, -1, -1, doc().add(uidField("2")).add(field("value", "test")).build(), Lucene.STANDARD_ANALYZER, B_2, false); + ParsedDocument doc2 = new ParsedDocument("2", "2", "test", null, -1, -1, testDocumentWithTextField("2"), Lucene.STANDARD_ANALYZER, B_2, false); engine.create(new Engine.Create(null, newUid("2"), doc2)); engine.flush(new Engine.Flush()); - ParsedDocument doc3 = new ParsedDocument("3", "3", "test", null, -1, -1, doc().add(uidField("3")).add(field("value", "test")).build(), Lucene.STANDARD_ANALYZER, B_3, false); + ParsedDocument doc3 = new ParsedDocument("3", "3", "test", null, -1, -1, testDocumentWithTextField("3"), Lucene.STANDARD_ANALYZER, B_3, false); engine.create(new Engine.Create(null, newUid("3"), doc3)); return null; } @@ -452,7 +477,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testSimpleRecover() throws Exception { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).add(field("value", "test")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocumentWithTextField("1"), Lucene.STANDARD_ANALYZER, B_1, false); engine.create(new Engine.Create(null, newUid("1"), doc)); engine.flush(new Engine.Flush()); @@ -497,10 +522,10 @@ public abstract class AbstractSimpleEngineTests { @Test public void testRecoverWithOperationsBetweenPhase1AndPhase2() throws Exception { - ParsedDocument doc1 = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).add(field("value", "test")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc1 = new ParsedDocument("1", "1", "test", null, -1, -1, testDocumentWithTextField("1"), Lucene.STANDARD_ANALYZER, B_1, false); engine.create(new Engine.Create(null, newUid("1"), doc1)); engine.flush(new Engine.Flush()); - ParsedDocument doc2 = new ParsedDocument("2", "2", "test", null, -1, -1, doc().add(uidField("2")).add(field("value", "test")).build(), Lucene.STANDARD_ANALYZER, B_2, false); + ParsedDocument doc2 = new ParsedDocument("2", "2", "test", null, -1, -1, testDocumentWithTextField("2"), Lucene.STANDARD_ANALYZER, B_2, false); engine.create(new Engine.Create(null, newUid("2"), doc2)); engine.recover(new Engine.RecoveryHandler() { @@ -528,10 +553,10 @@ public abstract class AbstractSimpleEngineTests { @Test public void testRecoverWithOperationsBetweenPhase1AndPhase2AndPhase3() throws Exception { - ParsedDocument doc1 = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).add(field("value", "test")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc1 = new ParsedDocument("1", "1", "test", null, -1, -1, testDocumentWithTextField("1"), Lucene.STANDARD_ANALYZER, B_1, false); engine.create(new Engine.Create(null, newUid("1"), doc1)); engine.flush(new Engine.Flush()); - ParsedDocument doc2 = new ParsedDocument("2", "2", "test", null, -1, -1, doc().add(uidField("2")).add(field("value", "test")).build(), Lucene.STANDARD_ANALYZER, B_2, false); + ParsedDocument doc2 = new ParsedDocument("2", "2", "test", null, -1, -1, testDocumentWithTextField("2"), Lucene.STANDARD_ANALYZER, B_2, false); engine.create(new Engine.Create(null, newUid("2"), doc2)); engine.recover(new Engine.RecoveryHandler() { @@ -547,7 +572,7 @@ public abstract class AbstractSimpleEngineTests { assertThat(create.source().toBytesArray(), equalTo(B_2)); // add for phase3 - ParsedDocument doc3 = new ParsedDocument("3", "3", "test", null, -1, -1, doc().add(uidField("3")).add(field("value", "test")).build(), Lucene.STANDARD_ANALYZER, B_3, false); + ParsedDocument doc3 = new ParsedDocument("3", "3", "test", null, -1, -1, testDocumentWithTextField("3"), Lucene.STANDARD_ANALYZER, B_3, false); engine.create(new Engine.Create(null, newUid("3"), doc3)); } @@ -566,7 +591,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testVersioningNewCreate() { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocument("1"), Lucene.STANDARD_ANALYZER, B_1, false); Engine.Create create = new Engine.Create(null, newUid("1"), doc); engine.create(create); assertThat(create.version(), equalTo(1l)); @@ -578,7 +603,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testExternalVersioningNewCreate() { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocument("1"), Lucene.STANDARD_ANALYZER, B_1, false); Engine.Create create = new Engine.Create(null, newUid("1"), doc).versionType(VersionType.EXTERNAL).version(12); engine.create(create); assertThat(create.version(), equalTo(12l)); @@ -590,7 +615,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testVersioningNewIndex() { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocument("1"), Lucene.STANDARD_ANALYZER, B_1, false); Engine.Index index = new Engine.Index(null, newUid("1"), doc); engine.index(index); assertThat(index.version(), equalTo(1l)); @@ -602,7 +627,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testExternalVersioningNewIndex() { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocument("1"), Lucene.STANDARD_ANALYZER, B_1, false); Engine.Index index = new Engine.Index(null, newUid("1"), doc).versionType(VersionType.EXTERNAL).version(12); engine.index(index); assertThat(index.version(), equalTo(12l)); @@ -614,7 +639,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testVersioningIndexConflict() { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocument("1"), Lucene.STANDARD_ANALYZER, B_1, false); Engine.Index index = new Engine.Index(null, newUid("1"), doc); engine.index(index); assertThat(index.version(), equalTo(1l)); @@ -643,7 +668,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testExternalVersioningIndexConflict() { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocument("1"), Lucene.STANDARD_ANALYZER, B_1, false); Engine.Index index = new Engine.Index(null, newUid("1"), doc).versionType(VersionType.EXTERNAL).version(12); engine.index(index); assertThat(index.version(), equalTo(12l)); @@ -663,7 +688,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testVersioningIndexConflictWithFlush() { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocument("1"), Lucene.STANDARD_ANALYZER, B_1, false); Engine.Index index = new Engine.Index(null, newUid("1"), doc); engine.index(index); assertThat(index.version(), equalTo(1l)); @@ -694,7 +719,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testExternalVersioningIndexConflictWithFlush() { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocument("1"), Lucene.STANDARD_ANALYZER, B_1, false); Engine.Index index = new Engine.Index(null, newUid("1"), doc).versionType(VersionType.EXTERNAL).version(12); engine.index(index); assertThat(index.version(), equalTo(12l)); @@ -716,7 +741,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testVersioningDeleteConflict() { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocument("1"), Lucene.STANDARD_ANALYZER, B_1, false); Engine.Index index = new Engine.Index(null, newUid("1"), doc); engine.index(index); assertThat(index.version(), equalTo(1l)); @@ -767,7 +792,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testVersioningDeleteConflictWithFlush() { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocument("1"), Lucene.STANDARD_ANALYZER, B_1, false); Engine.Index index = new Engine.Index(null, newUid("1"), doc); engine.index(index); assertThat(index.version(), equalTo(1l)); @@ -824,7 +849,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testVersioningCreateExistsException() { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocument("1"), Lucene.STANDARD_ANALYZER, B_1, false); Engine.Create create = new Engine.Create(null, newUid("1"), doc); engine.create(create); assertThat(create.version(), equalTo(1l)); @@ -840,7 +865,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testVersioningCreateExistsExceptionWithFlush() { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocument("1"), Lucene.STANDARD_ANALYZER, B_1, false); Engine.Create create = new Engine.Create(null, newUid("1"), doc); engine.create(create); assertThat(create.version(), equalTo(1l)); @@ -858,7 +883,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testVersioningReplicaConflict1() { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocument("1"), Lucene.STANDARD_ANALYZER, B_1, false); Engine.Index index = new Engine.Index(null, newUid("1"), doc); engine.index(index); assertThat(index.version(), equalTo(1l)); @@ -893,7 +918,7 @@ public abstract class AbstractSimpleEngineTests { @Test public void testVersioningReplicaConflict2() { - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, doc().add(uidField("1")).build(), Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocument("1"), Lucene.STANDARD_ANALYZER, B_1, false); Engine.Index index = new Engine.Index(null, newUid("1"), doc); engine.index(index); assertThat(index.version(), equalTo(1l)); diff --git a/src/test/java/org/elasticsearch/test/unit/index/field/data/doubles/DoubleFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/field/data/doubles/DoubleFieldDataTests.java index 12b2e35222b..7466268707b 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/field/data/doubles/DoubleFieldDataTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/field/data/doubles/DoubleFieldDataTests.java @@ -19,7 +19,9 @@ package org.elasticsearch.test.unit.index.field.data.doubles; -import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.DoubleField; +import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -31,7 +33,6 @@ import org.testng.annotations.Test; import java.util.ArrayList; -import static org.elasticsearch.common.lucene.DocumentBuilder.doc; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; @@ -45,28 +46,28 @@ public class DoubleFieldDataTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setDoubleValue(4)) - .add(new NumericField("mvalue").setDoubleValue(104)) - .build()); + Document document = new Document(); + document.add(new DoubleField("svalue", 4, Field.Store.NO)); + document.add(new DoubleField("mvalue", 104, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setDoubleValue(3)) - .add(new NumericField("mvalue").setDoubleValue(104)) - .add(new NumericField("mvalue").setDoubleValue(105)) - .build()); + document = new Document(); + document.add(new DoubleField("svalue", 3, Field.Store.NO)); + document.add(new DoubleField("mvalue", 104, Field.Store.NO)); + document.add(new DoubleField("mvalue", 105, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setDoubleValue(7)) - .build()); + document = new Document(); + document.add(new DoubleField("svalue", 7, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("mvalue").setDoubleValue(102)) - .build()); + document = new Document(); + document.add(new DoubleField("mvalue", 102, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setDoubleValue(4)) - .build()); + document = new Document(); + document.add(new DoubleField("svalue", 4, Field.Store.NO)); + indexWriter.addDocument(document); IndexReader reader = IndexReader.open(indexWriter, true); diff --git a/src/test/java/org/elasticsearch/test/unit/index/field/data/floats/FloatFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/field/data/floats/FloatFieldDataTests.java index 93cfc3ee1f0..6b96b13aed0 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/field/data/floats/FloatFieldDataTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/field/data/floats/FloatFieldDataTests.java @@ -19,7 +19,9 @@ package org.elasticsearch.test.unit.index.field.data.floats; -import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.FloatField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -31,7 +33,6 @@ import org.testng.annotations.Test; import java.util.ArrayList; -import static org.elasticsearch.common.lucene.DocumentBuilder.doc; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; @@ -45,28 +46,28 @@ public class FloatFieldDataTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setFloatValue(4)) - .add(new NumericField("mvalue").setFloatValue(104)) - .build()); + Document document = new Document(); + document.add(new FloatField("svalue", 4, Field.Store.NO)); + document.add(new FloatField("mvalue", 104, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setFloatValue(3)) - .add(new NumericField("mvalue").setFloatValue(104)) - .add(new NumericField("mvalue").setFloatValue(105)) - .build()); + document = new Document(); + document.add(new FloatField("svalue", 3, Field.Store.NO)); + document.add(new FloatField("mvalue", 104, Field.Store.NO)); + document.add(new FloatField("mvalue", 105, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setFloatValue(7)) - .build()); + document = new Document(); + document.add(new FloatField("svalue", 7, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("mvalue").setFloatValue(102)) - .build()); + document = new Document(); + document.add(new FloatField("mvalue", 102, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setFloatValue(4)) - .build()); + document = new Document(); + document.add(new FloatField("svalue", 4, Field.Store.NO)); + indexWriter.addDocument(document); IndexReader reader = IndexReader.open(indexWriter, true); diff --git a/src/test/java/org/elasticsearch/test/unit/index/field/data/ints/IntFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/field/data/ints/IntFieldDataTests.java index ba565918a6c..6829f14aeb5 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/field/data/ints/IntFieldDataTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/field/data/ints/IntFieldDataTests.java @@ -19,7 +19,9 @@ package org.elasticsearch.test.unit.index.field.data.ints; -import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -31,7 +33,6 @@ import org.testng.annotations.Test; import java.util.ArrayList; -import static org.elasticsearch.common.lucene.DocumentBuilder.doc; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; @@ -45,28 +46,28 @@ public class IntFieldDataTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setIntValue(4)) - .add(new NumericField("mvalue").setIntValue(104)) - .build()); + Document document = new Document(); + document.add(new IntField("svalue", 4, Field.Store.NO)); + document.add(new IntField("mvalue", 104, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setIntValue(3)) - .add(new NumericField("mvalue").setIntValue(104)) - .add(new NumericField("mvalue").setIntValue(105)) - .build()); + document = new Document(); + document.add(new IntField("svalue", 3, Field.Store.NO)); + document.add(new IntField("mvalue", 104, Field.Store.NO)); + document.add(new IntField("mvalue", 105, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setIntValue(7)) - .build()); + document = new Document(); + document.add(new IntField("svalue", 7, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("mvalue").setIntValue(102)) - .build()); + document = new Document(); + document.add(new IntField("mvalue", 102, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setIntValue(4)) - .build()); + document = new Document(); + document.add(new IntField("svalue", 4, Field.Store.NO)); + indexWriter.addDocument(document); IndexReader reader = IndexReader.open(indexWriter, true); diff --git a/src/test/java/org/elasticsearch/test/unit/index/field/data/longs/LongFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/field/data/longs/LongFieldDataTests.java index c1fa6d85bed..b80588cf548 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/field/data/longs/LongFieldDataTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/field/data/longs/LongFieldDataTests.java @@ -19,7 +19,9 @@ package org.elasticsearch.test.unit.index.field.data.longs; -import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.LongField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -31,7 +33,6 @@ import org.testng.annotations.Test; import java.util.ArrayList; -import static org.elasticsearch.common.lucene.DocumentBuilder.doc; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; @@ -45,28 +46,28 @@ public class LongFieldDataTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setLongValue(4)) - .add(new NumericField("mvalue").setLongValue(104)) - .build()); + Document document = new Document(); + document.add(new LongField("svalue", 4, Field.Store.NO)); + document.add(new LongField("mvalue", 104, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setLongValue(3)) - .add(new NumericField("mvalue").setLongValue(104)) - .add(new NumericField("mvalue").setLongValue(105)) - .build()); + document = new Document(); + document.add(new LongField("svalue", 3, Field.Store.NO)); + document.add(new LongField("mvalue", 104, Field.Store.NO)); + document.add(new LongField("mvalue", 105, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setLongValue(7)) - .build()); + document = new Document(); + document.add(new LongField("svalue", 7, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("mvalue").setLongValue(102)) - .build()); + document = new Document(); + document.add(new LongField("mvalue", 102, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setLongValue(4)) - .build()); + document = new Document(); + document.add(new LongField("svalue", 4, Field.Store.NO)); + indexWriter.addDocument(document); IndexReader reader = IndexReader.open(indexWriter, true); diff --git a/src/test/java/org/elasticsearch/test/unit/index/field/data/shorts/ShortFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/field/data/shorts/ShortFieldDataTests.java index 8c89da6c95c..a656224ffaf 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/field/data/shorts/ShortFieldDataTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/field/data/shorts/ShortFieldDataTests.java @@ -19,7 +19,9 @@ package org.elasticsearch.test.unit.index.field.data.shorts; -import org.apache.lucene.document.NumericField; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.IntField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; @@ -31,7 +33,6 @@ import org.testng.annotations.Test; import java.util.ArrayList; -import static org.elasticsearch.common.lucene.DocumentBuilder.doc; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; @@ -45,28 +46,28 @@ public class ShortFieldDataTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setIntValue(4)) - .add(new NumericField("mvalue").setIntValue(104)) - .build()); + Document document = new Document(); + document.add(new IntField("svalue", 4, Field.Store.NO)); + document.add(new IntField("mvalue", 104, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setIntValue(3)) - .add(new NumericField("mvalue").setIntValue(104)) - .add(new NumericField("mvalue").setIntValue(105)) - .build()); + document = new Document(); + document.add(new IntField("svalue", 3, Field.Store.NO)); + document.add(new IntField("mvalue", 104, Field.Store.NO)); + document.add(new IntField("mvalue", 105, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setIntValue(7)) - .build()); + document = new Document(); + document.add(new IntField("svalue", 7, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("mvalue").setIntValue(102)) - .build()); + document = new Document(); + document.add(new IntField("mvalue", 102, Field.Store.NO)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(new NumericField("svalue").setIntValue(4)) - .build()); + document = new Document(); + document.add(new IntField("svalue", 4, Field.Store.NO)); + indexWriter.addDocument(document); IndexReader reader = IndexReader.open(indexWriter, true); diff --git a/src/test/java/org/elasticsearch/test/unit/index/field/data/strings/StringFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/field/data/strings/StringFieldDataTests.java index a82a2cd00c0..bb026de1afe 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/field/data/strings/StringFieldDataTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/field/data/strings/StringFieldDataTests.java @@ -19,6 +19,9 @@ package org.elasticsearch.test.unit.index.field.data.strings; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.TextField; import org.apache.lucene.index.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; @@ -30,8 +33,6 @@ import org.testng.annotations.Test; import java.util.ArrayList; -import static org.elasticsearch.common.lucene.DocumentBuilder.doc; -import static org.elasticsearch.common.lucene.DocumentBuilder.field; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; @@ -45,22 +46,27 @@ public class StringFieldDataTests { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - indexWriter.addDocument(doc() - .add(field("svalue", "zzz")) - .add(field("mvalue", "111")).build()); + Document document = new Document(); + document.add(new TextField("svalue", "zzz", Field.Store.YES)); + document.add(new TextField("mvalue", "111", Field.Store.YES)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(field("svalue", "xxx")) - .add(field("mvalue", "222 333")).build()); + document = new Document(); + document.add(new TextField("svalue", "xxx", Field.Store.YES)); + document.add(new TextField("mvalue", "222 333", Field.Store.YES)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(field("mvalue", "333 444")).build()); + document = new Document(); + document.add(new TextField("mvalue", "333 444", Field.Store.YES)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(field("svalue", "aaa")).build()); + document = new Document(); + document.add(new TextField("svalue", "aaa", Field.Store.YES)); + indexWriter.addDocument(document); - indexWriter.addDocument(doc() - .add(field("svalue", "aaa")).build()); + document = new Document(); + document.add(new TextField("svalue", "aaa", Field.Store.YES)); + indexWriter.addDocument(document); AtomicReader reader = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(indexWriter, false)); From 787b7a39003f44cacde45e7f61a41c906822e47b Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Wed, 31 Oct 2012 22:20:57 -0400 Subject: [PATCH 083/146] lucene4: more unit test cleanup --- .../TruncateTokenFilterTests.java | 5 ++-- .../miscellaneous/UniqueTokenFilterTests.java | 5 ++-- .../uidscan/LuceneUidScanBenchmark.java | 17 +++++++------ .../lucene/search/TermsFilterTests.java | 25 ++++++++----------- .../SnapshotDeletionPolicyTests.java | 9 +++++-- .../engine/robin/SimpleRobinEngineTests.java | 3 +-- .../data/doubles/DoubleFieldDataTests.java | 6 ++--- .../data/floats/FloatFieldDataTests.java | 6 ++--- .../field/data/ints/IntFieldDataTests.java | 6 ++--- .../field/data/longs/LongFieldDataTests.java | 6 ++--- .../data/shorts/ShortFieldDataTests.java | 6 ++--- .../mapper/boost/CustomBoostMappingTests.java | 16 ++++++------ .../query/SimpleIndexQueryParserTests.java | 1 + 13 files changed, 53 insertions(+), 58 deletions(-) diff --git a/src/test/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterTests.java b/src/test/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterTests.java index b87e9cdaa8e..e7552e30456 100644 --- a/src/test/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterTests.java +++ b/src/test/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterTests.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.miscellaneous; import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.common.lucene.Lucene; import org.testng.annotations.Test; @@ -38,7 +39,7 @@ public class TruncateTokenFilterTests { @Test public void simpleTest() throws IOException { - Analyzer analyzer = new ReusableAnalyzerBase() { + Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { @@ -47,7 +48,7 @@ public class TruncateTokenFilterTests { } }; - TokenStream test = analyzer.reusableTokenStream("test", new StringReader("a bb ccc dddd eeeee")); + TokenStream test = analyzer.tokenStream("test", new StringReader("a bb ccc dddd eeeee")); CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class); assertThat(test.incrementToken(), equalTo(true)); assertThat(termAttribute.toString(), equalTo("a")); diff --git a/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java b/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java index 3b9dd3f92c4..97d5b027367 100644 --- a/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java +++ b/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java @@ -20,6 +20,7 @@ package org.apache.lucene.analysis.miscellaneous; import org.apache.lucene.analysis.*; +import org.apache.lucene.analysis.core.WhitespaceTokenizer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.common.lucene.Lucene; import org.testng.annotations.Test; @@ -38,7 +39,7 @@ public class UniqueTokenFilterTests { @Test public void simpleTest() throws IOException { - Analyzer analyzer = new ReusableAnalyzerBase() { + Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { @@ -47,7 +48,7 @@ public class UniqueTokenFilterTests { } }; - TokenStream test = analyzer.reusableTokenStream("test", new StringReader("this test with test")); + TokenStream test = analyzer.tokenStream("test", new StringReader("this test with test")); CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class); assertThat(test.incrementToken(), equalTo(true)); assertThat(termAttribute.toString(), equalTo("this")); diff --git a/src/test/java/org/elasticsearch/benchmark/common/lucene/uidscan/LuceneUidScanBenchmark.java b/src/test/java/org/elasticsearch/benchmark/common/lucene/uidscan/LuceneUidScanBenchmark.java index b6ac579ea9c..f35ae9d0425 100644 --- a/src/test/java/org/elasticsearch/benchmark/common/lucene/uidscan/LuceneUidScanBenchmark.java +++ b/src/test/java/org/elasticsearch/benchmark/common/lucene/uidscan/LuceneUidScanBenchmark.java @@ -23,6 +23,7 @@ import jsr166y.ThreadLocalRandom; import org.apache.lucene.document.Document; import org.apache.lucene.index.*; import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Numbers; import org.elasticsearch.common.StopWatch; import org.elasticsearch.common.lucene.Lucene; @@ -68,21 +69,21 @@ public class LuceneUidScanBenchmark { try { for (long i = 0; i < SCAN_COUNT; i++) { long id = startUid + (Math.abs(ThreadLocalRandom.current().nextInt()) % INDEX_COUNT); - TermPositions uid = reader.termPositions(new Term("_uid", Long.toString(id))); - uid.next(); + DocsAndPositionsEnum uid = MultiFields.getTermPositionsEnum(reader, + MultiFields.getLiveDocs(reader), + "_uid", + new BytesRef(Long.toString(id))); + uid.nextDoc(); uid.nextPosition(); - if (!uid.isPayloadAvailable()) { - uid.close(); + if (uid.getPayload() == null) { System.err.println("no payload..."); break; } - byte[] payload = uid.getPayload(new byte[8], 0); - if (Numbers.bytesToLong(payload) != id) { - uid.close(); + BytesRef payload = uid.getPayload(); + if (Numbers.bytesToLong(BytesRef.deepCopyOf(payload).bytes) != id) { System.err.println("wrong id..."); break; } - uid.close(); } } catch (Exception e) { e.printStackTrace(); diff --git a/src/test/java/org/elasticsearch/test/unit/common/lucene/search/TermsFilterTests.java b/src/test/java/org/elasticsearch/test/unit/common/lucene/search/TermsFilterTests.java index 4e0ef138c36..5d75df4de63 100644 --- a/src/test/java/org/elasticsearch/test/unit/common/lucene/search/TermsFilterTests.java +++ b/src/test/java/org/elasticsearch/test/unit/common/lucene/search/TermsFilterTests.java @@ -19,13 +19,10 @@ package org.elasticsearch.test.unit.common.lucene.search; -import org.apache.lucene.analysis.KeywordAnalyzer; +import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.Term; +import org.apache.lucene.index.*; import org.apache.lucene.search.XTermsFilter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; @@ -58,19 +55,19 @@ public class TermsFilterTests { w.commit(); } } - IndexReader reader = w.getReader(); + AtomicReader reader = new SlowCompositeReaderWrapper(DirectoryReader.open(w, true)); w.close(); TermFilter tf = new TermFilter(new Term(fieldName, "19")); - FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader); + FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs()); assertThat(bits, nullValue()); tf = new TermFilter(new Term(fieldName, "20")); - bits = (FixedBitSet) tf.getDocIdSet(reader); + bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs()); assertThat(bits.cardinality(), equalTo(1)); tf = new TermFilter(new Term("all", "xxx")); - bits = (FixedBitSet) tf.getDocIdSet(reader); + bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs()); assertThat(bits.cardinality(), equalTo(100)); reader.close(); @@ -92,23 +89,23 @@ public class TermsFilterTests { w.commit(); } } - IndexReader reader = w.getReader(); + AtomicReader reader = new SlowCompositeReaderWrapper(DirectoryReader.open(w, true)); w.close(); XTermsFilter tf = new XTermsFilter(new Term[]{new Term(fieldName, "19")}); - FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader); + FixedBitSet bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs()); assertThat(bits, nullValue()); tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20")}); - bits = (FixedBitSet) tf.getDocIdSet(reader); + bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs()); assertThat(bits.cardinality(), equalTo(1)); tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20"), new Term(fieldName, "10")}); - bits = (FixedBitSet) tf.getDocIdSet(reader); + bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs()); assertThat(bits.cardinality(), equalTo(2)); tf = new XTermsFilter(new Term[]{new Term(fieldName, "19"), new Term(fieldName, "20"), new Term(fieldName, "10"), new Term(fieldName, "00")}); - bits = (FixedBitSet) tf.getDocIdSet(reader); + bits = (FixedBitSet) tf.getDocIdSet(reader.getContext(), reader.getLiveDocs()); assertThat(bits.cardinality(), equalTo(2)); reader.close(); diff --git a/src/test/java/org/elasticsearch/test/unit/index/deletionpolicy/SnapshotDeletionPolicyTests.java b/src/test/java/org/elasticsearch/test/unit/index/deletionpolicy/SnapshotDeletionPolicyTests.java index 071c5a239d1..633ef56c509 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/deletionpolicy/SnapshotDeletionPolicyTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/deletionpolicy/SnapshotDeletionPolicyTests.java @@ -23,7 +23,9 @@ import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.TextField; import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.util.Version; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.index.Index; import org.elasticsearch.index.deletionpolicy.KeepOnlyLastDeletionPolicy; @@ -35,7 +37,7 @@ import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; -import static org.apache.lucene.index.IndexReader.listCommits; +import static org.apache.lucene.index.DirectoryReader.listCommits; import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; @@ -57,7 +59,10 @@ public class SnapshotDeletionPolicyTests { public void setUp() throws Exception { dir = new RAMDirectory(); deletionPolicy = new SnapshotDeletionPolicy(new KeepOnlyLastDeletionPolicy(shardId, EMPTY_SETTINGS)); - indexWriter = new IndexWriter(dir, Lucene.STANDARD_ANALYZER, true, deletionPolicy, IndexWriter.MaxFieldLength.UNLIMITED); + // LUCENE 4 UPGRADE: Not sure about version. + indexWriter = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_31, Lucene.STANDARD_ANALYZER) + .setIndexDeletionPolicy(deletionPolicy) + .setOpenMode(IndexWriterConfig.OpenMode.CREATE)); } @AfterClass diff --git a/src/test/java/org/elasticsearch/test/unit/index/engine/robin/SimpleRobinEngineTests.java b/src/test/java/org/elasticsearch/test/unit/index/engine/robin/SimpleRobinEngineTests.java index 6fd1252e86e..d67f9f2845c 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/engine/robin/SimpleRobinEngineTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/engine/robin/SimpleRobinEngineTests.java @@ -20,7 +20,6 @@ package org.elasticsearch.test.unit.index.engine.robin; import org.elasticsearch.index.analysis.AnalysisService; -import org.elasticsearch.index.cache.bloom.none.NoneBloomCache; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.engine.robin.RobinEngine; import org.elasticsearch.index.indexing.ShardIndexingService; @@ -39,6 +38,6 @@ public class SimpleRobinEngineTests extends AbstractSimpleEngineTests { protected Engine createEngine(Store store, Translog translog) { return new RobinEngine(shardId, EMPTY_SETTINGS, threadPool, new IndexSettingsService(shardId.index(), EMPTY_SETTINGS), new ShardIndexingService(shardId, EMPTY_SETTINGS), null, store, createSnapshotDeletionPolicy(), translog, createMergePolicy(), createMergeScheduler(), - new AnalysisService(shardId.index()), new SimilarityService(shardId.index()), new NoneBloomCache(shardId.index())); + new AnalysisService(shardId.index()), new SimilarityService(shardId.index())); } } diff --git a/src/test/java/org/elasticsearch/test/unit/index/field/data/doubles/DoubleFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/field/data/doubles/DoubleFieldDataTests.java index 7466268707b..bd8c8e45c40 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/field/data/doubles/DoubleFieldDataTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/field/data/doubles/DoubleFieldDataTests.java @@ -22,9 +22,7 @@ package org.elasticsearch.test.unit.index.field.data.doubles; import org.apache.lucene.document.Document; import org.apache.lucene.document.DoubleField; import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.elasticsearch.common.lucene.Lucene; @@ -69,7 +67,7 @@ public class DoubleFieldDataTests { document.add(new DoubleField("svalue", 4, Field.Store.NO)); indexWriter.addDocument(document); - IndexReader reader = IndexReader.open(indexWriter, true); + AtomicReader reader = new SlowCompositeReaderWrapper(DirectoryReader.open(indexWriter, true)); DoubleFieldData sFieldData = DoubleFieldData.load(reader, "svalue"); DoubleFieldData mFieldData = DoubleFieldData.load(reader, "mvalue"); diff --git a/src/test/java/org/elasticsearch/test/unit/index/field/data/floats/FloatFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/field/data/floats/FloatFieldDataTests.java index 6b96b13aed0..08ca48d6ad9 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/field/data/floats/FloatFieldDataTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/field/data/floats/FloatFieldDataTests.java @@ -22,9 +22,7 @@ package org.elasticsearch.test.unit.index.field.data.floats; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FloatField; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.elasticsearch.common.lucene.Lucene; @@ -69,7 +67,7 @@ public class FloatFieldDataTests { document.add(new FloatField("svalue", 4, Field.Store.NO)); indexWriter.addDocument(document); - IndexReader reader = IndexReader.open(indexWriter, true); + AtomicReader reader = new SlowCompositeReaderWrapper(DirectoryReader.open(indexWriter, true)); FloatFieldData sFieldData = FloatFieldData.load(reader, "svalue"); FloatFieldData mFieldData = FloatFieldData.load(reader, "mvalue"); diff --git a/src/test/java/org/elasticsearch/test/unit/index/field/data/ints/IntFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/field/data/ints/IntFieldDataTests.java index 6829f14aeb5..daa7a67a7ad 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/field/data/ints/IntFieldDataTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/field/data/ints/IntFieldDataTests.java @@ -22,9 +22,7 @@ package org.elasticsearch.test.unit.index.field.data.ints; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.IntField; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.elasticsearch.common.lucene.Lucene; @@ -69,7 +67,7 @@ public class IntFieldDataTests { document.add(new IntField("svalue", 4, Field.Store.NO)); indexWriter.addDocument(document); - IndexReader reader = IndexReader.open(indexWriter, true); + AtomicReader reader = new SlowCompositeReaderWrapper(DirectoryReader.open(indexWriter, true)); IntFieldData sFieldData = IntFieldData.load(reader, "svalue"); IntFieldData mFieldData = IntFieldData.load(reader, "mvalue"); diff --git a/src/test/java/org/elasticsearch/test/unit/index/field/data/longs/LongFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/field/data/longs/LongFieldDataTests.java index b80588cf548..08fa28f5a80 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/field/data/longs/LongFieldDataTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/field/data/longs/LongFieldDataTests.java @@ -22,9 +22,7 @@ package org.elasticsearch.test.unit.index.field.data.longs; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.LongField; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.elasticsearch.common.lucene.Lucene; @@ -69,7 +67,7 @@ public class LongFieldDataTests { document.add(new LongField("svalue", 4, Field.Store.NO)); indexWriter.addDocument(document); - IndexReader reader = IndexReader.open(indexWriter, true); + AtomicReader reader = new SlowCompositeReaderWrapper(DirectoryReader.open(indexWriter, true)); LongFieldData sFieldData = LongFieldData.load(reader, "svalue"); LongFieldData mFieldData = LongFieldData.load(reader, "mvalue"); diff --git a/src/test/java/org/elasticsearch/test/unit/index/field/data/shorts/ShortFieldDataTests.java b/src/test/java/org/elasticsearch/test/unit/index/field/data/shorts/ShortFieldDataTests.java index a656224ffaf..27b34e5d868 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/field/data/shorts/ShortFieldDataTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/field/data/shorts/ShortFieldDataTests.java @@ -22,9 +22,7 @@ package org.elasticsearch.test.unit.index.field.data.shorts; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.IntField; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.*; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.elasticsearch.common.lucene.Lucene; @@ -69,7 +67,7 @@ public class ShortFieldDataTests { document.add(new IntField("svalue", 4, Field.Store.NO)); indexWriter.addDocument(document); - IndexReader reader = IndexReader.open(indexWriter, true); + AtomicReader reader = new SlowCompositeReaderWrapper(DirectoryReader.open(indexWriter, true)); ShortFieldData sFieldData = ShortFieldData.load(reader, "svalue"); ShortFieldData mFieldData = ShortFieldData.load(reader, "mvalue"); diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/CustomBoostMappingTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/CustomBoostMappingTests.java index 32c0022b86f..186f94ea28c 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/CustomBoostMappingTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/CustomBoostMappingTests.java @@ -57,13 +57,13 @@ public class CustomBoostMappingTests { .startObject("date_field").field("value", "20100101").field("boost", 9.0f).endObject() .endObject().bytes()); - assertThat(doc.rootDoc().getFieldable("s_field").getBoost(), equalTo(2.0f)); - assertThat(doc.rootDoc().getFieldable("l_field").getBoost(), equalTo(3.0f)); - assertThat(doc.rootDoc().getFieldable("i_field").getBoost(), equalTo(4.0f)); - assertThat(doc.rootDoc().getFieldable("sh_field").getBoost(), equalTo(5.0f)); - assertThat(doc.rootDoc().getFieldable("b_field").getBoost(), equalTo(6.0f)); - assertThat(doc.rootDoc().getFieldable("d_field").getBoost(), equalTo(7.0f)); - assertThat(doc.rootDoc().getFieldable("f_field").getBoost(), equalTo(8.0f)); - assertThat(doc.rootDoc().getFieldable("date_field").getBoost(), equalTo(9.0f)); + assertThat(doc.rootDoc().getField("s_field").boost(), equalTo(2.0f)); + assertThat(doc.rootDoc().getField("l_field").boost(), equalTo(3.0f)); + assertThat(doc.rootDoc().getField("i_field").boost(), equalTo(4.0f)); + assertThat(doc.rootDoc().getField("sh_field").boost(), equalTo(5.0f)); + assertThat(doc.rootDoc().getField("b_field").boost(), equalTo(6.0f)); + assertThat(doc.rootDoc().getField("d_field").boost(), equalTo(7.0f)); + assertThat(doc.rootDoc().getField("f_field").boost(), equalTo(8.0f)); + assertThat(doc.rootDoc().getField("date_field").boost(), equalTo(9.0f)); } } \ No newline at end of file diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java index 7c5cc9f0ce7..3f65dd8e269 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java @@ -20,6 +20,7 @@ package org.elasticsearch.test.unit.index.query; import org.apache.lucene.index.Term; +import org.apache.lucene.queries.BoostingQuery; import org.apache.lucene.sandbox.queries.FuzzyLikeThisQuery; import org.apache.lucene.search.*; import org.apache.lucene.search.spans.*; From daf347e67ebd152a4774edefb0f23a2428a466c0 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Wed, 31 Oct 2012 22:51:19 -0400 Subject: [PATCH 084/146] lucene4: replace IndexCommit.getVersion() with IndexCommit.getGeneration() --- .../index/deletionpolicy/SnapshotDeletionPolicy.java | 4 ++-- .../index/deletionpolicy/SnapshotIndexCommit.java | 4 ++-- .../org/elasticsearch/index/gateway/IndexShardGateway.java | 2 +- .../elasticsearch/index/gateway/IndexShardGatewayService.java | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/deletionpolicy/SnapshotDeletionPolicy.java b/src/main/java/org/elasticsearch/index/deletionpolicy/SnapshotDeletionPolicy.java index 44d6f51a137..7a392dc47ec 100644 --- a/src/main/java/org/elasticsearch/index/deletionpolicy/SnapshotDeletionPolicy.java +++ b/src/main/java/org/elasticsearch/index/deletionpolicy/SnapshotDeletionPolicy.java @@ -132,10 +132,10 @@ public class SnapshotDeletionPolicy extends AbstractIndexShardComponent implemen * Helper method to snapshot a give commit. */ private SnapshotIndexCommit snapshot(SnapshotIndexCommit commit) throws IOException { - SnapshotHolder snapshotHolder = snapshots.get(commit.getVersion()); + SnapshotHolder snapshotHolder = snapshots.get(commit.getGeneration()); if (snapshotHolder == null) { snapshotHolder = new SnapshotHolder(0); - snapshots.put(commit.getVersion(), snapshotHolder); + snapshots.put(commit.getGeneration(), snapshotHolder); } snapshotHolder.counter++; return new OneTimeReleaseSnapshotIndexCommit(this, commit); diff --git a/src/main/java/org/elasticsearch/index/deletionpolicy/SnapshotIndexCommit.java b/src/main/java/org/elasticsearch/index/deletionpolicy/SnapshotIndexCommit.java index 6d03547f250..c56821a0997 100644 --- a/src/main/java/org/elasticsearch/index/deletionpolicy/SnapshotIndexCommit.java +++ b/src/main/java/org/elasticsearch/index/deletionpolicy/SnapshotIndexCommit.java @@ -58,7 +58,7 @@ public class SnapshotIndexCommit extends IndexCommitDelegate implements Releasab * actually released. */ public boolean release() { - return deletionPolicy.release(getVersion()); + return deletionPolicy.release(getGeneration()); } /** @@ -67,7 +67,7 @@ public class SnapshotIndexCommit extends IndexCommitDelegate implements Releasab */ @Override public void delete() { - if (!deletionPolicy.isHeld(getVersion())) { + if (!deletionPolicy.isHeld(getGeneration())) { delegate.delete(); } } diff --git a/src/main/java/org/elasticsearch/index/gateway/IndexShardGateway.java b/src/main/java/org/elasticsearch/index/gateway/IndexShardGateway.java index ef6e312cca0..bda3ebec998 100644 --- a/src/main/java/org/elasticsearch/index/gateway/IndexShardGateway.java +++ b/src/main/java/org/elasticsearch/index/gateway/IndexShardGateway.java @@ -103,7 +103,7 @@ public interface IndexShardGateway extends IndexShardComponent, CloseableIndexCo * Indicates that the index has changed from the latest snapshot. */ public boolean indexChanged() { - return lastIndexVersion != indexCommit.getVersion(); + return lastIndexVersion != indexCommit.getGeneration(); } /** diff --git a/src/main/java/org/elasticsearch/index/gateway/IndexShardGatewayService.java b/src/main/java/org/elasticsearch/index/gateway/IndexShardGatewayService.java index 04282dbe37f..dce49cb221c 100644 --- a/src/main/java/org/elasticsearch/index/gateway/IndexShardGatewayService.java +++ b/src/main/java/org/elasticsearch/index/gateway/IndexShardGatewayService.java @@ -265,13 +265,13 @@ public class IndexShardGatewayService extends AbstractIndexShardComponent implem SnapshotStatus snapshotStatus = indexShard.snapshot(new Engine.SnapshotHandler() { @Override public SnapshotStatus snapshot(SnapshotIndexCommit snapshotIndexCommit, Translog.Snapshot translogSnapshot) throws EngineException { - if (lastIndexVersion != snapshotIndexCommit.getVersion() || lastTranslogId != translogSnapshot.translogId() || lastTranslogLength < translogSnapshot.length()) { + if (lastIndexVersion != snapshotIndexCommit.getGeneration() || lastTranslogId != translogSnapshot.translogId() || lastTranslogLength < translogSnapshot.length()) { logger.debug("snapshot ({}) to {} ...", reason, shardGateway); SnapshotStatus snapshotStatus = shardGateway.snapshot(new IndexShardGateway.Snapshot(snapshotIndexCommit, translogSnapshot, lastIndexVersion, lastTranslogId, lastTranslogLength, lastTotalTranslogOperations)); - lastIndexVersion = snapshotIndexCommit.getVersion(); + lastIndexVersion = snapshotIndexCommit.getGeneration(); lastTranslogId = translogSnapshot.translogId(); lastTranslogLength = translogSnapshot.length(); lastTotalTranslogOperations = translogSnapshot.estimatedTotalOperations(); From 41325113f0f97b21add41978622052f0624aeda4 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Wed, 31 Oct 2012 22:52:20 -0400 Subject: [PATCH 085/146] lucene4: switched from Field.Index to boolean indexed in ParseContext.includeInAll() --- .../java/org/elasticsearch/index/mapper/ParseContext.java | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/ParseContext.java b/src/main/java/org/elasticsearch/index/mapper/ParseContext.java index d3fa33b0a67..c6a70a0a880 100644 --- a/src/main/java/org/elasticsearch/index/mapper/ParseContext.java +++ b/src/main/java/org/elasticsearch/index/mapper/ParseContext.java @@ -228,7 +228,7 @@ public class ParseContext { } public boolean includeInAll(Boolean includeInAll, FieldMapper mapper) { - return includeInAll(includeInAll, mapper.index()); + return includeInAll(includeInAll, mapper.indexed()); } /** @@ -236,13 +236,13 @@ public class ParseContext { * is false. If its enabled, then will return true only if the specific flag is null or * its actual value (so, if not set, defaults to "true") and the field is indexed. */ - private boolean includeInAll(Boolean specificIncludeInAll, Field.Index index) { + private boolean includeInAll(Boolean specificIncludeInAll, boolean indexed) { if (!docMapper.allFieldMapper().enabled()) { return false; } // not explicitly set if (specificIncludeInAll == null) { - return index != Field.Index.NO; + return indexed; } return specificIncludeInAll; } From 09fe15488d5561ad8d877fb1b6b5dc422f691505 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 1 Nov 2012 11:16:18 +0100 Subject: [PATCH 086/146] lucene 4: Upgraded ScanContext. --- .../org/elasticsearch/search/scan/ScanContext.java | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/scan/ScanContext.java b/src/main/java/org/elasticsearch/search/scan/ScanContext.java index 3f5590d2de2..3eee340f8d3 100644 --- a/src/main/java/org/elasticsearch/search/scan/ScanContext.java +++ b/src/main/java/org/elasticsearch/search/scan/ScanContext.java @@ -1,8 +1,10 @@ package org.elasticsearch.search.scan; import com.google.common.collect.Maps; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.*; +import org.apache.lucene.util.Bits; import org.elasticsearch.common.lucene.docset.AllDocSet; import org.elasticsearch.search.internal.SearchContext; @@ -89,7 +91,7 @@ public class ScanContext { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { + public void setNextReader(AtomicReaderContext context) throws IOException { // if we have a reader state, and we haven't registered one already, register it // we need to check in readersState since even when the filter return null, setNextReader is still // called for that reader (before) @@ -98,8 +100,8 @@ public class ScanContext { readerState.done = true; readerStates.put(currentReader, readerState); } - this.currentReader = reader; - this.docBase = docBase; + this.currentReader = context.reader(); + this.docBase = context.docBase; this.readerState = new ReaderState(); } @@ -130,13 +132,13 @@ public class ScanContext { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - ReaderState readerState = readerStates.get(reader); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptedDocs) throws IOException { + ReaderState readerState = readerStates.get(context.reader()); if (readerState != null && readerState.done) { scanCollector.incCounter(readerState.count); return null; } - return new AllDocSet(reader.maxDoc()); + return new AllDocSet(context.reader().maxDoc()); } } From 968b012911d6b7e7082aaa82fd3105ce171b6700 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 1 Nov 2012 11:59:19 +0100 Subject: [PATCH 087/146] lucene 4: Upgraded *ValueGeoPointFieldData and GeoDistanceDataComparator. --- .../geo/MultiValueGeoPointFieldData.java | 4 ++- .../geo/SingleValueGeoPointFieldData.java | 4 ++- .../search/geo/GeoDistanceDataComparator.java | 26 ++++++++++++++----- 3 files changed, 26 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/geo/MultiValueGeoPointFieldData.java b/src/main/java/org/elasticsearch/index/mapper/geo/MultiValueGeoPointFieldData.java index bb2879e72c8..a11f222e935 100644 --- a/src/main/java/org/elasticsearch/index/mapper/geo/MultiValueGeoPointFieldData.java +++ b/src/main/java/org/elasticsearch/index/mapper/geo/MultiValueGeoPointFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.mapper.geo; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.index.field.data.doubles.DoubleFieldData; @@ -110,7 +111,8 @@ public class MultiValueGeoPointFieldData extends GeoPointFieldData { } break; } - proc.onValue(docId, GeoHashUtils.encode(lat[loc], lon[loc])); + //LUCENE 4 UPGRADE: Make GeoHashUtils works with BytesRef instead of String + proc.onValue(docId, new BytesRef(GeoHashUtils.encode(lat[loc], lon[loc]))); } } diff --git a/src/main/java/org/elasticsearch/index/mapper/geo/SingleValueGeoPointFieldData.java b/src/main/java/org/elasticsearch/index/mapper/geo/SingleValueGeoPointFieldData.java index dff1d70f955..fb7f78e352a 100644 --- a/src/main/java/org/elasticsearch/index/mapper/geo/SingleValueGeoPointFieldData.java +++ b/src/main/java/org/elasticsearch/index/mapper/geo/SingleValueGeoPointFieldData.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.mapper.geo; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.RamUsage; import org.elasticsearch.common.util.concurrent.ThreadLocals; import org.elasticsearch.index.field.data.doubles.DoubleFieldData; @@ -84,7 +85,8 @@ public class SingleValueGeoPointFieldData extends GeoPointFieldData { proc.onMissing(docId); return; } - proc.onValue(docId, GeoHashUtils.encode(lat[loc], lon[loc])); + //LUCENE 4 UPGRADE: Make GeoHashUtils works with BytesRef instead of String + proc.onValue(docId, new BytesRef(GeoHashUtils.encode(lat[loc], lon[loc]))); } @Override diff --git a/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceDataComparator.java b/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceDataComparator.java index 5898b241114..639e608f2fd 100644 --- a/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceDataComparator.java +++ b/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceDataComparator.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.search.geo; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.FieldComparator; import org.apache.lucene.search.SortField; @@ -37,7 +38,7 @@ import java.io.IOException; * */ // LUCENE MONITOR: Monitor against FieldComparator.Double -public class GeoDistanceDataComparator extends FieldComparator { +public class GeoDistanceDataComparator extends FieldComparator { public static FieldDataType.ExtendedFieldComparatorSource comparatorSource(String fieldName, double lat, double lon, DistanceUnit unit, GeoDistance geoDistance, FieldDataCache fieldDataCache, MapperService mapperService) { @@ -78,7 +79,7 @@ public class GeoDistanceDataComparator extends FieldComparator { @Override public SortField.Type reducedType() { - return SortField.DOUBLE; + return SortField.Type.DOUBLE; } } @@ -127,8 +128,9 @@ public class GeoDistanceDataComparator extends FieldComparator { } @Override - public void setNextReader(IndexReader reader, int docBase) throws IOException { - fieldData = (GeoPointFieldData) fieldDataCache.cache(GeoPointFieldDataType.TYPE, reader, indexFieldName); + public GeoDistanceDataComparator setNextReader(AtomicReaderContext context) throws IOException { + fieldData = (GeoPointFieldData) fieldDataCache.cache(GeoPointFieldDataType.TYPE, context.reader(), indexFieldName); + return this; } @Override @@ -163,6 +165,18 @@ public class GeoDistanceDataComparator extends FieldComparator { } } + @Override + public int compareDocToValue(int doc, Double distance2) throws IOException { + double distance1; + if (!fieldData.hasValue(doc)) { + // is this true? push this to the "end" + distance1 = Double.MAX_VALUE; + } else { + distance1 = fixedSourceDistance.calculate(fieldData.latValue(doc), fieldData.lonValue(doc)); + } + return (int) (distance1 - distance2); + } + @Override public void copy(int slot, int doc) { double distance; @@ -181,7 +195,7 @@ public class GeoDistanceDataComparator extends FieldComparator { } @Override - public Comparable value(int slot) { - return Double.valueOf(values[slot]); + public Double value(int slot) { + return values[slot]; } } From 3298ad22352d4e8e1381c84d27f7fd5f28f78c86 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 1 Nov 2012 12:49:45 +0100 Subject: [PATCH 088/146] lucene 4: Upgraded UidField. (version can be stored later as doc values) --- .../common/lucene/uid/UidField.java | 78 +++++++------------ 1 file changed, 27 insertions(+), 51 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java b/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java index 05473665655..52d231c6023 100644 --- a/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java +++ b/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java @@ -22,11 +22,12 @@ package org.elasticsearch.common.lucene.uid; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; -import org.apache.lucene.document.AbstractField; import org.apache.lucene.document.Field; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.DocsAndPositionsEnum; import org.apache.lucene.index.Term; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Numbers; import org.elasticsearch.common.lucene.Lucene; @@ -36,6 +37,7 @@ import java.io.Reader; /** * */ +// TODO: LUCENE 4 UPGRADE: Store version as doc values instead of as a payload. public class UidField extends Field { public static class DocIdAndVersion { @@ -52,39 +54,31 @@ public class UidField extends Field { // this works fine for nested docs since they don't have the payload which has the version // so we iterate till we find the one with the payload - public static DocIdAndVersion loadDocIdAndVersion(AtomicReaderContext reader, Term term) { + public static DocIdAndVersion loadDocIdAndVersion(AtomicReaderContext context, Term term) { int docId = Lucene.NO_DOC; - TermPositions uid = null; try { - uid = reader.termPositions(term); - if (!uid.next()) { + DocsAndPositionsEnum uid = context.reader().termPositionsEnum(term); + if (uid.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { return null; // no doc } // Note, only master docs uid have version payload, so we can use that info to not // take them into account do { - docId = uid.doc(); + docId = uid.docID(); uid.nextPosition(); - if (!uid.isPayloadAvailable()) { + if (uid.getPayload() == null) { continue; } - if (uid.getPayloadLength() < 8) { + if (uid.getPayload().length < 8) { continue; } - byte[] payload = uid.getPayload(new byte[8], 0); - return new DocIdAndVersion(docId, Numbers.bytesToLong(payload), reader); - } while (uid.next()); - return new DocIdAndVersion(docId, -2, reader); + byte[] payload = new byte[uid.getPayload().length]; + System.arraycopy(uid.getPayload().bytes, uid.getPayload().offset, payload, 0, uid.getPayload().length); + return new DocIdAndVersion(docId, Numbers.bytesToLong(payload), context); + } while (uid.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); + return new DocIdAndVersion(docId, -2, context); } catch (Exception e) { - return new DocIdAndVersion(docId, -2, reader); - } finally { - if (uid != null) { - try { - uid.close(); - } catch (IOException e) { - // nothing to do here... - } - } + return new DocIdAndVersion(docId, -2, context); } } @@ -92,37 +86,29 @@ public class UidField extends Field { * Load the version for the uid from the reader, returning -1 if no doc exists, or -2 if * no version is available (for backward comp.) */ - public static long loadVersion(AtomicReaderContext reader, Term term) { - TermPositions uid = null; + public static long loadVersion(AtomicReaderContext context, Term term) { try { - uid = reader.termPositions(term); - if (!uid.next()) { + DocsAndPositionsEnum uid = context.reader().termPositionsEnum(term); + if (uid.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { return -1; } // Note, only master docs uid have version payload, so we can use that info to not // take them into account do { uid.nextPosition(); - if (!uid.isPayloadAvailable()) { + if (uid.getPayload() == null) { continue; } - if (uid.getPayloadLength() < 8) { + if (uid.getPayload().length < 8) { continue; } - byte[] payload = uid.getPayload(new byte[8], 0); + byte[] payload = new byte[uid.getPayload().length]; + System.arraycopy(uid.getPayload().bytes, uid.getPayload().offset, payload, 0, uid.getPayload().length); return Numbers.bytesToLong(payload); - } while (uid.next()); + } while (uid.nextDoc() != DocIdSetIterator.NO_MORE_DOCS); return -2; } catch (Exception e) { return -2; - } finally { - if (uid != null) { - try { - uid.close(); - } catch (IOException e) { - // nothing to do here... - } - } } } @@ -133,23 +119,13 @@ public class UidField extends Field { private final UidPayloadTokenStream tokenStream; public UidField(String name, String uid, long version) { - super(name, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); + super(name, uid, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); this.uid = uid; this.version = version; - this.indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; +// this.indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; this.tokenStream = new UidPayloadTokenStream(this); } - @Override - public void setIndexOptions(FieldInfo.IndexOptions indexOptions) { - // never allow to set this, since we want payload! - } - - @Override - public void setOmitTermFreqAndPositions(boolean omitTermFreqAndPositions) { - // never allow to set this, since we want payload! - } - public String uid() { return this.uid; } @@ -206,7 +182,7 @@ public class UidField extends Field { } termAtt.setLength(0); termAtt.append(field.uid); - payloadAttribute.setPayload(new Payload(Numbers.longToBytes(field.version()))); + payloadAttribute.setPayload(new BytesRef(Numbers.longToBytes(field.version()))); added = true; return true; } From ba1b87058016e9008e9c570e508dd968f471984a Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 1 Nov 2012 12:58:04 +0100 Subject: [PATCH 089/146] lucene 4: Upgraded CacheKeyFilter. --- .../index/cache/filter/support/CacheKeyFilter.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/cache/filter/support/CacheKeyFilter.java b/src/main/java/org/elasticsearch/index/cache/filter/support/CacheKeyFilter.java index 6786128b65f..4d499169558 100644 --- a/src/main/java/org/elasticsearch/index/cache/filter/support/CacheKeyFilter.java +++ b/src/main/java/org/elasticsearch/index/cache/filter/support/CacheKeyFilter.java @@ -19,9 +19,11 @@ package org.elasticsearch.index.cache.filter.support; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.elasticsearch.common.Unicode; import java.io.IOException; @@ -86,8 +88,8 @@ public interface CacheKeyFilter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - return filter.getDocIdSet(reader); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { + return filter.getDocIdSet(context, acceptDocs); } @Override From 415cfa2e896be82a543ffe70ca40f31f5f585087 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 1 Nov 2012 13:05:06 +0100 Subject: [PATCH 090/146] lucene 4: Upgraded GeoDistanceFilter, MatchedFiltersFetchSubPhase. --- .../index/cache/filter/support/CacheKeyFilter.java | 1 - .../index/search/geo/GeoDistanceFilter.java | 13 +++++++------ .../matchedfilters/MatchedFiltersFetchSubPhase.java | 2 +- .../search/highlight/HighlightPhase.java | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/cache/filter/support/CacheKeyFilter.java b/src/main/java/org/elasticsearch/index/cache/filter/support/CacheKeyFilter.java index 4d499169558..7ac5069d5ec 100644 --- a/src/main/java/org/elasticsearch/index/cache/filter/support/CacheKeyFilter.java +++ b/src/main/java/org/elasticsearch/index/cache/filter/support/CacheKeyFilter.java @@ -20,7 +20,6 @@ package org.elasticsearch.index.cache.filter.support; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; import org.apache.lucene.util.Bits; diff --git a/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceFilter.java b/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceFilter.java index 961a342096c..3ee2b19843f 100644 --- a/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceFilter.java +++ b/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceFilter.java @@ -20,9 +20,10 @@ package org.elasticsearch.index.search.geo; import com.google.common.collect.ImmutableList; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.lucene.docset.AndDocSet; import org.elasticsearch.common.lucene.docset.DocSet; @@ -103,17 +104,17 @@ public class GeoDistanceFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptedDocs) throws IOException { DocSet boundingBoxDocSet = null; if (boundingBoxFilter != null) { - DocIdSet docIdSet = boundingBoxFilter.getDocIdSet(reader); + DocIdSet docIdSet = boundingBoxFilter.getDocIdSet(context, acceptedDocs); if (docIdSet == null) { return null; } - boundingBoxDocSet = DocSets.convert(reader, docIdSet); + boundingBoxDocSet = DocSets.convert(context.reader(), docIdSet); } - final GeoPointFieldData fieldData = (GeoPointFieldData) fieldDataCache.cache(GeoPointFieldDataType.TYPE, reader, fieldName); - GeoDistanceDocSet distDocSet = new GeoDistanceDocSet(reader.maxDoc(), fieldData, fixedSourceDistance, distanceBoundingCheck, distance); + final GeoPointFieldData fieldData = (GeoPointFieldData) fieldDataCache.cache(GeoPointFieldDataType.TYPE, context.reader(), fieldName); + GeoDistanceDocSet distDocSet = new GeoDistanceDocSet(context.reader().maxDoc(), fieldData, fixedSourceDistance, distanceBoundingCheck, distance); if (boundingBoxDocSet == null) { return distDocSet; } else { diff --git a/src/main/java/org/elasticsearch/search/fetch/matchedfilters/MatchedFiltersFetchSubPhase.java b/src/main/java/org/elasticsearch/search/fetch/matchedfilters/MatchedFiltersFetchSubPhase.java index 0bb4a4e8a56..6f49219fe88 100644 --- a/src/main/java/org/elasticsearch/search/fetch/matchedfilters/MatchedFiltersFetchSubPhase.java +++ b/src/main/java/org/elasticsearch/search/fetch/matchedfilters/MatchedFiltersFetchSubPhase.java @@ -66,7 +66,7 @@ public class MatchedFiltersFetchSubPhase implements FetchSubPhase { String name = entry.getKey(); Filter filter = entry.getValue(); try { - DocIdSet docIdSet = filter.getDocIdSet(hitContext.reader()); + DocIdSet docIdSet = filter.getDocIdSet(hitContext.readerContext(), null); // null is fine, since we filter by hitContext.docId() if (docIdSet != null) { DocSet docSet = DocSets.convert(hitContext.reader(), docIdSet); if (docSet.get(hitContext.docId())) { diff --git a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java index 509d2441399..74e0105916b 100644 --- a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java +++ b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java @@ -169,7 +169,7 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase { try { SingleFieldVisitor fieldVisitor = new SingleFieldVisitor(mapper.names().indexName()); hitContext.reader().document(hitContext.docId(), fieldVisitor); - textsToHighlight = (List) fieldVisitor.values(); + textsToHighlight = (List) fieldVisitor.values(); } catch (Exception e) { throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e); } From d42d153c4877ce845386cb72be552215421a9f49 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 1 Nov 2012 13:08:20 +0100 Subject: [PATCH 091/146] lucene 4: Upgraded GeoDistanceRangeFilter, GeoPolygonFilter. --- .../index/search/geo/GeoDistanceRangeFilter.java | 13 +++++++------ .../index/search/geo/GeoPolygonFilter.java | 9 +++++---- 2 files changed, 12 insertions(+), 10 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceRangeFilter.java b/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceRangeFilter.java index 086c7b84851..b83cf19797d 100644 --- a/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceRangeFilter.java +++ b/src/main/java/org/elasticsearch/index/search/geo/GeoDistanceRangeFilter.java @@ -20,9 +20,10 @@ package org.elasticsearch.index.search.geo; import com.google.common.collect.ImmutableList; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.lucene.docset.AndDocSet; @@ -119,17 +120,17 @@ public class GeoDistanceRangeFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptedDocs) throws IOException { DocSet boundingBoxDocSet = null; if (boundingBoxFilter != null) { - DocIdSet docIdSet = boundingBoxFilter.getDocIdSet(reader); + DocIdSet docIdSet = boundingBoxFilter.getDocIdSet(context, acceptedDocs); if (docIdSet == null) { return null; } - boundingBoxDocSet = DocSets.convert(reader, docIdSet); + boundingBoxDocSet = DocSets.convert(context.reader(), docIdSet); } - final GeoPointFieldData fieldData = (GeoPointFieldData) fieldDataCache.cache(GeoPointFieldDataType.TYPE, reader, fieldName); - GeoDistanceRangeDocSet distDocSet = new GeoDistanceRangeDocSet(reader.maxDoc(), fieldData, fixedSourceDistance, distanceBoundingCheck, inclusiveLowerPoint, inclusiveUpperPoint); + final GeoPointFieldData fieldData = (GeoPointFieldData) fieldDataCache.cache(GeoPointFieldDataType.TYPE, context.reader(), fieldName); + GeoDistanceRangeDocSet distDocSet = new GeoDistanceRangeDocSet(context.reader().maxDoc(), fieldData, fixedSourceDistance, distanceBoundingCheck, inclusiveLowerPoint, inclusiveUpperPoint); if (boundingBoxDocSet == null) { return distDocSet; } else { diff --git a/src/main/java/org/elasticsearch/index/search/geo/GeoPolygonFilter.java b/src/main/java/org/elasticsearch/index/search/geo/GeoPolygonFilter.java index cb920a9c471..91e60cd3a15 100644 --- a/src/main/java/org/elasticsearch/index/search/geo/GeoPolygonFilter.java +++ b/src/main/java/org/elasticsearch/index/search/geo/GeoPolygonFilter.java @@ -19,9 +19,10 @@ package org.elasticsearch.index.search.geo; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.elasticsearch.common.lucene.docset.GetDocSet; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.mapper.geo.GeoPointFieldData; @@ -56,9 +57,9 @@ public class GeoPolygonFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final GeoPointFieldData fieldData = (GeoPointFieldData) fieldDataCache.cache(GeoPointFieldDataType.TYPE, reader, fieldName); - return new GeoPolygonDocSet(reader.maxDoc(), fieldData, points); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptedDocs) throws IOException { + final GeoPointFieldData fieldData = (GeoPointFieldData) fieldDataCache.cache(GeoPointFieldDataType.TYPE, context.reader(), fieldName); + return new GeoPolygonDocSet(context.reader().maxDoc(), fieldData, points); } @Override From 673712c0b276c99b695146717ae1b4d4c9ec5603 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 1 Nov 2012 13:11:37 +0100 Subject: [PATCH 092/146] lucene 4: Upgraded IndexedGeoBoundingBoxFilter & InMemoryGeoBoundingBoxFilter. --- .../geo/InMemoryGeoBoundingBoxFilter.java | 11 ++++++----- .../search/geo/IndexedGeoBoundingBoxFilter.java | 17 +++++++++-------- 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/search/geo/InMemoryGeoBoundingBoxFilter.java b/src/main/java/org/elasticsearch/index/search/geo/InMemoryGeoBoundingBoxFilter.java index aa98e8d7203..8eec21b43ff 100644 --- a/src/main/java/org/elasticsearch/index/search/geo/InMemoryGeoBoundingBoxFilter.java +++ b/src/main/java/org/elasticsearch/index/search/geo/InMemoryGeoBoundingBoxFilter.java @@ -19,9 +19,10 @@ package org.elasticsearch.index.search.geo; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.elasticsearch.common.lucene.docset.GetDocSet; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.mapper.geo.GeoPointFieldData; @@ -62,14 +63,14 @@ public class InMemoryGeoBoundingBoxFilter extends Filter { } @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final GeoPointFieldData fieldData = (GeoPointFieldData) fieldDataCache.cache(GeoPointFieldDataType.TYPE, reader, fieldName); + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptedDocs) throws IOException { + final GeoPointFieldData fieldData = (GeoPointFieldData) fieldDataCache.cache(GeoPointFieldDataType.TYPE, context.reader(), fieldName); //checks to see if bounding box crosses 180 degrees if (topLeft.lon > bottomRight.lon) { - return new Meridian180GeoBoundingBoxDocSet(reader.maxDoc(), fieldData, topLeft, bottomRight); + return new Meridian180GeoBoundingBoxDocSet(context.reader().maxDoc(), fieldData, topLeft, bottomRight); } else { - return new GeoBoundingBoxDocSet(reader.maxDoc(), fieldData, topLeft, bottomRight); + return new GeoBoundingBoxDocSet(context.reader().maxDoc(), fieldData, topLeft, bottomRight); } } diff --git a/src/main/java/org/elasticsearch/index/search/geo/IndexedGeoBoundingBoxFilter.java b/src/main/java/org/elasticsearch/index/search/geo/IndexedGeoBoundingBoxFilter.java index 3895678176e..a935aa7fee9 100644 --- a/src/main/java/org/elasticsearch/index/search/geo/IndexedGeoBoundingBoxFilter.java +++ b/src/main/java/org/elasticsearch/index/search/geo/IndexedGeoBoundingBoxFilter.java @@ -19,9 +19,10 @@ package org.elasticsearch.index.search.geo; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.lucene.docset.DocSets; @@ -58,16 +59,16 @@ public class IndexedGeoBoundingBoxFilter { } @Override - public FixedBitSet getDocIdSet(IndexReader reader) throws IOException { + public FixedBitSet getDocIdSet(AtomicReaderContext context, Bits acceptedDocs) throws IOException { FixedBitSet main; - DocIdSet set = lonFilter1.getDocIdSet(reader); + DocIdSet set = lonFilter1.getDocIdSet(context, acceptedDocs); if (set == null || set == DocIdSet.EMPTY_DOCIDSET) { main = null; } else { main = (FixedBitSet) set; } - set = lonFilter2.getDocIdSet(reader); + set = lonFilter2.getDocIdSet(context, acceptedDocs); if (set == null || set == DocIdSet.EMPTY_DOCIDSET) { if (main == null) { return null; @@ -82,7 +83,7 @@ public class IndexedGeoBoundingBoxFilter { } } - set = latFilter.getDocIdSet(reader); + set = latFilter.getDocIdSet(context, acceptedDocs); if (set == null || set == DocIdSet.EMPTY_DOCIDSET) { return null; } @@ -124,14 +125,14 @@ public class IndexedGeoBoundingBoxFilter { } @Override - public FixedBitSet getDocIdSet(IndexReader reader) throws IOException { + public FixedBitSet getDocIdSet(AtomicReaderContext context, Bits acceptedDocs) throws IOException { FixedBitSet main; - DocIdSet set = lonFilter.getDocIdSet(reader); + DocIdSet set = lonFilter.getDocIdSet(context, acceptedDocs); if (set == null || set == DocIdSet.EMPTY_DOCIDSET) { return null; } main = (FixedBitSet) set; - set = latFilter.getDocIdSet(reader); + set = latFilter.getDocIdSet(context, acceptedDocs); if (set == null || set == DocIdSet.EMPTY_DOCIDSET) { return null; } From 38dc19d8bc23bc124cbdce6b9f67a5ce5640130a Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 1 Nov 2012 13:12:55 +0100 Subject: [PATCH 093/146] lucene 4: Fixed compile error. --- src/main/java/org/elasticsearch/index/get/ShardGetService.java | 1 - 1 file changed, 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/index/get/ShardGetService.java b/src/main/java/org/elasticsearch/index/get/ShardGetService.java index 5deee8bc38a..f7c04074844 100644 --- a/src/main/java/org/elasticsearch/index/get/ShardGetService.java +++ b/src/main/java/org/elasticsearch/index/get/ShardGetService.java @@ -282,7 +282,6 @@ public class ShardGetService extends AbstractIndexShardComponent { // LUCENE 4 UPGRADE: optimize when only a single field needs to be loaded BaseFieldVisitor fieldVisitor = buildFieldSelectors(docMapper, gFields); if (fieldVisitor != null) { - fieldVisitor.reset(); Document doc; try { docIdAndVersion.reader.reader().document(docIdAndVersion.docId, fieldVisitor); From e33ae96b388be638ad14aa4f6069a20b6ec4ab2a Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 1 Nov 2012 13:22:08 +0100 Subject: [PATCH 094/146] lucene 4: added overloaded method. To fix issue in SourceScoreOrderFragmentsBuilder and SourceSimpleFragmentsBuilder. --- .../elasticsearch/search/lookup/SearchLookup.java | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java b/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java index efa151b5cd5..dbe9e03811d 100644 --- a/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java @@ -20,8 +20,11 @@ package org.elasticsearch.search.lookup; import com.google.common.collect.ImmutableMap; +import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; +import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.Nullable; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.mapper.MapperService; @@ -77,4 +80,14 @@ public class SearchLookup { sourceLookup.setNextDocId(docId); fieldsLookup.setNextDocId(docId); } + + // Sneaky way fvh to get the ARC. See SourceScoreOrderFragmentsBuilder. Passed reader must be atomic. + // but that is the case in SourceScoreOrderFragmentsBuilder. + public void setNextReader(IndexReader reader) { + if (AtomicReader.class.isInstance(reader)) { + throw new ElasticSearchIllegalArgumentException("reader not instance of AtomicReader, but " + reader.getClass()); + } + AtomicReaderContext context = (AtomicReaderContext) reader.getContext(); + setNextReader(context); + } } From a3de9e521d8eda6559592be5ef895b251a83c589 Mon Sep 17 00:00:00 2001 From: Simon Willnauer Date: Thu, 1 Nov 2012 14:48:08 +0100 Subject: [PATCH 095/146] lucene 4: replaced TrimFilter and WordDelimiterFilter with lucene versions --- .../analysis/miscellaneous/TrimFilter.java | 63 -- .../miscellaneous/WordDelimiterFilter.java | 574 ------------------ .../miscellaneous/WordDelimiterIterator.java | 341 ----------- .../WordDelimiterTokenFilterFactory.java | 62 +- .../analysis/IndicesAnalysisService.java | 9 +- 5 files changed, 34 insertions(+), 1015 deletions(-) delete mode 100644 src/main/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java delete mode 100644 src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java delete mode 100644 src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterIterator.java diff --git a/src/main/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java b/src/main/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java deleted file mode 100644 index d3ad3133a9b..00000000000 --- a/src/main/java/org/apache/lucene/analysis/miscellaneous/TrimFilter.java +++ /dev/null @@ -1,63 +0,0 @@ -package org.apache.lucene.analysis.miscellaneous; - -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; - -import java.io.IOException; - -/** - */ -// LUCENE MONITOR: Next version of Lucene (4.0) will have this as part of the analyzers module -public final class TrimFilter extends TokenFilter { - - final boolean updateOffsets; - private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); - private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class); - - - public TrimFilter(TokenStream in, boolean updateOffsets) { - super(in); - this.updateOffsets = updateOffsets; - } - - @Override - public boolean incrementToken() throws IOException { - if (!input.incrementToken()) return false; - - char[] termBuffer = termAtt.buffer(); - int len = termAtt.length(); - //TODO: Is this the right behavior or should we return false? Currently, " ", returns true, so I think this should - //also return true - if (len == 0) { - return true; - } - int start = 0; - int end = 0; - int endOff = 0; - - // eat the first characters - //QUESTION: Should we use Character.isWhitespace() instead? - for (start = 0; start < len && termBuffer[start] <= ' '; start++) { - } - // eat the end characters - for (end = len; end >= start && termBuffer[end - 1] <= ' '; end--) { - endOff++; - } - if (start > 0 || end < len) { - if (start < end) { - termAtt.copyBuffer(termBuffer, start, (end - start)); - } else { - termAtt.setEmpty(); - } - if (updateOffsets) { - int newStart = offsetAtt.startOffset() + start; - int newEnd = offsetAtt.endOffset() - (start < end ? endOff : 0); - offsetAtt.setOffset(newStart, newEnd); - } - } - - return true; - } -} diff --git a/src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java b/src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java deleted file mode 100644 index 930a09a1100..00000000000 --- a/src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterFilter.java +++ /dev/null @@ -1,574 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.lucene.analysis.miscellaneous; - -import org.apache.lucene.analysis.TokenFilter; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.analysis.tokenattributes.TypeAttribute; -import org.apache.lucene.analysis.util.CharArraySet; -import org.apache.lucene.util.ArrayUtil; -import org.apache.lucene.util.RamUsageEstimator; - -import java.io.IOException; - -/** - * Splits words into subwords and performs optional transformations on subword groups. - * Words are split into subwords with the following rules: - * - split on intra-word delimiters (by default, all non alpha-numeric characters). - * - "Wi-Fi" -> "Wi", "Fi" - * - split on case transitions - * - "PowerShot" -> "Power", "Shot" - * - split on letter-number transitions - * - "SD500" -> "SD", "500" - * - leading and trailing intra-word delimiters on each subword are ignored - * - "//hello---there, 'dude'" -> "hello", "there", "dude" - * - trailing "'s" are removed for each subword - * - "O'Neil's" -> "O", "Neil" - * - Note: this step isn't performed in a separate filter because of possible subword combinations. - *

- * The combinations parameter affects how subwords are combined: - * - combinations="0" causes no subword combinations. - * - "PowerShot" -> 0:"Power", 1:"Shot" (0 and 1 are the token positions) - * - combinations="1" means that in addition to the subwords, maximum runs of non-numeric subwords are catenated and produced at the same position of the last subword in the run. - * - "PowerShot" -> 0:"Power", 1:"Shot" 1:"PowerShot" - * - "A's+B's&C's" -> 0:"A", 1:"B", 2:"C", 2:"ABC" - * - "Super-Duper-XL500-42-AutoCoder!" -> 0:"Super", 1:"Duper", 2:"XL", 2:"SuperDuperXL", 3:"500" 4:"42", 5:"Auto", 6:"Coder", 6:"AutoCoder" - *

- * One use for WordDelimiterFilter is to help match words with different subword delimiters. - * For example, if the source text contained "wi-fi" one may want "wifi" "WiFi" "wi-fi" "wi+fi" queries to all match. - * One way of doing so is to specify combinations="1" in the analyzer used for indexing, and combinations="0" (the default) - * in the analyzer used for querying. Given that the current StandardTokenizer immediately removes many intra-word - * delimiters, it is recommended that this filter be used after a tokenizer that does not do this (such as WhitespaceTokenizer). - */ -// LUCENE MONITOR: Part of Lucene 4.0, once we upgrade, remove it -public final class WordDelimiterFilter extends TokenFilter { - - public static final int LOWER = 0x01; - public static final int UPPER = 0x02; - public static final int DIGIT = 0x04; - public static final int SUBWORD_DELIM = 0x08; - - // combinations: for testing, not for setting bits - public static final int ALPHA = 0x03; - public static final int ALPHANUM = 0x07; - - /** - * If true, causes parts of words to be generated: - *

- * "PowerShot" => "Power" "Shot" - */ - final boolean generateWordParts; - - /** - * If true, causes number subwords to be generated: - *

- * "500-42" => "500" "42" - */ - final boolean generateNumberParts; - - /** - * If true, causes maximum runs of word parts to be catenated: - *

- * "wi-fi" => "wifi" - */ - final boolean catenateWords; - - /** - * If true, causes maximum runs of number parts to be catenated: - *

- * "500-42" => "50042" - */ - final boolean catenateNumbers; - - /** - * If true, causes all subword parts to be catenated: - *

- * "wi-fi-4000" => "wifi4000" - */ - final boolean catenateAll; - - /** - * If true, original words are preserved and added to the subword list (Defaults to false) - *

- * "500-42" => "500" "42" "500-42" - */ - final boolean preserveOriginal; - - /** - * If not null is the set of tokens to protect from being delimited - */ - final CharArraySet protWords; - - private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class); - private final OffsetAttribute offsetAttribute = addAttribute(OffsetAttribute.class); - private final PositionIncrementAttribute posIncAttribute = addAttribute(PositionIncrementAttribute.class); - private final TypeAttribute typeAttribute = addAttribute(TypeAttribute.class); - - // used for iterating word delimiter breaks - private final WordDelimiterIterator iterator; - - // used for concatenating runs of similar typed subwords (word,number) - private final WordDelimiterConcatenation concat = new WordDelimiterConcatenation(); - // number of subwords last output by concat. - private int lastConcatCount = 0; - - // used for catenate all - private final WordDelimiterConcatenation concatAll = new WordDelimiterConcatenation(); - - // used for accumulating position increment gaps - private int accumPosInc = 0; - - private char savedBuffer[] = new char[1024]; - private int savedStartOffset; - private int savedEndOffset; - private String savedType; - private boolean hasSavedState = false; - // if length by start + end offsets doesn't match the term text then assume - // this is a synonym and don't adjust the offsets. - private boolean hasIllegalOffsets = false; - - // for a run of the same subword type within a word, have we output anything? - private boolean hasOutputToken = false; - // when preserve original is on, have we output any token following it? - // this token must have posInc=0! - private boolean hasOutputFollowingOriginal = false; - - /** - * @param in Token stream to be filtered. - * @param charTypeTable - * @param generateWordParts If 1, causes parts of words to be generated: "PowerShot" => "Power" "Shot" - * @param generateNumberParts If 1, causes number subwords to be generated: "500-42" => "500" "42" - * @param catenateWords 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi" - * @param catenateNumbers If 1, causes maximum runs of number parts to be catenated: "500-42" => "50042" - * @param catenateAll If 1, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000" - * @param splitOnCaseChange 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards) - * @param preserveOriginal If 1, includes original words in subwords: "500-42" => "500" "42" "500-42" - * @param splitOnNumerics 1, causes "j2se" to be three tokens; "j" "2" "se" - * @param stemEnglishPossessive If 1, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil" - * @param protWords If not null is the set of tokens to protect from being delimited - */ - public WordDelimiterFilter(TokenStream in, - byte[] charTypeTable, - int generateWordParts, - int generateNumberParts, - int catenateWords, - int catenateNumbers, - int catenateAll, - int splitOnCaseChange, - int preserveOriginal, - int splitOnNumerics, - int stemEnglishPossessive, - CharArraySet protWords) { - super(in); - this.generateWordParts = generateWordParts != 0; - this.generateNumberParts = generateNumberParts != 0; - this.catenateWords = catenateWords != 0; - this.catenateNumbers = catenateNumbers != 0; - this.catenateAll = catenateAll != 0; - this.preserveOriginal = preserveOriginal != 0; - this.protWords = protWords; - this.iterator = new WordDelimiterIterator(charTypeTable, splitOnCaseChange != 0, splitOnNumerics != 0, stemEnglishPossessive != 0); - } - - /** - * @param in Token stream to be filtered. - * @param generateWordParts If 1, causes parts of words to be generated: "PowerShot", "Power-Shot" => "Power" "Shot" - * @param generateNumberParts If 1, causes number subwords to be generated: "500-42" => "500" "42" - * @param catenateWords 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi" - * @param catenateNumbers If 1, causes maximum runs of number parts to be catenated: "500-42" => "50042" - * @param catenateAll If 1, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000" - * @param splitOnCaseChange 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards) - * @param preserveOriginal If 1, includes original words in subwords: "500-42" => "500" "42" "500-42" - * @param splitOnNumerics 1, causes "j2se" to be three tokens; "j" "2" "se" - * @param stemEnglishPossessive If 1, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil" - * @param protWords If not null is the set of tokens to protect from being delimited - */ - public WordDelimiterFilter(TokenStream in, - int generateWordParts, - int generateNumberParts, - int catenateWords, - int catenateNumbers, - int catenateAll, - int splitOnCaseChange, - int preserveOriginal, - int splitOnNumerics, - int stemEnglishPossessive, - CharArraySet protWords) { - this(in, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll, splitOnCaseChange, preserveOriginal, splitOnNumerics, stemEnglishPossessive, protWords); - } - - public boolean incrementToken() throws IOException { - while (true) { - if (!hasSavedState) { - // process a new input word - if (!input.incrementToken()) { - return false; - } - - int termLength = termAttribute.length(); - char[] termBuffer = termAttribute.buffer(); - - accumPosInc += posIncAttribute.getPositionIncrement(); - - iterator.setText(termBuffer, termLength); - iterator.next(); - - // word of no delimiters, or protected word: just return it - if ((iterator.current == 0 && iterator.end == termLength) || - (protWords != null && protWords.contains(termBuffer, 0, termLength))) { - posIncAttribute.setPositionIncrement(accumPosInc); - accumPosInc = 0; - return true; - } - - // word of simply delimiters - if (iterator.end == WordDelimiterIterator.DONE && !preserveOriginal) { - // if the posInc is 1, simply ignore it in the accumulation - if (posIncAttribute.getPositionIncrement() == 1) { - accumPosInc--; - } - continue; - } - - saveState(); - - hasOutputToken = false; - hasOutputFollowingOriginal = !preserveOriginal; - lastConcatCount = 0; - - if (preserveOriginal) { - posIncAttribute.setPositionIncrement(accumPosInc); - accumPosInc = 0; - return true; - } - } - - // at the end of the string, output any concatenations - if (iterator.end == WordDelimiterIterator.DONE) { - if (!concat.isEmpty()) { - if (flushConcatenation(concat)) { - return true; - } - } - - if (!concatAll.isEmpty()) { - // only if we haven't output this same combo above! - if (concatAll.subwordCount > lastConcatCount) { - concatAll.writeAndClear(); - return true; - } - concatAll.clear(); - } - - // no saved concatenations, on to the next input word - hasSavedState = false; - continue; - } - - // word surrounded by delimiters: always output - if (iterator.isSingleWord()) { - generatePart(true); - iterator.next(); - return true; - } - - int wordType = iterator.type(); - - // do we already have queued up incompatible concatenations? - if (!concat.isEmpty() && (concat.type & wordType) == 0) { - if (flushConcatenation(concat)) { - hasOutputToken = false; - return true; - } - hasOutputToken = false; - } - - // add subwords depending upon options - if (shouldConcatenate(wordType)) { - if (concat.isEmpty()) { - concat.type = wordType; - } - concatenate(concat); - } - - // add all subwords (catenateAll) - if (catenateAll) { - concatenate(concatAll); - } - - // if we should output the word or number part - if (shouldGenerateParts(wordType)) { - generatePart(false); - iterator.next(); - return true; - } - - iterator.next(); - } - } - - /** - * {@inheritDoc} - */ - @Override - public void reset() throws IOException { - super.reset(); - hasSavedState = false; - concat.clear(); - concatAll.clear(); - accumPosInc = 0; - } - - // ================================================= Helper Methods ================================================ - - /** - * Saves the existing attribute states - */ - private void saveState() { - // otherwise, we have delimiters, save state - savedStartOffset = offsetAttribute.startOffset(); - savedEndOffset = offsetAttribute.endOffset(); - // if length by start + end offsets doesn't match the term text then assume this is a synonym and don't adjust the offsets. - hasIllegalOffsets = (savedEndOffset - savedStartOffset != termAttribute.length()); - savedType = typeAttribute.type(); - - if (savedBuffer.length < termAttribute.length()) { - savedBuffer = new char[ArrayUtil.oversize(termAttribute.length(), RamUsageEstimator.NUM_BYTES_CHAR)]; - } - - System.arraycopy(termAttribute.buffer(), 0, savedBuffer, 0, termAttribute.length()); - iterator.text = savedBuffer; - - hasSavedState = true; - } - - /** - * Flushes the given WordDelimiterConcatenation by either writing its concat and then clearing, or just clearing. - * - * @param concatenation WordDelimiterConcatenation that will be flushed - * @return {@code true} if the concatenation was written before it was cleared, {@code} false otherwise - */ - private boolean flushConcatenation(WordDelimiterConcatenation concatenation) { - lastConcatCount = concatenation.subwordCount; - if (concatenation.subwordCount != 1 || !shouldGenerateParts(concatenation.type)) { - concatenation.writeAndClear(); - return true; - } - concatenation.clear(); - return false; - } - - /** - * Determines whether to concatenate a word or number if the current word is the given type - * - * @param wordType Type of the current word used to determine if it should be concatenated - * @return {@code true} if concatenation should occur, {@code false} otherwise - */ - private boolean shouldConcatenate(int wordType) { - return (catenateWords && isAlpha(wordType)) || (catenateNumbers && isDigit(wordType)); - } - - /** - * Determines whether a word/number part should be generated for a word of the given type - * - * @param wordType Type of the word used to determine if a word/number part should be generated - * @return {@code true} if a word/number part should be generated, {@code false} otherwise - */ - private boolean shouldGenerateParts(int wordType) { - return (generateWordParts && isAlpha(wordType)) || (generateNumberParts && isDigit(wordType)); - } - - /** - * Concatenates the saved buffer to the given WordDelimiterConcatenation - * - * @param concatenation WordDelimiterConcatenation to concatenate the buffer to - */ - private void concatenate(WordDelimiterConcatenation concatenation) { - if (concatenation.isEmpty()) { - concatenation.startOffset = savedStartOffset + iterator.current; - } - concatenation.append(savedBuffer, iterator.current, iterator.end - iterator.current); - concatenation.endOffset = savedStartOffset + iterator.end; - } - - /** - * Generates a word/number part, updating the appropriate attributes - * - * @param isSingleWord {@code true} if the generation is occurring from a single word, {@code false} otherwise - */ - private void generatePart(boolean isSingleWord) { - clearAttributes(); - termAttribute.copyBuffer(savedBuffer, iterator.current, iterator.end - iterator.current); - - int startOffSet = (isSingleWord || !hasIllegalOffsets) ? savedStartOffset + iterator.current : savedStartOffset; - int endOffSet = (hasIllegalOffsets) ? savedEndOffset : savedStartOffset + iterator.end; - - offsetAttribute.setOffset(startOffSet, endOffSet); - posIncAttribute.setPositionIncrement(position(false)); - typeAttribute.setType(savedType); - } - - /** - * Get the position increment gap for a subword or concatenation - * - * @param inject true if this token wants to be injected - * @return position increment gap - */ - private int position(boolean inject) { - int posInc = accumPosInc; - - if (hasOutputToken) { - accumPosInc = 0; - return inject ? 0 : Math.max(1, posInc); - } - - hasOutputToken = true; - - if (!hasOutputFollowingOriginal) { - // the first token following the original is 0 regardless - hasOutputFollowingOriginal = true; - return 0; - } - // clear the accumulated position increment - accumPosInc = 0; - return Math.max(1, posInc); - } - - /** - * Checks if the given word type includes {@link #ALPHA} - * - * @param type Word type to check - * @return {@code true} if the type contains ALPHA, {@code false} otherwise - */ - static boolean isAlpha(int type) { - return (type & ALPHA) != 0; - } - - /** - * Checks if the given word type includes {@link #DIGIT} - * - * @param type Word type to check - * @return {@code true} if the type contains DIGIT, {@code false} otherwise - */ - static boolean isDigit(int type) { - return (type & DIGIT) != 0; - } - - /** - * Checks if the given word type includes {@link #SUBWORD_DELIM} - * - * @param type Word type to check - * @return {@code true} if the type contains SUBWORD_DELIM, {@code false} otherwise - */ - static boolean isSubwordDelim(int type) { - return (type & SUBWORD_DELIM) != 0; - } - - /** - * Checks if the given word type includes {@link #UPPER} - * - * @param type Word type to check - * @return {@code true} if the type contains UPPER, {@code false} otherwise - */ - static boolean isUpper(int type) { - return (type & UPPER) != 0; - } - - // ================================================= Inner Classes ================================================= - - /** - * A WDF concatenated 'run' - */ - final class WordDelimiterConcatenation { - final StringBuilder buffer = new StringBuilder(); - int startOffset; - int endOffset; - int type; - int subwordCount; - - /** - * Appends the given text of the given length, to the concetenation at the given offset - * - * @param text Text to append - * @param offset Offset in the concetenation to add the text - * @param length Length of the text to append - */ - void append(char text[], int offset, int length) { - buffer.append(text, offset, length); - subwordCount++; - } - - /** - * Writes the concatenation to the attributes - */ - void write() { - clearAttributes(); - if (termAttribute.length() < buffer.length()) { - termAttribute.resizeBuffer(buffer.length()); - } - char termbuffer[] = termAttribute.buffer(); - - buffer.getChars(0, buffer.length(), termbuffer, 0); - termAttribute.setLength(buffer.length()); - - if (hasIllegalOffsets) { - offsetAttribute.setOffset(savedStartOffset, savedEndOffset); - } else { - offsetAttribute.setOffset(startOffset, endOffset); - } - posIncAttribute.setPositionIncrement(position(true)); - typeAttribute.setType(savedType); - accumPosInc = 0; - } - - /** - * Determines if the concatenation is empty - * - * @return {@code true} if the concatenation is empty, {@code false} otherwise - */ - boolean isEmpty() { - return buffer.length() == 0; - } - - /** - * Clears the concatenation and resets its state - */ - void clear() { - buffer.setLength(0); - startOffset = endOffset = type = subwordCount = 0; - } - - /** - * Convenience method for the common scenario of having to write the concetenation and then clearing its state - */ - void writeAndClear() { - write(); - clear(); - } - } - // questions: - // negative numbers? -42 indexed as just 42? - // dollar sign? $42 - // percent sign? 33% - // downsides: if source text is "powershot" then a query of "PowerShot" won't match! -} diff --git a/src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterIterator.java b/src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterIterator.java deleted file mode 100644 index 867422c920e..00000000000 --- a/src/main/java/org/apache/lucene/analysis/miscellaneous/WordDelimiterIterator.java +++ /dev/null @@ -1,341 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.lucene.analysis.miscellaneous; - -import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*; - -/** - * A BreakIterator-like API for iterating over subwords in text, according to WordDelimiterFilter rules. - * - * @lucene.internal - */ -public final class WordDelimiterIterator { - - /** - * Indicates the end of iteration - */ - public static final int DONE = -1; - - public static final byte[] DEFAULT_WORD_DELIM_TABLE; - - char text[]; - int length; - - /** - * start position of text, excluding leading delimiters - */ - int startBounds; - /** - * end position of text, excluding trailing delimiters - */ - int endBounds; - - /** - * Beginning of subword - */ - int current; - /** - * End of subword - */ - int end; - - /* does this string end with a possessive such as 's */ - private boolean hasFinalPossessive = false; - - /** - * If false, causes case changes to be ignored (subwords will only be generated - * given SUBWORD_DELIM tokens). (Defaults to true) - */ - final boolean splitOnCaseChange; - - /** - * If false, causes numeric changes to be ignored (subwords will only be generated - * given SUBWORD_DELIM tokens). (Defaults to true) - */ - final boolean splitOnNumerics; - - /** - * If true, causes trailing "'s" to be removed for each subword. (Defaults to true) - *

- * "O'Neil's" => "O", "Neil" - */ - final boolean stemEnglishPossessive; - - private final byte[] charTypeTable; - - /** - * if true, need to skip over a possessive found in the last call to next() - */ - private boolean skipPossessive = false; - - // TODO: should there be a WORD_DELIM category for chars that only separate words (no catenation of subwords will be - // done if separated by these chars?) "," would be an obvious candidate... - static { - byte[] tab = new byte[256]; - for (int i = 0; i < 256; i++) { - byte code = 0; - if (Character.isLowerCase(i)) { - code |= LOWER; - } else if (Character.isUpperCase(i)) { - code |= UPPER; - } else if (Character.isDigit(i)) { - code |= DIGIT; - } - if (code == 0) { - code = SUBWORD_DELIM; - } - tab[i] = code; - } - DEFAULT_WORD_DELIM_TABLE = tab; - } - - /** - * Create a new WordDelimiterIterator operating with the supplied rules. - * - * @param charTypeTable table containing character types - * @param splitOnCaseChange if true, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards) - * @param splitOnNumerics if true, causes "j2se" to be three tokens; "j" "2" "se" - * @param stemEnglishPossessive if true, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil" - */ - WordDelimiterIterator(byte[] charTypeTable, boolean splitOnCaseChange, boolean splitOnNumerics, boolean stemEnglishPossessive) { - this.charTypeTable = charTypeTable; - this.splitOnCaseChange = splitOnCaseChange; - this.splitOnNumerics = splitOnNumerics; - this.stemEnglishPossessive = stemEnglishPossessive; - } - - /** - * Advance to the next subword in the string. - * - * @return index of the next subword, or {@link #DONE} if all subwords have been returned - */ - int next() { - current = end; - if (current == DONE) { - return DONE; - } - - if (skipPossessive) { - current += 2; - skipPossessive = false; - } - - int lastType = 0; - - while (current < endBounds && (isSubwordDelim(lastType = charType(text[current])))) { - current++; - } - - if (current >= endBounds) { - return end = DONE; - } - - for (end = current + 1; end < endBounds; end++) { - int type = charType(text[end]); - if (isBreak(lastType, type)) { - break; - } - lastType = type; - } - - if (end < endBounds - 1 && endsWithPossessive(end + 2)) { - skipPossessive = true; - } - - return end; - } - - - /** - * Return the type of the current subword. - * This currently uses the type of the first character in the subword. - * - * @return type of the current word - */ - int type() { - if (end == DONE) { - return 0; - } - - int type = charType(text[current]); - switch (type) { - // return ALPHA word type for both lower and upper - case LOWER: - case UPPER: - return ALPHA; - default: - return type; - } - } - - /** - * Reset the text to a new value, and reset all state - * - * @param text New text - * @param length length of the text - */ - void setText(char text[], int length) { - this.text = text; - this.length = this.endBounds = length; - current = startBounds = end = 0; - skipPossessive = hasFinalPossessive = false; - setBounds(); - } - - // ================================================= Helper Methods ================================================ - - /** - * Determines whether the transition from lastType to type indicates a break - * - * @param lastType Last subword type - * @param type Current subword type - * @return {@code true} if the transition indicates a break, {@code false} otherwise - */ - private boolean isBreak(int lastType, int type) { - if ((type & lastType) != 0) { - return false; - } - - if (!splitOnCaseChange && isAlpha(lastType) && isAlpha(type)) { - // ALPHA->ALPHA: always ignore if case isn't considered. - return false; - } else if (isUpper(lastType) && isAlpha(type)) { - // UPPER->letter: Don't split - return false; - } else if (!splitOnNumerics && ((isAlpha(lastType) && isDigit(type)) || (isDigit(lastType) && isAlpha(type)))) { - // ALPHA->NUMERIC, NUMERIC->ALPHA :Don't split - return false; - } - - return true; - } - - /** - * Determines if the current word contains only one subword. Note, it could be potentially surrounded by delimiters - * - * @return {@code true} if the current word contains only one subword, {@code false} otherwise - */ - boolean isSingleWord() { - if (hasFinalPossessive) { - return current == startBounds && end == endBounds - 2; - } else { - return current == startBounds && end == endBounds; - } - } - - /** - * Set the internal word bounds (remove leading and trailing delimiters). Note, if a possessive is found, don't remove - * it yet, simply note it. - */ - private void setBounds() { - while (startBounds < length && (isSubwordDelim(charType(text[startBounds])))) { - startBounds++; - } - - while (endBounds > startBounds && (isSubwordDelim(charType(text[endBounds - 1])))) { - endBounds--; - } - if (endsWithPossessive(endBounds)) { - hasFinalPossessive = true; - } - current = startBounds; - } - - /** - * Determines if the text at the given position indicates an English possessive which should be removed - * - * @param pos Position in the text to check if it indicates an English possessive - * @return {@code true} if the text at the position indicates an English posessive, {@code false} otherwise - */ - private boolean endsWithPossessive(int pos) { - return (stemEnglishPossessive && - pos > 2 && - text[pos - 2] == '\'' && - (text[pos - 1] == 's' || text[pos - 1] == 'S') && - isAlpha(charType(text[pos - 3])) && - (pos == endBounds || isSubwordDelim(charType(text[pos])))); - } - - /** - * Determines the type of the given character - * - * @param ch Character whose type is to be determined - * @return Type of the character - */ - private int charType(int ch) { - if (ch < charTypeTable.length) { - return charTypeTable[ch]; - } - return getType(ch); - } - - /** - * Computes the type of the given character - * - * @param ch Character whose type is to be determined - * @return Type of the character - */ - public static byte getType(int ch) { - switch (Character.getType(ch)) { - case Character.UPPERCASE_LETTER: - return UPPER; - case Character.LOWERCASE_LETTER: - return LOWER; - - case Character.TITLECASE_LETTER: - case Character.MODIFIER_LETTER: - case Character.OTHER_LETTER: - case Character.NON_SPACING_MARK: - case Character.ENCLOSING_MARK: // depends what it encloses? - case Character.COMBINING_SPACING_MARK: - return ALPHA; - - case Character.DECIMAL_DIGIT_NUMBER: - case Character.LETTER_NUMBER: - case Character.OTHER_NUMBER: - return DIGIT; - - // case Character.SPACE_SEPARATOR: - // case Character.LINE_SEPARATOR: - // case Character.PARAGRAPH_SEPARATOR: - // case Character.CONTROL: - // case Character.FORMAT: - // case Character.PRIVATE_USE: - - case Character.SURROGATE: // prevent splitting - return ALPHA | DIGIT; - - // case Character.DASH_PUNCTUATION: - // case Character.START_PUNCTUATION: - // case Character.END_PUNCTUATION: - // case Character.CONNECTOR_PUNCTUATION: - // case Character.OTHER_PUNCTUATION: - // case Character.MATH_SYMBOL: - // case Character.CURRENCY_SYMBOL: - // case Character.MODIFIER_SYMBOL: - // case Character.OTHER_SYMBOL: - // case Character.INITIAL_QUOTE_PUNCTUATION: - // case Character.FINAL_QUOTE_PUNCTUATION: - - default: - return SUBWORD_DELIM; - } - } -} \ No newline at end of file diff --git a/src/main/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactory.java index 4e6aded73d8..e0eb881d489 100644 --- a/src/main/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/WordDelimiterTokenFilterFactory.java @@ -35,18 +35,12 @@ import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; +import static org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter.*; + public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory { private final byte[] charTypeTable; - private final boolean generateWordParts; - private final boolean generateNumberParts; - private final boolean catenateWords; - private final boolean catenateNumbers; - private final boolean catenateAll; - private final boolean splitOnCaseChange; - private final boolean preserveOriginal; - private final boolean splitOnNumerics; - private final boolean stemEnglishPossessive; + private final int flags; private final CharArraySet protoWords; @Inject @@ -65,45 +59,45 @@ public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory } else { this.charTypeTable = parseTypes(charTypeTableValues); } - - // If 1, causes parts of words to be generated: "PowerShot" => "Power" "Shot" - this.generateWordParts = settings.getAsBoolean("generate_word_parts", true); - // If 1, causes number subwords to be generated: "500-42" => "500" "42" - this.generateNumberParts = settings.getAsBoolean("generate_number_parts", true); + int flags = 0; + // If set, causes parts of words to be generated: "PowerShot" => "Power" "Shot" + flags |= getFlag(GENERATE_WORD_PARTS ,settings, "generate_word_parts", true); + // If set, causes number subwords to be generated: "500-42" => "500" "42" + flags |= getFlag(GENERATE_WORD_PARTS, settings,"generate_number_parts", true); // 1, causes maximum runs of word parts to be catenated: "wi-fi" => "wifi" - this.catenateWords = settings.getAsBoolean("catenate_words", false); - // If 1, causes maximum runs of number parts to be catenated: "500-42" => "50042" - this.catenateNumbers = settings.getAsBoolean("catenate_numbers", false); - // If 1, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000" - this.catenateAll = settings.getAsBoolean("catenate_all", false); + flags |= getFlag(CATENATE_WORDS, settings,"catenate_words", false); + // If set, causes maximum runs of number parts to be catenated: "500-42" => "50042" + flags |= getFlag(CATENATE_NUMBERS, settings,"catenate_numbers", false); + // If set, causes all subword parts to be catenated: "wi-fi-4000" => "wifi4000" + flags |= getFlag(CATENATE_ALL,settings,"catenate_all", false); // 1, causes "PowerShot" to be two tokens; ("Power-Shot" remains two parts regards) - this.splitOnCaseChange = settings.getAsBoolean("split_on_case_change", true); - // If 1, includes original words in subwords: "500-42" => "500" "42" "500-42" - this.preserveOriginal = settings.getAsBoolean("preserve_original", false); + flags |= getFlag(SPLIT_ON_CASE_CHANGE, settings,"split_on_case_change", true); + // If set, includes original words in subwords: "500-42" => "500" "42" "500-42" + flags |= getFlag(PRESERVE_ORIGINAL, settings,"preserve_original", false); // 1, causes "j2se" to be three tokens; "j" "2" "se" - this.splitOnNumerics = settings.getAsBoolean("split_on_numerics", true); - // If 1, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil" - this.stemEnglishPossessive = settings.getAsBoolean("stem_english_possessive", true); + flags |= getFlag(SPLIT_ON_NUMERICS, settings,"split_on_numerics", true); + // If set, causes trailing "'s" to be removed for each subword: "O'Neil's" => "O", "Neil" + flags |= getFlag(STEM_ENGLISH_POSSESSIVE, settings,"stem_english_possessive", true); // If not null is the set of tokens to protect from being delimited Set protectedWords = Analysis.getWordSet(env, settings, "protected_words", version); this.protoWords = protectedWords == null ? null : CharArraySet.copy(Lucene.VERSION, protectedWords); + this.flags = flags; } @Override public TokenStream create(TokenStream tokenStream) { return new WordDelimiterFilter(tokenStream, charTypeTable, - generateWordParts ? 1 : 0, - generateNumberParts ? 1 : 0, - catenateWords ? 1 : 0, - catenateNumbers ? 1 : 0, - catenateAll ? 1 : 0, - splitOnCaseChange ? 1 : 0, - preserveOriginal ? 1 : 0, - splitOnNumerics ? 1 : 0, - stemEnglishPossessive ? 1 : 0, + flags, protoWords); } + + public int getFlag(int flag, Settings settings, String key, boolean defaultValue) { + if (settings.getAsBoolean(key, true)) { + return 0; + } + return flag; + } // source => type private static Pattern typePattern = Pattern.compile("(.*)\\s*=>\\s*(.*)\\s*$"); diff --git a/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java b/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java index 198e5133893..e9f185a60d0 100644 --- a/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java +++ b/src/main/java/org/elasticsearch/indices/analysis/IndicesAnalysisService.java @@ -328,11 +328,14 @@ public class IndicesAnalysisService extends AbstractComponent { @Override public TokenStream create(TokenStream tokenStream) { - return new WordDelimiterFilter(tokenStream, WordDelimiterIterator.DEFAULT_WORD_DELIM_TABLE, - 1, 1, 0, 0, 0, 1, 0, 1, 1, null); + return new WordDelimiterFilter(tokenStream, + WordDelimiterFilter.GENERATE_WORD_PARTS | + WordDelimiterFilter.GENERATE_NUMBER_PARTS | + WordDelimiterFilter.SPLIT_ON_CASE_CHANGE | + WordDelimiterFilter.SPLIT_ON_NUMERICS | + WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null); } })); - tokenFilterFactories.put("stop", new PreBuiltTokenFilterFactoryFactory(new TokenFilterFactory() { @Override public String name() { From 6ca64074680ade01b596cb3f458b9b5a33bc05c0 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 1 Nov 2012 15:33:50 +0100 Subject: [PATCH 096/146] lucene 4: Re-fixed issue in SourceScoreOrderFragmentsBuilder and SourceSimpleFragmentsBuilder. --- .../SourceScoreOrderFragmentsBuilder.java | 3 ++- .../SourceSimpleFragmentsBuilder.java | 3 ++- .../elasticsearch/search/lookup/SearchLookup.java | 13 ------------- 3 files changed, 4 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java b/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java index 84ec390b2e6..1ea8deb9069 100644 --- a/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java +++ b/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceScoreOrderFragmentsBuilder.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.highlight.vectorhighlight; import org.apache.lucene.document.Field; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.vectorhighlight.BoundaryScanner; import org.apache.lucene.search.vectorhighlight.XScoreOrderFragmentsBuilder; @@ -50,7 +51,7 @@ public class SourceScoreOrderFragmentsBuilder extends XScoreOrderFragmentsBuilde protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { // we know its low level reader, and matching docId, since that's how we call the highlighter with SearchLookup lookup = searchContext.lookup(); - lookup.setNextReader(reader); + lookup.setNextReader((AtomicReaderContext) reader.getContext()); lookup.setNextDocId(docId); List values = lookup.source().extractRawValues(mapper.names().sourcePath()); diff --git a/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java b/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java index f3fd38ff1b3..1bf3cc2ba6d 100644 --- a/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java +++ b/src/main/java/org/elasticsearch/search/highlight/vectorhighlight/SourceSimpleFragmentsBuilder.java @@ -20,6 +20,7 @@ package org.elasticsearch.search.highlight.vectorhighlight; import org.apache.lucene.document.Field; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.vectorhighlight.BoundaryScanner; import org.apache.lucene.search.vectorhighlight.XSimpleFragmentsBuilder; @@ -52,7 +53,7 @@ public class SourceSimpleFragmentsBuilder extends XSimpleFragmentsBuilder { protected Field[] getFields(IndexReader reader, int docId, String fieldName) throws IOException { // we know its low level reader, and matching docId, since that's how we call the highlighter with SearchLookup lookup = searchContext.lookup(); - lookup.setNextReader(reader); + lookup.setNextReader((AtomicReaderContext) reader.getContext()); lookup.setNextDocId(docId); List values = lookup.source().extractRawValues(mapper.names().sourcePath()); diff --git a/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java b/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java index dbe9e03811d..efa151b5cd5 100644 --- a/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/SearchLookup.java @@ -20,11 +20,8 @@ package org.elasticsearch.search.lookup; import com.google.common.collect.ImmutableMap; -import org.apache.lucene.index.AtomicReader; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Scorer; -import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.Nullable; import org.elasticsearch.index.cache.field.data.FieldDataCache; import org.elasticsearch.index.mapper.MapperService; @@ -80,14 +77,4 @@ public class SearchLookup { sourceLookup.setNextDocId(docId); fieldsLookup.setNextDocId(docId); } - - // Sneaky way fvh to get the ARC. See SourceScoreOrderFragmentsBuilder. Passed reader must be atomic. - // but that is the case in SourceScoreOrderFragmentsBuilder. - public void setNextReader(IndexReader reader) { - if (AtomicReader.class.isInstance(reader)) { - throw new ElasticSearchIllegalArgumentException("reader not instance of AtomicReader, but " + reader.getClass()); - } - AtomicReaderContext context = (AtomicReaderContext) reader.getContext(); - setNextReader(context); - } } From 79368bb221253a94adabe96d2420845f918e3791 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Thu, 1 Nov 2012 15:35:50 +0100 Subject: [PATCH 097/146] lucene 4: fix visitors to use constants for field names --- .../index/mapper/internal/SourceFieldVisitor.java | 2 +- .../index/mapper/selector/UidAndRoutingFieldVisitor.java | 5 +++-- .../index/mapper/selector/UidAndSourceFieldVisitor.java | 4 ++-- .../elasticsearch/index/mapper/selector/UidFieldVisitor.java | 2 +- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java index 844ef9d5853..4b378acdf3a 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java @@ -50,7 +50,7 @@ public class SourceFieldVisitor extends BaseFieldVisitor { @Override public Document createDocument() { Document document = new Document(); - document.add(new StoredField("_source", source)); + document.add(new StoredField(SourceFieldMapper.NAME, source)); return document; } diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java index 26af9b3963d..f037412d1e1 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java @@ -24,6 +24,7 @@ import org.apache.lucene.document.StoredField; import org.apache.lucene.index.FieldInfo; import org.elasticsearch.common.lucene.document.BaseFieldVisitor; import org.elasticsearch.index.mapper.internal.RoutingFieldMapper; +import org.elasticsearch.index.mapper.internal.SourceFieldMapper; import org.elasticsearch.index.mapper.internal.UidFieldMapper; import java.io.IOException; @@ -39,8 +40,8 @@ public class UidAndRoutingFieldVisitor extends BaseFieldVisitor { @Override public Document createDocument() { Document document = new Document(); - document.add(new StoredField("uid", uid)); - document.add(new StoredField("_source", routing)); + document.add(new StoredField(UidFieldMapper.NAME, uid)); + document.add(new StoredField(SourceFieldMapper.NAME, routing)); return document; } diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java index fc9e8008d26..e5431b4770d 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java @@ -40,8 +40,8 @@ public class UidAndSourceFieldVisitor extends BaseFieldVisitor { @Override public Document createDocument() { Document document = new Document(); - document.add(new StoredField("uid", uid)); - document.add(new StoredField("_source", source)); + document.add(new StoredField(UidFieldMapper.NAME, uid)); + document.add(new StoredField(SourceFieldMapper.NAME, source)); return document; } diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java index 10c533fc904..e3dc0fe5cf6 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java @@ -53,7 +53,7 @@ public class UidFieldVisitor extends BaseFieldVisitor { @Override public Document createDocument() { Document document = new Document(); - document.add(new StoredField("_uid", uid)); + document.add(new StoredField(UidFieldMapper.NAME, uid)); return document; } From 0660e20c476661b388cf5fc2f818d92e8b301718 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 2 Nov 2012 10:25:56 +0100 Subject: [PATCH 098/146] lucene 4: cleanup terms/uid filter --- .../DeletionAwareConstantScoreQuery.java | 1 + .../apache/lucene/search/XTermsFilter.java | 219 +++++++++--------- .../index/mapper/internal/IdFieldMapper.java | 6 +- .../elasticsearch/index/search/UidFilter.java | 11 +- 4 files changed, 114 insertions(+), 123 deletions(-) diff --git a/src/main/java/org/apache/lucene/search/DeletionAwareConstantScoreQuery.java b/src/main/java/org/apache/lucene/search/DeletionAwareConstantScoreQuery.java index 23d2081a48a..f69c6eb3928 100644 --- a/src/main/java/org/apache/lucene/search/DeletionAwareConstantScoreQuery.java +++ b/src/main/java/org/apache/lucene/search/DeletionAwareConstantScoreQuery.java @@ -28,6 +28,7 @@ import org.elasticsearch.common.lucene.search.NotDeletedFilter; // So it can basically be cached safely even with a reader that changes deletions but remain with teh same cache key // See more: https://issues.apache.org/jira/browse/LUCENE-2468 // TODO Lucene 4.0 won't need this, since live docs are "and'ed" while scoring +// LUCENE 4 UPGRADE: we probably don't need this anymore, because of acceptDocs public class DeletionAwareConstantScoreQuery extends ConstantScoreQuery { private final Filter actualFilter; diff --git a/src/main/java/org/apache/lucene/search/XTermsFilter.java b/src/main/java/org/apache/lucene/search/XTermsFilter.java index 6873ebcd207..22b326a1d12 100644 --- a/src/main/java/org/apache/lucene/search/XTermsFilter.java +++ b/src/main/java/org/apache/lucene/search/XTermsFilter.java @@ -19,13 +19,7 @@ package org.apache.lucene.search; -import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.Fields; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.Terms; -import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.*; import org.apache.lucene.queries.TermsFilter; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; @@ -39,51 +33,52 @@ import java.util.Collection; * Similar to {@link TermsFilter} but stores the terms in an array for better memory usage * when cached, and also uses bulk read */ -// LUCENE MONITOR: Against TermsFilter - this is now identical to TermsFilter once 4.1 is released +// LUCENE 4 UPGRADE: Make sure to sync this against latest 4.1 +// LUCENE 4.1: once its out, we can use TermsFilter from it public class XTermsFilter extends Filter { private final Term[] filterTerms; private final boolean[] resetTermsEnum;// true if the enum must be reset when building the bitset private final int length; - + /** * Creates a new {@link XTermsFilter} from the given collection. The collection * can contain duplicate terms and multiple fields. */ public XTermsFilter(Collection terms) { - this(terms.toArray(new Term[terms.size()])); + this(terms.toArray(new Term[terms.size()])); } - + /** * Creates a new {@link XTermsFilter} from the given array. The array can * contain duplicate terms and multiple fields. */ public XTermsFilter(Term... terms) { - if (terms == null || terms.length == 0) { - throw new IllegalArgumentException("TermsFilter requires at least one term"); - } - Arrays.sort(terms); - this.filterTerms = new Term[terms.length]; - this.resetTermsEnum = new boolean[terms.length]; - int index = 0; - for (int i = 0; i < terms.length; i++) { - Term currentTerm = terms[i]; - boolean fieldChanged = true; - if (index > 0) { - // deduplicate - if (filterTerms[index-1].field().equals(currentTerm.field())) { - fieldChanged = false; - if (filterTerms[index-1].bytes().bytesEquals(currentTerm.bytes())){ - continue; - } - } + if (terms == null || terms.length == 0) { + throw new IllegalArgumentException("TermsFilter requires at least one term"); } - this.filterTerms[index] = currentTerm; - this.resetTermsEnum[index] = index == 0 || fieldChanged; // mark index 0 so we have a clear path in the iteration - - index++; - } - length = index; + Arrays.sort(terms); + this.filterTerms = new Term[terms.length]; + this.resetTermsEnum = new boolean[terms.length]; + int index = 0; + for (int i = 0; i < terms.length; i++) { + Term currentTerm = terms[i]; + boolean fieldChanged = true; + if (index > 0) { + // deduplicate + if (filterTerms[index - 1].field().equals(currentTerm.field())) { + fieldChanged = false; + if (filterTerms[index - 1].bytes().bytesEquals(currentTerm.bytes())) { + continue; + } + } + } + this.filterTerms[index] = currentTerm; + this.resetTermsEnum[index] = index == 0 || fieldChanged; // mark index 0 so we have a clear path in the iteration + + index++; + } + length = index; } public Term[] getTerms() { @@ -92,100 +87,100 @@ public class XTermsFilter extends Filter { @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { - AtomicReader reader = context.reader(); - FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time - Fields fields = reader.fields(); - if (fields == null) { + AtomicReader reader = context.reader(); + FixedBitSet result = null; // lazy init if needed - no need to create a big bitset ahead of time + Fields fields = reader.fields(); + if (fields == null) { + return result; + } + final BytesRef br = new BytesRef(); + Terms terms = null; + TermsEnum termsEnum = null; + DocsEnum docs = null; + assert resetTermsEnum[0]; + for (int i = 0; i < length; i++) { + Term term = this.filterTerms[i]; + if (resetTermsEnum[i]) { + terms = fields.terms(term.field()); + if (terms == null) { + i = skipToNextField(i + 1, length); // skip to the next field since this field is not indexed + continue; + } + } + if ((termsEnum = terms.iterator(termsEnum)) != null) { + br.copyBytes(term.bytes()); + assert termsEnum != null; + if (termsEnum.seekExact(br, true)) { + docs = termsEnum.docs(acceptDocs, docs, 0); + if (result == null) { + if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + result = new FixedBitSet(reader.maxDoc()); + // lazy init but don't do it in the hot loop since we could read many docs + result.set(docs.docID()); + } + } + while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + result.set(docs.docID()); + } + } + } + } return result; - } - final BytesRef br = new BytesRef(); - Terms terms = null; - TermsEnum termsEnum = null; - DocsEnum docs = null; - assert resetTermsEnum[0]; - for (int i = 0; i < length; i++) { - Term term = this.filterTerms[i]; - if (resetTermsEnum[i]) { - terms = fields.terms(term.field()); - if (terms == null) { - i = skipToNextField(i+1, length); // skip to the next field since this field is not indexed - continue; - } - } - if ((termsEnum = terms.iterator(termsEnum)) != null) { - br.copyBytes(term.bytes()); - assert termsEnum != null; - if (termsEnum.seekExact(br,true)) { - docs = termsEnum.docs(acceptDocs, docs, 0); - if (result == null) { - if (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - result = new FixedBitSet(reader.maxDoc()); - // lazy init but don't do it in the hot loop since we could read many docs - result.set(docs.docID()); - } - } - while (docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { - result.set(docs.docID()); - } - } - } - } - return result; } private final int skipToNextField(int index, int length) { - for (int i = index; i < length; i++) { - if (resetTermsEnum[i]) { - return i-1; + for (int i = index; i < length; i++) { + if (resetTermsEnum[i]) { + return i - 1; + } } - } - return length; + return length; } - + @Override public boolean equals(Object obj) { - if (this == obj) { - return true; - } - if ((obj == null) || (obj.getClass() != this.getClass())) { - return false; - } - XTermsFilter test = (XTermsFilter) obj; - if (filterTerms != test.filterTerms) { - if (length == test.length) { - for (int i = 0; i < length; i++) { - // can not be null! - if (!filterTerms[i].equals(test.filterTerms[i])) { - return false; - } - } - } else { - return false; + if (this == obj) { + return true; } - } - return true; - + if ((obj == null) || (obj.getClass() != this.getClass())) { + return false; + } + XTermsFilter test = (XTermsFilter) obj; + if (filterTerms != test.filterTerms) { + if (length == test.length) { + for (int i = 0; i < length; i++) { + // can not be null! + if (!filterTerms[i].equals(test.filterTerms[i])) { + return false; + } + } + } else { + return false; + } + } + return true; + } @Override public int hashCode() { - int hash = 9; - for (int i = 0; i < length; i++) { - hash = 31 * hash + filterTerms[i].hashCode(); - } - return hash; + int hash = 9; + for (int i = 0; i < length; i++) { + hash = 31 * hash + filterTerms[i].hashCode(); + } + return hash; } - + @Override public String toString() { - StringBuilder builder = new StringBuilder(); - for (int i = 0; i < length; i++) { - if (builder.length() > 0) { - builder.append(' '); + StringBuilder builder = new StringBuilder(); + for (int i = 0; i < length; i++) { + if (builder.length() > 0) { + builder.append(' '); + } + builder.append(filterTerms[i]); } - builder.append(filterTerms[i]); - } - return builder.toString(); + return builder.toString(); } } diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java index 58008e8314d..4da45dabc7a 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/IdFieldMapper.java @@ -166,7 +166,7 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern if (indexed() || context == null) { return super.fieldQuery(value, context); } - UidFilter filter = new UidFilter(context.queryTypes(), ImmutableList.of(value), context.indexCache().bloomCache()); + UidFilter filter = new UidFilter(context.queryTypes(), ImmutableList.of(value)); // no need for constant score filter, since we don't cache the filter, and it always takes deletes into account return new ConstantScoreQuery(filter); } @@ -176,7 +176,7 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern if (indexed() || context == null) { return super.fieldFilter(value, context); } - return new UidFilter(context.queryTypes(), ImmutableList.of(value), context.indexCache().bloomCache()); + return new UidFilter(context.queryTypes(), ImmutableList.of(value)); } @Override @@ -280,7 +280,7 @@ public class IdFieldMapper extends AbstractFieldMapper implements Intern public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { // if all are defaults, no sense to write it at all if (fieldType.stored() == Defaults.ID_FIELD_TYPE.stored() && - fieldType.indexed() == Defaults.ID_FIELD_TYPE.indexed() && path == Defaults.PATH) { + fieldType.indexed() == Defaults.ID_FIELD_TYPE.indexed() && path == Defaults.PATH) { return builder; } builder.startObject(CONTENT_TYPE); diff --git a/src/main/java/org/elasticsearch/index/search/UidFilter.java b/src/main/java/org/elasticsearch/index/search/UidFilter.java index 59665b6fd95..bdf712acc72 100644 --- a/src/main/java/org/elasticsearch/index/search/UidFilter.java +++ b/src/main/java/org/elasticsearch/index/search/UidFilter.java @@ -19,11 +19,7 @@ package org.elasticsearch.index.search; -import org.apache.lucene.index.AtomicReader; -import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.DocsEnum; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.*; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; import org.apache.lucene.util.Bits; @@ -36,9 +32,11 @@ import java.util.Arrays; import java.util.Collection; import java.util.List; +// LUCENE 4 UPGRADE: we can potentially use TermsFilter here, specifically, now when we don't do bloom filter, batching, and with optimization on single field terms public class UidFilter extends Filter { final Term[] uids; + public UidFilter(Collection types, List ids) { this.uids = new Term[types.size() * ids.size()]; int i = 0; @@ -60,7 +58,6 @@ public class UidFilter extends Filter { // - If we have a single id, we can create a SingleIdDocIdSet to save on mem // - We can use sorted int array DocIdSet to reserve memory compared to OpenBitSet in some cases @Override - // LUCENE 4 UPGRADE: this filter does respect acceptDocs maybe we need to change this public DocIdSet getDocIdSet(AtomicReaderContext ctx, Bits acceptedDocs) throws IOException { FixedBitSet set = null; final AtomicReader reader = ctx.reader(); @@ -71,8 +68,6 @@ public class UidFilter extends Filter { docsEnum = termsEnum.docs(acceptedDocs, docsEnum, 0); int doc; while ((doc = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) { - // no need for batching, its on the UID, there will be only - // one doc if (set == null) { set = new FixedBitSet(reader.maxDoc()); } From e9f8d0c722199c0990f1a983a0fc773200145c9f Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 2 Nov 2012 10:26:15 +0100 Subject: [PATCH 099/146] lucene 4: extrace Lucene#readSegmentsInfo, and use it where applicable --- .../elasticsearch/common/lucene/Lucene.java | 27 ++++++++++++++++++- .../index/engine/robin/RobinEngine.java | 16 ++--------- .../blobstore/BlobStoreIndexShardGateway.java | 7 ++--- .../gateway/local/LocalIndexShardGateway.java | 13 +++++---- 4 files changed, 38 insertions(+), 25 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/src/main/java/org/elasticsearch/common/lucene/Lucene.java index 000a147acdf..1376a473f05 100644 --- a/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -23,6 +23,7 @@ import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.*; import org.apache.lucene.search.*; +import org.apache.lucene.store.Directory; import org.apache.lucene.util.Version; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.io.stream.StreamInput; @@ -40,7 +41,7 @@ import java.lang.reflect.Field; */ public class Lucene { - public static final Version VERSION = Version.LUCENE_36; + public static final Version VERSION = Version.LUCENE_40; public static final Version ANALYZER_VERSION = VERSION; public static final Version QUERYPARSER_VERSION = VERSION; @@ -57,6 +58,9 @@ public class Lucene { if (version == null) { return defaultVersion; } + if ("4.0".equals(version)) { + return Version.LUCENE_40; + } if ("3.6".equals(version)) { return Version.LUCENE_36; } @@ -82,6 +86,27 @@ public class Lucene { return defaultVersion; } + /** + * Reads the segments infos, returning null if it doesn't exists + */ + @Nullable + public static SegmentInfos readSegmentInfosIfExists(Directory directory) { + try { + return readSegmentInfos(directory); + } catch (IOException e) { + return null; + } + } + + /** + * Reads the segments infos, failing if it fails to load + */ + public static SegmentInfos readSegmentInfos(Directory directory) throws IOException { + final SegmentInfos sis = new SegmentInfos(); + sis.read(directory); + return sis; + } + public static long count(IndexSearcher searcher, Query query) throws IOException { TotalHitCountCollector countCollector = new TotalHitCountCollector(); // we don't need scores, so wrap it in a constant score query diff --git a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java index b5ea6660538..3cfffc47d9a 100644 --- a/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java +++ b/src/main/java/org/elasticsearch/index/engine/robin/RobinEngine.java @@ -23,9 +23,6 @@ import com.google.common.collect.Lists; import org.apache.lucene.index.*; import org.apache.lucene.search.*; import org.apache.lucene.store.AlreadyClosedException; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.util.IOUtils; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.cluster.metadata.IndexMetaData; @@ -244,7 +241,7 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { // commit on a just opened writer will commit even if there are no changes done to it // we rely on that for the commit data translog id key if (DirectoryReader.indexExists(store.directory())) { - Map commitUserData = getCommitUserData(store.directory()); + Map commitUserData = Lucene.readSegmentInfos(store.directory()).getUserData(); if (commitUserData.containsKey(Translog.TRANSLOG_ID_KEY)) { translogIdGenerator.set(Long.parseLong(commitUserData.get(Translog.TRANSLOG_ID_KEY))); } else { @@ -859,7 +856,7 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { indexWriter.commit(MapBuilder.newMapBuilder().put(Translog.TRANSLOG_ID_KEY, Long.toString(translogId)).map()); if (flush.force()) { // if we force, we might not have committed, we need to check that its the same id - Map commitUserData = getCommitUserData(store.directory()); + Map commitUserData = Lucene.readSegmentInfos(store.directory()).getUserData(); long committedTranslogId = Long.parseLong(commitUserData.get(Translog.TRANSLOG_ID_KEY)); if (committedTranslogId != translogId) { // we did not commit anything, revert to the old translog @@ -1529,13 +1526,4 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine { return searcher; } } - - /** - * Reads the latest commit and loads the userdata - */ - private static final Map getCommitUserData(final Directory directory) throws IOException { - final SegmentInfos sis = new SegmentInfos(); - sis.read(directory); - return sis.getUserData(); - } } diff --git a/src/main/java/org/elasticsearch/index/gateway/blobstore/BlobStoreIndexShardGateway.java b/src/main/java/org/elasticsearch/index/gateway/blobstore/BlobStoreIndexShardGateway.java index b247cbaad7f..e518c622fe7 100644 --- a/src/main/java/org/elasticsearch/index/gateway/blobstore/BlobStoreIndexShardGateway.java +++ b/src/main/java/org/elasticsearch/index/gateway/blobstore/BlobStoreIndexShardGateway.java @@ -22,7 +22,7 @@ package org.elasticsearch.index.gateway.blobstore; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Iterables; import com.google.common.collect.Lists; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.store.Directory; import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; @@ -32,6 +32,7 @@ import org.elasticsearch.common.blobstore.*; import org.elasticsearch.common.io.FastByteArrayInputStream; import org.elasticsearch.common.io.FastByteArrayOutputStream; import org.elasticsearch.common.io.stream.BytesStreamInput; +import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.store.InputStreamIndexInput; import org.elasticsearch.common.lucene.store.ThreadSafeInputStreamIndexInput; import org.elasticsearch.common.settings.Settings; @@ -609,8 +610,8 @@ public abstract class BlobStoreIndexShardGateway extends AbstractIndexShardCompo // read the gateway data persisted long version = -1; try { - if (IndexReader.indexExists(store.directory())) { - version = IndexReader.getCurrentVersion(store.directory()); + if (DirectoryReader.indexExists(store.directory())) { + version = Lucene.readSegmentInfos(store.directory()).getVersion(); } } catch (IOException e) { throw new IndexShardGatewayRecoveryException(shardId(), "Failed to fetch index version after copying it over", e); diff --git a/src/main/java/org/elasticsearch/index/gateway/local/LocalIndexShardGateway.java b/src/main/java/org/elasticsearch/index/gateway/local/LocalIndexShardGateway.java index f5d62a55f92..301543fcafa 100644 --- a/src/main/java/org/elasticsearch/index/gateway/local/LocalIndexShardGateway.java +++ b/src/main/java/org/elasticsearch/index/gateway/local/LocalIndexShardGateway.java @@ -20,9 +20,9 @@ package org.elasticsearch.index.gateway.local; import com.google.common.io.Closeables; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SegmentInfos; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.io.stream.InputStreamStreamInput; @@ -49,7 +49,6 @@ import java.io.EOFException; import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.util.Map; import java.util.concurrent.ScheduledFuture; /** @@ -101,12 +100,12 @@ public class LocalIndexShardGateway extends AbstractIndexShardComponent implemen long version = -1; long translogId = -1; try { - if (IndexReader.indexExists(indexShard.store().directory())) { + SegmentInfos si = Lucene.readSegmentInfosIfExists(indexShard.store().directory()); + if (si != null) { if (indexShouldExists) { - version = IndexReader.getCurrentVersion(indexShard.store().directory()); - Map commitUserData = IndexReader.getCommitUserData(indexShard.store().directory()); - if (commitUserData.containsKey(Translog.TRANSLOG_ID_KEY)) { - translogId = Long.parseLong(commitUserData.get(Translog.TRANSLOG_ID_KEY)); + version = si.getVersion(); + if (si.getUserData().containsKey(Translog.TRANSLOG_ID_KEY)) { + translogId = Long.parseLong(si.getUserData().get(Translog.TRANSLOG_ID_KEY)); } else { translogId = version; } From faf3e0e85759c20256a0b48dcc7810118a89d369 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 2 Nov 2012 10:58:48 +0100 Subject: [PATCH 100/146] lucene 4: comment on adding DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS --- .../java/org/elasticsearch/index/mapper/core/TypeParsers.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java b/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java index 1ffd78d532c..6e496ebf124 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java @@ -37,7 +37,7 @@ import static org.elasticsearch.common.xcontent.support.XContentMapValues.*; * */ public class TypeParsers { - + public static final String INDEX_OPTIONS_DOCS = "docs"; public static final String INDEX_OPTIONS_FREQS = "freqs"; public static final String INDEX_OPTIONS_POSITIONS = "positions"; @@ -113,6 +113,7 @@ public class TypeParsers { } } + // LUCENE 4 UPGRADE: when ew move into feature mode, we need to support DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS private static IndexOptions nodeIndexOptionValue(final Object propNode) { final String value = propNode.toString(); if (INDEX_OPTIONS_POSITIONS.equalsIgnoreCase(value)) { From dca88a9b7cc9e5495cd595ced885b4ce14b2c3c8 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 2 Nov 2012 11:07:44 +0100 Subject: [PATCH 101/146] lucene 4: use field type in UidField --- .../java/org/elasticsearch/common/lucene/uid/UidField.java | 4 ++-- .../elasticsearch/index/mapper/internal/UidFieldMapper.java | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java b/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java index 52d231c6023..e26d2190d45 100644 --- a/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java +++ b/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java @@ -30,6 +30,7 @@ import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.Numbers; import org.elasticsearch.common.lucene.Lucene; +import org.elasticsearch.index.mapper.internal.UidFieldMapper; import java.io.IOException; import java.io.Reader; @@ -119,10 +120,9 @@ public class UidField extends Field { private final UidPayloadTokenStream tokenStream; public UidField(String name, String uid, long version) { - super(name, uid, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO); + super(name, uid, UidFieldMapper.Defaults.UID_FIELD_TYPE); this.uid = uid; this.version = version; -// this.indexOptions = FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; this.tokenStream = new UidPayloadTokenStream(this); } diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java index c821eb51b57..efb5748c931 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/UidFieldMapper.java @@ -41,8 +41,6 @@ public class UidFieldMapper extends AbstractFieldMapper implements Internal public static final String NAME = "_uid".intern(); - public static final Term TERM_FACTORY = new Term(NAME, ""); - public static final String CONTENT_TYPE = "_uid"; public static class Defaults extends AbstractFieldMapper.Defaults { From b492320e2fe43fc223ec865912d232a041441e71 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 2 Nov 2012 11:08:13 +0100 Subject: [PATCH 102/146] lucene 4: switch directory not used --- .../common/lucene/store/SwitchDirectory.java | 172 ------------------ 1 file changed, 172 deletions(-) delete mode 100644 src/main/java/org/elasticsearch/common/lucene/store/SwitchDirectory.java diff --git a/src/main/java/org/elasticsearch/common/lucene/store/SwitchDirectory.java b/src/main/java/org/elasticsearch/common/lucene/store/SwitchDirectory.java deleted file mode 100644 index 1d453802aa7..00000000000 --- a/src/main/java/org/elasticsearch/common/lucene/store/SwitchDirectory.java +++ /dev/null @@ -1,172 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.common.lucene.store; - -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableSet; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IOContext; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; -import org.elasticsearch.index.store.support.ForceSyncDirectory; - -import java.io.IOException; -import java.util.*; - -/** - * A Directory instance that switches files between - * two other Directory instances. - *

- *

Files with the specified extensions are placed in the - * primary directory; others are placed in the secondary - * directory. - * - * - */ -public class SwitchDirectory extends Directory implements ForceSyncDirectory { - - private final Directory secondaryDir; - - private final Directory primaryDir; - - private final ImmutableSet primaryExtensions; - - private boolean doClose; - - public SwitchDirectory(Set primaryExtensions, Directory primaryDir, Directory secondaryDir, boolean doClose) { - this.primaryExtensions = ImmutableSet.copyOf(primaryExtensions); - this.primaryDir = primaryDir; - this.secondaryDir = secondaryDir; - this.doClose = doClose; - this.lockFactory = primaryDir.getLockFactory(); - } - - public ImmutableSet primaryExtensions() { - return primaryExtensions; - } - - /** - * Return the primary directory - */ - public Directory primaryDir() { - return primaryDir; - } - - /** - * Return the secondary directory - */ - public Directory secondaryDir() { - return secondaryDir; - } - - @Override - public void close() throws IOException { - if (doClose) { - try { - secondaryDir.close(); - } finally { - primaryDir.close(); - } - doClose = false; - } - } - - @Override - public String[] listAll() throws IOException { - Set files = new HashSet(); - for (String f : primaryDir.listAll()) { - files.add(f); - } - for (String f : secondaryDir.listAll()) { - files.add(f); - } - return files.toArray(new String[files.size()]); - } - - /** - * Utility method to return a file's extension. - */ - public static String getExtension(String name) { - int i = name.lastIndexOf('.'); - if (i == -1) { - return ""; - } - return name.substring(i + 1, name.length()); - } - - private Directory getDirectory(String name) { - String ext = getExtension(name); - if (primaryExtensions.contains(ext)) { - return primaryDir; - } else { - return secondaryDir; - } - } - - @Override - public boolean fileExists(String name) throws IOException { - return getDirectory(name).fileExists(name); - } - - @Override - public void deleteFile(String name) throws IOException { - getDirectory(name).deleteFile(name); - } - - @Override - public long fileLength(String name) throws IOException { - return getDirectory(name).fileLength(name); - } - - @Override - public IndexOutput createOutput(String name, IOContext context) throws IOException { - return getDirectory(name).createOutput(name, context); - } - - @Override - public void sync(Collection names) throws IOException { - List primaryNames = new ArrayList(); - List secondaryNames = new ArrayList(); - - for (String name : names) - if (primaryExtensions.contains(getExtension(name))) - primaryNames.add(name); - else - secondaryNames.add(name); - - primaryDir.sync(primaryNames); - secondaryDir.sync(secondaryNames); - } - - @Override - public void forceSync(String name) throws IOException { - Directory dir = getDirectory(name); - if (dir instanceof ForceSyncDirectory) { - ((ForceSyncDirectory) dir).forceSync(name); - } else { - dir.sync(ImmutableList.of(name)); - } - } - - @Override - public IndexInput openInput(String name, IOContext context) throws IOException { - return getDirectory(name).openInput(name, context); - } -} From c60f20413b299e4d9ea0a5fa3e24381e90d914b8 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 2 Nov 2012 11:32:47 +0100 Subject: [PATCH 103/146] lucene 4: support doc level boost --- .../elasticsearch/index/mapper/DocumentMapper.java | 12 ++++++++++++ .../org/elasticsearch/index/mapper/ParseContext.java | 12 +++++++++++- .../index/mapper/internal/BoostFieldMapper.java | 4 ++-- 3 files changed, 25 insertions(+), 3 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java b/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java index 0935ad41926..6805acf3269 100644 --- a/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java @@ -25,6 +25,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.Filter; import org.elasticsearch.common.Booleans; import org.elasticsearch.common.Nullable; @@ -519,6 +520,17 @@ public class DocumentMapper implements ToXContent { if (context.docs().size() > 1) { Collections.reverse(context.docs()); } + // apply doc boost + if (context.docBoost() != 1.0f) { + for (Document doc : context.docs()) { + for (IndexableField field : doc) { + if (field.fieldType().indexed() && !field.fieldType().omitNorms()) { + ((Field) field).setBoost(context.docBoost() * field.boost()); + } + } + } + } + ParsedDocument doc = new ParsedDocument(context.uid(), context.id(), context.type(), source.routing(), source.timestamp(), source.ttl(), context.docs(), context.analyzer(), context.source(), context.mappingsModified()).parent(source.parent()); // reset the context to free up memory diff --git a/src/main/java/org/elasticsearch/index/mapper/ParseContext.java b/src/main/java/org/elasticsearch/index/mapper/ParseContext.java index c6a70a0a880..af85557a631 100644 --- a/src/main/java/org/elasticsearch/index/mapper/ParseContext.java +++ b/src/main/java/org/elasticsearch/index/mapper/ParseContext.java @@ -21,7 +21,6 @@ package org.elasticsearch.index.mapper; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.lucene.all.AllEntries; @@ -80,6 +79,8 @@ public class ParseContext { private AllEntries allEntries = new AllEntries(); + private float docBoost = 1.0f; + public ParseContext(String index, @Nullable Settings indexSettings, DocumentMapperParser docMapperParser, DocumentMapper docMapper, ContentPath path) { this.index = index; this.indexSettings = indexSettings; @@ -107,6 +108,7 @@ public class ParseContext { this.listener = listener == null ? DocumentMapper.ParseListener.EMPTY : listener; this.allEntries = new AllEntries(); this.ignoredValues.clear(); + this.docBoost = 1.0f; } public boolean flyweight() { @@ -273,6 +275,14 @@ public class ParseContext { return externalValue; } + public float docBoost() { + return this.docBoost; + } + + public void docBoost(float docBoost) { + this.docBoost = docBoost; + } + /** * A string builder that can be used to construct complex names for example. * Its better to reuse the. diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java index f2c99d8c1aa..de41e5e7a63 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/BoostFieldMapper.java @@ -228,7 +228,7 @@ public class BoostFieldMapper extends NumberFieldMapper implements Intern // we override parse since we want to handle cases where it is not indexed and not stored (the default) float value = parseFloatValue(context); if (!Float.isNaN(value)) { - context.doc().setBoost(value); + context.docBoost(value); } super.parse(context); } @@ -239,7 +239,7 @@ public class BoostFieldMapper extends NumberFieldMapper implements Intern if (Float.isNaN(value)) { return null; } - context.doc().setBoost(value); + context.docBoost(value); return new FloatFieldMapper.CustomFloatNumericField(this, value, fieldType); } From 7ecfa9c35f6f0ed7a2d10217124e9e6f08e17da5 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 2 Nov 2012 11:45:59 +0100 Subject: [PATCH 104/146] lucene 4: caching should pass acceptDocs still work left on streamlining filters --- .../filter/weighted/WeightedFilterCache.java | 20 ++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/cache/filter/weighted/WeightedFilterCache.java b/src/main/java/org/elasticsearch/index/cache/filter/weighted/WeightedFilterCache.java index a154b265042..cb0c387ad52 100644 --- a/src/main/java/org/elasticsearch/index/cache/filter/weighted/WeightedFilterCache.java +++ b/src/main/java/org/elasticsearch/index/cache/filter/weighted/WeightedFilterCache.java @@ -23,10 +23,12 @@ import com.google.common.cache.Cache; import com.google.common.cache.RemovalListener; import com.google.common.cache.RemovalNotification; import com.google.common.cache.Weigher; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.SegmentReader; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.Filter; +import org.apache.lucene.util.Bits; import org.elasticsearch.ElasticSearchException; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.docset.DocSet; @@ -142,26 +144,30 @@ public class WeightedFilterCache extends AbstractIndexComponent implements Filte this.cache = cache; } + @Override - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { + public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { Object filterKey = filter; if (filter instanceof CacheKeyFilter) { filterKey = ((CacheKeyFilter) filter).cacheKey(); } - FilterCacheKey cacheKey = new FilterCacheKey(cache.index().name(), reader.getCoreCacheKey(), filterKey); + FilterCacheKey cacheKey = new FilterCacheKey(cache.index().name(), context.reader().getCoreCacheKey(), filterKey); Cache innerCache = cache.indicesFilterCache.cache(); DocSet cacheValue = innerCache.getIfPresent(cacheKey); if (cacheValue == null) { - if (!cache.seenReaders.containsKey(reader.getCoreCacheKey())) { - Boolean previous = cache.seenReaders.putIfAbsent(reader.getCoreCacheKey(), Boolean.TRUE); - if (previous == null && (reader instanceof SegmentReader)) { - ((SegmentReader) reader).addCoreClosedListener(cache); + if (!cache.seenReaders.containsKey(context.reader().getCoreCacheKey())) { + Boolean previous = cache.seenReaders.putIfAbsent(context.reader().getCoreCacheKey(), Boolean.TRUE); + if (previous == null && (context.reader() instanceof SegmentReader)) { + ((SegmentReader) context.reader()).addCoreClosedListener(cache); cache.seenReadersCount.inc(); } } - cacheValue = DocSets.cacheable(reader, filter.getDocIdSet(reader)); + // we pass down the acceptDocs so things like TermFilter will be able to make use of it + // but we don't wrap it with accept docs in "our own filters", we rely on it being applied + // on the top level + cacheValue = DocSets.cacheable(context.reader(), filter.getDocIdSet(context, acceptDocs)); // we might put the same one concurrently, that's fine, it will be replaced and the removal // will be called cache.totalMetric.inc(cacheValue.sizeInBytes()); From bec0ffa6236a05a1ccde6547fc512cd6778808ea Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 2 Nov 2012 11:48:04 +0100 Subject: [PATCH 105/146] lucene 4: make sure to apply doc boost only once per field name --- .../elasticsearch/index/mapper/DocumentMapper.java | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java b/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java index 6805acf3269..f0fd06cec42 100644 --- a/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/DocumentMapper.java @@ -21,6 +21,7 @@ package org.elasticsearch.index.mapper; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; +import com.google.common.collect.Sets; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; @@ -41,10 +42,7 @@ import org.elasticsearch.index.mapper.object.ObjectMapper; import org.elasticsearch.index.mapper.object.RootObjectMapper; import java.io.IOException; -import java.util.Collections; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.concurrent.CopyOnWriteArrayList; import static com.google.common.collect.Lists.newArrayList; @@ -522,10 +520,15 @@ public class DocumentMapper implements ToXContent { } // apply doc boost if (context.docBoost() != 1.0f) { + Set encounteredFields = Sets.newHashSet(); for (Document doc : context.docs()) { + encounteredFields.clear(); for (IndexableField field : doc) { if (field.fieldType().indexed() && !field.fieldType().omitNorms()) { - ((Field) field).setBoost(context.docBoost() * field.boost()); + if (!encounteredFields.contains(field.name())) { + ((Field) field).setBoost(context.docBoost() * field.boost()); + encounteredFields.add(field.name()); + } } } } From cefe2ba870b0eef03b4329a34e43438f65dd18c5 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 2 Nov 2012 14:25:48 +0100 Subject: [PATCH 106/146] lucene 4: fix fuzzy query test --- .../apache/lucene/queryparser/classic/MapperQueryParser.java | 5 +---- .../test/unit/index/query/SimpleIndexQueryParserTests.java | 4 ++-- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java b/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java index a213519e2cd..c76fd038d09 100644 --- a/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java +++ b/src/main/java/org/apache/lucene/queryparser/classic/MapperQueryParser.java @@ -25,9 +25,7 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.Term; -import org.apache.lucene.queryparser.classic.QueryParser; import org.apache.lucene.search.*; -import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.io.FastStringReader; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.search.Queries; @@ -414,8 +412,7 @@ public class MapperQueryParser extends QueryParser { @Override protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { String text = term.text(); - int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity, - text.codePointCount(0, text.length())); + int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity, text.codePointCount(0, text.length())); //LUCENE 4 UPGRADE I disabled transpositions here by default - maybe this needs to be changed FuzzyQuery query = new FuzzyQuery(term, numEdits, prefixLength, settings.fuzzyMaxExpansions(), false); QueryParsers.setRewriteMethod(query, settings.fuzzyRewriteMethod()); diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java index 3f65dd8e269..0041bb04c05 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java @@ -364,7 +364,7 @@ public class SimpleIndexQueryParserTests { assertThat(parsedQuery, instanceOf(FuzzyQuery.class)); FuzzyQuery fuzzyQuery = (FuzzyQuery) parsedQuery; assertThat(fuzzyQuery.getTerm(), equalTo(new Term("name.first", "sh"))); - assertThat(fuzzyQuery.getMinSimilarity(), equalTo(0.1f)); + assertThat(fuzzyQuery.getMaxEdits(), equalTo(FuzzyQuery.floatToEdits(0.1f, "sh".length()))); assertThat(fuzzyQuery.getPrefixLength(), equalTo(1)); assertThat(fuzzyQuery.getBoost(), equalTo(2.0f)); } @@ -377,7 +377,7 @@ public class SimpleIndexQueryParserTests { assertThat(parsedQuery, instanceOf(FuzzyQuery.class)); FuzzyQuery fuzzyQuery = (FuzzyQuery) parsedQuery; assertThat(fuzzyQuery.getTerm(), equalTo(new Term("name.first", "sh"))); - assertThat(fuzzyQuery.getMinSimilarity(), equalTo(0.1f)); + assertThat(fuzzyQuery.getMaxEdits(), equalTo(FuzzyQuery.floatToEdits(0.1f, "sh".length()))); assertThat(fuzzyQuery.getPrefixLength(), equalTo(1)); assertThat(fuzzyQuery.getBoost(), equalTo(2.0f)); } From 5c0ef796e8ee48cd22eaa384436553ce4f94a6b8 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 2 Nov 2012 15:48:27 +0100 Subject: [PATCH 107/146] lucene 4: Upgraded BoostMappingTests + SimpleMapperTests --- .../index/mapper/boost/BoostMappingTests.java | 24 ++++++++++++++++--- .../mapper/simple/SimpleMapperTests.java | 23 +++++++++++++----- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/BoostMappingTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/BoostMappingTests.java index 7d53388ebcc..d2fd717af05 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/BoostMappingTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/BoostMappingTests.java @@ -19,6 +19,7 @@ package org.elasticsearch.test.unit.index.mapper.boost; +import org.apache.lucene.index.IndexableField; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.ParsedDocument; @@ -41,8 +42,16 @@ public class BoostMappingTests { ParsedDocument doc = mapper.parse("type", "1", XContentFactory.jsonBuilder().startObject() .field("_boost", 2.0f) + .field("field", "a") + .field("field", "b") .endObject().bytes()); - assertThat(doc.rootDoc().getBoost(), equalTo(2.0f)); + + assertThat(doc.rootDoc().getFields().size(), equalTo(2)); + float sum = 0.0f; + for (IndexableField field : doc.rootDoc().getFields()) { + sum += field.boost(); + } + assertThat(3.0f, equalTo(sum)); // 2.0 (for first field) + 1.0 (for second field) } @Test @@ -54,13 +63,22 @@ public class BoostMappingTests { DocumentMapper mapper = MapperTests.newParser().parse(mapping); ParsedDocument doc = mapper.parse("type", "1", XContentFactory.jsonBuilder().startObject() + .field("field", "a") .field("_boost", 2.0f) + .endObject().bytes()); - assertThat(doc.rootDoc().getBoost(), equalTo(1.0f)); + assertThat(doc.rootDoc().getFields().size(), equalTo(1)); + for (IndexableField field : doc.rootDoc().getFields()) { + assertThat(field.boost(), equalTo(1.0f)); + } doc = mapper.parse("type", "1", XContentFactory.jsonBuilder().startObject() + .field("field", "a") .field("custom_boost", 2.0f) .endObject().bytes()); - assertThat(doc.rootDoc().getBoost(), equalTo(2.0f)); + assertThat(doc.rootDoc().getFields().size(), equalTo(1)); + for (IndexableField field : doc.rootDoc().getFields()) { + assertThat(field.boost(), equalTo(2.0f)); + } } } diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/simple/SimpleMapperTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/simple/SimpleMapperTests.java index 6249657950c..4f332bbf3dd 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/simple/SimpleMapperTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/simple/SimpleMapperTests.java @@ -20,7 +20,7 @@ package org.elasticsearch.test.unit.index.mapper.simple; import org.apache.lucene.document.Document; -import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexableField; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.index.mapper.DocumentMapper; @@ -29,7 +29,6 @@ import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.test.unit.index.mapper.MapperTests; import org.testng.annotations.Test; -import static org.apache.lucene.document.Field.Store.YES; import static org.elasticsearch.common.io.Streams.copyToBytesFromClasspath; import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath; import static org.elasticsearch.index.mapper.MapperBuilders.*; @@ -53,7 +52,10 @@ public class SimpleMapperTests { BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/simple/test1.json")); Document doc = docMapper.parse("person", "1", json).rootDoc(); - assertThat((double) doc.getBoost(), closeTo(3.7, 0.01)); + assertThat(doc.getFields().size(), equalTo(14)); + for (IndexableField field : doc.getFields()) { + assertThat((double) field.boost(), closeTo(3.7, 0.01)); + } assertThat(doc.get(docMapper.mappers().name("first").mapper().names().indexName()), equalTo("shay")); assertThat(docMapper.mappers().name("first").mapper().names().fullName(), equalTo("name.first")); // System.out.println("Document: " + doc); @@ -74,7 +76,10 @@ public class SimpleMapperTests { BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/simple/test1.json")); Document doc = builtDocMapper.parse(json).rootDoc(); assertThat(doc.get(docMapper.uidMapper().names().indexName()), equalTo(Uid.createUid("person", "1"))); - assertThat((double) doc.getBoost(), closeTo(3.7, 0.01)); + assertThat(doc.getFields().size(), equalTo(14)); + for (IndexableField field : doc.getFields()) { + assertThat((double) field.boost(), closeTo(3.7, 0.01)); + } assertThat(doc.get(docMapper.mappers().name("first").mapper().names().indexName()), equalTo("shay")); // System.out.println("Document: " + doc); // System.out.println("Json: " + docMapper.sourceMapper().value(doc)); @@ -90,7 +95,10 @@ public class SimpleMapperTests { BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/simple/test1.json")); Document doc = docMapper.parse(json).rootDoc(); assertThat(doc.get(docMapper.uidMapper().names().indexName()), equalTo(Uid.createUid("person", "1"))); - assertThat((double) doc.getBoost(), closeTo(3.7, 0.01)); + assertThat(doc.getFields().size(), equalTo(14)); + for (IndexableField field : doc.getFields()) { + assertThat((double) field.boost(), closeTo(3.7, 0.01)); + } assertThat(doc.get(docMapper.mappers().name("first").mapper().names().indexName()), equalTo("shay")); // System.out.println("Document: " + doc); // System.out.println("Json: " + docMapper.sourceMapper().value(doc)); @@ -103,7 +111,10 @@ public class SimpleMapperTests { BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/simple/test1-notype-noid.json")); Document doc = docMapper.parse("person", "1", json).rootDoc(); assertThat(doc.get(docMapper.uidMapper().names().indexName()), equalTo(Uid.createUid("person", "1"))); - assertThat((double) doc.getBoost(), closeTo(3.7, 0.01)); + assertThat(doc.getFields().size(), equalTo(14)); + for (IndexableField field : doc.getFields()) { + assertThat((double) field.boost(), closeTo(3.7, 0.01)); + } assertThat(doc.get(docMapper.mappers().name("first").mapper().names().indexName()), equalTo("shay")); // System.out.println("Document: " + doc); // System.out.println("Json: " + docMapper.sourceMapper().value(doc)); From f796fe8d5e2a8fb6c703968df6a51de4b26a2b3c Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 2 Nov 2012 16:11:25 +0100 Subject: [PATCH 108/146] lucene 4: fix cases where number values are not stored --- .../elasticsearch/index/mapper/core/NumberFieldMapper.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java index 05571ad2ea0..62065631557 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/NumberFieldMapper.java @@ -25,6 +25,7 @@ import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.Filter; import org.apache.lucene.search.Query; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.NumericUtils; import org.elasticsearch.common.Explicit; import org.elasticsearch.common.Nullable; @@ -291,8 +292,11 @@ public abstract class NumberFieldMapper extends AbstractFieldM protected final NumberFieldMapper mapper; public CustomNumericField(NumberFieldMapper mapper, byte[] value, FieldType fieldType) { - super(mapper.names().indexName(), value, fieldType); + super(mapper.names().indexName(), fieldType); this.mapper = mapper; + if (value != null) { + this.fieldsData = new BytesRef(value); + } } @Override From aa2a8c66cc966b9b0ff6cbf917c08db75bdf9a6a Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 2 Nov 2012 16:25:47 +0100 Subject: [PATCH 109/146] lucene 4: Upgraded UidFieldTests class. --- .../common/lucene/uid/UidField.java | 13 ++--- .../unit/common/lucene/uid/UidFieldTests.java | 48 ++++++++++--------- 2 files changed, 33 insertions(+), 28 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java b/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java index e26d2190d45..8b7f8270c71 100644 --- a/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java +++ b/src/main/java/org/elasticsearch/common/lucene/uid/UidField.java @@ -19,6 +19,7 @@ package org.elasticsearch.common.lucene.uid; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; @@ -55,11 +56,12 @@ public class UidField extends Field { // this works fine for nested docs since they don't have the payload which has the version // so we iterate till we find the one with the payload + // LUCENE 4 UPGRADE: We can get rid of the do while loop, since there is only one _uid value (live docs are taken into account) public static DocIdAndVersion loadDocIdAndVersion(AtomicReaderContext context, Term term) { int docId = Lucene.NO_DOC; try { DocsAndPositionsEnum uid = context.reader().termPositionsEnum(term); - if (uid.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { + if (uid == null || uid.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { return null; // no doc } // Note, only master docs uid have version payload, so we can use that info to not @@ -87,10 +89,11 @@ public class UidField extends Field { * Load the version for the uid from the reader, returning -1 if no doc exists, or -2 if * no version is available (for backward comp.) */ + // LUCENE 4 UPGRADE: We can get rid of the do while loop, since there is only one _uid value (live docs are taken into account) public static long loadVersion(AtomicReaderContext context, Term term) { try { DocsAndPositionsEnum uid = context.reader().termPositionsEnum(term); - if (uid.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { + if (uid == null || uid.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) { return -1; } // Note, only master docs uid have version payload, so we can use that info to not @@ -117,10 +120,8 @@ public class UidField extends Field { private long version; - private final UidPayloadTokenStream tokenStream; - public UidField(String name, String uid, long version) { - super(name, uid, UidFieldMapper.Defaults.UID_FIELD_TYPE); + super(name, UidFieldMapper.Defaults.UID_FIELD_TYPE); this.uid = uid; this.version = version; this.tokenStream = new UidPayloadTokenStream(this); @@ -153,7 +154,7 @@ public class UidField extends Field { } @Override - public TokenStream tokenStreamValue() { + public TokenStream tokenStream(Analyzer analyzer) throws IOException { return tokenStream; } diff --git a/src/test/java/org/elasticsearch/test/unit/common/lucene/uid/UidFieldTests.java b/src/test/java/org/elasticsearch/test/unit/common/lucene/uid/UidFieldTests.java index eccb3d38960..bf4042ab3e1 100644 --- a/src/test/java/org/elasticsearch/test/unit/common/lucene/uid/UidFieldTests.java +++ b/src/test/java/org/elasticsearch/test/unit/common/lucene/uid/UidFieldTests.java @@ -21,13 +21,11 @@ package org.elasticsearch.test.unit.common.lucene.uid; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.IndexWriter; -import org.apache.lucene.index.IndexWriterConfig; -import org.apache.lucene.index.Term; +import org.apache.lucene.index.*; import org.apache.lucene.store.RAMDirectory; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.lucene.uid.UidField; +import org.elasticsearch.index.mapper.internal.UidFieldMapper; import org.hamcrest.MatcherAssert; import org.testng.annotations.Test; @@ -44,43 +42,49 @@ public class UidFieldTests { public void testUidField() throws Exception { IndexWriter writer = new IndexWriter(new RAMDirectory(), new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - IndexReader reader = IndexReader.open(writer, true); - MatcherAssert.assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(-1l)); + DirectoryReader directoryReader = DirectoryReader.open(writer, true); + AtomicReader atomicReader = SlowCompositeReaderWrapper.wrap(directoryReader); + MatcherAssert.assertThat(UidField.loadVersion(atomicReader.getContext(), new Term("_uid", "1")), equalTo(-1l)); Document doc = new Document(); - doc.add(new Field("_uid", "1", Field.Store.YES, Field.Index.NOT_ANALYZED)); + doc.add(new Field("_uid", "1", UidFieldMapper.Defaults.UID_FIELD_TYPE)); writer.addDocument(doc); - reader = reader.reopen(); - assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(-2l)); - assertThat(UidField.loadDocIdAndVersion(reader, 0, new Term("_uid", "1")).version, equalTo(-2l)); + directoryReader = DirectoryReader.openIfChanged(directoryReader); + atomicReader = SlowCompositeReaderWrapper.wrap(directoryReader); + assertThat(UidField.loadVersion(atomicReader.getContext(), new Term("_uid", "1")), equalTo(-2l)); + assertThat(UidField.loadDocIdAndVersion(atomicReader.getContext(), new Term("_uid", "1")).version, equalTo(-2l)); doc = new Document(); doc.add(new UidField("_uid", "1", 1)); writer.updateDocument(new Term("_uid", "1"), doc); - reader = reader.reopen(); - assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(1l)); - assertThat(UidField.loadDocIdAndVersion(reader, 0, new Term("_uid", "1")).version, equalTo(1l)); + directoryReader = DirectoryReader.openIfChanged(directoryReader); + atomicReader = SlowCompositeReaderWrapper.wrap(directoryReader); + assertThat(UidField.loadVersion(atomicReader.getContext(), new Term("_uid", "1")), equalTo(1l)); + assertThat(UidField.loadDocIdAndVersion(atomicReader.getContext(), new Term("_uid", "1")).version, equalTo(1l)); doc = new Document(); UidField uid = new UidField("_uid", "1", 2); doc.add(uid); writer.updateDocument(new Term("_uid", "1"), doc); - reader = reader.reopen(); - assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(2l)); - assertThat(UidField.loadDocIdAndVersion(reader, 0, new Term("_uid", "1")).version, equalTo(2l)); + directoryReader = DirectoryReader.openIfChanged(directoryReader); + atomicReader = SlowCompositeReaderWrapper.wrap(directoryReader); + assertThat(UidField.loadVersion(atomicReader.getContext(), new Term("_uid", "1")), equalTo(2l)); + assertThat(UidField.loadDocIdAndVersion(atomicReader.getContext(), new Term("_uid", "1")).version, equalTo(2l)); // test reuse of uid field doc = new Document(); uid.version(3); doc.add(uid); writer.updateDocument(new Term("_uid", "1"), doc); - reader = reader.reopen(); - assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(3l)); - assertThat(UidField.loadDocIdAndVersion(reader, 0, new Term("_uid", "1")).version, equalTo(3l)); + directoryReader = DirectoryReader.openIfChanged(directoryReader); + atomicReader = SlowCompositeReaderWrapper.wrap(directoryReader); + assertThat(UidField.loadVersion(atomicReader.getContext(), new Term("_uid", "1")), equalTo(3l)); + assertThat(UidField.loadDocIdAndVersion(atomicReader.getContext(), new Term("_uid", "1")).version, equalTo(3l)); writer.deleteDocuments(new Term("_uid", "1")); - reader = reader.reopen(); - assertThat(UidField.loadVersion(reader, new Term("_uid", "1")), equalTo(-1l)); - assertThat(UidField.loadDocIdAndVersion(reader, 0, new Term("_uid", "1")), nullValue()); + directoryReader = DirectoryReader.openIfChanged(directoryReader); + atomicReader = SlowCompositeReaderWrapper.wrap(directoryReader); + assertThat(UidField.loadVersion(atomicReader.getContext(), new Term("_uid", "1")), equalTo(-1l)); + assertThat(UidField.loadDocIdAndVersion(atomicReader.getContext(), new Term("_uid", "1")), nullValue()); } } From 2a8161d096b78452c8e463a62be6d951fa18d8d7 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 2 Nov 2012 21:50:14 +0100 Subject: [PATCH 110/146] lucene 4: Upgraded SimpleLuceneTests class. The complete codebase compiles now! --- .../unit/deps/lucene/SimpleLuceneTests.java | 35 ++++++++++--------- 1 file changed, 18 insertions(+), 17 deletions(-) diff --git a/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java b/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java index 4c1ac0e8332..c7e1303202e 100644 --- a/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java +++ b/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java @@ -153,9 +153,12 @@ public class SimpleLuceneTests { value.append(" ").append("value"); } Document document = new Document(); - document.add(new TextField("_id", Integer.toString(i), Field.Store.YES)); - document.add(new TextField("value", value.toString(), Field.Store.YES)); - document.boost(i); + TextField textField = new TextField("_id", Integer.toString(i), Field.Store.YES); + textField.setBoost(i); + document.add(textField); + textField = new TextField("value", value.toString(), Field.Store.YES); + textField.setBoost(i); + document.add(textField); indexWriter.addDocument(document); } @@ -181,15 +184,16 @@ public class SimpleLuceneTests { for (int i = 0; i < 100; i++) { Document document = new Document(); - document.add(new TextField("_id", Integer.toString(i), Field.Store.YES)); - document.boost(i); + TextField field = new TextField("_id", Integer.toString(i), Field.Store.YES); + field.setBoost(i); + document.add(field); indexWriter.addDocument(document); } reader = refreshReader(reader); indexWriter.close(); - TermDocs termDocs = reader.termDocs(); + TermsEnum termDocs = SlowCompositeReaderWrapper.wrap(reader).terms("_id").iterator(null); termDocs.next(); } @@ -220,19 +224,16 @@ public class SimpleLuceneTests { indexWriter.addDocument(doc); IndexReader reader = IndexReader.open(indexWriter, true); + AtomicReader atomicReader = SlowCompositeReaderWrapper.wrap(reader); - TermDocs termDocs = reader.termDocs(); - - TermEnum termEnum = reader.terms(new Term("int1", "")); - termDocs.seek(termEnum); - assertThat(termDocs.next(), equalTo(true)); - assertThat(termDocs.doc(), equalTo(0)); + DocsEnum termDocs = atomicReader.termDocsEnum(new Term("int1")); + assertThat(termDocs.nextDoc(), equalTo(0)); + assertThat(termDocs.docID(), equalTo(0)); assertThat(termDocs.freq(), equalTo(1)); - termEnum = reader.terms(new Term("int2", "")); - termDocs.seek(termEnum); - assertThat(termDocs.next(), equalTo(true)); - assertThat(termDocs.doc(), equalTo(0)); + termDocs = atomicReader.termDocsEnum(new Term("int2")); + assertThat(termDocs.nextDoc(), equalTo(0)); + assertThat(termDocs.docID(), equalTo(0)); assertThat(termDocs.freq(), equalTo(2)); reader.close(); @@ -241,7 +242,7 @@ public class SimpleLuceneTests { private DirectoryReader refreshReader(DirectoryReader reader) throws IOException { DirectoryReader oldReader = reader; - reader = DirectoryReader.openIfChanged(reader);; + reader = DirectoryReader.openIfChanged(reader); if (reader != oldReader) { oldReader.close(); } From db639e5c2ef0a3f3a1fb9b6bd1848b111565e6c5 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 2 Nov 2012 22:24:04 +0100 Subject: [PATCH 111/146] lucene 4: Upgraded SimpleLuceneTests class. Test actually passes now. --- .../unit/deps/lucene/SimpleLuceneTests.java | 40 ++++++++++++------- 1 file changed, 25 insertions(+), 15 deletions(-) diff --git a/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java b/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java index c7e1303202e..95e6bbcf464 100644 --- a/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java +++ b/src/test/java/org/elasticsearch/test/unit/deps/lucene/SimpleLuceneTests.java @@ -49,12 +49,12 @@ public class SimpleLuceneTests { document.add(new TextField("str", new String(new char[]{(char) (97 + i), (char) (97 + i)}), Field.Store.YES)); indexWriter.addDocument(document); } - IndexReader reader = IndexReader.open(indexWriter, true); + IndexReader reader = DirectoryReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TopFieldDocs docs = searcher.search(new MatchAllDocsQuery(), null, 10, new Sort(new SortField("str", SortField.Type.STRING))); for (int i = 0; i < 10; i++) { FieldDoc fieldDoc = (FieldDoc) docs.scoreDocs[i]; - assertThat(fieldDoc.fields[0].toString(), equalTo(new String(new char[]{(char) (97 + i), (char) (97 + i)}))); + assertThat((BytesRef) fieldDoc.fields[0], equalTo(new BytesRef(new String(new char[]{(char) (97 + i), (char) (97 + i)})))); } } @@ -65,12 +65,12 @@ public class SimpleLuceneTests { Document document = new Document(); document.add(new TextField("_id", "1", Field.Store.YES)); indexWriter.addDocument(document); - DirectoryReader reader = IndexReader.open(indexWriter, true); + DirectoryReader reader = DirectoryReader.open(indexWriter, true); assertThat(reader.numDocs(), equalTo(1)); indexWriter.prepareCommit(); - reader = DirectoryReader.openIfChanged(reader); - assertThat(reader.numDocs(), equalTo(1)); + // Returns null b/c no changes. + assertThat(DirectoryReader.openIfChanged(reader), equalTo(null)); document = new Document(); document.add(new TextField("_id", "2", Field.Store.YES)); @@ -90,7 +90,7 @@ public class SimpleLuceneTests { document.add(new IntField("test", 2, IntField.TYPE_STORED)); indexWriter.addDocument(document); - IndexReader reader = IndexReader.open(indexWriter, true); + IndexReader reader = DirectoryReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); Document doc = searcher.doc(topDocs.scoreDocs[0].doc); @@ -122,7 +122,7 @@ public class SimpleLuceneTests { document.add(new TextField("#id", "1", Field.Store.YES)); indexWriter.addDocument(document); - IndexReader reader = IndexReader.open(indexWriter, true); + IndexReader reader = DirectoryReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1); final ArrayList fieldsOrder = new ArrayList(); @@ -162,7 +162,7 @@ public class SimpleLuceneTests { indexWriter.addDocument(document); } - IndexReader reader = IndexReader.open(indexWriter, true); + IndexReader reader = DirectoryReader.open(indexWriter, true); IndexSearcher searcher = new IndexSearcher(reader); TermQuery query = new TermQuery(new Term("value", "value")); TopDocs topDocs = searcher.search(query, 100); @@ -170,7 +170,7 @@ public class SimpleLuceneTests { for (int i = 0; i < topDocs.scoreDocs.length; i++) { Document doc = searcher.doc(topDocs.scoreDocs[i].doc); // System.out.println(doc.get("id") + ": " + searcher.explain(query, topDocs.scoreDocs[i].doc)); - assertThat(doc.get("id"), equalTo(Integer.toString(100 - i - 1))); + assertThat(doc.get("_id"), equalTo(Integer.toString(100 - i - 1))); } indexWriter.close(); @@ -180,7 +180,7 @@ public class SimpleLuceneTests { public void testNRTSearchOnClosedWriter() throws Exception { Directory dir = new RAMDirectory(); IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); - DirectoryReader reader = IndexReader.open(indexWriter, true); + DirectoryReader reader = DirectoryReader.open(indexWriter, true); for (int i = 0; i < 100; i++) { Document document = new Document(); @@ -207,11 +207,14 @@ public class SimpleLuceneTests { IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.VERSION, Lucene.STANDARD_ANALYZER)); Document doc = new Document(); - FieldType type = IntField.TYPE_STORED; - type.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS); + FieldType type = IntField.TYPE_NOT_STORED; IntField field = new IntField("int1", 1, type); doc.add(field); + type = new FieldType(IntField.TYPE_NOT_STORED); + type.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS); + type.freeze(); + field = new IntField("int1", 1, type); doc.add(field); @@ -223,15 +226,22 @@ public class SimpleLuceneTests { indexWriter.addDocument(doc); - IndexReader reader = IndexReader.open(indexWriter, true); + IndexReader reader = DirectoryReader.open(indexWriter, true); AtomicReader atomicReader = SlowCompositeReaderWrapper.wrap(reader); - DocsEnum termDocs = atomicReader.termDocsEnum(new Term("int1")); + Terms terms = atomicReader.terms("int1"); + TermsEnum termsEnum = terms.iterator(null); + termsEnum.next(); + + DocsEnum termDocs = termsEnum.docs(atomicReader.getLiveDocs(), null); assertThat(termDocs.nextDoc(), equalTo(0)); assertThat(termDocs.docID(), equalTo(0)); assertThat(termDocs.freq(), equalTo(1)); - termDocs = atomicReader.termDocsEnum(new Term("int2")); + terms = atomicReader.terms("int2"); + termsEnum = terms.iterator(termsEnum); + termsEnum.next(); + termDocs = termsEnum.docs(atomicReader.getLiveDocs(), termDocs); assertThat(termDocs.nextDoc(), equalTo(0)); assertThat(termDocs.docID(), equalTo(0)); assertThat(termDocs.freq(), equalTo(2)); From bf13f3f81e437f3057a16b32d04ed158cd3736a1 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Fri, 2 Nov 2012 21:24:16 -0400 Subject: [PATCH 112/146] lucene4: fixed SimpleIndexQueryParserTests --- .../index/mapper/core/StringFieldMapper.java | 3 +- .../index/search/MatchQuery.java | 62 ++++++++++--------- .../query/SimpleIndexQueryParserTests.java | 2 +- 3 files changed, 35 insertions(+), 32 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java index eb69e9b6757..91b32b72902 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java @@ -47,9 +47,10 @@ public class StringFieldMapper extends AbstractFieldMapper implements Al public static final String CONTENT_TYPE = "string"; public static class Defaults extends AbstractFieldMapper.Defaults { - public static final FieldType STRING_FIELD_TYPE = new FieldType(NumberFieldMapper.Defaults.NUMBER_FIELD_TYPE); + public static final FieldType STRING_FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE); static { + STRING_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY); STRING_FIELD_TYPE.freeze(); } diff --git a/src/main/java/org/elasticsearch/index/search/MatchQuery.java b/src/main/java/org/elasticsearch/index/search/MatchQuery.java index 74c1726c356..8ea2d0d982b 100644 --- a/src/main/java/org/elasticsearch/index/search/MatchQuery.java +++ b/src/main/java/org/elasticsearch/index/search/MatchQuery.java @@ -180,57 +180,59 @@ public class MatchQuery { } // Logic similar to QueryParser#getFieldQuery - final TokenStream source; + TokenStream source = null; + CachingTokenFilter buffer = null; + CharTermAttribute termAtt = null; + PositionIncrementAttribute posIncrAtt = null; + boolean success = false; try { source = analyzer.tokenStream(field, new FastStringReader(text)); source.reset(); + success = true; } catch(IOException ex) { - //LUCENE 4 UPGRADE not sure what todo here really lucene 3.6 had a tokenStream that didn't throw an exc. - throw new ElasticSearchParseException("failed to process query", ex); + //LUCENE 4 UPGRADE not sure what todo here really lucene 3.6 had a tokenStream that didn't throw an exc. + // success==false if we hit an exception } - CachingTokenFilter buffer = new CachingTokenFilter(source); - CharTermAttribute termAtt = null; - PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; + int positionCount = 0; + boolean severalTokensAtSamePosition = false; - boolean success = false; - buffer.reset(); if (success) { + buffer = new CachingTokenFilter(source); + buffer.reset(); if (buffer.hasAttribute(CharTermAttribute.class)) { termAtt = buffer.getAttribute(CharTermAttribute.class); } if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } - } - int positionCount = 0; - boolean severalTokensAtSamePosition = false; - - boolean hasMoreTokens = false; - if (termAtt != null) { - try { - hasMoreTokens = buffer.incrementToken(); - while (hasMoreTokens) { - numTokens++; - int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; - if (positionIncrement != 0) { - positionCount += positionIncrement; - } else { - severalTokensAtSamePosition = true; - } + boolean hasMoreTokens = false; + if (termAtt != null) { + try { hasMoreTokens = buffer.incrementToken(); + while (hasMoreTokens) { + numTokens++; + int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; + if (positionIncrement != 0) { + positionCount += positionIncrement; + } else { + severalTokensAtSamePosition = true; + } + hasMoreTokens = buffer.incrementToken(); + } + } catch (IOException e) { + // ignore } - } catch (IOException e) { - // ignore } - } - try { // rewind the buffer stream buffer.reset(); - + } + try { // close original stream - all tokens buffered - source.close(); + if (source != null) { + source.close(); + } } catch (IOException e) { // ignore } diff --git a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java index 0041bb04c05..d03c0634957 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/query/SimpleIndexQueryParserTests.java @@ -127,7 +127,7 @@ public class SimpleIndexQueryParserTests { private BytesRef longToPrefixCoded(long val) { BytesRef bytesRef = new BytesRef(); - NumericUtils.longToPrefixCoded(val, 0, bytesRef); + NumericUtils.longToPrefixCoded(val, NumericUtils.PRECISION_STEP_DEFAULT, bytesRef); return bytesRef; } From 8a34ea1223ba7172cb0ebadf91bfe954846d91cc Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Fri, 2 Nov 2012 21:24:44 -0400 Subject: [PATCH 113/146] lucene4: fixed FloatFieldDataTests --- .../elasticsearch/index/analysis/StopTokenFilterFactory.java | 2 +- .../elasticsearch/index/cache/id/simple/SimpleIdCache.java | 4 ++-- .../index/field/data/support/FieldDataLoader.java | 3 ++- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/analysis/StopTokenFilterFactory.java b/src/main/java/org/elasticsearch/index/analysis/StopTokenFilterFactory.java index 8c8e8929cc8..a24934d6e04 100644 --- a/src/main/java/org/elasticsearch/index/analysis/StopTokenFilterFactory.java +++ b/src/main/java/org/elasticsearch/index/analysis/StopTokenFilterFactory.java @@ -50,7 +50,7 @@ public class StopTokenFilterFactory extends AbstractTokenFilterFactory { this.ignoreCase = settings.getAsBoolean("ignore_case", false); this.stopWords = Analysis.parseStopWords(env, settings, StopAnalyzer.ENGLISH_STOP_WORDS_SET, version, ignoreCase); // LUCENE 4 UPGRADE: LUCENE_29 constant is no longer defined - this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", version.onOrAfter(Version.parseLeniently("LUCENE_29"))); + this.enablePositionIncrements = settings.getAsBoolean("enable_position_increments", version.onOrAfter(Version.LUCENE_30)); } @Override diff --git a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java index 5b85ed7649c..298e7fbdef6 100644 --- a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java +++ b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java @@ -121,7 +121,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se TermsEnum termsEnum = terms.iterator(null); DocsEnum docsEnum = null; - for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.term()) { + for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { HashedBytesArray[] typeAndId = Uid.splitUidIntoTypeAndId(term); TypeBuilder typeBuilder = readerBuilder.get(typeAndId[0].toUtf8()); if (typeBuilder == null) { @@ -156,7 +156,7 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se TermsEnum termsEnum = terms.iterator(null); DocsEnum docsEnum = null; - for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.term()) { + for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { HashedBytesArray[] typeAndId = Uid.splitUidIntoTypeAndId(term); TypeBuilder typeBuilder = readerBuilder.get(typeAndId[0].toUtf8()); diff --git a/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java b/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java index 0d41783c9c5..bc6e9c1088d 100644 --- a/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java +++ b/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java @@ -52,7 +52,7 @@ public class FieldDataLoader { TermsEnum termsEnum = terms.iterator(null); try { DocsEnum docsEnum = null; - for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.term()) { + for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { loader.collectTerm(BytesRef.deepCopyOf(term)); docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0); for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { @@ -66,6 +66,7 @@ public class FieldDataLoader { ordinal[docId] = t; idx[docId]++; } + t++; } } catch (RuntimeException e) { if (e.getClass().getName().endsWith("StopFillCacheException")) { From 53d9b13e2f7aa649722bf68d610848a37e70bf94 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sat, 3 Nov 2012 17:34:10 +0100 Subject: [PATCH 114/146] lucene 4: fix optimization check to set docs_only+omit_norms --- .../org/elasticsearch/index/mapper/core/StringFieldMapper.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java index 91b32b72902..a131e3dc99a 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java @@ -50,7 +50,6 @@ public class StringFieldMapper extends AbstractFieldMapper implements Al public static final FieldType STRING_FIELD_TYPE = new FieldType(AbstractFieldMapper.Defaults.FIELD_TYPE); static { - STRING_FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_ONLY); STRING_FIELD_TYPE.freeze(); } @@ -120,7 +119,7 @@ public class StringFieldMapper extends AbstractFieldMapper implements Al // if the field is not analyzed, then by default, we should omit norms and have docs only // index options, as probably what the user really wants // if they are set explicitly, we will use those values - if (fieldType.indexed() && fieldType.tokenized()) { + if (fieldType.indexed() && !fieldType.tokenized()) { if (!omitNormsSet) { fieldType.setOmitNorms(true); } From a38064913f3b985fab66f9be86621de3828f58fa Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sat, 3 Nov 2012 17:43:17 +0100 Subject: [PATCH 115/146] lucene 4: fix engine tests --- .../unit/index/engine/AbstractSimpleEngineTests.java | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/src/test/java/org/elasticsearch/test/unit/index/engine/AbstractSimpleEngineTests.java b/src/test/java/org/elasticsearch/test/unit/index/engine/AbstractSimpleEngineTests.java index d473b5417ca..5c3c6d95d21 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/engine/AbstractSimpleEngineTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/engine/AbstractSimpleEngineTests.java @@ -108,7 +108,7 @@ public abstract class AbstractSimpleEngineTests { threadPool.shutdownNow(); } } - + private Document testDocumentWithTextField(String id) { Document document = testDocument(id); document.add(new TextField("value", "test", Field.Store.YES)); @@ -165,9 +165,7 @@ public abstract class AbstractSimpleEngineTests { assertThat(segments.isEmpty(), equalTo(true)); // create a doc and refresh - Document document = testDocumentWithTextField("1"); - document.add(new Field(SourceFieldMapper.NAME, B_1.toBytes(), TextField.TYPE_STORED)); - ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, document, Lucene.STANDARD_ANALYZER, B_1, false); + ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, testDocumentWithTextField("1"), Lucene.STANDARD_ANALYZER, B_1, false); engine.create(new Engine.Create(null, newUid("1"), doc)); ParsedDocument doc2 = new ParsedDocument("2", "2", "test", null, -1, -1, testDocumentWithTextField("2"), Lucene.STANDARD_ANALYZER, B_2, false); @@ -233,7 +231,7 @@ public abstract class AbstractSimpleEngineTests { // create a document Document document = testDocumentWithTextField("1"); - document.add(new Field(SourceFieldMapper.NAME, B_1.toBytes(), TextField.TYPE_STORED)); + document.add(new Field(SourceFieldMapper.NAME, B_1.toBytes(), SourceFieldMapper.Defaults.SOURCE_FIELD_TYPE)); ParsedDocument doc = new ParsedDocument("1", "1", "test", null, -1, -1, document, Lucene.STANDARD_ANALYZER, B_1, false); engine.create(new Engine.Create(null, newUid("1"), doc)); @@ -270,7 +268,7 @@ public abstract class AbstractSimpleEngineTests { // now do an update document = testDocument("1"); document.add(new TextField("value", "test1", Field.Store.YES)); - document.add(new Field(SourceFieldMapper.NAME, B_2.toBytes(), TextField.TYPE_STORED)); + document.add(new Field(SourceFieldMapper.NAME, B_2.toBytes(), SourceFieldMapper.Defaults.SOURCE_FIELD_TYPE)); doc = new ParsedDocument("1", "1", "test", null, -1, -1, document, Lucene.STANDARD_ANALYZER, B_2, false); engine.index(new Engine.Index(null, newUid("1"), doc)); @@ -321,7 +319,7 @@ public abstract class AbstractSimpleEngineTests { // add it back document = testDocumentWithTextField("1"); - document.add(new Field(SourceFieldMapper.NAME, B_1.toBytes(), TextField.TYPE_STORED)); + document.add(new Field(SourceFieldMapper.NAME, B_1.toBytes(), SourceFieldMapper.Defaults.SOURCE_FIELD_TYPE)); doc = new ParsedDocument("1", "1", "test", null, -1, -1, document, Lucene.STANDARD_ANALYZER, B_1, false); engine.create(new Engine.Create(null, newUid("1"), doc)); From a10f60873c48338d3d046bbdf1fdc2a262675a6b Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sat, 3 Nov 2012 23:00:15 +0100 Subject: [PATCH 116/146] lucene 4: fix numeric types to properly return numeric streams --- .../elasticsearch/index/mapper/core/DoubleFieldMapper.java | 4 +++- .../elasticsearch/index/mapper/core/FloatFieldMapper.java | 4 ++-- .../elasticsearch/index/mapper/core/IntegerFieldMapper.java | 3 ++- .../org/elasticsearch/index/mapper/core/LongFieldMapper.java | 4 ++-- .../elasticsearch/index/mapper/core/ShortFieldMapper.java | 5 +++-- 5 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java index e26ba4697e9..87dc7657833 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.mapper.core; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -61,6 +62,7 @@ public class DoubleFieldMapper extends NumberFieldMapper { static { DOUBLE_FIELD_TYPE.freeze(); } + public static final Double NULL_VALUE = null; } @@ -375,7 +377,7 @@ public class DoubleFieldMapper extends NumberFieldMapper { } @Override - public TokenStream tokenStreamValue() { + public TokenStream tokenStream(Analyzer analyzer) throws IOException { if (fieldType().indexed()) { return mapper.popCachedStream().setDoubleValue(number); } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java index 6ce1e8b3fa2..4722898630a 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java @@ -19,10 +19,10 @@ package org.elasticsearch.index.mapper.core; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; @@ -373,7 +373,7 @@ public class FloatFieldMapper extends NumberFieldMapper { } @Override - public TokenStream tokenStreamValue() { + public TokenStream tokenStream(Analyzer analyzer) throws IOException { if (fieldType().indexed()) { return mapper.popCachedStream().setFloatValue(number); } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java index d28e57b3ba4..b1d78c9ddc7 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java @@ -19,6 +19,7 @@ package org.elasticsearch.index.mapper.core; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; @@ -376,7 +377,7 @@ public class IntegerFieldMapper extends NumberFieldMapper { } @Override - public TokenStream tokenStreamValue() { + public TokenStream tokenStream(Analyzer analyzer) throws IOException { if (fieldType().indexed()) { return mapper.popCachedStream().setIntValue(number); } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java index 49c32f288ae..2b2ebf5677d 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java @@ -19,10 +19,10 @@ package org.elasticsearch.index.mapper.core; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; @@ -376,7 +376,7 @@ public class LongFieldMapper extends NumberFieldMapper { } @Override - public TokenStream tokenStreamValue() { + public TokenStream tokenStream(Analyzer analyzer) throws IOException { if (fieldType().indexed()) { return mapper.popCachedStream().setLongValue(number); } diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java index f281f6383fb..cd0dfb87feb 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java @@ -19,10 +19,10 @@ package org.elasticsearch.index.mapper.core; +import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; -import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.search.Filter; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.NumericRangeQuery; @@ -63,6 +63,7 @@ public class ShortFieldMapper extends NumberFieldMapper { static { SHORT_FIELD_TYPE.freeze(); } + public static final Short NULL_VALUE = null; } @@ -375,7 +376,7 @@ public class ShortFieldMapper extends NumberFieldMapper { } @Override - public TokenStream tokenStreamValue() { + public TokenStream tokenStream(Analyzer analyzer) throws IOException { if (fieldType().indexed()) { return mapper.popCachedStream().setIntValue(number); } From cb5df26bf792108e5063e99b4359b33cf7422f22 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sat, 3 Nov 2012 23:16:52 +0100 Subject: [PATCH 117/146] lucene 4: use the proper token stream to return --- src/main/java/org/elasticsearch/common/lucene/all/AllField.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/all/AllField.java b/src/main/java/org/elasticsearch/common/lucene/all/AllField.java index 66d39ef57fe..3fdff97ca6d 100644 --- a/src/main/java/org/elasticsearch/common/lucene/all/AllField.java +++ b/src/main/java/org/elasticsearch/common/lucene/all/AllField.java @@ -57,7 +57,7 @@ public class AllField extends Field { } @Override - public TokenStream tokenStreamValue() { + public TokenStream tokenStream(Analyzer analyzer) throws IOException { try { allEntries.reset(); // reset the all entries, just in case it was read already return AllTokenStream.allTokenStream(name, allEntries, analyzer); From 72f41111c99fb11c2e6a4c55c8af7ef410759a7d Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sat, 3 Nov 2012 23:17:19 +0100 Subject: [PATCH 118/146] lucene 4: calling tokenStream is enough, verified to return a stream to analyze content --- .../index/percolator/PercolatorExecutor.java | 35 +++++-------------- 1 file changed, 8 insertions(+), 27 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/percolator/PercolatorExecutor.java b/src/main/java/org/elasticsearch/index/percolator/PercolatorExecutor.java index 5445bc1fd2c..54cd58330e9 100644 --- a/src/main/java/org/elasticsearch/index/percolator/PercolatorExecutor.java +++ b/src/main/java/org/elasticsearch/index/percolator/PercolatorExecutor.java @@ -34,7 +34,6 @@ import org.elasticsearch.common.Preconditions; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.inject.Inject; -import org.elasticsearch.common.io.FastStringReader; import org.elasticsearch.common.logging.ESLogger; import org.elasticsearch.common.lucene.Lucene; import org.elasticsearch.common.settings.Settings; @@ -49,7 +48,10 @@ import org.elasticsearch.index.cache.IndexCache; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.field.data.FieldData; import org.elasticsearch.index.field.data.FieldDataType; -import org.elasticsearch.index.mapper.*; +import org.elasticsearch.index.mapper.DocumentMapper; +import org.elasticsearch.index.mapper.MapperService; +import org.elasticsearch.index.mapper.ParsedDocument; +import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.mapper.internal.UidFieldMapper; import org.elasticsearch.index.query.IndexQueryParserService; import org.elasticsearch.index.query.QueryBuilder; @@ -60,7 +62,6 @@ import org.elasticsearch.index.shard.service.IndexShard; import org.elasticsearch.indices.IndicesService; import java.io.IOException; -import java.io.Reader; import java.util.ArrayList; import java.util.List; import java.util.Map; @@ -303,33 +304,13 @@ public class PercolatorExecutor extends AbstractIndexComponent { } TokenStream tokenStream; try { - tokenStream = field.tokenStream( - mapperService.documentMapper(request.doc().type()).mappers().smartNameFieldMapper(field.name()).indexAnalyzer() - ); + tokenStream = field.tokenStream(request.doc().analyzer()); + if (tokenStream != null) { + memoryIndex.addField(field.name(), tokenStream, field.boost()); + } } catch (IOException e) { throw new ElasticSearchException("Failed to create token stream", e); } - if (tokenStream != null) { - memoryIndex.addField(field.name(), tokenStream, field.boost()); - } else { - Reader reader = field.readerValue(); - if (reader != null) { - try { - memoryIndex.addField(field.name(), request.doc().analyzer().tokenStream(field.name(), reader), field.boost() /** request.doc().rootDoc().getBoost()*/); - } catch (IOException e) { - throw new MapperParsingException("Failed to analyze field [" + field.name() + "]", e); - } - } else { - String value = field.stringValue(); - if (value != null) { - try { - memoryIndex.addField(field.name(), request.doc().analyzer().tokenStream(field.name(), new FastStringReader(value)), field.boost() /** request.doc().rootDoc().getBoost()*/); - } catch (IOException e) { - throw new MapperParsingException("Failed to analyze field [" + field.name() + "]", e); - } - } - } - } } final IndexSearcher searcher = memoryIndex.createSearcher(); From 25717ab2536b91fab710df595c841d7e6ee0e523 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Sat, 3 Nov 2012 23:44:23 +0100 Subject: [PATCH 119/146] lucene 4: only omit_norms on non analyzed field if boost is not set --- .../org/elasticsearch/index/mapper/core/StringFieldMapper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java index a131e3dc99a..077b3510047 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/StringFieldMapper.java @@ -120,7 +120,7 @@ public class StringFieldMapper extends AbstractFieldMapper implements Al // index options, as probably what the user really wants // if they are set explicitly, we will use those values if (fieldType.indexed() && !fieldType.tokenized()) { - if (!omitNormsSet) { + if (!omitNormsSet && boost == Defaults.BOOST) { fieldType.setOmitNorms(true); } if (!indexOptionsSet) { From 3816366780ae8ba0608eb038bb6c95b1555f7671 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Sat, 3 Nov 2012 19:03:01 -0400 Subject: [PATCH 120/146] lucene4: fixed SimpleAllMapperTests --- .../unit/index/mapper/all/SimpleAllMapperTests.java | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/all/SimpleAllMapperTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/all/SimpleAllMapperTests.java index 36b8a414882..c8d9ad04ff1 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/all/SimpleAllMapperTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/all/SimpleAllMapperTests.java @@ -51,7 +51,7 @@ public class SimpleAllMapperTests { byte[] json = copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/all/test1.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); AllField field = (AllField) doc.getField("_all"); - AllEntries allEntries = ((AllTokenStream) field.tokenStreamValue()).allEntries(); + AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); assertThat(allEntries.fields().size(), equalTo(3)); assertThat(allEntries.fields().contains("address.last.location"), equalTo(true)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); @@ -67,7 +67,7 @@ public class SimpleAllMapperTests { byte[] json = copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/all/test1.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); AllField field = (AllField) doc.getField("_all"); - AllEntries allEntries = ((AllTokenStream) field.tokenStreamValue()).allEntries(); + AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); assertThat(allEntries.fields().size(), equalTo(3)); assertThat(allEntries.fields().contains("address.last.location"), equalTo(true)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); @@ -83,7 +83,7 @@ public class SimpleAllMapperTests { byte[] json = copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/all/test1.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); AllField field = (AllField) doc.getField("_all"); - AllEntries allEntries = ((AllTokenStream) field.tokenStreamValue()).allEntries(); + AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); assertThat(allEntries.fields().size(), equalTo(3)); assertThat(allEntries.fields().contains("address.last.location"), equalTo(true)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); @@ -106,7 +106,7 @@ public class SimpleAllMapperTests { Document doc = builtDocMapper.parse(new BytesArray(json)).rootDoc(); AllField field = (AllField) doc.getField("_all"); - AllEntries allEntries = ((AllTokenStream) field.tokenStreamValue()).allEntries(); + AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); assertThat(allEntries.fields().size(), equalTo(3)); assertThat(allEntries.fields().contains("address.last.location"), equalTo(true)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); @@ -120,7 +120,7 @@ public class SimpleAllMapperTests { byte[] json = copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/all/test1.json"); Document doc = docMapper.parse(new BytesArray(json)).rootDoc(); AllField field = (AllField) doc.getField("_all"); - AllEntries allEntries = ((AllTokenStream) field.tokenStreamValue()).allEntries(); + AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); assertThat(allEntries.fields().size(), equalTo(2)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); assertThat(allEntries.fields().contains("simple1"), equalTo(true)); @@ -141,7 +141,7 @@ public class SimpleAllMapperTests { Document doc = builtDocMapper.parse(new BytesArray(json)).rootDoc(); AllField field = (AllField) doc.getField("_all"); - AllEntries allEntries = ((AllTokenStream) field.tokenStreamValue()).allEntries(); + AllEntries allEntries = ((AllTokenStream) field.tokenStream(docMapper.mappers().indexAnalyzer())).allEntries(); assertThat(allEntries.fields().size(), equalTo(2)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); assertThat(allEntries.fields().contains("simple1"), equalTo(true)); From a5bef30be949ee30d33ec12a8c7b95dbc8424209 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Sat, 3 Nov 2012 19:08:04 -0400 Subject: [PATCH 121/146] lucene4: fixed CompressIndexInputOutputTests --- .../unit/common/compress/CompressIndexInputOutputTests.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/org/elasticsearch/test/unit/common/compress/CompressIndexInputOutputTests.java b/src/test/java/org/elasticsearch/test/unit/common/compress/CompressIndexInputOutputTests.java index 42465aaf269..4111466b980 100644 --- a/src/test/java/org/elasticsearch/test/unit/common/compress/CompressIndexInputOutputTests.java +++ b/src/test/java/org/elasticsearch/test/unit/common/compress/CompressIndexInputOutputTests.java @@ -292,7 +292,7 @@ public class CompressIndexInputOutputTests { Document document = reader.document(doc); checkDoc(document); DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor("id", "field", "count"); - reader.document(i, visitor); + reader.document(doc, visitor); document = visitor.getDocument(); checkDoc(document); } From 2fb3591792349e09b897185fe693a2c7cd57be27 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Sat, 3 Nov 2012 19:33:26 -0400 Subject: [PATCH 122/146] lucene4: fixed default values tests to refer to correct default FieldType constants --- .../unit/index/mapper/timestamp/TimestampMappingTests.java | 5 ++--- .../test/unit/index/mapper/ttl/TTLMappingTests.java | 5 ++--- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/timestamp/TimestampMappingTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/timestamp/TimestampMappingTests.java index be6311e3ea7..810ae221346 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/timestamp/TimestampMappingTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/timestamp/TimestampMappingTests.java @@ -19,7 +19,6 @@ package org.elasticsearch.test.unit.index.mapper.timestamp; -import org.apache.lucene.document.Field; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.mapper.DocumentMapper; @@ -74,8 +73,8 @@ public class TimestampMappingTests { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").endObject().string(); DocumentMapper docMapper = MapperTests.newParser().parse(mapping); assertThat(docMapper.timestampFieldMapper().enabled(), equalTo(TimestampFieldMapper.Defaults.ENABLED)); - assertThat(docMapper.timestampFieldMapper().stored(), equalTo(TimestampFieldMapper.Defaults.FIELD_TYPE.stored())); - assertThat(docMapper.timestampFieldMapper().indexed(), equalTo(TimestampFieldMapper.Defaults.FIELD_TYPE.indexed())); + assertThat(docMapper.timestampFieldMapper().stored(), equalTo(TimestampFieldMapper.Defaults.TIMESTAMP_FIELD_TYPE.stored())); + assertThat(docMapper.timestampFieldMapper().indexed(), equalTo(TimestampFieldMapper.Defaults.TIMESTAMP_FIELD_TYPE.indexed())); assertThat(docMapper.timestampFieldMapper().path(), equalTo(null)); assertThat(docMapper.timestampFieldMapper().dateTimeFormatter().format(), equalTo(TimestampFieldMapper.DEFAULT_DATE_TIME_FORMAT)); } diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/ttl/TTLMappingTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/ttl/TTLMappingTests.java index afcfc5f367f..ad6afa72119 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/ttl/TTLMappingTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/ttl/TTLMappingTests.java @@ -20,7 +20,6 @@ package org.elasticsearch.test.unit.index.mapper.ttl; -import org.apache.lucene.document.Field; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.index.mapper.DocumentMapper; @@ -72,8 +71,8 @@ public class TTLMappingTests { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").endObject().string(); DocumentMapper docMapper = MapperTests.newParser().parse(mapping); assertThat(docMapper.TTLFieldMapper().enabled(), equalTo(TTLFieldMapper.Defaults.ENABLED)); - assertThat(docMapper.TTLFieldMapper().stored(), equalTo(TTLFieldMapper.Defaults.FIELD_TYPE.stored())); - assertThat(docMapper.TTLFieldMapper().indexed(), equalTo(TTLFieldMapper.Defaults.FIELD_TYPE.indexed())); + assertThat(docMapper.TTLFieldMapper().stored(), equalTo(TTLFieldMapper.Defaults.TTL_FIELD_TYPE.stored())); + assertThat(docMapper.TTLFieldMapper().indexed(), equalTo(TTLFieldMapper.Defaults.TTL_FIELD_TYPE.indexed())); } From 3f3a95668b789593b3748af3d1113fb568ee1c17 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Sat, 3 Nov 2012 22:28:27 -0400 Subject: [PATCH 123/146] lucene4: add support for omit_norm setting to numeric types and don't omit norms if boost is not 1.0 This commit enables setting boost for numeric fields. However, there is still no way to take advantage of boosted numeric fields during searching because all queries against numeric fields are translated into range queries wrapped in ConstantScore. Boost for numeric fields is broken on master as well https://gist.github.com/7ecedea4f6a5219efb89 --- .../index/mapper/core/ByteFieldMapper.java | 2 +- .../index/mapper/core/DateFieldMapper.java | 2 +- .../index/mapper/core/DoubleFieldMapper.java | 2 +- .../index/mapper/core/FloatFieldMapper.java | 2 +- .../index/mapper/core/IntegerFieldMapper.java | 2 +- .../index/mapper/core/LongFieldMapper.java | 2 +- .../index/mapper/core/ShortFieldMapper.java | 2 +- .../index/mapper/core/TypeParsers.java | 2 ++ .../index/mapper/ip/IpFieldMapper.java | 2 +- .../mapper/boost/CustomBoostMappingTests.java | 14 +++++++------- 10 files changed, 17 insertions(+), 15 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java index 9f32901e525..074dff92b60 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ByteFieldMapper.java @@ -81,7 +81,7 @@ public class ByteFieldMapper extends NumberFieldMapper { @Override public ByteFieldMapper build(BuilderContext context) { - fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); + fieldType.setOmitNorms(fieldType.omitNorms() && boost == 1.0f); ByteFieldMapper fieldMapper = new ByteFieldMapper(buildNames(context), precisionStep, fuzzyFactor, boost, fieldType, nullValue, ignoreMalformed(context)); fieldMapper.includeInAll(includeInAll); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java index 7447d7e4be9..86b259b7dfb 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DateFieldMapper.java @@ -109,7 +109,7 @@ public class DateFieldMapper extends NumberFieldMapper { if (context.indexSettings() != null) { parseUpperInclusive = context.indexSettings().getAsBoolean("index.mapping.date.parse_upper_inclusive", Defaults.PARSE_UPPER_INCLUSIVE); } - fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); + fieldType.setOmitNorms(fieldType.omitNorms() && boost == 1.0f); DateFieldMapper fieldMapper = new DateFieldMapper(buildNames(context), dateTimeFormatter, precisionStep, fuzzyFactor, boost, fieldType, nullValue, timeUnit, parseUpperInclusive, ignoreMalformed(context)); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java index 87dc7657833..6430e74334a 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/DoubleFieldMapper.java @@ -82,7 +82,7 @@ public class DoubleFieldMapper extends NumberFieldMapper { @Override public DoubleFieldMapper build(BuilderContext context) { - fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); + fieldType.setOmitNorms(fieldType.omitNorms() && boost == 1.0f); DoubleFieldMapper fieldMapper = new DoubleFieldMapper(buildNames(context), precisionStep, fuzzyFactor, boost, fieldType, nullValue, ignoreMalformed(context)); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java index 4722898630a..f38c7e4bce2 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/FloatFieldMapper.java @@ -83,7 +83,7 @@ public class FloatFieldMapper extends NumberFieldMapper { @Override public FloatFieldMapper build(BuilderContext context) { - fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); + fieldType.setOmitNorms(fieldType.omitNorms() && boost == 1.0f); FloatFieldMapper fieldMapper = new FloatFieldMapper(buildNames(context), precisionStep, fuzzyFactor, boost, fieldType, nullValue, ignoreMalformed(context)); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java index b1d78c9ddc7..304df946551 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/IntegerFieldMapper.java @@ -83,7 +83,7 @@ public class IntegerFieldMapper extends NumberFieldMapper { @Override public IntegerFieldMapper build(BuilderContext context) { - fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); + fieldType.setOmitNorms(fieldType.omitNorms() && boost == 1.0f); IntegerFieldMapper fieldMapper = new IntegerFieldMapper(buildNames(context), precisionStep, fuzzyFactor, boost, fieldType, nullValue, ignoreMalformed(context)); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java index 2b2ebf5677d..75550edc95b 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/LongFieldMapper.java @@ -83,7 +83,7 @@ public class LongFieldMapper extends NumberFieldMapper { @Override public LongFieldMapper build(BuilderContext context) { - fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); + fieldType.setOmitNorms(fieldType.omitNorms() && boost != 1.0f); LongFieldMapper fieldMapper = new LongFieldMapper(buildNames(context), precisionStep, fuzzyFactor, boost, fieldType, nullValue, ignoreMalformed(context)); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java index cd0dfb87feb..237bbc09552 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/ShortFieldMapper.java @@ -83,7 +83,7 @@ public class ShortFieldMapper extends NumberFieldMapper { @Override public ShortFieldMapper build(BuilderContext context) { - fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); + fieldType.setOmitNorms(fieldType.omitNorms() && boost == 1.0f); ShortFieldMapper fieldMapper = new ShortFieldMapper(buildNames(context), precisionStep, fuzzyFactor, boost, fieldType, nullValue, ignoreMalformed(context)); diff --git a/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java b/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java index 6e496ebf124..924e6a4706f 100644 --- a/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java +++ b/src/main/java/org/elasticsearch/index/mapper/core/TypeParsers.java @@ -53,6 +53,8 @@ public class TypeParsers { builder.fuzzyFactor(propNode.toString()); } else if (propName.equals("ignore_malformed")) { builder.ignoreMalformed(nodeBooleanValue(propNode)); + } else if (propName.equals("omit_norms")) { + builder.omitNorms(nodeBooleanValue(propNode)); } } } diff --git a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java index 6114801e3e1..6bd71246c1b 100644 --- a/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java +++ b/src/main/java/org/elasticsearch/index/mapper/ip/IpFieldMapper.java @@ -110,7 +110,7 @@ public class IpFieldMapper extends NumberFieldMapper { @Override public IpFieldMapper build(BuilderContext context) { - fieldType.setOmitNorms(fieldType.omitNorms() || boost != 1.0f); + fieldType.setOmitNorms(fieldType.omitNorms() && boost == 1.0f); IpFieldMapper fieldMapper = new IpFieldMapper(buildNames(context), precisionStep, boost, fieldType, nullValue, ignoreMalformed(context)); fieldMapper.includeInAll(includeInAll); diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/CustomBoostMappingTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/CustomBoostMappingTests.java index 186f94ea28c..b7d986b6e2a 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/CustomBoostMappingTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/CustomBoostMappingTests.java @@ -35,13 +35,13 @@ public class CustomBoostMappingTests { public void testCustomBoostValues() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties") .startObject("s_field").field("type", "string").endObject() - .startObject("l_field").field("type", "long").endObject() - .startObject("i_field").field("type", "integer").endObject() - .startObject("sh_field").field("type", "short").endObject() - .startObject("b_field").field("type", "byte").endObject() - .startObject("d_field").field("type", "double").endObject() - .startObject("f_field").field("type", "float").endObject() - .startObject("date_field").field("type", "date").endObject() + .startObject("l_field").field("type", "long").field("omit_norms", false).endObject() + .startObject("i_field").field("type", "integer").field("omit_norms", false).endObject() + .startObject("sh_field").field("type", "short").field("omit_norms", false).endObject() + .startObject("b_field").field("type", "byte").field("omit_norms", false).endObject() + .startObject("d_field").field("type", "double").field("omit_norms", false).endObject() + .startObject("f_field").field("type", "float").field("omit_norms", false).endObject() + .startObject("date_field").field("type", "date").field("omit_norms", false).endObject() .endObject().endObject().endObject().string(); DocumentMapper mapper = MapperTests.newParser().parse(mapping); From 7aac88cf5c6f644858460dccdd4386c630b1596c Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Sun, 4 Nov 2012 12:52:52 -0500 Subject: [PATCH 124/146] lucene4: check liveDocs and acceptedDocs for null before trying to call get() on them --- .../elasticsearch/common/lucene/search/NotDeletedFilter.java | 2 +- .../org/elasticsearch/index/search/child/HasChildFilter.java | 2 +- .../org/elasticsearch/index/search/child/HasParentFilter.java | 4 ++-- .../elasticsearch/index/search/child/TopChildrenQuery.java | 3 ++- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/search/NotDeletedFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/NotDeletedFilter.java index 9d802e6447e..5f3a8eefbcd 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/NotDeletedFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/NotDeletedFilter.java @@ -93,7 +93,7 @@ public class NotDeletedFilter extends Filter { @Override protected boolean match(int doc) { - return liveDocs.get(doc); + return liveDocs == null || liveDocs.get(doc); } } } diff --git a/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java b/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java index 9f33e2d9614..f08dec3a8df 100644 --- a/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java +++ b/src/main/java/org/elasticsearch/index/search/child/HasChildFilter.java @@ -178,7 +178,7 @@ public abstract class HasChildFilter extends Filter implements ScopePhase.Collec } public boolean get(int doc) { - return acceptDocs.get(doc) && parents.contains(typeCache.idByDoc(doc)); + return (acceptDocs == null || acceptDocs.get(doc)) && parents.contains(typeCache.idByDoc(doc)); } } diff --git a/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java b/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java index 8cb758748c5..899d6b95020 100644 --- a/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java +++ b/src/main/java/org/elasticsearch/index/search/child/HasParentFilter.java @@ -143,7 +143,7 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle } public boolean get(int doc) { - return acceptDocs.get(doc) && parents.contains(idReaderTypeCache.parentIdByDoc(doc)); + return (acceptDocs == null || acceptDocs.get(doc)) && parents.contains(idReaderTypeCache.parentIdByDoc(doc)); } } @@ -229,7 +229,7 @@ public abstract class HasParentFilter extends Filter implements ScopePhase.Colle } public boolean get(int doc) { - if (!acceptDocs.get(doc) || doc == -1) { + if ((acceptDocs != null && !acceptDocs.get(doc)) || doc == -1) { return false; } diff --git a/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java b/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java index 1b9634f4a51..57fe74e112a 100644 --- a/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java +++ b/src/main/java/org/elasticsearch/index/search/child/TopChildrenQuery.java @@ -138,7 +138,8 @@ public class TopChildrenQuery extends Query implements ScopePhase.TopDocsPhase { for (AtomicReaderContext atomicReaderContext : context.searcher().getIndexReader().leaves()) { AtomicReader indexReader = atomicReaderContext.reader(); int parentDocId = context.idCache().reader(indexReader).docById(parentType, parentId); - if (parentDocId != -1 && indexReader.getLiveDocs().get(parentDocId)) { + Bits liveDocs = indexReader.getLiveDocs(); + if (parentDocId != -1 && (liveDocs == null || liveDocs.get(parentDocId))) { // we found a match, add it and break TIntObjectHashMap readerParentDocs = parentDocsPerReader.get(indexReader.getCoreCacheKey()); From 9ad05ecdea7bb2e374de1de1a74e4f2490256361 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Sun, 4 Nov 2012 20:31:46 -0500 Subject: [PATCH 125/146] lucene 4: make FieldVistors behave similar to FieldSelectors Added back reset() method for now to make things work. Will refactor it out when we have tests passing. --- ...ava => AbstractMultipleFieldsVisitor.java} | 33 +++++-------------- .../lucene/document/BaseFieldVisitor.java | 3 ++ .../lucene/document/SingleFieldVisitor.java | 5 +++ .../mapper/internal/SourceFieldVisitor.java | 5 +++ .../selector/AllButSourceFieldVisitor.java | 6 ++-- .../selector/FieldMappersFieldVisitor.java | 16 +++++++-- .../selector/UidAndRoutingFieldVisitor.java | 6 ++++ .../selector/UidAndSourceFieldVisitor.java | 6 ++++ .../mapper/selector/UidFieldVisitor.java | 5 +++ .../search/fetch/FetchPhase.java | 4 +-- 10 files changed, 57 insertions(+), 32 deletions(-) rename src/main/java/org/elasticsearch/common/lucene/document/{MultipleFieldsVisitor.java => AbstractMultipleFieldsVisitor.java} (64%) diff --git a/src/main/java/org/elasticsearch/common/lucene/document/MultipleFieldsVisitor.java b/src/main/java/org/elasticsearch/common/lucene/document/AbstractMultipleFieldsVisitor.java similarity index 64% rename from src/main/java/org/elasticsearch/common/lucene/document/MultipleFieldsVisitor.java rename to src/main/java/org/elasticsearch/common/lucene/document/AbstractMultipleFieldsVisitor.java index 21fd6f5006a..df76014bc74 100644 --- a/src/main/java/org/elasticsearch/common/lucene/document/MultipleFieldsVisitor.java +++ b/src/main/java/org/elasticsearch/common/lucene/document/AbstractMultipleFieldsVisitor.java @@ -10,28 +10,9 @@ import java.util.Set; /** * */ -public class MultipleFieldsVisitor extends BaseFieldVisitor { +public abstract class AbstractMultipleFieldsVisitor extends BaseFieldVisitor { protected Document doc = new Document(); - protected final Set fieldsToAdd; - - /** Load only fields named in the provided Set<String>. */ - public MultipleFieldsVisitor(Set fieldsToAdd) { - this.fieldsToAdd = fieldsToAdd; - } - - /** Load only fields named in the provided Set<String>. */ - public MultipleFieldsVisitor(String... fields) { - fieldsToAdd = new HashSet(fields.length); - for(String field : fields) { - fieldsToAdd.add(field); - } - } - - /** Load all stored fields. */ - public MultipleFieldsVisitor() { - this.fieldsToAdd = null; - } @Override public void binaryField(FieldInfo fieldInfo, byte[] value) throws IOException { @@ -68,13 +49,15 @@ public class MultipleFieldsVisitor extends BaseFieldVisitor { doc.add(new StoredField(fieldInfo.name, value)); } - @Override - public Status needsField(FieldInfo fieldInfo) throws IOException { - return fieldsToAdd == null || fieldsToAdd.contains(fieldInfo.name) ? Status.YES : Status.NO; - } - @Override public Document createDocument() { return doc; } + + @Override + public void reset() { + if (!doc.getFields().isEmpty()) { + doc = new Document(); + } + } } diff --git a/src/main/java/org/elasticsearch/common/lucene/document/BaseFieldVisitor.java b/src/main/java/org/elasticsearch/common/lucene/document/BaseFieldVisitor.java index a86ce265632..8044de316e9 100644 --- a/src/main/java/org/elasticsearch/common/lucene/document/BaseFieldVisitor.java +++ b/src/main/java/org/elasticsearch/common/lucene/document/BaseFieldVisitor.java @@ -8,4 +8,7 @@ public abstract class BaseFieldVisitor extends StoredFieldVisitor { // LUCENE 4 UPGRADE: Added for now to make everything work. Want to make use of Document as less as possible. public abstract Document createDocument(); + // LUCENE 4 UPGRADE: Added for now for compatibility with Selectors + public abstract void reset(); + } diff --git a/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java b/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java index 09ff642c8b6..bd829ffa7ee 100644 --- a/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java +++ b/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java @@ -55,6 +55,11 @@ public class SingleFieldVisitor extends BaseFieldVisitor { return document; } + @Override + public void reset() { + values = null; + } + public String value() { return values.get(0); } diff --git a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java index 4b378acdf3a..907e4a7f208 100644 --- a/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/internal/SourceFieldVisitor.java @@ -54,6 +54,11 @@ public class SourceFieldVisitor extends BaseFieldVisitor { return document; } + @Override + public void reset() { + source = null; + } + public BytesRef source() { return source; } diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/AllButSourceFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/selector/AllButSourceFieldVisitor.java index c586c7b4094..ef89bf897a0 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/AllButSourceFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/AllButSourceFieldVisitor.java @@ -20,7 +20,7 @@ package org.elasticsearch.index.mapper.selector; import org.apache.lucene.index.FieldInfo; -import org.elasticsearch.common.lucene.document.MultipleFieldsVisitor; +import org.elasticsearch.common.lucene.document.AbstractMultipleFieldsVisitor; import org.elasticsearch.index.mapper.internal.SourceFieldMapper; import java.io.IOException; @@ -28,14 +28,14 @@ import java.io.IOException; /** * A field selector that loads all fields except the source field. */ -public class AllButSourceFieldVisitor extends MultipleFieldsVisitor { +public class AllButSourceFieldVisitor extends AbstractMultipleFieldsVisitor { @Override public Status needsField(FieldInfo fieldInfo) throws IOException { if (SourceFieldMapper.NAME.equals(fieldInfo.name)) { return Status.NO; } - return super.needsField(fieldInfo); + return Status.YES; } @Override diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/FieldMappersFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/selector/FieldMappersFieldVisitor.java index 3661167fec2..87ffa34fbd5 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/FieldMappersFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/FieldMappersFieldVisitor.java @@ -19,14 +19,21 @@ package org.elasticsearch.index.mapper.selector; -import org.elasticsearch.common.lucene.document.MultipleFieldsVisitor; +import org.apache.lucene.index.FieldInfo; +import org.elasticsearch.common.lucene.document.AbstractMultipleFieldsVisitor; import org.elasticsearch.index.mapper.FieldMapper; import org.elasticsearch.index.mapper.FieldMappers; +import java.io.IOException; +import java.util.HashSet; +import java.util.Set; + /** * */ -public class FieldMappersFieldVisitor extends MultipleFieldsVisitor { +public class FieldMappersFieldVisitor extends AbstractMultipleFieldsVisitor { + + protected final Set fieldsToAdd = new HashSet(); public void add(String fieldName) { fieldsToAdd.add(fieldName); @@ -38,6 +45,11 @@ public class FieldMappersFieldVisitor extends MultipleFieldsVisitor { } } + @Override + public Status needsField(FieldInfo fieldInfo) throws IOException { + return fieldsToAdd.contains(fieldInfo.name) ? Status.YES : Status.NO; + } + @Override public String toString() { return "fields(" + fieldsToAdd + ")"; diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java index f037412d1e1..6a75d1efd8d 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndRoutingFieldVisitor.java @@ -45,6 +45,12 @@ public class UidAndRoutingFieldVisitor extends BaseFieldVisitor { return document; } + @Override + public void reset() { + uid = null; + routing = null; + } + @Override public Status needsField(FieldInfo fieldInfo) throws IOException { if (RoutingFieldMapper.NAME.equals(fieldInfo.name)) { diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java index e5431b4770d..fab029d7759 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/UidAndSourceFieldVisitor.java @@ -45,6 +45,12 @@ public class UidAndSourceFieldVisitor extends BaseFieldVisitor { return document; } + @Override + public void reset() { + source = null; + uid = null; + } + @Override public Status needsField(FieldInfo fieldInfo) throws IOException { if (SourceFieldMapper.NAME.equals(fieldInfo.name)) { diff --git a/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java b/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java index e3dc0fe5cf6..15d05ef77c8 100644 --- a/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java +++ b/src/main/java/org/elasticsearch/index/mapper/selector/UidFieldVisitor.java @@ -57,6 +57,11 @@ public class UidFieldVisitor extends BaseFieldVisitor { return document; } + @Override + public void reset() { + uid = null; + } + public String uid() { return uid; } diff --git a/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java b/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java index e0393866ed4..5342984cb7d 100644 --- a/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java +++ b/src/main/java/org/elasticsearch/search/fetch/FetchPhase.java @@ -37,10 +37,9 @@ import org.elasticsearch.index.mapper.FieldMappers; import org.elasticsearch.index.mapper.Uid; import org.elasticsearch.index.mapper.internal.SourceFieldMapper; import org.elasticsearch.index.mapper.internal.UidFieldMapper; +import org.elasticsearch.index.mapper.selector.*; import org.elasticsearch.index.mapper.selector.AllButSourceFieldVisitor; import org.elasticsearch.index.mapper.selector.FieldMappersFieldVisitor; -import org.elasticsearch.index.mapper.selector.UidAndSourceFieldVisitor; -import org.elasticsearch.index.mapper.selector.UidFieldVisitor; import org.elasticsearch.indices.TypeMissingException; import org.elasticsearch.search.SearchHitField; import org.elasticsearch.search.SearchParseElement; @@ -291,6 +290,7 @@ public class FetchPhase implements SearchPhase { if (fieldVisitor == null) { return context.searcher().doc(docId); } + fieldVisitor.reset(); context.searcher().doc(docId, fieldVisitor); return fieldVisitor.createDocument(); } catch (IOException e) { From 2eaad61a9e0698aa1db753690c181ad08dccf636 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Sun, 4 Nov 2012 21:02:24 -0500 Subject: [PATCH 126/146] lucene4: make SimpleIdCache more resilient to missing fields Not sure if we can get a segment with the _uid field, but segments without the _parent field definitely happen. --- .../index/cache/id/simple/SimpleIdCache.java | 85 +++++++++---------- 1 file changed, 40 insertions(+), 45 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java index 298e7fbdef6..fa6242de3ad 100644 --- a/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java +++ b/src/main/java/org/elasticsearch/index/cache/id/simple/SimpleIdCache.java @@ -23,7 +23,6 @@ import gnu.trove.impl.Constants; import org.apache.lucene.index.*; import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchException; -import org.elasticsearch.ElasticSearchIllegalArgumentException; import org.elasticsearch.common.bytes.HashedBytesArray; import org.elasticsearch.common.collect.MapBuilder; import org.elasticsearch.common.inject.Inject; @@ -115,25 +114,23 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se Terms terms = reader.terms(UidFieldMapper.NAME); - if (terms == null) { // Should not happen - throw new ElasticSearchIllegalArgumentException("Id cache needs _uid field"); - } + if (terms != null) { + TermsEnum termsEnum = terms.iterator(null); + DocsEnum docsEnum = null; + for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { + HashedBytesArray[] typeAndId = Uid.splitUidIntoTypeAndId(term); + TypeBuilder typeBuilder = readerBuilder.get(typeAndId[0].toUtf8()); + if (typeBuilder == null) { + typeBuilder = new TypeBuilder(reader); + readerBuilder.put(typeAndId[0].toUtf8(), typeBuilder); + } - TermsEnum termsEnum = terms.iterator(null); - DocsEnum docsEnum = null; - for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { - HashedBytesArray[] typeAndId = Uid.splitUidIntoTypeAndId(term); - TypeBuilder typeBuilder = readerBuilder.get(typeAndId[0].toUtf8()); - if (typeBuilder == null) { - typeBuilder = new TypeBuilder(reader); - readerBuilder.put(typeAndId[0].toUtf8(), typeBuilder); - } - - HashedBytesArray idAsBytes = checkIfCanReuse(builders, typeAndId[1]); - docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0); - for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { - typeBuilder.idToDoc.put(idAsBytes, docId); - typeBuilder.docToId[docId] = idAsBytes; + HashedBytesArray idAsBytes = checkIfCanReuse(builders, typeAndId[1]); + docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0); + for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { + typeBuilder.idToDoc.put(idAsBytes, docId); + typeBuilder.docToId[docId] = idAsBytes; + } } } } @@ -150,35 +147,33 @@ public class SimpleIdCache extends AbstractIndexComponent implements IdCache, Se Map readerBuilder = builders.get(reader.getCoreCacheKey()); Terms terms = reader.terms(ParentFieldMapper.NAME); - if (terms == null) { // Should not happen - throw new ElasticSearchIllegalArgumentException("Id cache needs _parent field"); - } + if (terms != null) { + TermsEnum termsEnum = terms.iterator(null); + DocsEnum docsEnum = null; + for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { + HashedBytesArray[] typeAndId = Uid.splitUidIntoTypeAndId(term); - TermsEnum termsEnum = terms.iterator(null); - DocsEnum docsEnum = null; - for (BytesRef term = termsEnum.next(); term != null; term = termsEnum.next()) { - HashedBytesArray[] typeAndId = Uid.splitUidIntoTypeAndId(term); - - TypeBuilder typeBuilder = readerBuilder.get(typeAndId[0].toUtf8()); - if (typeBuilder == null) { - typeBuilder = new TypeBuilder(reader); - readerBuilder.put(typeAndId[0].toUtf8(), typeBuilder); - } - - HashedBytesArray idAsBytes = checkIfCanReuse(builders, typeAndId[1]); - boolean added = false; // optimize for when all the docs are deleted for this id - - docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0); - for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { - if (!added) { - typeBuilder.parentIdsValues.add(idAsBytes); - added = true; + TypeBuilder typeBuilder = readerBuilder.get(typeAndId[0].toUtf8()); + if (typeBuilder == null) { + typeBuilder = new TypeBuilder(reader); + readerBuilder.put(typeAndId[0].toUtf8(), typeBuilder); } - typeBuilder.parentIdsOrdinals[docId] = typeBuilder.t; - } - if (added) { - typeBuilder.t++; + HashedBytesArray idAsBytes = checkIfCanReuse(builders, typeAndId[1]); + boolean added = false; // optimize for when all the docs are deleted for this id + + docsEnum = termsEnum.docs(reader.getLiveDocs(), docsEnum, 0); + for (int docId = docsEnum.nextDoc(); docId != DocsEnum.NO_MORE_DOCS; docId = docsEnum.nextDoc()) { + if (!added) { + typeBuilder.parentIdsValues.add(idAsBytes); + added = true; + } + typeBuilder.parentIdsOrdinals[docId] = typeBuilder.t; + } + + if (added) { + typeBuilder.t++; + } } } } From 6d4077020096e32a89606ae80dc4fa8b1080e89e Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Sun, 4 Nov 2012 22:31:56 -0500 Subject: [PATCH 127/146] lucene 4: fixed facets and filtering aliases I am not completely sure about this one, but it reduces the number of failing tests from 98 to 31 so I am going to check it in. Please, review and fix it, if there is a better solution. Because of change in Lucene 4.0, ContextIndexSearcher was bypassed and elasticsearch filters and collectors were ignored. In lucene 3.6 the stack of Searcher search calls looked like this: search(Query query, int n) search(Query query, Filter filter, int n) search(Weight weight, Filter filter, int nDocs) search(Weight weight, Filter filter, ScoreDoc after, int nDocs) search(Weight weight, Filter filter, Collector collector) <-- this is ContextIndexSearcher was injecting combined filter and collector search(Weight weight, Filter filter, Collector collector) In Lucene 4.0 the stack looks like this: search(Query query, int n) search(Query query, Filter filter, int n) <-- here lucene wraps Query and Filter into Weight search(Weight weight, ScoreDoc after, int nDocs) search(List leaves, Weight weight, ScoreDoc after, int nDocs) search(List leaves, Weight weight, Collector collector) ... In other words, when we have Filter, we don't have a Collector yet, but when we have Collector, Filter is already wrapped inside Weight. The only way to fix for the problem that I could think of is by introducing two injection points: one for Filters and another one for Collectors: search(Query query, int n) search(Query query, Filter filter, int n) <-- here combined Filters are injected search(Weight weight, ScoreDoc after, int nDocs) search(List leaves, Weight weight, ScoreDoc after, int nDocs) search(List leaves, Weight weight, Collector collector) <-- here Collectors are injected Similar problem existed for count(), so I had to override search(Query query, Collector results) as well. --- .../search/internal/ContextIndexSearcher.java | 45 +++++++++++++------ 1 file changed, 32 insertions(+), 13 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java b/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java index b6e7564b7ea..d32fa2ce7c7 100644 --- a/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java +++ b/src/main/java/org/elasticsearch/search/internal/ContextIndexSearcher.java @@ -133,8 +133,37 @@ public class ContextIndexSearcher extends IndexSearcher { return super.createNormalizedWeight(query); } + private Filter combinedFilter(Filter filter) { + Filter combinedFilter; + if (filter == null) { + combinedFilter = searchContext.aliasFilter(); + } else { + if (searchContext.aliasFilter() != null) { + combinedFilter = new AndFilter(ImmutableList.of(filter, searchContext.aliasFilter())); + } else { + combinedFilter = filter; + } + } + return combinedFilter; + } + @Override - public void search(Query query, Filter filter, Collector collector) throws IOException { + public void search(Query query, Collector results) throws IOException { + Filter filter = combinedFilter(null); + if (filter != null) { + super.search(wrapFilter(query, filter), results); + } else { + super.search(query, results); + } + } + + @Override + public TopDocs search(Query query, Filter filter, int n) throws IOException { + return super.search(query, combinedFilter(filter), n); + } + + @Override + public void search(List leaves, Weight weight, Collector collector) throws IOException { if (searchContext.parsedFilter() != null && Scopes.MAIN.equals(processingScope)) { // this will only get applied to the actual search collector and not // to any scoped collectors, also, it will only be applied to the main collector @@ -156,26 +185,16 @@ public class ContextIndexSearcher extends IndexSearcher { collector = new MinimumScoreCollector(collector, searchContext.minimumScore()); } - Filter combinedFilter; - if (filter == null) { - combinedFilter = searchContext.aliasFilter(); - } else { - if (searchContext.aliasFilter() != null) { - combinedFilter = new AndFilter(ImmutableList.of(filter, searchContext.aliasFilter())); - } else { - combinedFilter = filter; - } - } // we only compute the doc id set once since within a context, we execute the same query always... if (searchContext.timeoutInMillis() != -1) { try { - super.search(query, combinedFilter, collector); + super.search(leaves, weight, collector); } catch (TimeLimitingCollector.TimeExceededException e) { searchContext.queryResult().searchTimedOut(true); } } else { - super.search(query, combinedFilter, collector); + super.search(leaves, weight, collector); } } From 74464f9f99ad4da88d41fb94c87eca5f8da8da22 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Sun, 4 Nov 2012 22:38:14 -0500 Subject: [PATCH 128/146] lucene 4: fix possible NPE in range queries and filters if one of the bounds is not specified --- .../java/org/elasticsearch/index/query/RangeFilterParser.java | 2 +- .../java/org/elasticsearch/index/query/RangeQueryParser.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java b/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java index f35cb3194a6..94ea2baafe8 100644 --- a/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java +++ b/src/main/java/org/elasticsearch/index/query/RangeFilterParser.java @@ -118,7 +118,7 @@ public class RangeFilterParser implements FilterParser { if (smartNameFieldMappers != null) { if (smartNameFieldMappers.hasMapper()) { //LUCENE 4 UPGRADE range filter should use bytesref too? - filter = smartNameFieldMappers.mapper().rangeFilter(from.utf8ToString(), to.utf8ToString(), includeLower, includeUpper, parseContext); + filter = smartNameFieldMappers.mapper().rangeFilter(from != null ? from.utf8ToString() : null, to != null ? to.utf8ToString() : null, includeLower, includeUpper, parseContext); } } if (filter == null) { diff --git a/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java b/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java index 5cbaccf9db4..780e50fb973 100644 --- a/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java +++ b/src/main/java/org/elasticsearch/index/query/RangeQueryParser.java @@ -110,7 +110,7 @@ public class RangeQueryParser implements QueryParser { if (smartNameFieldMappers != null) { if (smartNameFieldMappers.hasMapper()) { //LUCENE 4 UPGRADE Mapper#rangeQuery should use bytesref as well? - query = smartNameFieldMappers.mapper().rangeQuery(from.utf8ToString(), to.utf8ToString(), includeLower, includeUpper, parseContext); + query = smartNameFieldMappers.mapper().rangeQuery(from != null ? from.utf8ToString() : null, to != null ? to.utf8ToString() : null, includeLower, includeUpper, parseContext); } } if (query == null) { From cfbd17992ad001b2b3ca868eaaf40ed1ce0d0258 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Sun, 4 Nov 2012 23:27:32 -0500 Subject: [PATCH 129/146] lucene 4: convert script term to string --- .../facet/terms/strings/FieldsTermsStringFacetCollector.java | 3 ++- .../search/facet/terms/strings/TermsStringFacetCollector.java | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java index 7a1b09056c5..013665e3b39 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/FieldsTermsStringFacetCollector.java @@ -200,7 +200,8 @@ public class FieldsTermsStringFacetCollector extends AbstractFacetCollector { } if (script != null) { script.setNextDocId(docId); - script.setNextVar("term", value); + // LUCENE 4 UPGRADE: needs optimization + script.setNextVar("term", value.utf8ToString()); Object scriptValue = script.run(); if (scriptValue == null) { return; diff --git a/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java b/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java index 23f3e17b66a..93cdc7a3ad0 100644 --- a/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java +++ b/src/main/java/org/elasticsearch/search/facet/terms/strings/TermsStringFacetCollector.java @@ -203,7 +203,8 @@ public class TermsStringFacetCollector extends AbstractFacetCollector { } if (script != null) { script.setNextDocId(docId); - script.setNextVar("term", value); + // LUCENE 4 UPGRADE: needs optimization + script.setNextVar("term", value.utf8ToString()); Object scriptValue = script.run(); if (scriptValue == null) { return; From ffd262e96f665abf0d627057da9f93dae26e8c09 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Sun, 4 Nov 2012 23:42:26 -0500 Subject: [PATCH 130/146] lucene 4: rollback optimization in SingleFieldVisitor for now to make it work --- .../lucene/document/SingleFieldVisitor.java | 38 +------------------ .../search/highlight/HighlightPhase.java | 10 ++++- .../search/lookup/FieldsLookup.java | 2 + 3 files changed, 13 insertions(+), 37 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java b/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java index bd829ffa7ee..de3f92cef3a 100644 --- a/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java +++ b/src/main/java/org/elasticsearch/common/lucene/document/SingleFieldVisitor.java @@ -30,10 +30,9 @@ import java.util.List; /** * */ -public class SingleFieldVisitor extends BaseFieldVisitor { +public class SingleFieldVisitor extends AbstractMultipleFieldsVisitor { private String name; - private List values; public SingleFieldVisitor() { } @@ -46,44 +45,11 @@ public class SingleFieldVisitor extends BaseFieldVisitor { this.name = name; } - @Override - public Document createDocument() { - Document document = new Document(); - for (String value : values) { - document.add(new StoredField(name, value)); - } - return document; - } - - @Override - public void reset() { - values = null; - } - - public String value() { - return values.get(0); - } - - public List values() { - return values; - } - @Override public Status needsField(FieldInfo fieldInfo) throws IOException { if (name.equals(fieldInfo.name)) { return Status.YES; } - - return values != null ? Status.STOP : Status.NO; - } - - @Override - public void stringField(FieldInfo fieldInfo, String value) throws IOException { - if (fieldInfo.name.equals(name)) { - if (values == null) { - values = new ArrayList(); - } - values.add(value); - } + return Status.NO; } } diff --git a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java index 74e0105916b..48c1515be74 100644 --- a/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java +++ b/src/main/java/org/elasticsearch/search/highlight/HighlightPhase.java @@ -23,6 +23,8 @@ import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.search.Query; import org.apache.lucene.search.highlight.*; import org.apache.lucene.search.highlight.Formatter; @@ -169,7 +171,13 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase { try { SingleFieldVisitor fieldVisitor = new SingleFieldVisitor(mapper.names().indexName()); hitContext.reader().document(hitContext.docId(), fieldVisitor); - textsToHighlight = (List) fieldVisitor.values(); + Document doc = fieldVisitor.createDocument(); + textsToHighlight = new ArrayList(doc.getFields().size()); + for (IndexableField docField : doc.getFields()) { + if (docField.stringValue() != null) { + textsToHighlight.add(docField.stringValue()); + } + } } catch (Exception e) { throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e); } diff --git a/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java b/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java index 9175dd1e69c..543c7e6b8dc 100644 --- a/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java +++ b/src/main/java/org/elasticsearch/search/lookup/FieldsLookup.java @@ -159,6 +159,8 @@ public class FieldsLookup implements Map { data.doc(fieldVisitor.createDocument()); } catch (IOException e) { throw new ElasticSearchParseException("failed to load field [" + name + "]", e); + } finally { + fieldVisitor.reset(); } } return data; From 5c45aad260e1f2014a45e8fc5d3dc9ba69437c60 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Mon, 5 Nov 2012 11:37:58 +0100 Subject: [PATCH 131/146] lucene 4: fix boost mapping tests --- .../index/mapper/boost/BoostMappingTests.java | 21 ++++++------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/BoostMappingTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/BoostMappingTests.java index d2fd717af05..3e928421fbb 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/BoostMappingTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/boost/BoostMappingTests.java @@ -46,12 +46,10 @@ public class BoostMappingTests { .field("field", "b") .endObject().bytes()); - assertThat(doc.rootDoc().getFields().size(), equalTo(2)); - float sum = 0.0f; - for (IndexableField field : doc.rootDoc().getFields()) { - sum += field.boost(); - } - assertThat(3.0f, equalTo(sum)); // 2.0 (for first field) + 1.0 (for second field) + // one fo the same named field will have the proper boost, the others will have 1 + IndexableField[] fields = doc.rootDoc().getFields("field"); + assertThat(fields[0].boost(), equalTo(2.0f)); + assertThat(fields[1].boost(), equalTo(1.0f)); } @Test @@ -65,20 +63,13 @@ public class BoostMappingTests { ParsedDocument doc = mapper.parse("type", "1", XContentFactory.jsonBuilder().startObject() .field("field", "a") .field("_boost", 2.0f) - .endObject().bytes()); - assertThat(doc.rootDoc().getFields().size(), equalTo(1)); - for (IndexableField field : doc.rootDoc().getFields()) { - assertThat(field.boost(), equalTo(1.0f)); - } + assertThat(doc.rootDoc().getField("field").boost(), equalTo(1.0f)); doc = mapper.parse("type", "1", XContentFactory.jsonBuilder().startObject() .field("field", "a") .field("custom_boost", 2.0f) .endObject().bytes()); - assertThat(doc.rootDoc().getFields().size(), equalTo(1)); - for (IndexableField field : doc.rootDoc().getFields()) { - assertThat(field.boost(), equalTo(2.0f)); - } + assertThat(doc.rootDoc().getField("field").boost(), equalTo(2.0f)); } } From 9d5cae23fa7f2307faabe47a1512f92a51d5ed17 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Mon, 5 Nov 2012 11:48:10 +0100 Subject: [PATCH 132/146] lucene 4: fix general mapping test no need to test for boost, we already have specific boost tests, in general, we should get rid of this test, and use more specialized tests if we are missing some --- .../index/mapper/simple/SimpleMapperTests.java | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/src/test/java/org/elasticsearch/test/unit/index/mapper/simple/SimpleMapperTests.java b/src/test/java/org/elasticsearch/test/unit/index/mapper/simple/SimpleMapperTests.java index 4f332bbf3dd..002392e49c0 100644 --- a/src/test/java/org/elasticsearch/test/unit/index/mapper/simple/SimpleMapperTests.java +++ b/src/test/java/org/elasticsearch/test/unit/index/mapper/simple/SimpleMapperTests.java @@ -20,7 +20,6 @@ package org.elasticsearch.test.unit.index.mapper.simple; import org.apache.lucene.document.Document; -import org.apache.lucene.index.IndexableField; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.index.mapper.DocumentMapper; @@ -33,7 +32,6 @@ import static org.elasticsearch.common.io.Streams.copyToBytesFromClasspath; import static org.elasticsearch.common.io.Streams.copyToStringFromClasspath; import static org.elasticsearch.index.mapper.MapperBuilders.*; import static org.hamcrest.MatcherAssert.assertThat; -import static org.hamcrest.Matchers.closeTo; import static org.hamcrest.Matchers.equalTo; /** @@ -52,10 +50,6 @@ public class SimpleMapperTests { BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/simple/test1.json")); Document doc = docMapper.parse("person", "1", json).rootDoc(); - assertThat(doc.getFields().size(), equalTo(14)); - for (IndexableField field : doc.getFields()) { - assertThat((double) field.boost(), closeTo(3.7, 0.01)); - } assertThat(doc.get(docMapper.mappers().name("first").mapper().names().indexName()), equalTo("shay")); assertThat(docMapper.mappers().name("first").mapper().names().fullName(), equalTo("name.first")); // System.out.println("Document: " + doc); @@ -76,10 +70,6 @@ public class SimpleMapperTests { BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/simple/test1.json")); Document doc = builtDocMapper.parse(json).rootDoc(); assertThat(doc.get(docMapper.uidMapper().names().indexName()), equalTo(Uid.createUid("person", "1"))); - assertThat(doc.getFields().size(), equalTo(14)); - for (IndexableField field : doc.getFields()) { - assertThat((double) field.boost(), closeTo(3.7, 0.01)); - } assertThat(doc.get(docMapper.mappers().name("first").mapper().names().indexName()), equalTo("shay")); // System.out.println("Document: " + doc); // System.out.println("Json: " + docMapper.sourceMapper().value(doc)); @@ -95,10 +85,6 @@ public class SimpleMapperTests { BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/simple/test1.json")); Document doc = docMapper.parse(json).rootDoc(); assertThat(doc.get(docMapper.uidMapper().names().indexName()), equalTo(Uid.createUid("person", "1"))); - assertThat(doc.getFields().size(), equalTo(14)); - for (IndexableField field : doc.getFields()) { - assertThat((double) field.boost(), closeTo(3.7, 0.01)); - } assertThat(doc.get(docMapper.mappers().name("first").mapper().names().indexName()), equalTo("shay")); // System.out.println("Document: " + doc); // System.out.println("Json: " + docMapper.sourceMapper().value(doc)); @@ -111,10 +97,6 @@ public class SimpleMapperTests { BytesReference json = new BytesArray(copyToBytesFromClasspath("/org/elasticsearch/test/unit/index/mapper/simple/test1-notype-noid.json")); Document doc = docMapper.parse("person", "1", json).rootDoc(); assertThat(doc.get(docMapper.uidMapper().names().indexName()), equalTo(Uid.createUid("person", "1"))); - assertThat(doc.getFields().size(), equalTo(14)); - for (IndexableField field : doc.getFields()) { - assertThat((double) field.boost(), closeTo(3.7, 0.01)); - } assertThat(doc.get(docMapper.mappers().name("first").mapper().names().indexName()), equalTo("shay")); // System.out.println("Document: " + doc); // System.out.println("Json: " + docMapper.sourceMapper().value(doc)); From 98eb97a1ffe55169af4608b50c3f0053f60eebc7 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Mon, 5 Nov 2012 06:37:24 -0500 Subject: [PATCH 133/146] lucene 4: fix NoopCollector --- .../org/elasticsearch/common/lucene/search/NoopCollector.java | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/search/NoopCollector.java b/src/main/java/org/elasticsearch/common/lucene/search/NoopCollector.java index 60c52b4f2dd..806f4b8d8f3 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/NoopCollector.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/NoopCollector.java @@ -20,7 +20,6 @@ package org.elasticsearch.common.lucene.search; import org.apache.lucene.index.AtomicReaderContext; -import org.apache.lucene.index.IndexReader; import org.apache.lucene.search.Collector; import org.apache.lucene.search.Scorer; @@ -43,7 +42,6 @@ public class NoopCollector extends Collector { @Override public void setNextReader(AtomicReaderContext context) throws IOException { - throw new UnsupportedOperationException(); } @Override From f8842d5a4f2e0871a9991a71a87a9bb99e0e0d2a Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Mon, 5 Nov 2012 08:05:17 -0500 Subject: [PATCH 134/146] lucene 4: fix TokenFilterTests --- .../lucene/analysis/miscellaneous/TruncateTokenFilterTests.java | 1 + .../lucene/analysis/miscellaneous/UniqueTokenFilterTests.java | 1 + 2 files changed, 2 insertions(+) diff --git a/src/test/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterTests.java b/src/test/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterTests.java index e7552e30456..9a78a8a07f2 100644 --- a/src/test/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterTests.java +++ b/src/test/java/org/apache/lucene/analysis/miscellaneous/TruncateTokenFilterTests.java @@ -49,6 +49,7 @@ public class TruncateTokenFilterTests { }; TokenStream test = analyzer.tokenStream("test", new StringReader("a bb ccc dddd eeeee")); + test.reset(); CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class); assertThat(test.incrementToken(), equalTo(true)); assertThat(termAttribute.toString(), equalTo("a")); diff --git a/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java b/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java index 97d5b027367..6ea69a6f43f 100644 --- a/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java +++ b/src/test/java/org/apache/lucene/analysis/miscellaneous/UniqueTokenFilterTests.java @@ -49,6 +49,7 @@ public class UniqueTokenFilterTests { }; TokenStream test = analyzer.tokenStream("test", new StringReader("this test with test")); + test.reset(); CharTermAttribute termAttribute = test.addAttribute(CharTermAttribute.class); assertThat(test.incrementToken(), equalTo(true)); assertThat(termAttribute.toString(), equalTo("this")); From cae66fb6368207016c7fa5b7ad11dec7a0a2e9cd Mon Sep 17 00:00:00 2001 From: uboness Date: Mon, 5 Nov 2012 23:22:34 +0100 Subject: [PATCH 135/146] * lucene 4: added missing short support in stream input/output * lucene 4: added more extensive test for stored fields --- .../common/io/stream/StreamInput.java | 2 + .../common/io/stream/StreamOutput.java | 3 + .../search/fields/SearchFieldsTests.java | 69 +++++++++++++++++++ 3 files changed, 74 insertions(+) diff --git a/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java b/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java index b98c7332309..dfbdb528eee 100644 --- a/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java +++ b/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java @@ -356,6 +356,8 @@ public abstract class StreamInput extends InputStream { return readBytesReference(); case 15: return readText(); + case 16: + return readShort(); default: throw new IOException("Can't read unknown type [" + type + "]"); } diff --git a/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java index 26285983d0e..758ac582f24 100644 --- a/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java +++ b/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java @@ -369,6 +369,9 @@ public abstract class StreamOutput extends OutputStream { } else if (value instanceof Text) { writeByte((byte) 15); writeText((Text) value); + } else if (value instanceof Short) { + writeByte((byte) 16); + writeShort((Short) value); } else { throw new IOException("Can't write type [" + type + "]"); } diff --git a/src/test/java/org/elasticsearch/test/integration/search/fields/SearchFieldsTests.java b/src/test/java/org/elasticsearch/test/integration/search/fields/SearchFieldsTests.java index 2e2beb64b3b..60e816b039a 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/fields/SearchFieldsTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/fields/SearchFieldsTests.java @@ -21,10 +21,14 @@ package org.elasticsearch.test.integration.search.fields; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; +import org.elasticsearch.common.Base64; import org.elasticsearch.common.collect.MapBuilder; +import org.elasticsearch.common.joda.Joda; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.search.sort.SortOrder; import org.elasticsearch.test.integration.AbstractNodesTests; +import org.joda.time.DateTime; +import org.joda.time.DateTimeZone; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; @@ -277,4 +281,69 @@ public class SearchFieldsTests extends AbstractNodesTests { assertThat(partial2.containsKey("obj1"), equalTo(false)); assertThat(partial2.containsKey("field1"), equalTo(true)); } + + @Test + public void testStoredFieldsWithoutSource() throws Exception { + client.admin().indices().prepareDelete().execute().actionGet(); + client.admin().indices().prepareCreate("test").execute().actionGet(); + client.admin().cluster().prepareHealth().setWaitForYellowStatus().execute().actionGet(); + + String mapping = XContentFactory.jsonBuilder().startObject().startObject("type").startObject("properties") + .startObject("_source").field("enabled", false).endObject() + .startObject("byte_field").field("type", "byte").field("store", "yes").endObject() + .startObject("short_field").field("type", "short").field("store", "yes").endObject() + .startObject("integer_field").field("type", "integer").field("store", "yes").endObject() + .startObject("long_field").field("type", "long").field("store", "yes").endObject() + .startObject("float_field").field("type", "float").field("store", "yes").endObject() + .startObject("double_field").field("type", "double").field("store", "yes").endObject() + .startObject("date_field").field("type", "date").field("store", "yes").endObject() + .startObject("boolean_field").field("type", "boolean").field("store", "yes").endObject() + .startObject("binary_field").field("type", "binary").field("store", "yes").endObject() + .endObject().endObject().endObject().string(); + + client.admin().indices().preparePutMapping().setType("type1").setSource(mapping).execute().actionGet(); + + client.prepareIndex("test", "type1", "1").setSource(jsonBuilder().startObject() + .field("byte_field", (byte) 1) + .field("short_field", (short) 2) + .field("integer_field", 3) + .field("long_field", 4l) + .field("float_field", 5.0f) + .field("double_field", 6.0d) + .field("date_field", Joda.forPattern("dateOptionalTime").printer().print(new DateTime(2012, 3, 22, 0, 0, DateTimeZone.UTC))) + .field("boolean_field", true) + .field("binary_field", Base64.encodeBytes("testing text".getBytes("UTF8"))) + .endObject()).execute().actionGet(); + + client.admin().indices().prepareRefresh().execute().actionGet(); + + SearchResponse searchResponse = client.prepareSearch().setQuery(matchAllQuery()) + .addField("byte_field") + .addField("short_field") + .addField("integer_field") + .addField("long_field") + .addField("float_field") + .addField("double_field") + .addField("date_field") + .addField("boolean_field") + .addField("binary_field") + .execute().actionGet(); + + assertThat(searchResponse.hits().getTotalHits(), equalTo(1l)); + assertThat(searchResponse.hits().hits().length, equalTo(1)); + assertThat(searchResponse.hits().getAt(0).fields().size(), equalTo(9)); + + + assertThat(searchResponse.hits().getAt(0).fields().get("byte_field").value().toString(), equalTo("1")); + assertThat(searchResponse.hits().getAt(0).fields().get("short_field").value().toString(), equalTo("2")); + assertThat(searchResponse.hits().getAt(0).fields().get("integer_field").value(), equalTo((Object) 3)); + assertThat(searchResponse.hits().getAt(0).fields().get("long_field").value(), equalTo((Object) 4l)); + assertThat(searchResponse.hits().getAt(0).fields().get("float_field").value(), equalTo((Object) 5.0f)); + assertThat(searchResponse.hits().getAt(0).fields().get("double_field").value(), equalTo((Object) 6.0d)); + String dateTime = Joda.forPattern("dateOptionalTime").printer().print(new DateTime(2012, 3, 22, 0, 0, DateTimeZone.UTC)); + assertThat(searchResponse.hits().getAt(0).fields().get("date_field").value(), equalTo((Object) dateTime)); + assertThat(searchResponse.hits().getAt(0).fields().get("boolean_field").value(), equalTo((Object) "true")); + assertThat(searchResponse.hits().getAt(0).fields().get("binary_field").value().toString(), equalTo(Base64.encodeBytes("testing text".getBytes("UTF8")))); + + } } From ed2b009f076a9f8d4c96f469f83a896d5991c04e Mon Sep 17 00:00:00 2001 From: uboness Date: Tue, 6 Nov 2012 11:14:40 +0100 Subject: [PATCH 136/146] * changed instanceof to be consistent with other type checks --- .../java/org/elasticsearch/common/io/stream/StreamOutput.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java index 758ac582f24..e909c466bfb 100644 --- a/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java +++ b/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java @@ -369,7 +369,7 @@ public abstract class StreamOutput extends OutputStream { } else if (value instanceof Text) { writeByte((byte) 15); writeText((Text) value); - } else if (value instanceof Short) { + } else if (value == Short.class) { writeByte((byte) 16); writeShort((Short) value); } else { From 46223c117abf3dee86765cab2b1d184664e2dc71 Mon Sep 17 00:00:00 2001 From: uboness Date: Tue, 6 Nov 2012 11:19:05 +0100 Subject: [PATCH 137/146] * removed unused Streamables class --- .../common/io/stream/Streamables.java | 151 ------------------ 1 file changed, 151 deletions(-) delete mode 100644 src/main/java/org/elasticsearch/common/io/stream/Streamables.java diff --git a/src/main/java/org/elasticsearch/common/io/stream/Streamables.java b/src/main/java/org/elasticsearch/common/io/stream/Streamables.java deleted file mode 100644 index e9ac61afd84..00000000000 --- a/src/main/java/org/elasticsearch/common/io/stream/Streamables.java +++ /dev/null @@ -1,151 +0,0 @@ -/* - * Licensed to ElasticSearch and Shay Banon under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. ElasticSearch licenses this - * file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.elasticsearch.common.io.stream; - -import java.io.IOException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * - */ -public class Streamables { - - public static Map readMap(StreamInput in) throws IOException { - int size = in.readVInt(); - Map map = new HashMap(size); - for (int i = 0; i < size; i++) { - map.put(in.readUTF(), readMapValue(in)); - } - return map; - } - - public static Object readMapValue(StreamInput in) throws IOException { - byte type = in.readByte(); - if (type == -1) { - return null; - } else if (type == 0) { - return in.readUTF(); - } else if (type == 1) { - return in.readInt(); - } else if (type == 2) { - return in.readLong(); - } else if (type == 3) { - return in.readFloat(); - } else if (type == 4) { - return in.readDouble(); - } else if (type == 5) { - return in.readBoolean(); - } else if (type == 6) { - int bytesSize = in.readVInt(); - byte[] value = new byte[bytesSize]; - in.readFully(value); - return value; - } else if (type == 7) { - int size = in.readVInt(); - List list = new ArrayList(size); - for (int i = 0; i < size; i++) { - list.add(readMapValue(in)); - } - return list; - } else if (type == 8) { - int size = in.readVInt(); - Object[] list = new Object[size]; - for (int i = 0; i < size; i++) { - list[i] = readMapValue(in); - } - return list; - } else if (type == 9) { - int size = in.readVInt(); - Map map = new HashMap(size); - for (int i = 0; i < size; i++) { - map.put(in.readUTF(), readMapValue(in)); - } - return map; - } else { - throw new IOException("Can't read unknown type [" + type + "]"); - } - } - - public static void writeMap(StreamOutput out, Map map) throws IOException { - out.writeVInt(map.size()); - for (Map.Entry entry : map.entrySet()) { - out.writeUTF(entry.getKey()); - writeMapValue(out, entry.getValue()); - } - } - - private static void writeMapValue(StreamOutput out, Object value) throws IOException { - if (value == null) { - out.writeByte((byte) -1); - return; - } - Class type = value.getClass(); - if (type == String.class) { - out.writeByte((byte) 0); - out.writeUTF((String) value); - } else if (type == Integer.class) { - out.writeByte((byte) 1); - out.writeInt((Integer) value); - } else if (type == Long.class) { - out.writeByte((byte) 2); - out.writeLong((Long) value); - } else if (type == Float.class) { - out.writeByte((byte) 3); - out.writeFloat((Float) value); - } else if (type == Double.class) { - out.writeByte((byte) 4); - out.writeDouble((Double) value); - } else if (type == Boolean.class) { - out.writeByte((byte) 5); - out.writeBoolean((Boolean) value); - } else if (type == byte[].class) { - out.writeByte((byte) 6); - out.writeVInt(((byte[]) value).length); - out.writeBytes(((byte[]) value)); - } else if (value instanceof List) { - out.writeByte((byte) 7); - List list = (List) value; - out.writeVInt(list.size()); - for (Object o : list) { - writeMapValue(out, o); - } - } else if (value instanceof Object[]) { - out.writeByte((byte) 8); - Object[] list = (Object[]) value; - out.writeVInt(list.length); - for (Object o : list) { - writeMapValue(out, o); - } - } else if (value instanceof Map) { - out.writeByte((byte) 9); - Map map = (Map) value; - out.writeVInt(map.size()); - for (Map.Entry entry : map.entrySet()) { - out.writeUTF(entry.getKey()); - writeMapValue(out, entry.getValue()); - } - } else { - throw new IOException("Can't write type [" + type + "]"); - } - } -} From d069212ce46b0dc6800a74b654affee139a62a28 Mon Sep 17 00:00:00 2001 From: uboness Date: Tue, 6 Nov 2012 11:25:22 +0100 Subject: [PATCH 138/146] * fixed the type check for short --- .../java/org/elasticsearch/common/io/stream/StreamOutput.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java index e909c466bfb..e414c0d1cba 100644 --- a/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java +++ b/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java @@ -369,7 +369,7 @@ public abstract class StreamOutput extends OutputStream { } else if (value instanceof Text) { writeByte((byte) 15); writeText((Text) value); - } else if (value == Short.class) { + } else if (type == Short.class) { writeByte((byte) 16); writeShort((Short) value); } else { From c8cf72d6575002d4dc98577ef3e661d591d3ed41 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Tue, 6 Nov 2012 08:45:44 -0500 Subject: [PATCH 139/146] lucene 4: fix handling of deleted docs in TermFilter --- .../org/elasticsearch/common/lucene/search/TermFilter.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java b/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java index c5d345504d7..54aa46b7bc2 100644 --- a/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java +++ b/src/main/java/org/elasticsearch/common/lucene/search/TermFilter.java @@ -53,8 +53,7 @@ public class TermFilter extends Filter { if (!termsEnum.seekExact(term.bytes(), false)) { return null; } - // LUCENE 4 UPGRADE: For leave acceptedDocs null, until we figure out how to deal with deleted docs... - DocsEnum docsEnum = termsEnum.docs(null, null); + DocsEnum docsEnum = termsEnum.docs(acceptDocs, null); int docId = docsEnum.nextDoc(); if (docId == DocsEnum.NO_MORE_DOCS) { return null; From 2b58c2dfff64fa55cbe5ce6b0691e8962c7c5c6a Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Tue, 6 Nov 2012 16:26:18 +0100 Subject: [PATCH 140/146] lucene 4: optimize read/write BytesRef handling --- .../common/io/stream/BytesStreamInput.java | 11 +++++++++++ .../elasticsearch/common/io/stream/StreamInput.java | 12 +++++++++--- .../elasticsearch/common/io/stream/StreamOutput.java | 1 - .../transport/netty/ChannelBufferStreamInput.java | 11 +++++++++++ 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/src/main/java/org/elasticsearch/common/io/stream/BytesStreamInput.java b/src/main/java/org/elasticsearch/common/io/stream/BytesStreamInput.java index 7e1b3fed37d..65071087356 100644 --- a/src/main/java/org/elasticsearch/common/io/stream/BytesStreamInput.java +++ b/src/main/java/org/elasticsearch/common/io/stream/BytesStreamInput.java @@ -19,6 +19,7 @@ package org.elasticsearch.common.io.stream; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; @@ -69,6 +70,16 @@ public class BytesStreamInput extends StreamInput { return bytes; } + @Override + public BytesRef readBytesRef(int length) throws IOException { + if (unsafe) { + return super.readBytesRef(length); + } + BytesRef bytes = new BytesRef(buf, pos, length); + pos += length; + return bytes; + } + @Override public long skip(long n) throws IOException { if (pos + n > count) { diff --git a/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java b/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java index dfbdb528eee..b993c9a55e0 100644 --- a/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java +++ b/src/main/java/org/elasticsearch/common/io/stream/StreamInput.java @@ -87,10 +87,16 @@ public abstract class StreamInput extends InputStream { public BytesRef readBytesRef() throws IOException { int length = readVInt(); - int offset = readVInt(); + return readBytesRef(length); + } + + public BytesRef readBytesRef(int length) throws IOException { + if (length == 0) { + return new BytesRef(); + } byte[] bytes = new byte[length]; - readBytes(bytes, offset, length); - return new BytesRef(bytes, offset, length); + readBytes(bytes, 0, length); + return new BytesRef(bytes, 0, length); } public void readFully(byte[] b) throws IOException { diff --git a/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java b/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java index e414c0d1cba..5af27273970 100644 --- a/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java +++ b/src/main/java/org/elasticsearch/common/io/stream/StreamOutput.java @@ -113,7 +113,6 @@ public abstract class StreamOutput extends OutputStream { return; } writeVInt(bytes.length); - writeVInt(bytes.offset); write(bytes.bytes, bytes.offset, bytes.length); } diff --git a/src/main/java/org/elasticsearch/transport/netty/ChannelBufferStreamInput.java b/src/main/java/org/elasticsearch/transport/netty/ChannelBufferStreamInput.java index 37b834b255a..1a8064af949 100644 --- a/src/main/java/org/elasticsearch/transport/netty/ChannelBufferStreamInput.java +++ b/src/main/java/org/elasticsearch/transport/netty/ChannelBufferStreamInput.java @@ -19,6 +19,7 @@ package org.elasticsearch.transport.netty; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.bytes.ChannelBufferBytesReference; import org.elasticsearch.common.io.stream.StreamInput; @@ -57,6 +58,16 @@ public class ChannelBufferStreamInput extends StreamInput { return ref; } + @Override + public BytesRef readBytesRef(int length) throws IOException { + if (!buffer.hasArray()) { + return super.readBytesRef(length); + } + BytesRef bytesRef = new BytesRef(buffer.array(), buffer.arrayOffset() + buffer.readerIndex(), length); + buffer.skipBytes(length); + return bytesRef; + } + @Override public int available() throws IOException { return endIndex - buffer.readerIndex(); From c2f3eab7d3137f72595379fa6caa90410fc238d1 Mon Sep 17 00:00:00 2001 From: Igor Motov Date: Tue, 6 Nov 2012 11:52:14 -0500 Subject: [PATCH 141/146] lucene 4: fix sorting --- .../search/ShardFieldDocSortedHitQueue.java | 11 ++++----- .../elasticsearch/common/lucene/Lucene.java | 6 +++++ .../index/field/data/DocFieldData.java | 10 +++++--- .../data/strings/StringDocFieldData.java | 24 +++++++++++++++++-- .../field/data/support/FieldDataLoader.java | 2 +- .../search/internal/InternalSearchHit.java | 16 +++++++++++++ .../data/strings/StringFieldDataTests.java | 4 ++-- 7 files changed, 59 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java b/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java index 6544b2427ae..e829f38eced 100644 --- a/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java +++ b/src/main/java/org/apache/lucene/search/ShardFieldDocSortedHitQueue.java @@ -19,13 +19,12 @@ package org.apache.lucene.search; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.PriorityQueue; import org.elasticsearch.ElasticSearchIllegalStateException; import org.elasticsearch.search.controller.ShardFieldDoc; import java.io.IOException; -import java.text.Collator; -import java.util.Locale; /** * @@ -86,8 +85,8 @@ public class ShardFieldDocSortedHitQueue extends PriorityQueue { /** * Returns whether a is less relevant than b. * - * @param a ScoreDoc - * @param b ScoreDoc + * @param docA ScoreDoc + * @param docB ScoreDoc * @return true if document a should be sorted after document b. */ @SuppressWarnings("unchecked") @@ -98,8 +97,8 @@ public class ShardFieldDocSortedHitQueue extends PriorityQueue { for (int i = 0; i < n && c == 0; ++i) { final SortField.Type type = fields[i].getType(); if (type == SortField.Type.STRING) { - final String s1 = (String) docA.fields[i]; - final String s2 = (String) docB.fields[i]; + final BytesRef s1 = (BytesRef) docA.fields[i]; + final BytesRef s2 = (BytesRef) docB.fields[i]; // null values need to be sorted first, because of how FieldCache.getStringIndex() // works - in that routine, any documents without a value in the given field are // put first. If both are null, the next SortField is used diff --git a/src/main/java/org/elasticsearch/common/lucene/Lucene.java b/src/main/java/org/elasticsearch/common/lucene/Lucene.java index 1376a473f05..258e9d9474d 100644 --- a/src/main/java/org/elasticsearch/common/lucene/Lucene.java +++ b/src/main/java/org/elasticsearch/common/lucene/Lucene.java @@ -24,6 +24,7 @@ import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.index.*; import org.apache.lucene.search.*; import org.apache.lucene.store.Directory; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Version; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.io.stream.StreamInput; @@ -173,6 +174,8 @@ public class Lucene { cFields[j] = in.readShort(); } else if (type == 8) { cFields[j] = in.readBoolean(); + } else if (type == 9) { + cFields[j] = in.readBytesRef(); } else { throw new IOException("Can't match type [" + type + "]"); } @@ -258,6 +261,9 @@ public class Lucene { } else if (type == Boolean.class) { out.writeByte((byte) 8); out.writeBoolean((Boolean) field); + } else if (type == BytesRef.class) { + out.writeByte((byte) 9); + out.writeBytesRef((BytesRef) field); } else { throw new IOException("Can't handle sort field value of type [" + type + "]"); } diff --git a/src/main/java/org/elasticsearch/index/field/data/DocFieldData.java b/src/main/java/org/elasticsearch/index/field/data/DocFieldData.java index c8e435aa9ee..c6cc070a8f4 100644 --- a/src/main/java/org/elasticsearch/index/field/data/DocFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/DocFieldData.java @@ -46,11 +46,15 @@ public abstract class DocFieldData { return !fieldData.hasValue(docId); } - public BytesRef stringValue() { - return fieldData.stringValue(docId); + public String stringValue() { + BytesRef val = fieldData.stringValue(docId); + if (val == null) { + return null; + } + return val.utf8ToString(); } - public BytesRef getStringValue() { + public String getStringValue() { return stringValue(); } diff --git a/src/main/java/org/elasticsearch/index/field/data/strings/StringDocFieldData.java b/src/main/java/org/elasticsearch/index/field/data/strings/StringDocFieldData.java index a4edcdf8e01..2c75ed6d63e 100644 --- a/src/main/java/org/elasticsearch/index/field/data/strings/StringDocFieldData.java +++ b/src/main/java/org/elasticsearch/index/field/data/strings/StringDocFieldData.java @@ -31,11 +31,31 @@ public class StringDocFieldData extends DocFieldData { super(fieldData); } - public BytesRef getValue() { + public String getValue() { + BytesRef value = fieldData.value(docId); + if (value == null) { + return null; + } + return value.utf8ToString(); + } + + public String[] getValues() { + BytesRef[] values = fieldData.values(docId); + if (values == null) { + return null; + } + String[] stringValues = new String[values.length]; + for (int i = 0; i < values.length; i++) { + stringValues[i] = values[i].utf8ToString(); + } + return stringValues; + } + + public BytesRef getBytesValue() { return fieldData.value(docId); } - public BytesRef[] getValues() { + public BytesRef[] getBytesValues() { return fieldData.values(docId); } } diff --git a/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java b/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java index bc6e9c1088d..a52525f5438 100644 --- a/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java +++ b/src/main/java/org/elasticsearch/index/field/data/support/FieldDataLoader.java @@ -46,7 +46,7 @@ public class FieldDataLoader { Terms terms = reader.terms(field); if (terms == null) { - return loader.buildSingleValue(field, new int[0]); // Return empty field data if field doesn't exists. + return loader.buildSingleValue(field, ordinals.get(0)); // Return empty field data if field doesn't exists. } TermsEnum termsEnum = terms.iterator(null); diff --git a/src/main/java/org/elasticsearch/search/internal/InternalSearchHit.java b/src/main/java/org/elasticsearch/search/internal/InternalSearchHit.java index 7405227e854..adc97b9feaf 100644 --- a/src/main/java/org/elasticsearch/search/internal/InternalSearchHit.java +++ b/src/main/java/org/elasticsearch/search/internal/InternalSearchHit.java @@ -21,6 +21,7 @@ package org.elasticsearch.search.internal; import com.google.common.collect.ImmutableMap; import org.apache.lucene.search.Explanation; +import org.apache.lucene.util.BytesRef; import org.elasticsearch.ElasticSearchParseException; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; @@ -29,6 +30,7 @@ import org.elasticsearch.common.bytes.BytesReference; import org.elasticsearch.common.compress.CompressorFactory; import org.elasticsearch.common.io.stream.StreamInput; import org.elasticsearch.common.io.stream.StreamOutput; +import org.elasticsearch.common.text.StringAndBytesText; import org.elasticsearch.common.text.Text; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentBuilderString; @@ -295,6 +297,14 @@ public class InternalSearchHit implements SearchHit { } public void sortValues(Object[] sortValues) { + // LUCENE 4 UPGRADE: There must be a better way + if (sortValues != null) { + for (int i=0; i Date: Tue, 6 Nov 2012 12:05:14 -0500 Subject: [PATCH 142/146] lucene 4: fix TTL --- .../java/org/elasticsearch/indices/ttl/IndicesTTLService.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/org/elasticsearch/indices/ttl/IndicesTTLService.java b/src/main/java/org/elasticsearch/indices/ttl/IndicesTTLService.java index 0539d1eea79..9c4298b36c5 100644 --- a/src/main/java/org/elasticsearch/indices/ttl/IndicesTTLService.java +++ b/src/main/java/org/elasticsearch/indices/ttl/IndicesTTLService.java @@ -236,7 +236,7 @@ public class IndicesTTLService extends AbstractLifecycleComponent Date: Tue, 6 Nov 2012 19:54:36 +0100 Subject: [PATCH 143/146] lucene 4: sort values on hit are Text, not BytesRef --- .../search/internal/InternalSearchHit.java | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/main/java/org/elasticsearch/search/internal/InternalSearchHit.java b/src/main/java/org/elasticsearch/search/internal/InternalSearchHit.java index adc97b9feaf..650240e1895 100644 --- a/src/main/java/org/elasticsearch/search/internal/InternalSearchHit.java +++ b/src/main/java/org/elasticsearch/search/internal/InternalSearchHit.java @@ -298,10 +298,11 @@ public class InternalSearchHit implements SearchHit { public void sortValues(Object[] sortValues) { // LUCENE 4 UPGRADE: There must be a better way + // we want to convert to a Text object here, and not BytesRef if (sortValues != null) { - for (int i=0; i 0) { matchedFilters = new String[size]; for (int i = 0; i < size; i++) { - matchedFilters[i] = in.readUTF(); + matchedFilters[i] = in.readString(); } } @@ -617,8 +617,8 @@ public class InternalSearchHit implements SearchHit { public void writeTo(StreamOutput out, InternalSearchHits.StreamContext context) throws IOException { out.writeFloat(score); - out.writeUTF(id); - out.writeUTF(type); + out.writeString(id); + out.writeString(type); out.writeLong(version); out.writeBytesReference(source); if (explanation == null) { @@ -655,7 +655,7 @@ public class InternalSearchHit implements SearchHit { Class type = sortValue.getClass(); if (type == String.class) { out.writeByte((byte) 1); - out.writeUTF((String) sortValue); + out.writeString((String) sortValue); } else if (type == Integer.class) { out.writeByte((byte) 2); out.writeInt((Integer) sortValue); @@ -677,9 +677,9 @@ public class InternalSearchHit implements SearchHit { } else if (type == Boolean.class) { out.writeByte((byte) 8); out.writeBoolean((Boolean) sortValue); - } else if (type == BytesRef.class) { + } else if (sortValue instanceof Text) { out.writeByte((byte) 9); - out.writeBytesRef((BytesRef) sortValue); + out.writeText((Text) sortValue); } else { throw new IOException("Can't handle sort field value of type [" + type + "]"); } @@ -692,7 +692,7 @@ public class InternalSearchHit implements SearchHit { } else { out.writeVInt(matchedFilters.length); for (String matchedFilter : matchedFilters) { - out.writeUTF(matchedFilter); + out.writeString(matchedFilter); } } From e2c33ed6590eacc62ddf04c48a735146cd977362 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 8 Nov 2012 22:50:48 +0100 Subject: [PATCH 144/146] lucene 4: Fixed BitsetExecutionChildQuerySearchTests class. --- .../org/elasticsearch/index/search/child/ChildCollector.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/elasticsearch/index/search/child/ChildCollector.java b/src/main/java/org/elasticsearch/index/search/child/ChildCollector.java index 974cd1e2739..33e7366668e 100644 --- a/src/main/java/org/elasticsearch/index/search/child/ChildCollector.java +++ b/src/main/java/org/elasticsearch/index/search/child/ChildCollector.java @@ -80,13 +80,13 @@ public class ChildCollector extends Collector { return; } for (Tuple tuple : readers) { - IndexReader indexReader = tuple.v1(); + AtomicReader indexReader = tuple.v1(); IdReaderTypeCache idReaderTypeCache = tuple.v2(); if (idReaderTypeCache == null) { // might be if we don't have that doc with that type in this reader continue; } int parentDocId = idReaderTypeCache.docById(parentId); - if (parentDocId != -1) { + if (parentDocId != -1 && (indexReader.getLiveDocs() == null || indexReader.getLiveDocs().get(parentDocId))) { FixedBitSet docIdSet = parentDocs().get(indexReader.getCoreCacheKey()); if (docIdSet == null) { docIdSet = new FixedBitSet(indexReader.maxDoc()); From 05746adeb21c5b46ca06d0b699db996cfe9851f3 Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Thu, 8 Nov 2012 23:10:13 +0100 Subject: [PATCH 145/146] lucene 4: Set number of replicas to 0. Makes the test run faster. --- .../child/SimpleChildQuerySearchTests.java | 68 ++++++++++++++++--- 1 file changed, 58 insertions(+), 10 deletions(-) diff --git a/src/test/java/org/elasticsearch/test/integration/search/child/SimpleChildQuerySearchTests.java b/src/test/java/org/elasticsearch/test/integration/search/child/SimpleChildQuerySearchTests.java index 8073940d603..e3505f7928f 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/child/SimpleChildQuerySearchTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/child/SimpleChildQuerySearchTests.java @@ -75,7 +75,12 @@ public class SimpleChildQuerySearchTests extends AbstractNodesTests { public void multiLevelChild() throws Exception { client.admin().indices().prepareDelete().execute().actionGet(); - client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 1)).execute().actionGet(); + client.admin().indices().prepareCreate("test") + .setSettings( + ImmutableSettings.settingsBuilder() + .put("index.number_of_shards", 1) + .put("index.number_of_replicas", 0) + ).execute().actionGet(); client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); client.admin().indices().preparePutMapping("test").setType("child").setSource(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("_parent").field("type", "parent").endObject() @@ -109,7 +114,12 @@ public class SimpleChildQuerySearchTests extends AbstractNodesTests { public void simpleChildQuery() throws Exception { client.admin().indices().prepareDelete().execute().actionGet(); - client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 1)).execute().actionGet(); + client.admin().indices().prepareCreate("test") + .setSettings( + ImmutableSettings.settingsBuilder() + .put("index.number_of_shards", 1) + .put("index.number_of_replicas", 0) + ).execute().actionGet(); client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); client.admin().indices().preparePutMapping("test").setType("child").setSource(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("_parent").field("type", "parent").endObject() @@ -288,7 +298,12 @@ public class SimpleChildQuerySearchTests extends AbstractNodesTests { @Test public void testHasParentFilter() throws Exception { client.admin().indices().prepareDelete().execute().actionGet(); - client.admin().indices().prepareCreate("test").execute().actionGet(); + client.admin().indices().prepareCreate("test") + .setSettings( + ImmutableSettings.settingsBuilder() + .put("index.number_of_shards", 1) + .put("index.number_of_replicas", 0) + ).execute().actionGet(); client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); client.admin().indices().preparePutMapping("test").setType("child").setSource(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("_parent").field("type", "parent").endObject() @@ -345,7 +360,12 @@ public class SimpleChildQuerySearchTests extends AbstractNodesTests { public void simpleChildQueryWithFlush() throws Exception { client.admin().indices().prepareDelete().execute().actionGet(); - client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 1)).execute().actionGet(); + client.admin().indices().prepareCreate("test") + .setSettings( + ImmutableSettings.settingsBuilder() + .put("index.number_of_shards", 1) + .put("index.number_of_replicas", 0) + ).execute().actionGet(); client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); client.admin().indices().preparePutMapping("test").setType("child").setSource(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("_parent").field("type", "parent").endObject() @@ -440,7 +460,11 @@ public class SimpleChildQuerySearchTests extends AbstractNodesTests { public void simpleChildQueryWithFlushAnd3Shards() throws Exception { client.admin().indices().prepareDelete().execute().actionGet(); - client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 3)).execute().actionGet(); + client.admin().indices().prepareCreate("test").setSettings( + ImmutableSettings.settingsBuilder() + .put("index.number_of_shards", 3) + .put("index.number_of_replicas", 0) + ).execute().actionGet(); client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); client.admin().indices().preparePutMapping("test").setType("child").setSource(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("_parent").field("type", "parent").endObject() @@ -535,7 +559,12 @@ public class SimpleChildQuerySearchTests extends AbstractNodesTests { public void testScopedFacet() throws Exception { client.admin().indices().prepareDelete().execute().actionGet(); - client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 1)).execute().actionGet(); + client.admin().indices().prepareCreate("test") + .setSettings( + ImmutableSettings.settingsBuilder() + .put("index.number_of_shards", 1) + .put("index.number_of_replicas", 0) + ).execute().actionGet(); client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); client.admin().indices().preparePutMapping("test").setType("child").setSource(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("_parent").field("type", "parent").endObject() @@ -574,7 +603,12 @@ public class SimpleChildQuerySearchTests extends AbstractNodesTests { public void testDeletedParent() throws Exception { client.admin().indices().prepareDelete().execute().actionGet(); - client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 1)).execute().actionGet(); + client.admin().indices().prepareCreate("test") + .setSettings( + ImmutableSettings.settingsBuilder() + .put("index.number_of_shards", 1) + .put("index.number_of_replicas", 0) + ).execute().actionGet(); client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); client.admin().indices().preparePutMapping("test").setType("child").setSource(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("_parent").field("type", "parent").endObject() @@ -635,7 +669,11 @@ public class SimpleChildQuerySearchTests extends AbstractNodesTests { public void testDfsSearchType() throws Exception { client.admin().indices().prepareDelete().execute().actionGet(); - client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 2)).execute().actionGet(); + client.admin().indices().prepareCreate("test").setSettings( + ImmutableSettings.settingsBuilder() + .put("index.number_of_shards", 2) + .put("index.number_of_replicas", 0) + ).execute().actionGet(); client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); client.admin().indices().preparePutMapping("test").setType("child").setSource(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("_parent").field("type", "parent").endObject() @@ -661,7 +699,12 @@ public class SimpleChildQuerySearchTests extends AbstractNodesTests { public void testTopChildrenReSearchBug() throws Exception { client.admin().indices().prepareDelete().execute().actionGet(); - client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 1)).execute().actionGet(); + client.admin().indices().prepareCreate("test") + .setSettings( + ImmutableSettings.settingsBuilder() + .put("index.number_of_shards", 1) + .put("index.number_of_replicas", 0) + ).execute().actionGet(); client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); client.admin().indices().preparePutMapping("test").setType("child").setSource(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("_parent").field("type", "parent").endObject() @@ -753,7 +796,12 @@ public class SimpleChildQuerySearchTests extends AbstractNodesTests { public void testCountApiUsage() throws Exception { client.admin().indices().prepareDelete().execute().actionGet(); - client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("index.number_of_shards", 1)).execute().actionGet(); + client.admin().indices().prepareCreate("test") + .setSettings( + ImmutableSettings.settingsBuilder() + .put("index.number_of_shards", 1) + .put("index.number_of_replicas", 0) + ).execute().actionGet(); client.admin().cluster().prepareHealth().setWaitForGreenStatus().execute().actionGet(); client.admin().indices().preparePutMapping("test").setType("child").setSource(XContentFactory.jsonBuilder().startObject().startObject("type") .startObject("_parent").field("type", "parent").endObject() From 978c95649ef4a6021ea9e204765d0aac45fd443d Mon Sep 17 00:00:00 2001 From: Martijn van Groningen Date: Fri, 9 Nov 2012 10:43:51 +0100 Subject: [PATCH 146/146] lucene 4: Fixed SimpleQueryTests --- .../search/query/SimpleQueryTests.java | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java b/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java index 065a1dbc22e..733a5aed140 100644 --- a/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java +++ b/src/test/java/org/elasticsearch/test/integration/search/query/SimpleQueryTests.java @@ -39,6 +39,7 @@ import static org.elasticsearch.index.query.QueryBuilders.*; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.anyOf; import static org.hamcrest.Matchers.equalTo; +import static org.testng.Assert.assertTrue; import static org.testng.Assert.fail; /** @@ -97,8 +98,11 @@ public class SimpleQueryTests extends AbstractNodesTests { SearchResponse searchResponse = client.prepareSearch().setQuery("{ \"text_phrase\" : { \"field2\" : \"quick brown\", \"slop\" : \"2\" }}").execute().actionGet(); assertThat(searchResponse.hits().totalHits(), equalTo(1l)); - SearchResponse actionGet = client.prepareSearch().setQuery("{ \"text_phrase\" : { \"field1\" : \"quick brown\", \"slop\" : \"2\" }}").execute().actionGet(); - assertThat(actionGet.hits().totalHits(), equalTo(0l)); + try { + client.prepareSearch().setQuery("{ \"text_phrase\" : { \"field1\" : \"quick brown\", \"slop\" : \"2\" }}").execute().actionGet(); + } catch (SearchPhaseExecutionException e) { + assertTrue(e.getMessage().endsWith("IllegalStateException[field \"field1\" was indexed without position data; cannot run PhraseQuery (term=quick)]; }")); + } } @Test @@ -117,10 +121,14 @@ public class SimpleQueryTests extends AbstractNodesTests { client.prepareIndex("test", "type1", "1").setSource("field1", "quick brown fox", "field2", "quick brown fox").execute().actionGet(); client.prepareIndex("test", "type1", "2").setSource("field1", "quick lazy huge brown fox", "field2", "quick lazy huge brown fox").setRefresh(true).execute().actionGet(); + SearchResponse searchResponse = client.prepareSearch().setQuery("{ \"text_phrase\" : { \"field2\" : \"quick brown\", \"slop\" : \"2\" }}").execute().actionGet(); assertThat(searchResponse.hits().totalHits(), equalTo(1l)); - SearchResponse actionGet = client.prepareSearch().setQuery("{ \"text_phrase\" : { \"field1\" : \"quick brown\", \"slop\" : \"2\" }}").execute().actionGet(); - assertThat(actionGet.hits().totalHits(), equalTo(0l)); + try { + client.prepareSearch().setQuery("{ \"text_phrase\" : { \"field1\" : \"quick brown\", \"slop\" : \"2\" }}").execute().actionGet(); + } catch (SearchPhaseExecutionException e) { + assertTrue(e.getMessage().endsWith("IllegalStateException[field \"field1\" was indexed without position data; cannot run PhraseQuery (term=quick)]; }")); + } } @Test