Upgrade to Lucene 4.1

* Removed CustmoMemoryIndex in favor of MemoryIndex which as of 4.1 supports adding the same field twice
* Replaced duplicated logic in X[*]FSDirectory for rate limiting with a RateLimitedFSDirectory wrapper
* Remove hacks to find out merge context in rate limiting in favor of IOContext
* replaced Scorer#freq() return type (from float to int)
* Upgraded FVHighlighter to new 'centered' highlighting
* Fixed RobinEngine to use seperate setCommitData
This commit is contained in:
Simon Willnauer 2013-01-22 12:21:37 +01:00
parent 20f43bf54c
commit 2880cd0172
43 changed files with 354 additions and 2273 deletions

View File

@ -30,7 +30,7 @@
</parent>
<properties>
<lucene.version>4.0.0</lucene.version>
<lucene.version>4.1.0</lucene.version>
</properties>
<repositories>
@ -617,4 +617,4 @@
</plugins>
</pluginManagement>
</build>
</project>
</project>

View File

@ -87,10 +87,8 @@ public class TrackingConcurrentMergeScheduler extends ConcurrentMergeScheduler {
logger.trace("merge [{}] starting..., merging [{}] segments, [{}] docs, [{}] size, into [{}] estimated_size", merge.info == null ? "_na_" : merge.info.info.name, merge.segments.size(), totalNumDocs, new ByteSizeValue(totalSizeInBytes), new ByteSizeValue(merge.estimatedMergeBytes));
}
try {
TrackingMergeScheduler.setCurrentMerge(merge);
super.doMerge(merge);
} finally {
TrackingMergeScheduler.removeCurrentMerge();
long took = System.currentTimeMillis() - time;
currentMerges.dec();

View File

@ -1,43 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.index;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import java.util.concurrent.ConcurrentMap;
/**
*/
public class TrackingMergeScheduler {
private static final ConcurrentMap<Thread, MergePolicy.OneMerge> merges = ConcurrentCollections.newConcurrentMap();
public static void setCurrentMerge(MergePolicy.OneMerge merge) {
merges.put(Thread.currentThread(), merge);
}
public static void removeCurrentMerge() {
merges.remove(Thread.currentThread());
}
public static MergePolicy.OneMerge getCurrentMerge() {
return merges.get(Thread.currentThread());
}
}

View File

@ -99,10 +99,8 @@ public class TrackingSerialMergeScheduler extends MergeScheduler {
logger.trace("merge [{}] starting..., merging [{}] segments, [{}] docs, [{}] size, into [{}] estimated_size", merge.info == null ? "_na_" : merge.info.info.name, merge.segments.size(), totalNumDocs, new ByteSizeValue(totalSizeInBytes), new ByteSizeValue(merge.estimatedMergeBytes));
}
try {
TrackingMergeScheduler.setCurrentMerge(merge);
writer.merge(merge);
} finally {
TrackingMergeScheduler.removeCurrentMerge();
long took = System.currentTimeMillis() - time;
currentMerges.dec();

View File

@ -518,6 +518,7 @@ public class MapperQueryParser extends QueryParser {
TokenStream source;
try {
source = getAnalyzer().tokenStream(field, new StringReader(termStr));
source.reset();
} catch (IOException e) {
return super.getPrefixQuery(field, termStr);
}
@ -660,6 +661,7 @@ public class MapperQueryParser extends QueryParser {
if (isWithinToken) {
try {
TokenStream source = getAnalyzer().tokenStream(field, new FastStringReader(tmp.toString()));
source.reset();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
if (source.incrementToken()) {
String term = termAtt.toString();
@ -689,6 +691,7 @@ public class MapperQueryParser extends QueryParser {
if (isWithinToken) {
try {
TokenStream source = getAnalyzer().tokenStream(field, new FastStringReader(tmp.toString()));
source.reset();
CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
if (source.incrementToken()) {
String term = termAtt.toString();

View File

@ -0,0 +1,197 @@
package org.apache.lucene.store;
import java.io.IOException;
import java.util.Collection;
import org.apache.lucene.store.IOContext.Context;
public final class RateLimitedFSDirectory extends Directory {
private final FSDirectory delegate;
private final StoreRateLimiting.Provider rateLimitingProvider;
private final StoreRateLimiting.Listener rateListener;
public RateLimitedFSDirectory(FSDirectory wrapped, StoreRateLimiting.Provider rateLimitingProvider,
StoreRateLimiting.Listener rateListener) {
this.delegate = wrapped;
this.rateLimitingProvider = rateLimitingProvider;
this.rateListener = rateListener;
}
public FSDirectory wrappedDirectory() {
return this.delegate;
}
@Override
public String[] listAll() throws IOException {
ensureOpen();
return delegate.listAll();
}
@Override
public boolean fileExists(String name) throws IOException {
ensureOpen();
return delegate.fileExists(name);
}
@Override
public void deleteFile(String name) throws IOException {
ensureOpen();
delegate.deleteFile(name);
}
@Override
public long fileLength(String name) throws IOException {
ensureOpen();
return delegate.fileLength(name);
}
@Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
ensureOpen();
final IndexOutput output = delegate.createOutput(name, context);
StoreRateLimiting rateLimiting = rateLimitingProvider.rateLimiting();
StoreRateLimiting.Type type = rateLimiting.getType();
RateLimiter limiter = rateLimiting.getRateLimiter();
if (type == StoreRateLimiting.Type.NONE || limiter == null) {
return output;
}
if (context.context == Context.MERGE) {
// we are mering, and type is either MERGE or ALL, rate limit...
return new RateLimitedIndexOutput(limiter, rateListener, output);
}
if (type == StoreRateLimiting.Type.ALL) {
return new RateLimitedIndexOutput(limiter, rateListener, output);
}
// we shouldn't really get here...
return output;
}
@Override
public void sync(Collection<String> names) throws IOException {
ensureOpen();
delegate.sync(names);
}
@Override
public IndexInput openInput(String name, IOContext context) throws IOException {
ensureOpen();
return delegate.openInput(name, context);
}
@Override
public void close() throws IOException {
isOpen = false;
delegate.close();
}
@Override
public IndexInputSlicer createSlicer(String name, IOContext context) throws IOException {
ensureOpen();
return delegate.createSlicer(name, context);
}
@Override
public Lock makeLock(String name) {
ensureOpen();
return delegate.makeLock(name);
}
@Override
public void clearLock(String name) throws IOException {
ensureOpen();
delegate.clearLock(name);
}
@Override
public void setLockFactory(LockFactory lockFactory) throws IOException {
ensureOpen();
delegate.setLockFactory(lockFactory);
}
@Override
public LockFactory getLockFactory() {
ensureOpen();
return delegate.getLockFactory();
}
@Override
public String getLockID() {
ensureOpen();
return delegate.getLockID();
}
@Override
public String toString() {
return "RateLimitedDirectoryWrapper(" + delegate.toString() + ")";
}
@Override
public void copy(Directory to, String src, String dest, IOContext context) throws IOException {
ensureOpen();
delegate.copy(to, src, dest, context);
}
static final class RateLimitedIndexOutput extends BufferedIndexOutput {
private final IndexOutput delegate;
private final BufferedIndexOutput bufferedDelegate;
private final RateLimiter rateLimiter;
private final StoreRateLimiting.Listener rateListener;
RateLimitedIndexOutput(final RateLimiter rateLimiter, final StoreRateLimiting.Listener rateListener, final IndexOutput delegate) {
// TODO should we make buffer size configurable
if (delegate instanceof BufferedIndexOutput) {
bufferedDelegate = (BufferedIndexOutput) delegate;
this.delegate = delegate;
} else {
this.delegate = delegate;
bufferedDelegate = null;
}
this.rateLimiter = rateLimiter;
this.rateListener = rateListener;
}
@Override
protected void flushBuffer(byte[] b, int offset, int len) throws IOException {
rateListener.onPause(rateLimiter.pause(len));
if (bufferedDelegate != null) {
bufferedDelegate.flushBuffer(b, offset, len);
} else {
delegate.writeBytes(b, offset, len);
}
}
@Override
public long length() throws IOException {
return delegate.length();
}
@Override
public void seek(long pos) throws IOException {
flush();
delegate.seek(pos);
}
@Override
public void flush() throws IOException {
try {
super.flush();
} finally {
delegate.flush();
}
}
@Override
public void close() throws IOException {
try {
super.close();
} finally {
delegate.close();
}
}
}
}

View File

@ -2,7 +2,6 @@ package org.apache.lucene.store;
import org.elasticsearch.ElasticSearchIllegalArgumentException;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.RateLimiter;
import org.elasticsearch.common.unit.ByteSizeValue;
/**
@ -36,8 +35,8 @@ public class StoreRateLimiting {
}
}
private final RateLimiter rateLimiter = new RateLimiter(0);
private volatile RateLimiter actualRateLimiter;
private final RateLimiter.SimpleRateLimiter rateLimiter = new RateLimiter.SimpleRateLimiter(0);
private volatile RateLimiter.SimpleRateLimiter actualRateLimiter;
private volatile Type type;
@ -55,10 +54,10 @@ public class StoreRateLimiting {
actualRateLimiter = null;
} else if (actualRateLimiter == null) {
actualRateLimiter = rateLimiter;
actualRateLimiter.setMaxRate(rate.mbFrac());
actualRateLimiter.setMbPerSec(rate.mbFrac());
} else {
assert rateLimiter == actualRateLimiter;
rateLimiter.setMaxRate(rate.mbFrac());
rateLimiter.setMbPerSec(rate.mbFrac());
}
}

View File

@ -1,26 +0,0 @@
package org.apache.lucene.store;
import org.elasticsearch.common.RateLimiter;
import java.io.IOException;
/**
*/
class XFSIndexOutput extends FSDirectory.FSIndexOutput {
private final RateLimiter rateLimiter;
private final StoreRateLimiting.Listener rateListener;
XFSIndexOutput(FSDirectory parent, String name, RateLimiter rateLimiter, StoreRateLimiting.Listener rateListener) throws IOException {
super(parent, name, null /* we have our own rate limiter */);
this.rateLimiter = rateLimiter;
this.rateListener = rateListener;
}
@Override
public void flushBuffer(byte[] b, int offset, int size) throws IOException {
rateListener.onPause(rateLimiter.pause(size));
super.flushBuffer(b, offset, size);
}
}

View File

@ -1,64 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.store;
import org.apache.lucene.index.TrackingMergeScheduler;
import org.elasticsearch.common.RateLimiter;
import java.io.File;
import java.io.IOException;
/**
*/
public class XMMapFSDirectory extends MMapDirectory {
private final StoreRateLimiting.Provider rateLimitingProvider;
private final StoreRateLimiting.Listener rateListener;
public XMMapFSDirectory(File path, LockFactory lockFactory, StoreRateLimiting.Provider rateLimitingProvider, StoreRateLimiting.Listener rateListener) throws IOException {
super(path, lockFactory);
this.rateLimitingProvider = rateLimitingProvider;
this.rateListener = rateListener;
}
@Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
StoreRateLimiting rateLimiting = rateLimitingProvider.rateLimiting();
StoreRateLimiting.Type type = rateLimiting.getType();
RateLimiter limiter = rateLimiting.getRateLimiter();
if (type == StoreRateLimiting.Type.NONE || limiter == null) {
return super.createOutput(name, context);
}
if (TrackingMergeScheduler.getCurrentMerge() != null) {
// we are mering, and type is either MERGE or ALL, rate limit...
ensureOpen();
ensureCanWrite(name);
return new XFSIndexOutput(this, name, limiter, rateListener);
}
if (type == StoreRateLimiting.Type.ALL) {
ensureOpen();
ensureCanWrite(name);
return new XFSIndexOutput(this, name, limiter, rateListener);
}
// we shouldn't really get here...
return super.createOutput(name, context);
}
}

View File

@ -1,64 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.store;
import org.apache.lucene.index.TrackingMergeScheduler;
import org.elasticsearch.common.RateLimiter;
import java.io.File;
import java.io.IOException;
/**
*/
public class XNIOFSDirectory extends NIOFSDirectory {
private final StoreRateLimiting.Provider rateLimitingProvider;
private final StoreRateLimiting.Listener rateListener;
public XNIOFSDirectory(File path, LockFactory lockFactory, StoreRateLimiting.Provider rateLimitingProvider, StoreRateLimiting.Listener rateListener) throws IOException {
super(path, lockFactory);
this.rateLimitingProvider = rateLimitingProvider;
this.rateListener = rateListener;
}
@Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
StoreRateLimiting rateLimiting = rateLimitingProvider.rateLimiting();
StoreRateLimiting.Type type = rateLimiting.getType();
RateLimiter limiter = rateLimiting.getRateLimiter();
if (type == StoreRateLimiting.Type.NONE || limiter == null) {
return super.createOutput(name, context);
}
if (TrackingMergeScheduler.getCurrentMerge() != null) {
// we are mering, and type is either MERGE or ALL, rate limit...
ensureOpen();
ensureCanWrite(name);
return new XFSIndexOutput(this, name, limiter, rateListener);
}
if (type == StoreRateLimiting.Type.ALL) {
ensureOpen();
ensureCanWrite(name);
return new XFSIndexOutput(this, name, limiter, rateListener);
}
// we shouldn't really get here...
return super.createOutput(name, context);
}
}

View File

@ -1,64 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.store;
import org.apache.lucene.index.TrackingMergeScheduler;
import org.elasticsearch.common.RateLimiter;
import java.io.File;
import java.io.IOException;
/**
*/
public class XSimpleFSDirectory extends SimpleFSDirectory {
private final StoreRateLimiting.Provider rateLimitingProvider;
private final StoreRateLimiting.Listener rateListener;
public XSimpleFSDirectory(File path, LockFactory lockFactory, StoreRateLimiting.Provider rateLimitingProvider, StoreRateLimiting.Listener rateListener) throws IOException {
super(path, lockFactory);
this.rateLimitingProvider = rateLimitingProvider;
this.rateListener = rateListener;
}
@Override
public IndexOutput createOutput(String name, IOContext context) throws IOException {
StoreRateLimiting rateLimiting = rateLimitingProvider.rateLimiting();
StoreRateLimiting.Type type = rateLimiting.getType();
RateLimiter limiter = rateLimiting.getRateLimiter();
if (type == StoreRateLimiting.Type.NONE || limiter == null) {
return super.createOutput(name, context);
}
if (TrackingMergeScheduler.getCurrentMerge() != null) {
// we are mering, and type is either MERGE or ALL, rate limit...
ensureOpen();
ensureCanWrite(name);
return new XFSIndexOutput(this, name, limiter, rateListener);
}
if (type == StoreRateLimiting.Type.ALL) {
ensureOpen();
ensureCanWrite(name);
return new XFSIndexOutput(this, name, limiter, rateListener);
}
// we shouldn't really get here...
return super.createOutput(name, context);
}
}

View File

@ -1,85 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common;
import org.elasticsearch.ElasticSearchInterruptedException;
/**
*/
// LUCENE MONITOR: Taken from trunk of Lucene at 06-09-11
public class RateLimiter {
private volatile double nsPerByte;
private volatile long lastNS;
// TODO: we could also allow eg a sub class to dynamically
// determine the allowed rate, eg if an app wants to
// change the allowed rate over time or something
/**
* mbPerSec is the MB/sec max IO rate
*/
public RateLimiter(double mbPerSec) {
setMaxRate(mbPerSec);
}
public void setMaxRate(double mbPerSec) {
nsPerByte = 1000000000. / (1024 * 1024 * mbPerSec);
}
/**
* Pauses, if necessary, to keep the instantaneous IO
* rate at or below the target. NOTE: multiple threads
* may safely use this, however the implementation is
* not perfectly thread safe but likely in practice this
* is harmless (just means in some rare cases the rate
* might exceed the target). It's best to call this
* with a biggish count, not one byte at a time.
*/
public long pause(long bytes) {
// TODO: this is purely instantenous rate; maybe we
// should also offer decayed recent history one?
final long targetNS = lastNS = lastNS + ((long) (bytes * nsPerByte));
long curNS = System.nanoTime();
if (lastNS < curNS) {
lastNS = curNS;
}
// While loop because Thread.sleep doesn't alway sleep
// enough:
long totalPauseTime = 0;
while (true) {
final long pauseNS = targetNS - curNS;
if (pauseNS > 0) {
try {
Thread.sleep((int) (pauseNS / 1000000), (int) (pauseNS % 1000000));
totalPauseTime += pauseNS;
} catch (InterruptedException ie) {
throw new ElasticSearchInterruptedException("interrupted while rate limiting", ie);
}
curNS = System.nanoTime();
continue;
}
break;
}
return totalPauseTime;
}
}

View File

@ -39,7 +39,7 @@ public class EmptyScorer extends Scorer {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return 0;
}

View File

@ -21,13 +21,11 @@ import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.FilteredQuery.FilterStrategy;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.ToStringUtils;
import org.elasticsearch.common.lucene.docset.DocIdSets;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Set;
@ -41,14 +39,11 @@ import java.util.Set;
* @see CachingWrapperFilter
* @since 1.4
*/
// Copied form Lucene 4.1, Changes are marked with //CHANGE:
// Note, when Lucene 4.1 comes out, we can simply extend 4.1 and add our tweaks, since they are on top of current one
// some are tricky though..., need closer look
// Changes are marked with //CHANGE:
// Delegate to FilteredQuery - this version fixes the bug in LUCENE-4705 and uses ApplyAcceptedDocsFilter internally
public class XFilteredQuery extends Query {
private final Query query;
private final Filter filter;
private final Filter rawFilter;
private final FilteredQuery delegate;
private final FilterStrategy strategy;
/**
@ -59,7 +54,7 @@ public class XFilteredQuery extends Query {
* @param filter Filter to apply to query results, cannot be <code>null</code>.
*/
public XFilteredQuery(Query query, Filter filter) {
this(query, filter, RANDOM_ACCESS_FILTER_STRATEGY);
this(query, filter, FilteredQuery.RANDOM_ACCESS_FILTER_STRATEGY);
}
/**
@ -72,14 +67,10 @@ public class XFilteredQuery extends Query {
* @see FilterStrategy
*/
public XFilteredQuery(Query query, Filter filter, FilterStrategy strategy) {
if (query == null || filter == null)
throw new IllegalArgumentException("Query and filter cannot be null.");
if (strategy == null)
throw new IllegalArgumentException("FilterStrategy can not be null");
this.strategy = strategy;
this.query = query;
delegate = new FilteredQuery(query, new ApplyAcceptedDocsFilter(filter), strategy);
// CHANGE: we need to wrap it in post application of accepted docs
this.filter = new ApplyAcceptedDocsFilter(filter);
this.rawFilter = filter;
this.strategy = strategy;
}
/**
@ -88,260 +79,7 @@ public class XFilteredQuery extends Query {
*/
@Override
public Weight createWeight(final IndexSearcher searcher) throws IOException {
final Weight weight = query.createWeight(searcher);
return new Weight() {
@Override
public boolean scoresDocsOutOfOrder() {
return true;
}
@Override
public float getValueForNormalization() throws IOException {
return weight.getValueForNormalization() * getBoost() * getBoost(); // boost sub-weight
}
@Override
public void normalize(float norm, float topLevelBoost) {
weight.normalize(norm, topLevelBoost * getBoost()); // incorporate boost
}
@Override
public Explanation explain(AtomicReaderContext ir, int i) throws IOException {
Explanation inner = weight.explain(ir, i);
Filter f = XFilteredQuery.this.filter;
DocIdSet docIdSet = f.getDocIdSet(ir, ir.reader().getLiveDocs());
DocIdSetIterator docIdSetIterator = docIdSet == null ? DocIdSet.EMPTY_DOCIDSET.iterator() : docIdSet.iterator();
if (docIdSetIterator == null) {
docIdSetIterator = DocIdSet.EMPTY_DOCIDSET.iterator();
}
if (docIdSetIterator.advance(i) == i) {
return inner;
} else {
Explanation result = new Explanation
(0.0f, "failure to match filter: " + f.toString());
result.addDetail(inner);
return result;
}
}
// return this query
@Override
public Query getQuery() {
return XFilteredQuery.this;
}
// return a filtering scorer
@Override
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, final Bits acceptDocs) throws IOException {
assert filter != null;
final DocIdSet filterDocIdSet = filter.getDocIdSet(context, acceptDocs);
if (filterDocIdSet == null) {
// this means the filter does not accept any documents.
return null;
}
return strategy.filteredScorer(context, scoreDocsInOrder, topScorer, weight, filterDocIdSet);
}
};
}
/**
* A scorer that consults the filter iff a document was matched by the
* delegate scorer. This is useful if the filter computation is more expensive
* than document scoring or if the filter has a linear running time to compute
* the next matching doc like exact geo distances.
*/
// CHANGE: change this to package level, so it won't mess up with JIT
// CHANGE: filteredbits can be final
static final class QueryFirstScorer extends Scorer {
private final Scorer scorer;
private int scorerDoc = -1;
private final Bits filterbits;
protected QueryFirstScorer(Weight weight, Bits filterBits, Scorer other) {
super(weight);
this.scorer = other;
this.filterbits = filterBits;
}
// optimization: we are topScorer and collect directly
@Override
public void score(Collector collector) throws IOException {
// the normalization trick already applies the boost of this query,
// so we can use the wrapped scorer directly:
collector.setScorer(scorer);
for (; ; ) {
final int scorerDoc = scorer.nextDoc();
if (scorerDoc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
if (filterbits.get(scorerDoc)) {
collector.collect(scorerDoc);
}
}
}
@Override
public int nextDoc() throws IOException {
int doc;
for (; ; ) {
doc = scorer.nextDoc();
if (doc == Scorer.NO_MORE_DOCS || filterbits.get(doc)) {
return scorerDoc = doc;
}
}
}
@Override
public int advance(int target) throws IOException {
int doc = scorer.advance(target);
if (doc != Scorer.NO_MORE_DOCS && !filterbits.get(doc)) {
return scorerDoc = nextDoc();
} else {
return scorerDoc = doc;
}
}
@Override
public int docID() {
return scorerDoc;
}
@Override
public float score() throws IOException {
return scorer.score();
}
@Override
public float freq() throws IOException {
return scorer.freq();
}
@Override
public Collection<ChildScorer> getChildren() {
return Collections.singleton(new ChildScorer(scorer, "FILTERED"));
}
}
/**
* A Scorer that uses a "leap-frog" approach (also called "zig-zag join"). The scorer and the filter
* take turns trying to advance to each other's next matching document, often
* jumping past the target document. When both land on the same document, it's
* collected.
*/
// CHANGE: change this to package level, so it won't mess up with JIT
static class LeapFrogScorer extends Scorer {
private final DocIdSetIterator secondary;
private final DocIdSetIterator primary;
private final Scorer scorer;
protected int primaryDoc = -1;
protected int secondaryDoc = -1;
protected LeapFrogScorer(Weight weight, DocIdSetIterator primary, DocIdSetIterator secondary, Scorer scorer) {
super(weight);
this.primary = primary;
this.secondary = secondary;
this.scorer = scorer;
}
// optimization: we are topScorer and collect directly using short-circuited algo
@Override
public final void score(Collector collector) throws IOException {
int primDoc = primaryNext();
int secDoc = secondary.advance(primDoc);
// the normalization trick already applies the boost of this query,
// so we can use the wrapped scorer directly:
collector.setScorer(scorer);
for (; ; ) {
if (primDoc == secDoc) {
// Check if scorer has exhausted, only before collecting.
if (primDoc == DocIdSetIterator.NO_MORE_DOCS) {
break;
}
collector.collect(primDoc);
primDoc = primary.nextDoc();
secDoc = secondary.advance(primDoc);
} else if (secDoc > primDoc) {
primDoc = primary.advance(secDoc);
} else {
secDoc = secondary.advance(primDoc);
}
}
}
private final int advanceToNextCommonDoc() throws IOException {
for (; ; ) {
if (secondaryDoc < primaryDoc) {
secondaryDoc = secondary.advance(primaryDoc);
} else if (secondaryDoc == primaryDoc) {
return primaryDoc;
} else {
primaryDoc = primary.advance(secondaryDoc);
}
}
}
@Override
public final int nextDoc() throws IOException {
primaryDoc = primaryNext();
return advanceToNextCommonDoc();
}
protected int primaryNext() throws IOException {
return primary.nextDoc();
}
@Override
public final int advance(int target) throws IOException {
if (target > primaryDoc) {
primaryDoc = primary.advance(target);
}
return advanceToNextCommonDoc();
}
@Override
public final int docID() {
return secondaryDoc;
}
@Override
public final float score() throws IOException {
return scorer.score();
}
@Override
public final float freq() throws IOException {
return scorer.freq();
}
@Override
public final Collection<ChildScorer> getChildren() {
return Collections.singleton(new ChildScorer(scorer, "FILTERED"));
}
}
// TODO once we have way to figure out if we use RA or LeapFrog we can remove this scorer
private static final class PrimaryAdvancedLeapFrogScorer extends LeapFrogScorer {
private final int firstFilteredDoc;
protected PrimaryAdvancedLeapFrogScorer(Weight weight, int firstFilteredDoc, DocIdSetIterator filterIter, Scorer other) {
super(weight, filterIter, other, other);
this.firstFilteredDoc = firstFilteredDoc;
this.primaryDoc = firstFilteredDoc; // initialize to prevent and advance call to move it further
}
@Override
protected int primaryNext() throws IOException {
if (secondaryDoc != -1) {
return super.primaryNext();
} else {
return firstFilteredDoc;
}
}
return delegate.createWeight(searcher);
}
/**
@ -351,22 +89,23 @@ public class XFilteredQuery extends Query {
*/
@Override
public Query rewrite(IndexReader reader) throws IOException {
Query query = delegate.getQuery();
final Query queryRewritten = query.rewrite(reader);
// CHANGE: if we push back to Lucene, would love to have an extension for "isMatchAllQuery"
if (queryRewritten instanceof MatchAllDocsQuery || Queries.isConstantMatchAllQuery(queryRewritten)) {
// Special case: If the query is a MatchAllDocsQuery, we only
// return a CSQ(filter).
final Query rewritten = new ConstantScoreQuery(filter);
final Query rewritten = new ConstantScoreQuery(delegate.getFilter());
// Combine boost of MatchAllDocsQuery and the wrapped rewritten query:
rewritten.setBoost(this.getBoost() * queryRewritten.getBoost());
rewritten.setBoost(delegate.getBoost() * queryRewritten.getBoost());
return rewritten;
}
if (queryRewritten != query) {
// rewrite to a new FilteredQuery wrapping the rewritten query
final Query rewritten = new XFilteredQuery(queryRewritten, filter, strategy);
rewritten.setBoost(this.getBoost());
final Query rewritten = new XFilteredQuery(queryRewritten, rawFilter, strategy);
rewritten.setBoost(delegate.getBoost());
return rewritten;
} else {
// nothing to rewrite, we are done!
@ -374,11 +113,21 @@ public class XFilteredQuery extends Query {
}
}
@Override
public void setBoost(float b) {
delegate.setBoost(b);
}
@Override
public float getBoost() {
return delegate.getBoost();
}
/**
* Returns this FilteredQuery's (unfiltered) Query
*/
public final Query getQuery() {
return query;
return delegate.getQuery();
}
/**
@ -386,16 +135,16 @@ public class XFilteredQuery extends Query {
*/
public final Filter getFilter() {
// CHANGE: unwrap the accepted docs filter
if (filter instanceof ApplyAcceptedDocsFilter) {
return ((ApplyAcceptedDocsFilter) filter).filter();
if (rawFilter instanceof ApplyAcceptedDocsFilter) {
return ((ApplyAcceptedDocsFilter) rawFilter).filter();
}
return filter;
return rawFilter;
}
// inherit javadoc
@Override
public void extractTerms(Set<Term> terms) {
getQuery().extractTerms(terms);
delegate.extractTerms(terms);
}
/**
@ -403,13 +152,7 @@ public class XFilteredQuery extends Query {
*/
@Override
public String toString(String s) {
StringBuilder buffer = new StringBuilder();
buffer.append("filtered(");
buffer.append(query.toString(s));
buffer.append(")->");
buffer.append(filter);
buffer.append(ToStringUtils.boost(getBoost()));
return buffer.toString();
return delegate.toString(s);
}
/**
@ -417,13 +160,7 @@ public class XFilteredQuery extends Query {
*/
@Override
public boolean equals(Object o) {
if (o == this)
return true;
if (!super.equals(o))
return false;
assert o instanceof XFilteredQuery;
final XFilteredQuery fq = (XFilteredQuery) o;
return fq.query.equals(this.query) && fq.filter.equals(this.filter) && fq.strategy.equals(this.strategy);
return delegate.equals(o);
}
/**
@ -431,214 +168,13 @@ public class XFilteredQuery extends Query {
*/
@Override
public int hashCode() {
int hash = super.hashCode();
hash = hash * 31 + strategy.hashCode();
hash = hash * 31 + query.hashCode();
hash = hash * 31 + filter.hashCode();
return hash;
}
/**
* A {@link FilterStrategy} that conditionally uses a random access filter if
* the given {@link DocIdSet} supports random access (returns a non-null value
* from {@link DocIdSet#bits()}) and
* {@link RandomAccessFilterStrategy#useRandomAccess(Bits, int)} returns
* <code>true</code>. Otherwise this strategy falls back to a "zig-zag join" (
* {@link XFilteredQuery#LEAP_FROG_FILTER_FIRST_STRATEGY}) strategy.
* <p/>
* <p>
* Note: this strategy is the default strategy in {@link FilteredQuery}
* </p>
*/
public static final FilterStrategy RANDOM_ACCESS_FILTER_STRATEGY = new RandomAccessFilterStrategy();
/**
* A filter strategy that uses a "leap-frog" approach (also called "zig-zag join").
* The scorer and the filter
* take turns trying to advance to each other's next matching document, often
* jumping past the target document. When both land on the same document, it's
* collected.
* <p>
* Note: This strategy uses the filter to lead the iteration.
* </p>
*/
public static final FilterStrategy LEAP_FROG_FILTER_FIRST_STRATEGY = new LeapFrogFilterStrategy(false);
/**
* A filter strategy that uses a "leap-frog" approach (also called "zig-zag join").
* The scorer and the filter
* take turns trying to advance to each other's next matching document, often
* jumping past the target document. When both land on the same document, it's
* collected.
* <p>
* Note: This strategy uses the query to lead the iteration.
* </p>
*/
public static final FilterStrategy LEAP_FROG_QUERY_FIRST_STRATEGY = new LeapFrogFilterStrategy(true);
/**
* A filter strategy that advances the Query or rather its {@link Scorer} first and consults the
* filter {@link DocIdSet} for each matched document.
* <p>
* Note: this strategy requires a {@link DocIdSet#bits()} to return a non-null value. Otherwise
* this strategy falls back to {@link XFilteredQuery#LEAP_FROG_QUERY_FIRST_STRATEGY}
* </p>
* <p>
* Use this strategy if the filter computation is more expensive than document
* scoring or if the filter has a linear running time to compute the next
* matching doc like exact geo distances.
* </p>
*/
public static final FilterStrategy QUERY_FIRST_FILTER_STRATEGY = new QueryFirstFilterStrategy();
/**
* Abstract class that defines how the filter ({@link DocIdSet}) applied during document collection.
*/
public static abstract class FilterStrategy {
/**
* Returns a filtered {@link Scorer} based on this strategy.
*
* @param context the {@link AtomicReaderContext} for which to return the {@link Scorer}.
* @param scoreDocsInOrder specifies whether in-order scoring of documents is required. Note
* that if set to false (i.e., out-of-order scoring is required),
* this method can return whatever scoring mode it supports, as every
* in-order scorer is also an out-of-order one. However, an
* out-of-order scorer may not support {@link Scorer#nextDoc()}
* and/or {@link Scorer#advance(int)}, therefore it is recommended to
* request an in-order scorer if use of these methods is required.
* @param topScorer if true, {@link Scorer#score(Collector)} will be called; if false,
* {@link Scorer#nextDoc()} and/or {@link Scorer#advance(int)} will
* be called.
* @param weight the {@link FilteredQuery} {@link Weight} to create the filtered scorer.
* @param docIdSet the filter {@link DocIdSet} to apply
* @return a filtered scorer
* @throws IOException if an {@link IOException} occurs
*/
public abstract Scorer filteredScorer(AtomicReaderContext context,
boolean scoreDocsInOrder, boolean topScorer, Weight weight,
DocIdSet docIdSet) throws IOException;
}
/**
* A {@link FilterStrategy} that conditionally uses a random access filter if
* the given {@link DocIdSet} supports random access (returns a non-null value
* from {@link DocIdSet#bits()}) and
* {@link RandomAccessFilterStrategy#useRandomAccess(Bits, int)} returns
* <code>true</code>. Otherwise this strategy falls back to a "zig-zag join" (
* {@link XFilteredQuery#LEAP_FROG_FILTER_FIRST_STRATEGY}) strategy .
*/
public static class RandomAccessFilterStrategy extends FilterStrategy {
@Override
public Scorer filteredScorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Weight weight, DocIdSet docIdSet) throws IOException {
final DocIdSetIterator filterIter = docIdSet.iterator();
if (filterIter == null) {
// this means the filter does not accept any documents.
return null;
}
final int firstFilterDoc = filterIter.nextDoc();
if (firstFilterDoc == DocIdSetIterator.NO_MORE_DOCS) {
return null;
}
final Bits filterAcceptDocs = docIdSet.bits();
// force if RA is requested
final boolean useRandomAccess = (filterAcceptDocs != null && (useRandomAccess(filterAcceptDocs, firstFilterDoc)));
if (useRandomAccess) {
// if we are using random access, we return the inner scorer, just with other acceptDocs
return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs);
} else {
assert firstFilterDoc > -1;
// we are gonna advance() this scorer, so we set inorder=true/toplevel=false
// we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice
final Scorer scorer = weight.scorer(context, true, false, null);
// TODO once we have way to figure out if we use RA or LeapFrog we can remove this scorer
return (scorer == null) ? null : new PrimaryAdvancedLeapFrogScorer(weight, firstFilterDoc, filterIter, scorer);
}
}
/**
* Expert: decides if a filter should be executed as "random-access" or not.
* random-access means the filter "filters" in a similar way as deleted docs are filtered
* in Lucene. This is faster when the filter accepts many documents.
* However, when the filter is very sparse, it can be faster to execute the query+filter
* as a conjunction in some cases.
* <p/>
* The default implementation returns <code>true</code> if the first document accepted by the
* filter is < 100.
*
* @lucene.internal
*/
protected boolean useRandomAccess(Bits bits, int firstFilterDoc) {
//TODO once we have a cost API on filters and scorers we should rethink this heuristic
return firstFilterDoc < 100;
}
}
private static final class LeapFrogFilterStrategy extends FilterStrategy {
private final boolean scorerFirst;
private LeapFrogFilterStrategy(boolean scorerFirst) {
this.scorerFirst = scorerFirst;
}
@Override
public Scorer filteredScorer(AtomicReaderContext context,
boolean scoreDocsInOrder, boolean topScorer, Weight weight,
DocIdSet docIdSet) throws IOException {
final DocIdSetIterator filterIter = docIdSet.iterator();
if (filterIter == null) {
// this means the filter does not accept any documents.
return null;
}
// we are gonna advance() this scorer, so we set inorder=true/toplevel=false
// we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice
final Scorer scorer = weight.scorer(context, true, false, null);
if (scorerFirst) {
return (scorer == null) ? null : new LeapFrogScorer(weight, scorer, filterIter, scorer);
} else {
return (scorer == null) ? null : new LeapFrogScorer(weight, filterIter, scorer, scorer);
}
}
}
/**
* A filter strategy that advances the {@link Scorer} first and consults the
* {@link DocIdSet} for each matched document.
* <p>
* Note: this strategy requires a {@link DocIdSet#bits()} to return a non-null value. Otherwise
* this strategy falls back to {@link XFilteredQuery#LEAP_FROG_QUERY_FIRST_STRATEGY}
* </p>
* <p>
* Use this strategy if the filter computation is more expensive than document
* scoring or if the filter has a linear running time to compute the next
* matching doc like exact geo distances.
* </p>
*/
private static final class QueryFirstFilterStrategy extends FilterStrategy {
@Override
public Scorer filteredScorer(final AtomicReaderContext context,
boolean scoreDocsInOrder, boolean topScorer, Weight weight,
DocIdSet docIdSet) throws IOException {
Bits filterAcceptDocs = docIdSet.bits();
if (filterAcceptDocs == null) {
return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
}
final Scorer scorer = weight.scorer(context, true, false, null);
return scorer == null ? null : new QueryFirstScorer(weight,
filterAcceptDocs, scorer);
}
return delegate.hashCode();
}
// CHANGE: Add custom random access strategy, allowing to set the threshold
// CHANGE: Add filter first filter strategy
public static final FilterStrategy ALWAYS_RANDOM_ACCESS_FILTER_STRATEGY = new CustomRandomAccessFilterStrategy(0);
public static final CustomRandomAccessFilterStrategy CUSTOM_FILTER_STRATEGY = new CustomRandomAccessFilterStrategy();
/**
@ -649,7 +185,7 @@ public class XFilteredQuery extends Query {
* <code>true</code>. Otherwise this strategy falls back to a "zig-zag join" (
* {@link XFilteredQuery#LEAP_FROG_FILTER_FIRST_STRATEGY}) strategy .
*/
public static class CustomRandomAccessFilterStrategy extends FilterStrategy {
public static class CustomRandomAccessFilterStrategy extends FilteredQuery.RandomAccessFilterStrategy {
private final int threshold;
@ -669,7 +205,7 @@ public class XFilteredQuery extends Query {
if (filterAcceptDocs != null) {
return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs);
} else {
return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
return FilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
}
}
@ -677,49 +213,13 @@ public class XFilteredQuery extends Query {
if (threshold == -1) {
// default value, don't iterate on only apply filter after query if its not a "fast" docIdSet
if (!DocIdSets.isFastIterator(docIdSet)) {
return QUERY_FIRST_FILTER_STRATEGY.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
return FilteredQuery.QUERY_FIRST_FILTER_STRATEGY.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
}
}
final DocIdSetIterator filterIter = docIdSet.iterator();
if (filterIter == null) {
// this means the filter does not accept any documents.
return null;
}
final int firstFilterDoc = filterIter.nextDoc();
if (firstFilterDoc == DocIdSetIterator.NO_MORE_DOCS) {
return null;
}
final Bits filterAcceptDocs = docIdSet.bits();
// force if RA is requested
final boolean useRandomAccess = (filterAcceptDocs != null && (useRandomAccess(filterAcceptDocs, firstFilterDoc)));
if (useRandomAccess) {
// if we are using random access, we return the inner scorer, just with other acceptDocs
return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs);
} else {
assert firstFilterDoc > -1;
// we are gonna advance() this scorer, so we set inorder=true/toplevel=false
// we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice
final Scorer scorer = weight.scorer(context, true, false, null);
// TODO once we have way to figure out if we use RA or LeapFrog we can remove this scorer
return (scorer == null) ? null : new PrimaryAdvancedLeapFrogScorer(weight, firstFilterDoc, filterIter, scorer);
}
return super.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
}
/**
* Expert: decides if a filter should be executed as "random-access" or not.
* random-access means the filter "filters" in a similar way as deleted docs are filtered
* in Lucene. This is faster when the filter accepts many documents.
* However, when the filter is very sparse, it can be faster to execute the query+filter
* as a conjunction in some cases.
* <p/>
* The default implementation returns <code>true</code> if the first document accepted by the
* filter is < 100.
*
* @lucene.internal
*/
protected boolean useRandomAccess(Bits bits, int firstFilterDoc) {
// "default"
if (threshold == -1) {
@ -729,4 +229,5 @@ public class XFilteredQuery extends Query {
return firstFilterDoc < threshold;
}
}
}

View File

@ -344,7 +344,7 @@ public class FiltersFunctionScoreQuery extends Query {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return scorer.freq();
}
}

View File

@ -156,7 +156,7 @@ public class FunctionScoreQuery extends Query {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return scorer.freq();
}
}

View File

@ -21,6 +21,7 @@ package org.elasticsearch.index.codec;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40Codec;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.elasticsearch.index.codec.postingsformat.PostingsFormatProvider;
import org.elasticsearch.index.mapper.MapperService;
@ -33,7 +34,7 @@ import org.elasticsearch.index.mapper.MapperService;
* configured for a specific field the default postings format is used.
*/
// LUCENE UPGRADE: make sure to move to a new codec depending on the lucene version
public class PerFieldMappingPostingFormatCodec extends Lucene40Codec {
public class PerFieldMappingPostingFormatCodec extends Lucene41Codec {
private final MapperService mapperService;
private final PostingsFormat defaultPostingFormat;

View File

@ -21,7 +21,7 @@ package org.elasticsearch.index.codec.postingsformat;
import org.apache.lucene.codecs.BlockTreeTermsWriter;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -46,14 +46,14 @@ public class DefaultPostingsFormatProvider extends AbstractPostingsFormatProvide
private final int minBlockSize;
private final int maxBlockSize;
private final Lucene40PostingsFormat postingsFormat;
private final Lucene41PostingsFormat postingsFormat;
@Inject
public DefaultPostingsFormatProvider(@Assisted String name, @Assisted Settings postingsFormatSettings) {
super(name);
this.minBlockSize = postingsFormatSettings.getAsInt("min_block_size", BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE);
this.maxBlockSize = postingsFormatSettings.getAsInt("max_block_size", BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
this.postingsFormat = new Lucene40PostingsFormat(minBlockSize, maxBlockSize);
this.postingsFormat = new Lucene41PostingsFormat(minBlockSize, maxBlockSize);
}
public int minBlockSize() {

View File

@ -23,10 +23,10 @@ import com.google.common.collect.ImmutableCollection;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
import org.apache.lucene.codecs.memory.DirectPostingsFormat;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat;
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
import org.elasticsearch.common.collect.MapBuilder;
/**
@ -72,10 +72,10 @@ public class PostingFormats {
buildInPostingFormatsX.put("direct", new PreBuiltPostingsFormatProvider.Factory("direct", new DirectPostingsFormat()));
buildInPostingFormatsX.put("memory", new PreBuiltPostingsFormatProvider.Factory("memory", new MemoryPostingsFormat()));
// LUCENE UPGRADE: Need to change this to the relevant ones on a lucene upgrade
buildInPostingFormatsX.put("pulsing", new PreBuiltPostingsFormatProvider.Factory("pulsing", new Pulsing40PostingsFormat()));
buildInPostingFormatsX.put("bloom_pulsing", new PreBuiltPostingsFormatProvider.Factory("bloom_pulsing", new BloomFilteringPostingsFormat(new Pulsing40PostingsFormat(), new BloomFilterPostingsFormatProvider.CustomBloomFilterFactory())));
buildInPostingFormatsX.put("default", new PreBuiltPostingsFormatProvider.Factory("default", new Lucene40PostingsFormat()));
buildInPostingFormatsX.put("bloom_default", new PreBuiltPostingsFormatProvider.Factory("bloom_default", new BloomFilteringPostingsFormat(new Lucene40PostingsFormat(), new BloomFilterPostingsFormatProvider.CustomBloomFilterFactory())));
buildInPostingFormatsX.put("pulsing", new PreBuiltPostingsFormatProvider.Factory("pulsing", new Pulsing41PostingsFormat()));
buildInPostingFormatsX.put("bloom_pulsing", new PreBuiltPostingsFormatProvider.Factory("bloom_pulsing", new BloomFilteringPostingsFormat(new Pulsing41PostingsFormat(), new BloomFilterPostingsFormatProvider.CustomBloomFilterFactory())));
buildInPostingFormatsX.put("default", new PreBuiltPostingsFormatProvider.Factory("default", new Lucene41PostingsFormat()));
buildInPostingFormatsX.put("bloom_default", new PreBuiltPostingsFormatProvider.Factory("bloom_default", new BloomFilteringPostingsFormat(new Lucene41PostingsFormat(), new BloomFilterPostingsFormatProvider.CustomBloomFilterFactory())));
builtInPostingFormats = buildInPostingFormatsX.immutableMap();
}

View File

@ -21,7 +21,7 @@ package org.elasticsearch.index.codec.postingsformat;
import org.apache.lucene.codecs.BlockTreeTermsWriter;
import org.apache.lucene.codecs.PostingsFormat;
import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat;
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.settings.Settings;
@ -50,7 +50,7 @@ public class PulsingPostingsFormatProvider extends AbstractPostingsFormatProvide
private final int freqCutOff;
private final int minBlockSize;
private final int maxBlockSize;
private final Pulsing40PostingsFormat postingsFormat;
private final Pulsing41PostingsFormat postingsFormat;
@Inject
public PulsingPostingsFormatProvider(@Assisted String name, @Assisted Settings postingsFormatSettings) {
@ -58,7 +58,7 @@ public class PulsingPostingsFormatProvider extends AbstractPostingsFormatProvide
this.freqCutOff = postingsFormatSettings.getAsInt("freq_cut_off", 1);
this.minBlockSize = postingsFormatSettings.getAsInt("min_block_size", BlockTreeTermsWriter.DEFAULT_MIN_BLOCK_SIZE);
this.maxBlockSize = postingsFormatSettings.getAsInt("max_block_size", BlockTreeTermsWriter.DEFAULT_MAX_BLOCK_SIZE);
this.postingsFormat = new Pulsing40PostingsFormat(freqCutOff, minBlockSize, maxBlockSize);
this.postingsFormat = new Pulsing41PostingsFormat(freqCutOff, minBlockSize, maxBlockSize);
}
public int freqCutOff() {

View File

@ -255,11 +255,13 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine {
translogIdGenerator.set(Long.parseLong(commitUserData.get(Translog.TRANSLOG_ID_KEY)));
} else {
translogIdGenerator.set(System.currentTimeMillis());
indexWriter.commit(MapBuilder.<String, String>newMapBuilder().put(Translog.TRANSLOG_ID_KEY, Long.toString(translogIdGenerator.get())).map());
indexWriter.setCommitData(MapBuilder.<String, String>newMapBuilder().put(Translog.TRANSLOG_ID_KEY, Long.toString(translogIdGenerator.get())).map());
indexWriter.commit();
}
} else {
translogIdGenerator.set(System.currentTimeMillis());
indexWriter.commit(MapBuilder.<String, String>newMapBuilder().put(Translog.TRANSLOG_ID_KEY, Long.toString(translogIdGenerator.get())).map());
indexWriter.setCommitData(MapBuilder.<String, String>newMapBuilder().put(Translog.TRANSLOG_ID_KEY, Long.toString(translogIdGenerator.get())).map());
indexWriter.commit();
}
translog.newTranslog(translogIdGenerator.get());
this.searcherManager = buildSearchManager(indexWriter);
@ -823,7 +825,8 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine {
if (flushNeeded || flush.force()) {
flushNeeded = false;
long translogId = translogIdGenerator.incrementAndGet();
indexWriter.commit(MapBuilder.<String, String>newMapBuilder().put(Translog.TRANSLOG_ID_KEY, Long.toString(translogId)).map());
indexWriter.setCommitData(MapBuilder.<String, String>newMapBuilder().put(Translog.TRANSLOG_ID_KEY, Long.toString(translogId)).map());
indexWriter.commit();
translog.newTranslog(translogId);
}
@ -862,7 +865,8 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine {
try {
long translogId = translogIdGenerator.incrementAndGet();
translog.newTransientTranslog(translogId);
indexWriter.commit(MapBuilder.<String, String>newMapBuilder().put(Translog.TRANSLOG_ID_KEY, Long.toString(translogId)).map());
indexWriter.setCommitData(MapBuilder.<String, String>newMapBuilder().put(Translog.TRANSLOG_ID_KEY, Long.toString(translogId)).map());
indexWriter.commit();
if (flush.force()) {
// if we force, we might not have committed, we need to check that its the same id
Map<String, String> commitUserData = Lucene.readSegmentInfos(store.directory()).getUserData();
@ -914,7 +918,8 @@ public class RobinEngine extends AbstractIndexShardComponent implements Engine {
// other flushes use flushLock
try {
long translogId = translog.currentId();
indexWriter.commit(MapBuilder.<String, String>newMapBuilder().put(Translog.TRANSLOG_ID_KEY, Long.toString(translogId)).map());
indexWriter.setCommitData(MapBuilder.<String, String>newMapBuilder().put(Translog.TRANSLOG_ID_KEY, Long.toString(translogId)).map());
indexWriter.commit();
} catch (OutOfMemoryError e) {
translog.revertTransient();
failEngine(e);

View File

@ -209,11 +209,11 @@ public class LogByteSizeMergePolicyProvider extends AbstractIndexShardComponent
}
@Override
public MergeSpecification findMerges(SegmentInfos infos) throws IOException {
public MergeSpecification findMerges(MergeTrigger trigger, SegmentInfos infos) throws IOException {
if (enableMerge.get() == Boolean.FALSE) {
return null;
}
return super.findMerges(infos);
return super.findMerges(trigger, infos);
}
@Override

View File

@ -193,11 +193,11 @@ public class LogDocMergePolicyProvider extends AbstractIndexShardComponent imple
}
@Override
public MergeSpecification findMerges(SegmentInfos infos) throws IOException {
public MergeSpecification findMerges(MergeTrigger trigger, SegmentInfos infos) throws IOException {
if (enableMerge.get() == Boolean.FALSE) {
return null;
}
return super.findMerges(infos);
return super.findMerges(trigger, infos);
}
@Override

View File

@ -254,11 +254,11 @@ public class TieredMergePolicyProvider extends AbstractIndexShardComponent imple
}
@Override
public MergePolicy.MergeSpecification findMerges(SegmentInfos infos) throws IOException {
public MergePolicy.MergeSpecification findMerges(MergeTrigger trigger, SegmentInfos infos) throws IOException {
if (enableMerge.get() == Boolean.FALSE) {
return null;
}
return super.findMerges(infos);
return super.findMerges(trigger, infos);
}
@Override

View File

@ -22,7 +22,7 @@ package org.elasticsearch.index.percolator;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.memory.CustomMemoryIndex;
import org.apache.lucene.index.memory.MemoryIndex;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@ -292,7 +292,9 @@ public class PercolatorExecutor extends AbstractIndexComponent {
private Response percolate(DocAndQueryRequest request) throws ElasticSearchException {
// first, parse the source doc into a MemoryIndex
final CustomMemoryIndex memoryIndex = new CustomMemoryIndex();
final MemoryIndex memoryIndex = new MemoryIndex();
// TODO MemoryIndex now supports a reset call that reuses the internal memory
// maybe we can utilize this here.
// TODO: This means percolation does not support nested docs...
for (IndexableField field : request.doc().rootDoc().getFields()) {
@ -307,6 +309,7 @@ public class PercolatorExecutor extends AbstractIndexComponent {
try {
tokenStream = field.tokenStream(request.doc().analyzer());
if (tokenStream != null) {
tokenStream.reset();
memoryIndex.addField(field.name(), tokenStream, field.boost());
}
} catch (IOException e) {

View File

@ -20,6 +20,7 @@
package org.elasticsearch.index.query;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.Queries;
@ -60,7 +61,7 @@ public class FilteredQueryParser implements QueryParser {
String currentFieldName = null;
XContentParser.Token token;
XFilteredQuery.FilterStrategy filterStrategy = XFilteredQuery.CUSTOM_FILTER_STRATEGY;
FilteredQuery.FilterStrategy filterStrategy = XFilteredQuery.CUSTOM_FILTER_STRATEGY;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
@ -79,11 +80,11 @@ public class FilteredQueryParser implements QueryParser {
if ("strategy".equals(currentFieldName)) {
String value = parser.text();
if ("query_first".equals(value) || "queryFirst".equals(value)) {
filterStrategy = XFilteredQuery.QUERY_FIRST_FILTER_STRATEGY;
filterStrategy = FilteredQuery.QUERY_FIRST_FILTER_STRATEGY;
} else if ("random_access_random".equals(value) || "randomAccessAlways".equals(value)) {
filterStrategy = XFilteredQuery.ALWAYS_RANDOM_ACCESS_FILTER_STRATEGY;
} else if ("leap_frog".equals(value) || "leapFrog".equals(value)) {
filterStrategy = XFilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY;
filterStrategy = FilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY;
} else if (value.startsWith("random_access_")) {
int threshold = Integer.parseInt(value.substring("random_access_".length()));
filterStrategy = new XFilteredQuery.CustomRandomAccessFilterStrategy(threshold);
@ -91,9 +92,9 @@ public class FilteredQueryParser implements QueryParser {
int threshold = Integer.parseInt(value.substring("randomAccess".length()));
filterStrategy = new XFilteredQuery.CustomRandomAccessFilterStrategy(threshold);
} else if ("leap_frog_query_first".equals(value) || "leapFrogQueryFirst".equals(value)) {
filterStrategy = XFilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY;
filterStrategy = FilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY;
} else if ("leap_frog_filter_first".equals(value) || "leapFrogFilterFirst".equals(value)) {
filterStrategy = XFilteredQuery.LEAP_FROG_FILTER_FIRST_STRATEGY;
filterStrategy = FilteredQuery.LEAP_FROG_FILTER_FIRST_STRATEGY;
} else {
throw new QueryParsingException(parseContext.index(), "[filtered] strategy value not supported [" + value + "]");
}

View File

@ -230,7 +230,7 @@ public class ChildrenQuery extends Query implements ScopePhase.CollectorPhase {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
// We don't have the original child query hit info here...
// But the freq of the children could be collector and returned here, but makes this Scorer more expensive.
return 1;

View File

@ -241,7 +241,7 @@ public class ParentQuery extends Query implements ScopePhase.CollectorPhase {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
// We don't have the original child query hit info here...
// But the freq of the children could be collector and returned here, but makes this Scorer more expensive.
return 1;

View File

@ -303,7 +303,7 @@ public class TopChildrenQuery extends Query implements ScopePhase.TopDocsPhase {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return docs[index].count; // The number of matches in the child doc, which is propagated to parent
}
}

View File

@ -212,7 +212,7 @@ public class BlockJoinQuery extends Query {
private int parentDoc = -1;
private int prevParentDoc;
private float parentScore;
private float parentFreq;
private int parentFreq;
private int nextChildDoc;
private int[] pendingChildDocs = new int[5];
@ -294,10 +294,10 @@ public class BlockJoinQuery extends Query {
continue;
}
float totalScore = 0;
float totalFreq = 0;
int totalScore = 0;
int totalFreq = 0;
float maxScore = Float.NEGATIVE_INFINITY;
float maxFreq = 0;
int maxFreq = 0;
childDocUpto = 0;
do {
@ -313,7 +313,7 @@ public class BlockJoinQuery extends Query {
if (scoreMode != ScoreMode.None) {
// TODO: specialize this into dedicated classes per-scoreMode
final float childScore = childScorer.score();
final float childFreq = childScorer.freq();
final int childFreq = childScorer.freq();
pendingChildScores[childDocUpto] = childScore;
maxScore = Math.max(childScore, maxScore);
maxFreq = Math.max(childFreq, maxFreq);
@ -364,7 +364,7 @@ public class BlockJoinQuery extends Query {
}
@Override
public float freq() throws IOException {
public int freq() throws IOException {
return parentFreq;
}

View File

@ -215,7 +215,7 @@ public class IncludeNestedDocsQuery extends Query {
return parentScorer.score();
}
public float freq() throws IOException {
public int freq() throws IOException {
return parentScorer.freq();
}

View File

@ -73,7 +73,7 @@ public abstract class FsDirectoryService extends AbstractIndexShardComponent imp
@Override
public void renameFile(Directory dir, String from, String to) throws IOException {
File directory = ((FSDirectory) dir).getDirectory();
File directory = ((RateLimitedFSDirectory) dir).wrappedDirectory().getDirectory();
File old = new File(directory, from);
File nu = new File(directory, to);
if (nu.exists())
@ -103,7 +103,7 @@ public abstract class FsDirectoryService extends AbstractIndexShardComponent imp
@Override
public void fullDelete(Directory dir) throws IOException {
FSDirectory fsDirectory = (FSDirectory) dir;
FSDirectory fsDirectory = ((RateLimitedFSDirectory) dir).wrappedDirectory();
FileSystemUtils.deleteRecursively(fsDirectory.getDirectory());
// if we are the last ones, delete also the actual index
String[] list = fsDirectory.getDirectory().getParentFile().list();
@ -111,6 +111,20 @@ public abstract class FsDirectoryService extends AbstractIndexShardComponent imp
FileSystemUtils.deleteRecursively(fsDirectory.getDirectory().getParentFile());
}
}
@Override
public Directory[] build() throws IOException {
File[] locations = indexStore.shardIndexLocations(shardId);
Directory[] dirs = new Directory[locations.length];
for (int i = 0; i < dirs.length; i++) {
FileSystemUtils.mkdirs(locations[i]);
FSDirectory wrapped = newFSDirectory(locations[i], buildLockFactory());
dirs[i] = new RateLimitedFSDirectory(wrapped, this, this) ;
}
return dirs;
}
protected abstract FSDirectory newFSDirectory(File location, LockFactory lockFactory) throws IOException;
@Override
public void onPause(long nanos) {

View File

@ -19,10 +19,10 @@
package org.elasticsearch.index.store.fs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.XMMapFSDirectory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockFactory;
import org.apache.lucene.store.MMapDirectory;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.FileSystemUtils;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.index.shard.ShardId;
@ -41,13 +41,7 @@ public class MmapFsDirectoryService extends FsDirectoryService {
}
@Override
public Directory[] build() throws IOException {
File[] locations = indexStore.shardIndexLocations(shardId);
Directory[] dirs = new Directory[locations.length];
for (int i = 0; i < dirs.length; i++) {
FileSystemUtils.mkdirs(locations[i]);
dirs[i] = new XMMapFSDirectory(locations[i], buildLockFactory(), this, this);
}
return dirs;
protected FSDirectory newFSDirectory(File location, LockFactory lockFactory) throws IOException {
return new MMapDirectory(location, buildLockFactory());
}
}

View File

@ -19,10 +19,10 @@
package org.elasticsearch.index.store.fs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.XNIOFSDirectory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockFactory;
import org.apache.lucene.store.NIOFSDirectory;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.FileSystemUtils;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.index.shard.ShardId;
@ -41,13 +41,7 @@ public class NioFsDirectoryService extends FsDirectoryService {
}
@Override
public Directory[] build() throws IOException {
File[] locations = indexStore.shardIndexLocations(shardId);
Directory[] dirs = new Directory[locations.length];
for (int i = 0; i < dirs.length; i++) {
FileSystemUtils.mkdirs(locations[i]);
dirs[i] = new XNIOFSDirectory(locations[i], buildLockFactory(), this, this);
}
return dirs;
protected FSDirectory newFSDirectory(File location, LockFactory lockFactory) throws IOException {
return new NIOFSDirectory(location, lockFactory);
}
}

View File

@ -19,10 +19,10 @@
package org.elasticsearch.index.store.fs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.XSimpleFSDirectory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockFactory;
import org.apache.lucene.store.SimpleFSDirectory;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.io.FileSystemUtils;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.index.shard.ShardId;
@ -41,13 +41,7 @@ public class SimpleFsDirectoryService extends FsDirectoryService {
}
@Override
public Directory[] build() throws IOException {
File[] locations = indexStore.shardIndexLocations(shardId);
Directory[] dirs = new Directory[locations.length];
for (int i = 0; i < dirs.length; i++) {
FileSystemUtils.mkdirs(locations[i]);
dirs[i] = new XSimpleFSDirectory(locations[i], buildLockFactory(), this, this);
}
return dirs;
protected FSDirectory newFSDirectory(File location, LockFactory lockFactory) throws IOException {
return new SimpleFSDirectory(location, lockFactory);
}
}

View File

@ -20,8 +20,9 @@
package org.elasticsearch.indices.recovery;
import com.google.common.base.Objects;
import org.apache.lucene.store.RateLimiter;
import org.elasticsearch.cluster.metadata.MetaData;
import org.elasticsearch.common.RateLimiter;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.settings.Settings;
@ -56,7 +57,7 @@ public class RecoverySettings extends AbstractComponent {
private final ThreadPoolExecutor concurrentStreamPool;
private volatile ByteSizeValue maxSizePerSec;
private volatile RateLimiter rateLimiter;
private volatile RateLimiter.SimpleRateLimiter rateLimiter;
@Inject
public RecoverySettings(Settings settings, NodeSettingsService nodeSettingsService) {
@ -74,7 +75,7 @@ public class RecoverySettings extends AbstractComponent {
if (maxSizePerSec.bytes() <= 0) {
rateLimiter = null;
} else {
rateLimiter = new RateLimiter(maxSizePerSec.mbFrac());
rateLimiter = new RateLimiter.SimpleRateLimiter(maxSizePerSec.mbFrac());
}
logger.debug("using max_size_per_sec[{}], concurrent_streams [{}], file_chunk_size [{}], translog_size [{}], translog_ops [{}], and compress [{}]",
@ -131,9 +132,9 @@ public class RecoverySettings extends AbstractComponent {
if (maxSizePerSec.bytes() <= 0) {
rateLimiter = null;
} else if (rateLimiter != null) {
rateLimiter.setMaxRate(maxSizePerSec.mbFrac());
rateLimiter.setMbPerSec(maxSizePerSec.mbFrac());
} else {
rateLimiter = new RateLimiter(maxSizePerSec.mbFrac());
rateLimiter = new RateLimiter.SimpleRateLimiter(maxSizePerSec.mbFrac());
}
}

View File

@ -207,6 +207,7 @@ public class HighlightPhase extends AbstractComponent implements FetchSubPhase {
String text = textToHighlight.toString();
Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer();
TokenStream tokenStream = analyzer.tokenStream(mapper.names().indexName(), new FastStringReader(text));
tokenStream.reset();
TextFragment[] bestTextFragments = entry.highlighter.getBestTextFragments(tokenStream, text, false, numberOfFragments);
for (TextFragment bestTextFragment : bestTextFragments) {
if (bestTextFragment != null && bestTextFragment.getScore() > 0) {

View File

@ -1,43 +0,0 @@
/*
* Licensed to ElasticSearch and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. ElasticSearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.lucene.index.memory;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.TermQuery;
import org.elasticsearch.common.lucene.Lucene;
import org.testng.annotations.Test;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
public class CustomMemoryIndexTests {
@Test
public void testSameFieldSeveralTimes() throws Exception {
CustomMemoryIndex memoryIndex = new CustomMemoryIndex();
memoryIndex.addField("field1", "value1", Lucene.KEYWORD_ANALYZER);
memoryIndex.addField("field1", "value2", Lucene.KEYWORD_ANALYZER);
IndexSearcher searcher = memoryIndex.createSearcher();
assertThat(searcher.search(new TermQuery(new Term("field1", "value1")), 10).totalHits, equalTo(1));
assertThat(searcher.search(new TermQuery(new Term("field1", "value2")), 10).totalHits, equalTo(1));
}
}

View File

@ -241,8 +241,8 @@ public class HighlighterSearchTests extends AbstractNodesTests {
assertThat(search.hits().hits()[0].highlightFields().get("title").fragments()[0].string(), equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
assertThat(search.hits().hits()[0].highlightFields().get("title").fragments()[1].string(), equalTo("The <em>bug</em> is bugging us"));
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments().length, equalTo(2));
// assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0], equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0].string(), equalTo("highlighting <em>bug</em> present in elasticsearch")); // FastVectorHighlighter starts highlighting from startOffset - margin
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0].string(), equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
// assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0].string(), equalTo("highlighting <em>bug</em> present in elasticsearch")); // FastVectorHighlighter starts highlighting from startOffset - margin
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[1].string(), equalTo("The <em>bug</em> is bugging us"));
search = client.prepareSearch()
@ -253,7 +253,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
assertThat(search.hits().totalHits(), equalTo(1l));
assertThat(search.hits().hits().length, equalTo(1));
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments().length, equalTo(2));
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0].string(), equalTo("text to <em>highlight</em>"));
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[0].string(), equalTo("some text to <em>highlight</em>"));
assertThat(search.hits().hits()[0].highlightFields().get("titleTV").fragments()[1].string(), equalTo("<em>highlight</em> other text"));
}
@ -559,7 +559,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
SearchResponse search = client.prepareSearch()
.setQuery(fieldQuery("title", "bug"))
.addHighlightedField("title", 50, 1, 10)
.addHighlightedField("title", 30, 1, 10)
.execute().actionGet();
assertThat(Arrays.toString(search.shardFailures()), search.failedShards(), equalTo(0));
@ -634,7 +634,7 @@ public class HighlighterSearchTests extends AbstractNodesTests {
SearchResponse search = client.prepareSearch()
.setQuery(fieldQuery("title", "test"))
.setHighlighterEncoder("html")
.addHighlightedField("title", 50, 1, 10)
.addHighlightedField("title", 30, 1, 10)
.execute().actionGet();

View File

@ -98,6 +98,7 @@ public class CompoundAnalysisTests {
allEntries.reset();
TokenStream stream = AllTokenStream.allTokenStream("_all", allEntries, analyzer);
stream.reset();
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
List<String> terms = new ArrayList<String>();

View File

@ -90,6 +90,7 @@ public class SynonymsAnalysisTest {
allEntries.reset();
TokenStream stream = AllTokenStream.allTokenStream("_all", allEntries, analyzer);
stream.reset();
CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class);
StringBuilder sb = new StringBuilder();

View File

@ -23,10 +23,12 @@ import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.bloom.BloomFilteringPostingsFormat;
import org.apache.lucene.codecs.lucene40.Lucene40Codec;
import org.apache.lucene.codecs.lucene40.Lucene40PostingsFormat;
import org.apache.lucene.codecs.lucene41.Lucene41Codec;
import org.apache.lucene.codecs.lucene41.Lucene41PostingsFormat;
import org.apache.lucene.codecs.memory.DirectPostingsFormat;
import org.apache.lucene.codecs.memory.MemoryPostingsFormat;
import org.apache.lucene.codecs.perfield.PerFieldPostingsFormat;
import org.apache.lucene.codecs.pulsing.Pulsing40PostingsFormat;
import org.apache.lucene.codecs.pulsing.Pulsing41PostingsFormat;
import org.apache.lucene.codecs.simpletext.SimpleTextCodec;
import org.elasticsearch.common.inject.Injector;
import org.elasticsearch.common.inject.ModulesBuilder;
@ -60,6 +62,7 @@ public class CodecTests {
CodecService codecService = createCodecService();
assertThat(codecService.codec("default"), instanceOf(PerFieldMappingPostingFormatCodec.class));
assertThat(codecService.codec("Lucene40"), instanceOf(Lucene40Codec.class));
assertThat(codecService.codec("Lucene41"), instanceOf(Lucene41Codec.class));
assertThat(codecService.codec("SimpleText"), instanceOf(SimpleTextCodec.class));
}
@ -69,9 +72,9 @@ public class CodecTests {
assertThat(postingsFormatService.get("default"), instanceOf(PreBuiltPostingsFormatProvider.class));
// Should fail when upgrading Lucene with codec changes
assertThat(postingsFormatService.get("default").get(), instanceOf(((PerFieldPostingsFormat) Codec.getDefault().postingsFormat()).getPostingsFormatForField(null).getClass()));
assertThat(postingsFormatService.get("Lucene40"), instanceOf(PreBuiltPostingsFormatProvider.class));
assertThat(postingsFormatService.get("Lucene41"), instanceOf(PreBuiltPostingsFormatProvider.class));
// Should fail when upgrading Lucene with codec changes
assertThat(postingsFormatService.get("Lucene40").get(), instanceOf(((PerFieldPostingsFormat) Codec.getDefault().postingsFormat()).getPostingsFormatForField(null).getClass()));
assertThat(postingsFormatService.get("Lucene41").get(), instanceOf(((PerFieldPostingsFormat) Codec.getDefault().postingsFormat()).getPostingsFormatForField(null).getClass()));
assertThat(postingsFormatService.get("bloom_default"), instanceOf(PreBuiltPostingsFormatProvider.class));
assertThat(postingsFormatService.get("bloom_default").get(), instanceOf(BloomFilteringPostingsFormat.class));
@ -82,9 +85,9 @@ public class CodecTests {
assertThat(postingsFormatService.get("bloom_pulsing").get(), instanceOf(BloomFilteringPostingsFormat.class));
assertThat(postingsFormatService.get("pulsing"), instanceOf(PreBuiltPostingsFormatProvider.class));
assertThat(postingsFormatService.get("pulsing").get(), instanceOf(Pulsing40PostingsFormat.class));
assertThat(postingsFormatService.get("Pulsing40"), instanceOf(PreBuiltPostingsFormatProvider.class));
assertThat(postingsFormatService.get("Pulsing40").get(), instanceOf(Pulsing40PostingsFormat.class));
assertThat(postingsFormatService.get("pulsing").get(), instanceOf(Pulsing41PostingsFormat.class));
assertThat(postingsFormatService.get("Pulsing41"), instanceOf(PreBuiltPostingsFormatProvider.class));
assertThat(postingsFormatService.get("Pulsing41").get(), instanceOf(Pulsing41PostingsFormat.class));
assertThat(postingsFormatService.get("memory"), instanceOf(PreBuiltPostingsFormatProvider.class));
assertThat(postingsFormatService.get("memory").get(), instanceOf(MemoryPostingsFormat.class));
@ -114,7 +117,7 @@ public class CodecTests {
CodecService codecService = createCodecService(indexSettings);
DocumentMapper documentMapper = codecService.mapperService().documentMapperParser().parse(mapping);
assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider(), instanceOf(PreBuiltPostingsFormatProvider.class));
assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider().get(), instanceOf(Lucene40PostingsFormat.class));
assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider().get(), instanceOf(Lucene41PostingsFormat.class));
assertThat(documentMapper.mappers().name("field2").mapper().postingsFormatProvider(), instanceOf(DefaultPostingsFormatProvider.class));
DefaultPostingsFormatProvider provider = (DefaultPostingsFormatProvider) documentMapper.mappers().name("field2").mapper().postingsFormatProvider();
@ -190,7 +193,7 @@ public class CodecTests {
CodecService codecService = createCodecService(indexSettings);
DocumentMapper documentMapper = codecService.mapperService().documentMapperParser().parse(mapping);
assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider(), instanceOf(PreBuiltPostingsFormatProvider.class));
assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider().get(), instanceOf(Pulsing40PostingsFormat.class));
assertThat(documentMapper.mappers().name("field1").mapper().postingsFormatProvider().get(), instanceOf(Pulsing41PostingsFormat.class));
assertThat(documentMapper.mappers().name("field2").mapper().postingsFormatProvider(), instanceOf(PulsingPostingsFormatProvider.class));
PulsingPostingsFormatProvider provider = (PulsingPostingsFormatProvider) documentMapper.mappers().name("field2").mapper().postingsFormatProvider();