mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-03-25 01:19:02 +00:00
parent
5528370e24
commit
8e0a479316
@ -25,12 +25,6 @@ org.apache.lucene.index.IndexReader#decRef()
|
||||
org.apache.lucene.index.IndexReader#incRef()
|
||||
org.apache.lucene.index.IndexReader#tryIncRef()
|
||||
|
||||
org.apache.lucene.index.IndexWriter#maybeMerge() @ use Merges#maybeMerge
|
||||
org.apache.lucene.index.IndexWriter#forceMerge(int) @ use Merges#forceMerge
|
||||
org.apache.lucene.index.IndexWriter#forceMerge(int,boolean) @ use Merges#forceMerge
|
||||
org.apache.lucene.index.IndexWriter#forceMergeDeletes() @ use Merges#forceMergeDeletes
|
||||
org.apache.lucene.index.IndexWriter#forceMergeDeletes(boolean) @ use Merges#forceMergeDeletes
|
||||
|
||||
@defaultMessage QueryWrapperFilter is cachable by default - use Queries#wrap instead
|
||||
org.apache.lucene.search.QueryWrapperFilter#<init>(org.apache.lucene.search.Query)
|
||||
|
||||
|
@ -23,26 +23,19 @@ The location of the hunspell directory can be configured using the
|
||||
`indices.analysis.hunspell.dictionary.location` settings in
|
||||
_elasticsearch.yml_.
|
||||
|
||||
Each dictionary can be configured with two settings:
|
||||
Each dictionary can be configured with one setting:
|
||||
|
||||
`ignore_case`::
|
||||
If true, dictionary matching will be case insensitive
|
||||
(defaults to `false`)
|
||||
|
||||
`strict_affix_parsing`::
|
||||
Determines whether errors while reading a
|
||||
affix rules file will cause exception or simple be ignored (defaults to
|
||||
`true`)
|
||||
This setting can be configured globally in `elasticsearch.yml` using
|
||||
|
||||
These settings can be configured globally in `elasticsearch.yml` using
|
||||
|
||||
* `indices.analysis.hunspell.dictionary.ignore_case` and
|
||||
* `indices.analysis.hunspell.dictionary.strict_affix_parsing`
|
||||
* `indices.analysis.hunspell.dictionary.ignore_case`
|
||||
|
||||
or for specific dictionaries:
|
||||
|
||||
* `indices.analysis.hunspell.dictionary.en_US.ignore_case` and
|
||||
* `indices.analysis.hunspell.dictionary.en_US.strict_affix_parsing`.
|
||||
* `indices.analysis.hunspell.dictionary.en_US.ignore_case`.
|
||||
|
||||
It is also possible to add `settings.yml` file under the dictionary
|
||||
directory which holds these settings (this will override any other
|
||||
@ -87,10 +80,9 @@ The hunspell token filter accepts four options:
|
||||
If only unique terms should be returned, this needs to be
|
||||
set to `true`. Defaults to `true`.
|
||||
|
||||
`recursion_level`::
|
||||
Configures the recursion level a
|
||||
stemmer can go into. Defaults to `2`. Some languages (for example czech)
|
||||
give better results when set to `1` or `0`, so you should test it out.
|
||||
`longest_only`::
|
||||
If only the longest term should be returned, set this to `true`.
|
||||
Defaults to `false`: all possible stems are returned.
|
||||
|
||||
NOTE: As opposed to the snowball stemmers (which are algorithm based)
|
||||
this is a dictionary lookup based stemmer and therefore the quality of
|
||||
|
3
pom.xml
3
pom.xml
@ -31,7 +31,7 @@
|
||||
</parent>
|
||||
|
||||
<properties>
|
||||
<lucene.version>4.7.2</lucene.version>
|
||||
<lucene.version>4.8.0</lucene.version>
|
||||
<tests.jvms>auto</tests.jvms>
|
||||
<tests.shuffle>true</tests.shuffle>
|
||||
<tests.output>onerror</tests.output>
|
||||
@ -1059,7 +1059,6 @@
|
||||
<exclude>org/elasticsearch/plugins/PluginManager.class</exclude>
|
||||
<exclude>org/elasticsearch/bootstrap/Bootstrap.class</exclude>
|
||||
<exclude>org/elasticsearch/Version.class</exclude>
|
||||
<exclude>org/elasticsearch/index/merge/Merges.class</exclude>
|
||||
<exclude>org/elasticsearch/common/lucene/search/Queries$QueryWrapperFilterFactory.class</exclude>
|
||||
<!-- end excludes for valid system-out -->
|
||||
<!-- start excludes for Unsafe -->
|
||||
|
@ -1,56 +0,0 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.analysis.miscellaneous;
|
||||
|
||||
import org.apache.lucene.analysis.TokenFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A token filter that truncates tokens.
|
||||
*/
|
||||
public class TruncateTokenFilter extends TokenFilter {
|
||||
|
||||
private final CharTermAttribute termAttribute = addAttribute(CharTermAttribute.class);
|
||||
|
||||
private final int size;
|
||||
|
||||
public TruncateTokenFilter(TokenStream in, int size) {
|
||||
super(in);
|
||||
this.size = size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public final boolean incrementToken() throws IOException {
|
||||
if (input.incrementToken()) {
|
||||
final int length = termAttribute.length();
|
||||
if (length > size) {
|
||||
termAttribute.setLength(size);
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -91,7 +91,7 @@ public class TrackingSerialMergeScheduler extends MergeScheduler {
|
||||
* multiple threads, only one merge may run at a time.
|
||||
*/
|
||||
@Override
|
||||
synchronized public void merge(IndexWriter writer) throws CorruptIndexException, IOException {
|
||||
synchronized public void merge(IndexWriter writer, MergeTrigger trigger, boolean newMergesFound) throws CorruptIndexException, IOException {
|
||||
int cycle = 0;
|
||||
while (cycle++ < maxMergeAtOnce) {
|
||||
MergePolicy.OneMerge merge = writer.getNextMerge();
|
||||
|
@ -22,13 +22,8 @@ package org.apache.lucene.queries;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.index.TermContext;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.lucene.search.Queries;
|
||||
import org.elasticsearch.index.mapper.FieldMapper;
|
||||
|
||||
@ -84,91 +79,14 @@ public class ExtendedCommonTermsQuery extends CommonTermsQuery {
|
||||
return lowFreqMinNumShouldMatchSpec;
|
||||
}
|
||||
|
||||
// LUCENE-UPGRADE: remove this method if on 4.8
|
||||
@Override
|
||||
public Query rewrite(IndexReader reader) throws IOException {
|
||||
if (this.terms.isEmpty()) {
|
||||
return new BooleanQuery();
|
||||
} else if (this.terms.size() == 1) {
|
||||
final Query tq = newTermQuery(this.terms.get(0), null);
|
||||
tq.setBoost(getBoost());
|
||||
return tq;
|
||||
}
|
||||
return super.rewrite(reader);
|
||||
}
|
||||
|
||||
// LUCENE-UPGRADE: remove this method if on 4.8
|
||||
@Override
|
||||
protected Query buildQuery(final int maxDoc,
|
||||
final TermContext[] contextArray, final Term[] queryTerms) {
|
||||
BooleanQuery lowFreq = new BooleanQuery(disableCoord);
|
||||
BooleanQuery highFreq = new BooleanQuery(disableCoord);
|
||||
highFreq.setBoost(highFreqBoost);
|
||||
lowFreq.setBoost(lowFreqBoost);
|
||||
BooleanQuery query = new BooleanQuery(true);
|
||||
for (int i = 0; i < queryTerms.length; i++) {
|
||||
TermContext termContext = contextArray[i];
|
||||
if (termContext == null) {
|
||||
lowFreq.add(newTermQuery(queryTerms[i], null), lowFreqOccur);
|
||||
} else {
|
||||
if ((maxTermFrequency >= 1f && termContext.docFreq() > maxTermFrequency)
|
||||
|| (termContext.docFreq() > (int) Math.ceil(maxTermFrequency * (float) maxDoc))) {
|
||||
highFreq.add(newTermQuery(queryTerms[i], termContext), highFreqOccur);
|
||||
} else {
|
||||
lowFreq.add(newTermQuery(queryTerms[i], termContext), lowFreqOccur);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
final int numLowFreqClauses = lowFreq.clauses().size();
|
||||
final int numHighFreqClauses = highFreq.clauses().size();
|
||||
if (lowFreqOccur == Occur.SHOULD && numLowFreqClauses > 0) {
|
||||
int minMustMatch = calcLowFreqMinimumNumberShouldMatch(numLowFreqClauses);
|
||||
lowFreq.setMinimumNumberShouldMatch(minMustMatch);
|
||||
}
|
||||
if (highFreqOccur == Occur.SHOULD && numHighFreqClauses > 0) {
|
||||
int minMustMatch = calcHighFreqMinimumNumberShouldMatch(numHighFreqClauses);
|
||||
highFreq.setMinimumNumberShouldMatch(minMustMatch);
|
||||
}
|
||||
if (lowFreq.clauses().isEmpty()) {
|
||||
/*
|
||||
* if lowFreq is empty we rewrite the high freq terms in a conjunction to
|
||||
* prevent slow queries.
|
||||
*/
|
||||
if (highFreq.getMinimumNumberShouldMatch() == 0 && highFreqOccur != Occur.MUST) {
|
||||
for (BooleanClause booleanClause : highFreq) {
|
||||
booleanClause.setOccur(Occur.MUST);
|
||||
}
|
||||
}
|
||||
highFreq.setBoost(getBoost());
|
||||
return highFreq;
|
||||
} else if (highFreq.clauses().isEmpty()) {
|
||||
// only do low freq terms - we don't have high freq terms
|
||||
lowFreq.setBoost(getBoost());
|
||||
return lowFreq;
|
||||
} else {
|
||||
query.add(highFreq, Occur.SHOULD);
|
||||
query.add(lowFreq, Occur.MUST);
|
||||
query.setBoost(getBoost());
|
||||
return query;
|
||||
}
|
||||
}
|
||||
|
||||
static {
|
||||
assert Version.LUCENE_47.onOrAfter(Lucene.VERSION) : "Remove obsolete code after upgrade to lucene 4.8";
|
||||
}
|
||||
|
||||
//@Override
|
||||
// LUCENE-UPGRADE: remove this method if on 4.8
|
||||
protected Query newTermQuery(Term term, TermContext context) {
|
||||
if (mapper == null) {
|
||||
// this should be super.newTermQuery(term, context) once it's available in the super class
|
||||
return context == null ? new TermQuery(term) : new TermQuery(term, context);
|
||||
return super.newTermQuery(term, context);
|
||||
}
|
||||
final Query query = mapper.queryStringTermQuery(term);
|
||||
if (query == null) {
|
||||
// this should be super.newTermQuery(term, context) once it's available in the super class
|
||||
return context == null ? new TermQuery(term) : new TermQuery(term, context);
|
||||
return super.newTermQuery(term, context);
|
||||
} else {
|
||||
return query;
|
||||
}
|
||||
|
@ -24,14 +24,14 @@ import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.TokenStreamToAutomaton;
|
||||
import org.apache.lucene.search.suggest.InputIterator;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.search.suggest.Sort;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.util.automaton.*;
|
||||
import org.apache.lucene.util.fst.*;
|
||||
import org.apache.lucene.util.fst.FST.BytesReader;
|
||||
import org.apache.lucene.util.fst.PairOutputs.Pair;
|
||||
import org.apache.lucene.util.fst.Util.MinResult;
|
||||
import org.apache.lucene.util.fst.Util.Result;
|
||||
import org.apache.lucene.util.fst.Util.TopResults;
|
||||
import org.elasticsearch.common.collect.HppcMaps;
|
||||
|
||||
import java.io.File;
|
||||
@ -419,14 +419,14 @@ public class XAnalyzingSuggester extends Lookup {
|
||||
@Override
|
||||
public void build(InputIterator iterator) throws IOException {
|
||||
String prefix = getClass().getSimpleName();
|
||||
File directory = Sort.defaultTempDir();
|
||||
File directory = OfflineSorter.defaultTempDir();
|
||||
File tempInput = File.createTempFile(prefix, ".input", directory);
|
||||
File tempSorted = File.createTempFile(prefix, ".sorted", directory);
|
||||
|
||||
hasPayloads = iterator.hasPayloads();
|
||||
|
||||
Sort.ByteSequencesWriter writer = new Sort.ByteSequencesWriter(tempInput);
|
||||
Sort.ByteSequencesReader reader = null;
|
||||
OfflineSorter.ByteSequencesWriter writer = new OfflineSorter.ByteSequencesWriter(tempInput);
|
||||
OfflineSorter.ByteSequencesReader reader = null;
|
||||
BytesRef scratch = new BytesRef();
|
||||
|
||||
TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton();
|
||||
@ -502,12 +502,12 @@ public class XAnalyzingSuggester extends Lookup {
|
||||
writer.close();
|
||||
|
||||
// Sort all input/output pairs (required by FST.Builder):
|
||||
new Sort(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted);
|
||||
new OfflineSorter(new AnalyzingComparator(hasPayloads)).sort(tempInput, tempSorted);
|
||||
|
||||
// Free disk space:
|
||||
tempInput.delete();
|
||||
|
||||
reader = new Sort.ByteSequencesReader(tempSorted);
|
||||
reader = new OfflineSorter.ByteSequencesReader(tempSorted);
|
||||
|
||||
PairOutputs<Long,BytesRef> outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton());
|
||||
Builder<Pair<Long,BytesRef>> builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs);
|
||||
@ -692,7 +692,7 @@ public class XAnalyzingSuggester extends Lookup {
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<LookupResult> lookup(final CharSequence key, boolean onlyMorePopular, int num) {
|
||||
public List<LookupResult> lookup(final CharSequence key, Set<BytesRef> contexts, boolean onlyMorePopular, int num) {
|
||||
assert num > 0;
|
||||
|
||||
if (onlyMorePopular) {
|
||||
@ -763,7 +763,7 @@ public class XAnalyzingSuggester extends Lookup {
|
||||
}
|
||||
}
|
||||
|
||||
MinResult<Pair<Long,BytesRef>> completions[] = searcher.search();
|
||||
Util.TopResults<Pair<Long,BytesRef>> completions = searcher.search();
|
||||
|
||||
// NOTE: this is rather inefficient: we enumerate
|
||||
// every matching "exactly the same analyzed form"
|
||||
@ -777,7 +777,7 @@ public class XAnalyzingSuggester extends Lookup {
|
||||
// seach: it's bounded by how many prefix start
|
||||
// nodes we have and the
|
||||
// maxSurfaceFormsPerAnalyzedForm:
|
||||
for(MinResult<Pair<Long,BytesRef>> completion : completions) {
|
||||
for(Result<Pair<Long,BytesRef>> completion : completions) {
|
||||
BytesRef output2 = completion.output.output2;
|
||||
if (sameSurfaceForm(utf8Key, output2)) {
|
||||
results.add(getLookupResult(completion.output.output1, output2, spare));
|
||||
@ -832,9 +832,9 @@ public class XAnalyzingSuggester extends Lookup {
|
||||
searcher.addStartPaths(path.fstNode, path.output, true, path.input);
|
||||
}
|
||||
|
||||
MinResult<Pair<Long,BytesRef>> completions[] = searcher.search();
|
||||
TopResults<Pair<Long,BytesRef>> completions = searcher.search();
|
||||
|
||||
for(MinResult<Pair<Long,BytesRef>> completion : completions) {
|
||||
for(Result<Pair<Long,BytesRef>> completion : completions) {
|
||||
|
||||
LookupResult result = getLookupResult(completion.output.output1, completion.output.output2, spare);
|
||||
|
||||
|
@ -177,9 +177,9 @@ public class Version implements Serializable {
|
||||
public static final int V_1_1_2_ID = /*00*/1010299;
|
||||
public static final Version V_1_1_2 = new Version(V_1_1_2_ID, false, org.apache.lucene.util.Version.LUCENE_47);
|
||||
public static final int V_1_2_0_ID = /*00*/1020099;
|
||||
public static final Version V_1_2_0 = new Version(V_1_2_0_ID, false, org.apache.lucene.util.Version.LUCENE_47);
|
||||
public static final Version V_1_2_0 = new Version(V_1_2_0_ID, false, org.apache.lucene.util.Version.LUCENE_48);
|
||||
public static final int V_2_0_0_ID = /*00*/2000099;
|
||||
public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_47);
|
||||
public static final Version V_2_0_0 = new Version(V_2_0_0_ID, true, org.apache.lucene.util.Version.LUCENE_48);
|
||||
|
||||
public static final Version CURRENT = V_2_0_0;
|
||||
|
||||
|
@ -19,14 +19,12 @@
|
||||
|
||||
package org.elasticsearch.common.io;
|
||||
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
import org.elasticsearch.Version;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.common.logging.ESLogger;
|
||||
|
||||
import java.io.*;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import java.io.File;
|
||||
import java.io.FileFilter;
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
*
|
||||
@ -79,6 +77,7 @@ public class FileSystemUtils {
|
||||
* the given root files will be deleted as well. Otherwise only their content is deleted.
|
||||
*/
|
||||
public static boolean deleteRecursively(File[] roots, boolean deleteRoots) {
|
||||
|
||||
boolean deleted = true;
|
||||
for (File root : roots) {
|
||||
deleted &= deleteRecursively(root, deleteRoots);
|
||||
@ -86,6 +85,27 @@ public class FileSystemUtils {
|
||||
return deleted;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes all subdirectories of the given roots recursively.
|
||||
*/
|
||||
public static boolean deleteSubDirectories(File[] roots) {
|
||||
|
||||
boolean deleted = true;
|
||||
for (File root : roots) {
|
||||
if (root.isDirectory()) {
|
||||
File[] files = root.listFiles(new FileFilter() {
|
||||
@Override
|
||||
public boolean accept(File pathname) {
|
||||
return pathname.isDirectory();
|
||||
}
|
||||
});
|
||||
deleted &= deleteRecursively(files, true);
|
||||
}
|
||||
|
||||
}
|
||||
return deleted;
|
||||
}
|
||||
|
||||
/**
|
||||
* Deletes the given files recursively including the given roots.
|
||||
*/
|
||||
@ -122,10 +142,6 @@ public class FileSystemUtils {
|
||||
return false;
|
||||
}
|
||||
|
||||
static {
|
||||
assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_47 : "Use IOUtils#fsync instead of syncFile in Lucene 4.8";
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure that any writes to the given file is written to the storage device that contains it.
|
||||
* @param fileToSync the file to fsync
|
||||
@ -133,45 +149,7 @@ public class FileSystemUtils {
|
||||
* because not all file systems and operating systems allow to fsync on a directory)
|
||||
*/
|
||||
public static void syncFile(File fileToSync, boolean isDir) throws IOException {
|
||||
IOException exc = null;
|
||||
|
||||
// If the file is a directory we have to open read-only, for regular files we must open r/w for the fsync to have an effect.
|
||||
// See http://blog.httrack.com/blog/2013/11/15/everything-you-always-wanted-to-know-about-fsync/
|
||||
try (final FileChannel file = FileChannel.open(fileToSync.toPath(), isDir ? StandardOpenOption.READ : StandardOpenOption.WRITE)) {
|
||||
for (int retry = 0; retry < 5; retry++) {
|
||||
try {
|
||||
file.force(true);
|
||||
return;
|
||||
} catch (IOException ioe) {
|
||||
if (exc == null) {
|
||||
exc = ioe;
|
||||
}
|
||||
try {
|
||||
// Pause 5 msec
|
||||
Thread.sleep(5L);
|
||||
} catch (InterruptedException ie) {
|
||||
ThreadInterruptedException ex = new ThreadInterruptedException(ie);
|
||||
ex.addSuppressed(exc);
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (IOException ioe) {
|
||||
if (exc == null) {
|
||||
exc = ioe;
|
||||
}
|
||||
}
|
||||
|
||||
if (isDir) {
|
||||
assert (Constants.LINUX || Constants.MAC_OS_X) == false :
|
||||
"On Linux and MacOSX fsyncing a directory should not throw IOException, "+
|
||||
"we just don't want to rely on that in production (undocumented). Got: " + exc;
|
||||
// Ignore exception if it is a directory
|
||||
return;
|
||||
}
|
||||
|
||||
// Throw original exception
|
||||
throw exc;
|
||||
IOUtils.fsync(fileToSync, isDir);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -23,6 +23,7 @@ import org.apache.lucene.store.Directory;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.NoSuchFileException;
|
||||
|
||||
/**
|
||||
* A set of utilities for Lucene {@link Directory}.
|
||||
@ -40,7 +41,7 @@ public class Directories {
|
||||
for (String file : files) {
|
||||
try {
|
||||
estimatedSize += directory.fileLength(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
} catch (NoSuchFileException | FileNotFoundException e) {
|
||||
// ignore, the file is not there no more
|
||||
}
|
||||
}
|
||||
|
@ -45,7 +45,7 @@ import java.io.IOException;
|
||||
*/
|
||||
public class Lucene {
|
||||
|
||||
public static final Version VERSION = Version.LUCENE_47;
|
||||
public static final Version VERSION = Version.LUCENE_48;
|
||||
public static final Version ANALYZER_VERSION = VERSION;
|
||||
public static final Version QUERYPARSER_VERSION = VERSION;
|
||||
|
||||
@ -61,6 +61,9 @@ public class Lucene {
|
||||
if (version == null) {
|
||||
return defaultVersion;
|
||||
}
|
||||
if ("4.8".equals(version)) {
|
||||
return VERSION.LUCENE_48;
|
||||
}
|
||||
if ("4.7".equals(version)) {
|
||||
return VERSION.LUCENE_47;
|
||||
}
|
||||
|
@ -21,30 +21,11 @@ package org.elasticsearch.common.lucene;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.FilterAtomicReader;
|
||||
import org.apache.lucene.index.SegmentReader;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
|
||||
import java.lang.reflect.Field;
|
||||
|
||||
public class SegmentReaderUtils {
|
||||
|
||||
private static final Field FILTER_ATOMIC_READER_IN;
|
||||
|
||||
static {
|
||||
assert Version.LUCENE_47.onOrAfter(Lucene.VERSION) : "Lucene 4.8 has FilterAtomicReader.unwrap";
|
||||
|
||||
Field in = null;
|
||||
try { // and another one bites the dust...
|
||||
in = FilterAtomicReader.class.getDeclaredField("in");
|
||||
in.setAccessible(true);
|
||||
} catch (NoSuchFieldException e) {
|
||||
assert false : "Failed to get field: " + e.getMessage();
|
||||
}
|
||||
FILTER_ATOMIC_READER_IN = in;
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Tries to extract a segment reader from the given index reader.
|
||||
* If no SegmentReader can be extracted an {@link org.elasticsearch.ElasticsearchIllegalStateException} is thrown.
|
||||
@ -80,11 +61,7 @@ public class SegmentReaderUtils {
|
||||
return (SegmentReader) reader;
|
||||
} else if (reader instanceof FilterAtomicReader) {
|
||||
final FilterAtomicReader fReader = (FilterAtomicReader) reader;
|
||||
try {
|
||||
return FILTER_ATOMIC_READER_IN == null ? null :
|
||||
segmentReader((AtomicReader) FILTER_ATOMIC_READER_IN.get(fReader));
|
||||
} catch (IllegalAccessException e) {
|
||||
}
|
||||
return segmentReader(FilterAtomicReader.unwrap(fReader));
|
||||
}
|
||||
if (fail) {
|
||||
// hard fail - we can't get a SegmentReader
|
||||
|
@ -62,8 +62,7 @@ public class AllTermQuery extends SpanTermQuery {
|
||||
}
|
||||
|
||||
@Override
|
||||
public AllTermSpanScorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder,
|
||||
boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
public AllTermSpanScorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
if (this.stats == null) {
|
||||
return null;
|
||||
}
|
||||
@ -147,7 +146,7 @@ public class AllTermQuery extends SpanTermQuery {
|
||||
|
||||
@Override
|
||||
public Explanation explain(AtomicReaderContext context, int doc) throws IOException{
|
||||
AllTermSpanScorer scorer = scorer(context, true, false, context.reader().getLiveDocs());
|
||||
AllTermSpanScorer scorer = scorer(context, context.reader().getLiveDocs());
|
||||
if (scorer != null) {
|
||||
int newDoc = scorer.advance(doc);
|
||||
if (newDoc == doc) {
|
||||
|
@ -57,7 +57,7 @@ public final class MatchNoDocsQuery extends Query {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
return null;
|
||||
}
|
||||
|
||||
|
@ -211,14 +211,14 @@ public final class XFilteredQuery extends Query {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer filteredScorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Weight weight, DocIdSet docIdSet) throws IOException {
|
||||
public Scorer filteredScorer(AtomicReaderContext context, Weight weight, DocIdSet docIdSet) throws IOException {
|
||||
// CHANGE: If threshold is 0, always pass down the accept docs, don't pay the price of calling nextDoc even...
|
||||
if (threshold == 0) {
|
||||
final Bits filterAcceptDocs = docIdSet.bits();
|
||||
if (filterAcceptDocs != null) {
|
||||
return weight.scorer(context, scoreDocsInOrder, topScorer, filterAcceptDocs);
|
||||
return weight.scorer(context, filterAcceptDocs);
|
||||
} else {
|
||||
return FilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
|
||||
return FilteredQuery.LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, weight, docIdSet);
|
||||
}
|
||||
}
|
||||
|
||||
@ -226,11 +226,11 @@ public final class XFilteredQuery extends Query {
|
||||
if (threshold == -1) {
|
||||
// default value, don't iterate on only apply filter after query if its not a "fast" docIdSet
|
||||
if (!DocIdSets.isFastIterator(docIdSet)) {
|
||||
return FilteredQuery.QUERY_FIRST_FILTER_STRATEGY.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
|
||||
return FilteredQuery.QUERY_FIRST_FILTER_STRATEGY.filteredScorer(context, weight, docIdSet);
|
||||
}
|
||||
}
|
||||
|
||||
return super.filteredScorer(context, scoreDocsInOrder, topScorer, weight, docIdSet);
|
||||
return super.filteredScorer(context, weight, docIdSet);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -150,11 +150,11 @@ public class FiltersFunctionScoreQuery extends Query {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
// we ignore scoreDocsInOrder parameter, because we need to score in
|
||||
// order if documents are scored with a script. The
|
||||
// ShardLookup depends on in order scoring.
|
||||
Scorer subQueryScorer = subQueryWeight.scorer(context, true, false, acceptDocs);
|
||||
Scorer subQueryScorer = subQueryWeight.scorer(context, acceptDocs);
|
||||
if (subQueryScorer == null) {
|
||||
return null;
|
||||
}
|
||||
|
@ -112,11 +112,11 @@ public class FunctionScoreQuery extends Query {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
// we ignore scoreDocsInOrder parameter, because we need to score in
|
||||
// order if documents are scored with a script. The
|
||||
// ShardLookup depends on in order scoring.
|
||||
Scorer subQueryScorer = subQueryWeight.scorer(context, true, false, acceptDocs);
|
||||
Scorer subQueryScorer = subQueryWeight.scorer(context, acceptDocs);
|
||||
if (subQueryScorer == null) {
|
||||
return null;
|
||||
}
|
||||
|
@ -1,97 +0,0 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.common.lucene.store;
|
||||
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
/**
|
||||
*/
|
||||
public class ChecksumIndexOutput extends IndexOutput {
|
||||
|
||||
private final IndexOutput out;
|
||||
|
||||
private final Checksum digest;
|
||||
|
||||
public ChecksumIndexOutput(IndexOutput out, Checksum digest) {
|
||||
this.out = out;
|
||||
this.digest = digest;
|
||||
}
|
||||
|
||||
public Checksum digest() {
|
||||
return digest;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeByte(byte b) throws IOException {
|
||||
out.writeByte(b);
|
||||
digest.update(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setLength(long length) throws IOException {
|
||||
out.setLength(length);
|
||||
}
|
||||
|
||||
// don't override copyBytes, since we need to read it and compute it
|
||||
// @Override
|
||||
// public void copyBytes(DataInput input, long numBytes) throws IOException {
|
||||
// super.copyBytes(input, numBytes);
|
||||
// }
|
||||
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return out.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeBytes(byte[] b, int offset, int length) throws IOException {
|
||||
out.writeBytes(b, offset, length);
|
||||
digest.update(b, offset, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush() throws IOException {
|
||||
out.flush();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
out.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getFilePointer() {
|
||||
return out.getFilePointer();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seek(long pos) throws IOException {
|
||||
out.seek(pos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long length() throws IOException {
|
||||
return out.length();
|
||||
}
|
||||
}
|
@ -23,13 +23,10 @@ import com.carrotsearch.hppc.DoubleArrayList;
|
||||
import com.carrotsearch.hppc.FloatArrayList;
|
||||
import com.carrotsearch.hppc.LongArrayList;
|
||||
import com.carrotsearch.hppc.ObjectArrayList;
|
||||
import org.apache.lucene.util.IntroSorter;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.elasticsearch.common.Preconditions;
|
||||
|
||||
import java.util.AbstractList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import java.util.RandomAccess;
|
||||
import java.util.*;
|
||||
|
||||
/** Collections-related utility methods. */
|
||||
public enum CollectionUtils {
|
||||
@ -307,5 +304,58 @@ public enum CollectionUtils {
|
||||
}
|
||||
|
||||
};
|
||||
public static void sort(final BytesRefArray bytes, final int[] indices) {
|
||||
sort(new BytesRef(), new BytesRef(), bytes, indices);
|
||||
}
|
||||
|
||||
private static void sort(final BytesRef scratch, final BytesRef scratch1, final BytesRefArray bytes, final int[] indices) {
|
||||
|
||||
final int numValues = bytes.size();
|
||||
assert indices.length >= numValues;
|
||||
if (numValues > 1) {
|
||||
new InPlaceMergeSorter() {
|
||||
final Comparator<BytesRef> comparator = BytesRef.getUTF8SortedAsUnicodeComparator();
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return comparator.compare(bytes.get(scratch, indices[i]), bytes.get(scratch1, indices[j]));
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
int value_i = indices[i];
|
||||
indices[i] = indices[j];
|
||||
indices[j] = value_i;
|
||||
}
|
||||
}.sort(0, numValues);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static int sortAndDedup(final BytesRefArray bytes, final int[] indices) {
|
||||
final BytesRef scratch = new BytesRef();
|
||||
final BytesRef scratch1 = new BytesRef();
|
||||
final int numValues = bytes.size();
|
||||
assert indices.length >= numValues;
|
||||
if (numValues <= 1) {
|
||||
return numValues;
|
||||
}
|
||||
sort(scratch, scratch1, bytes, indices);
|
||||
int uniqueCount = 1;
|
||||
BytesRef previous = scratch;
|
||||
BytesRef current = scratch1;
|
||||
bytes.get(previous, indices[0]);
|
||||
for (int i = 1; i < numValues; ++i) {
|
||||
bytes.get(current, indices[i]);
|
||||
if (!previous.equals(current)) {
|
||||
indices[uniqueCount++] = indices[i];
|
||||
}
|
||||
BytesRef tmp = previous;
|
||||
previous = current;
|
||||
current = tmp;
|
||||
}
|
||||
return uniqueCount;
|
||||
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
@ -38,6 +38,7 @@ import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
/**
|
||||
*
|
||||
@ -50,6 +51,7 @@ public class NodeEnvironment extends AbstractComponent {
|
||||
private final Lock[] locks;
|
||||
|
||||
private final int localNodeId;
|
||||
private final AtomicBoolean closed = new AtomicBoolean(false);
|
||||
|
||||
@Inject
|
||||
public NodeEnvironment(Settings settings, Environment environment) {
|
||||
@ -143,6 +145,7 @@ public class NodeEnvironment extends AbstractComponent {
|
||||
}
|
||||
|
||||
public File[] nodeDataLocations() {
|
||||
assert assertEnvIsLocked();
|
||||
if (nodeFiles == null || locks == null) {
|
||||
throw new ElasticsearchIllegalStateException("node is not configured to store local location");
|
||||
}
|
||||
@ -150,10 +153,12 @@ public class NodeEnvironment extends AbstractComponent {
|
||||
}
|
||||
|
||||
public File[] indicesLocations() {
|
||||
assert assertEnvIsLocked();
|
||||
return nodeIndicesLocations;
|
||||
}
|
||||
|
||||
public File[] indexLocations(Index index) {
|
||||
assert assertEnvIsLocked();
|
||||
File[] indexLocations = new File[nodeFiles.length];
|
||||
for (int i = 0; i < nodeFiles.length; i++) {
|
||||
indexLocations[i] = new File(new File(nodeFiles[i], "indices"), index.name());
|
||||
@ -162,6 +167,7 @@ public class NodeEnvironment extends AbstractComponent {
|
||||
}
|
||||
|
||||
public File[] shardLocations(ShardId shardId) {
|
||||
assert assertEnvIsLocked();
|
||||
File[] shardLocations = new File[nodeFiles.length];
|
||||
for (int i = 0; i < nodeFiles.length; i++) {
|
||||
shardLocations[i] = new File(new File(new File(nodeFiles[i], "indices"), shardId.index().name()), Integer.toString(shardId.id()));
|
||||
@ -173,6 +179,7 @@ public class NodeEnvironment extends AbstractComponent {
|
||||
if (nodeFiles == null || locks == null) {
|
||||
throw new ElasticsearchIllegalStateException("node is not configured to store local location");
|
||||
}
|
||||
assert assertEnvIsLocked();
|
||||
Set<String> indices = Sets.newHashSet();
|
||||
for (File indicesLocation : nodeIndicesLocations) {
|
||||
File[] indicesList = indicesLocation.listFiles();
|
||||
@ -192,6 +199,7 @@ public class NodeEnvironment extends AbstractComponent {
|
||||
if (nodeFiles == null || locks == null) {
|
||||
throw new ElasticsearchIllegalStateException("node is not configured to store local location");
|
||||
}
|
||||
assert assertEnvIsLocked();
|
||||
Set<ShardId> shardIds = Sets.newHashSet();
|
||||
for (File indicesLocation : nodeIndicesLocations) {
|
||||
File[] indicesList = indicesLocation.listFiles();
|
||||
@ -222,7 +230,7 @@ public class NodeEnvironment extends AbstractComponent {
|
||||
}
|
||||
|
||||
public void close() {
|
||||
if (locks != null) {
|
||||
if (closed.compareAndSet(false, true) && locks != null) {
|
||||
for (Lock lock : locks) {
|
||||
try {
|
||||
logger.trace("releasing lock [{}]", lock);
|
||||
@ -233,4 +241,19 @@ public class NodeEnvironment extends AbstractComponent {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private boolean assertEnvIsLocked() {
|
||||
if (!closed.get() && locks != null) {
|
||||
for (Lock lock : locks) {
|
||||
try {
|
||||
assert lock.isLocked() : "Lock: " + lock + "is not locked";
|
||||
} catch (IOException e) {
|
||||
logger.warn("lock assertion failed", e);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -18,10 +18,8 @@
|
||||
*/
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
|
||||
import org.apache.lucene.analysis.hunspell.Dictionary;
|
||||
import org.apache.lucene.analysis.hunspell.HunspellStemFilter;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
@ -31,12 +29,14 @@ import org.elasticsearch.index.Index;
|
||||
import org.elasticsearch.index.settings.IndexSettings;
|
||||
import org.elasticsearch.indices.analysis.HunspellService;
|
||||
|
||||
import java.util.Locale;
|
||||
|
||||
@AnalysisSettingsRequired
|
||||
public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
|
||||
private final HunspellDictionary dictionary;
|
||||
private final Dictionary dictionary;
|
||||
private final boolean dedup;
|
||||
private final int recursionLevel;
|
||||
private final boolean longestOnly;
|
||||
|
||||
@Inject
|
||||
public HunspellTokenFilterFactory(Index index, @IndexSettings Settings indexSettings, @Assisted String name, @Assisted Settings settings, HunspellService hunspellService) {
|
||||
@ -53,24 +53,20 @@ public class HunspellTokenFilterFactory extends AbstractTokenFilterFactory {
|
||||
}
|
||||
|
||||
dedup = settings.getAsBoolean("dedup", true);
|
||||
|
||||
recursionLevel = settings.getAsInt("recursion_level", 2);
|
||||
if (recursionLevel < 0) {
|
||||
throw new ElasticsearchIllegalArgumentException(String.format(Locale.ROOT, "Negative recursion level not allowed for hunspell [%d]", recursionLevel));
|
||||
}
|
||||
longestOnly = settings.getAsBoolean("longest_only", false);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new HunspellStemFilter(tokenStream, dictionary, dedup, recursionLevel);
|
||||
return new HunspellStemFilter(tokenStream, dictionary, dedup, longestOnly);
|
||||
}
|
||||
|
||||
public boolean dedup() {
|
||||
return dedup;
|
||||
}
|
||||
|
||||
public int recursionLevel() {
|
||||
return recursionLevel;
|
||||
|
||||
public boolean longestOnly() {
|
||||
return longestOnly;
|
||||
}
|
||||
|
||||
}
|
@ -20,9 +20,11 @@
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.miscellaneous.Lucene47WordDelimiterFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterFilter;
|
||||
import org.apache.lucene.analysis.miscellaneous.WordDelimiterIterator;
|
||||
import org.apache.lucene.analysis.util.CharArraySet;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.inject.assistedinject.Assisted;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
@ -86,10 +88,17 @@ public class WordDelimiterTokenFilterFactory extends AbstractTokenFilterFactory
|
||||
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream) {
|
||||
return new WordDelimiterFilter(tokenStream,
|
||||
charTypeTable,
|
||||
flags,
|
||||
protoWords);
|
||||
if (version.onOrAfter(Version.LUCENE_48)) {
|
||||
return new WordDelimiterFilter(version, tokenStream,
|
||||
charTypeTable,
|
||||
flags,
|
||||
protoWords);
|
||||
} else {
|
||||
return new Lucene47WordDelimiterFilter(tokenStream,
|
||||
charTypeTable,
|
||||
flags,
|
||||
protoWords);
|
||||
}
|
||||
}
|
||||
|
||||
public int getFlag(int flag, Settings settings, String key, boolean defaultValue) {
|
||||
|
@ -21,8 +21,8 @@ package org.elasticsearch.index.codec.postingsformat;
|
||||
|
||||
import org.apache.lucene.codecs.*;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.store.ChecksumIndexInput;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
@ -51,6 +51,8 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
|
||||
|
||||
public static final String BLOOM_CODEC_NAME = "XBloomFilter"; // the Lucene one is named BloomFilter
|
||||
public static final int BLOOM_CODEC_VERSION = 1;
|
||||
public static final int BLOOM_CODEC_VERSION_CHECKSUM = 2;
|
||||
public static final int BLOOM_CODEC_VERSION_CURRENT = BLOOM_CODEC_VERSION_CHECKSUM;
|
||||
|
||||
/**
|
||||
* Extension of Bloom Filters file
|
||||
@ -116,12 +118,12 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
|
||||
|
||||
String bloomFileName = IndexFileNames.segmentFileName(
|
||||
state.segmentInfo.name, state.segmentSuffix, BLOOM_EXTENSION);
|
||||
IndexInput bloomIn = null;
|
||||
ChecksumIndexInput bloomIn = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
bloomIn = state.directory.openInput(bloomFileName, state.context);
|
||||
CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
|
||||
BLOOM_CODEC_VERSION);
|
||||
bloomIn = state.directory.openChecksumInput(bloomFileName, state.context);
|
||||
int version = CodecUtil.checkHeader(bloomIn, BLOOM_CODEC_NAME, BLOOM_CODEC_VERSION,
|
||||
BLOOM_CODEC_VERSION_CURRENT);
|
||||
// // Load the hash function used in the BloomFilter
|
||||
// hashFunction = HashFunction.forName(bloomIn.readString());
|
||||
// Load the delegate postings format
|
||||
@ -146,6 +148,11 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
|
||||
FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldNum);
|
||||
bloomsByFieldName.put(fieldInfo.name, bloom);
|
||||
}
|
||||
if (version >= BLOOM_CODEC_VERSION_CHECKSUM) {
|
||||
CodecUtil.checkFooter(bloomIn);
|
||||
} else {
|
||||
CodecUtil.checkEOF(bloomIn);
|
||||
}
|
||||
}
|
||||
IOUtils.close(bloomIn);
|
||||
success = true;
|
||||
@ -197,6 +204,11 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
delegateFieldsProducer.checkIntegrity();
|
||||
}
|
||||
}
|
||||
|
||||
public static final class BloomFilteredTerms extends FilterAtomicReader.FilterTerms {
|
||||
@ -382,7 +394,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
|
||||
bloomOutput = state.directory
|
||||
.createOutput(bloomFileName, state.context);
|
||||
CodecUtil.writeHeader(bloomOutput, BLOOM_CODEC_NAME,
|
||||
BLOOM_CODEC_VERSION);
|
||||
BLOOM_CODEC_VERSION_CURRENT);
|
||||
// remember the name of the postings format we will delegate to
|
||||
bloomOutput.writeString(delegatePostingsFormat.getName());
|
||||
|
||||
@ -394,6 +406,7 @@ public final class BloomFilterPostingsFormat extends PostingsFormat {
|
||||
bloomOutput.writeInt(fieldInfo.number);
|
||||
saveAppropriatelySizedBloomFilter(bloomOutput, bloomFilter, fieldInfo);
|
||||
}
|
||||
CodecUtil.writeFooter(bloomOutput);
|
||||
} finally {
|
||||
IOUtils.close(bloomOutput);
|
||||
}
|
||||
|
@ -59,7 +59,6 @@ import org.elasticsearch.index.deletionpolicy.SnapshotIndexCommit;
|
||||
import org.elasticsearch.index.engine.*;
|
||||
import org.elasticsearch.index.indexing.ShardIndexingService;
|
||||
import org.elasticsearch.index.mapper.Uid;
|
||||
import org.elasticsearch.index.merge.Merges;
|
||||
import org.elasticsearch.index.merge.OnGoingMerge;
|
||||
import org.elasticsearch.index.merge.policy.ElasticsearchMergePolicy;
|
||||
import org.elasticsearch.index.merge.policy.MergePolicyProvider;
|
||||
@ -883,7 +882,7 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
|
||||
}
|
||||
possibleMergeNeeded = false;
|
||||
try (InternalLock _ = readLock.acquire()) {
|
||||
Merges.maybeMerge(currentIndexWriter());
|
||||
currentIndexWriter().maybeMerge();
|
||||
} catch (Throwable t) {
|
||||
maybeFailEngine(t);
|
||||
throw new OptimizeFailedEngineException(shardId, t);
|
||||
@ -918,12 +917,12 @@ public class InternalEngine extends AbstractIndexShardComponent implements Engin
|
||||
elasticsearchMergePolicy.setForce(true);
|
||||
}
|
||||
if (optimize.onlyExpungeDeletes()) {
|
||||
Merges.forceMergeDeletes(writer, false);
|
||||
writer.forceMergeDeletes(false);
|
||||
} else if (optimize.maxNumSegments() <= 0) {
|
||||
Merges.maybeMerge(writer);
|
||||
writer.maybeMerge();
|
||||
possibleMergeNeeded = false;
|
||||
} else {
|
||||
Merges.forceMerge(writer, optimize.maxNumSegments(), false);
|
||||
writer.forceMerge(optimize.maxNumSegments(), false);
|
||||
}
|
||||
} catch (Throwable t) {
|
||||
maybeFailEngine(t);
|
||||
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
@ -69,7 +70,7 @@ public class BinaryDVAtomicFieldData implements AtomicFieldData<ScriptDocValues.
|
||||
final BinaryDocValues v = reader.getBinaryDocValues(field);
|
||||
if (v == null) {
|
||||
// segment has no value
|
||||
values = BinaryDocValues.EMPTY;
|
||||
values = DocValues.EMPTY_BINARY;
|
||||
docsWithField = new Bits.MatchNoBits(reader.maxDoc());
|
||||
} else {
|
||||
values = v;
|
||||
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
@ -39,7 +40,7 @@ final class BinaryDVNumericAtomicFieldData extends AbstractAtomicNumericFieldDat
|
||||
BinaryDVNumericAtomicFieldData(AtomicReader reader, BinaryDocValues values, NumericType numericType) {
|
||||
super(numericType.isFloatingPoint());
|
||||
this.reader = reader;
|
||||
this.values = values == null ? BinaryDocValues.EMPTY : values;
|
||||
this.values = values == null ? DocValues.EMPTY_BINARY : values;
|
||||
this.numericType = numericType;
|
||||
}
|
||||
|
||||
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.store.ByteArrayDataInput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.index.fielddata.AtomicFieldData;
|
||||
@ -35,7 +36,7 @@ final class BytesBinaryDVAtomicFieldData implements AtomicFieldData<ScriptDocVal
|
||||
BytesBinaryDVAtomicFieldData(AtomicReader reader, BinaryDocValues values) {
|
||||
super();
|
||||
this.reader = reader;
|
||||
this.values = values == null ? BinaryDocValues.EMPTY : values;
|
||||
this.values = values == null ? DocValues.EMPTY_BINARY : values;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -20,6 +20,7 @@
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.BinaryDocValues;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.common.geo.GeoPoint;
|
||||
import org.elasticsearch.common.util.ByteUtils;
|
||||
@ -33,7 +34,7 @@ final class GeoPointBinaryDVAtomicFieldData extends AtomicGeoPointFieldData<Scri
|
||||
|
||||
GeoPointBinaryDVAtomicFieldData(BinaryDocValues values) {
|
||||
super();
|
||||
this.values = values == null ? BinaryDocValues.EMPTY : values;
|
||||
this.values = values == null ? DocValues.EMPTY_BINARY : values;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -20,6 +20,7 @@
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.NumericDocValues;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
@ -82,7 +83,7 @@ public class NumericDVAtomicFieldData extends AbstractAtomicNumericFieldData {
|
||||
final NumericDocValues v = reader.getNumericDocValues(field);
|
||||
if (v == null) {
|
||||
// segment has no value
|
||||
values = NumericDocValues.EMPTY;
|
||||
values = DocValues.EMPTY_NUMERIC;
|
||||
docsWithField = new Bits.MatchNoBits(reader.maxDoc());
|
||||
} else {
|
||||
values = v;
|
||||
|
@ -19,9 +19,7 @@
|
||||
|
||||
package org.elasticsearch.index.fielddata.plain;
|
||||
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.SortedSetDocValues;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
@ -104,7 +102,7 @@ abstract class SortedSetDVAtomicFieldData {
|
||||
if (values == null) {
|
||||
// This field has not been populated
|
||||
assert reader.getFieldInfos().fieldInfo(field) == null;
|
||||
values = SortedSetDocValues.EMPTY;
|
||||
values = DocValues.EMPTY_SORTED_SET;
|
||||
}
|
||||
return values;
|
||||
} catch (IOException e) {
|
||||
|
@ -21,6 +21,7 @@ package org.elasticsearch.index.merge;
|
||||
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.MergeScheduler;
|
||||
import org.apache.lucene.index.MergeTrigger;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@ -31,44 +32,22 @@ import java.io.IOException;
|
||||
* <p/>
|
||||
* This merge scheduler can be used to get around the fact that even though a merge
|
||||
* policy can control that no new merges will be created as a result of a segment flush
|
||||
* (during indexing operation for example), the {@link #merge(org.apache.lucene.index.IndexWriter)}
|
||||
* (during indexing operation for example), the {@link #merge(org.apache.lucene.index.IndexWriter, org.apache.lucene.index.MergeTrigger, boolean)}
|
||||
* call will still be called, and can result in stalling indexing.
|
||||
*/
|
||||
public class EnableMergeScheduler extends MergeScheduler {
|
||||
|
||||
private final MergeScheduler mergeScheduler;
|
||||
|
||||
private final ThreadLocal<Boolean> enabled = new ThreadLocal<Boolean>() {
|
||||
@Override
|
||||
protected Boolean initialValue() {
|
||||
return Boolean.FALSE;
|
||||
}
|
||||
};
|
||||
|
||||
public EnableMergeScheduler(MergeScheduler mergeScheduler) {
|
||||
this.mergeScheduler = mergeScheduler;
|
||||
}
|
||||
|
||||
/**
|
||||
* Enable merges on the current thread.
|
||||
*/
|
||||
void enableMerge() {
|
||||
assert !enabled.get();
|
||||
enabled.set(Boolean.TRUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Disable merges on the current thread.
|
||||
*/
|
||||
void disableMerge() {
|
||||
assert enabled.get();
|
||||
enabled.set(Boolean.FALSE);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void merge(IndexWriter writer) throws IOException {
|
||||
if (enabled.get()) {
|
||||
mergeScheduler.merge(writer);
|
||||
public void merge(IndexWriter writer, MergeTrigger trigger, boolean newMergesFound) throws IOException {
|
||||
if (trigger == MergeTrigger.EXPLICIT) {
|
||||
mergeScheduler.merge(writer, trigger, newMergesFound);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,107 +0,0 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.merge;
|
||||
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.MergeScheduler;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* A helper to execute explicit merges of the {@link org.apache.lucene.index.IndexWriter} APIs. It
|
||||
* holds additional logic which in case the merge scheduler is an {@link org.elasticsearch.index.merge.EnableMergeScheduler}
|
||||
* then merges are explicitly enabled and disabled back at the end.
|
||||
* <p/>
|
||||
* In our codebase, at least until we can somehow use this logic in Lucene IW itself, we should only use
|
||||
* this class to execute explicit merges. The explicit merge calls have been added to the forbidden APIs
|
||||
* list to make sure we don't call them unless we use this class.
|
||||
*/
|
||||
public class Merges {
|
||||
|
||||
/**
|
||||
* See {@link org.apache.lucene.index.IndexWriter#maybeMerge()}, with the additional
|
||||
* logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}.
|
||||
*/
|
||||
public static void maybeMerge(IndexWriter writer) throws IOException {
|
||||
MergeScheduler mergeScheduler = writer.getConfig().getMergeScheduler();
|
||||
if (mergeScheduler instanceof EnableMergeScheduler) {
|
||||
((EnableMergeScheduler) mergeScheduler).enableMerge();
|
||||
try {
|
||||
writer.maybeMerge();
|
||||
} finally {
|
||||
((EnableMergeScheduler) mergeScheduler).disableMerge();
|
||||
}
|
||||
} else {
|
||||
writer.maybeMerge();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* See {@link org.apache.lucene.index.IndexWriter#forceMerge(int)}, with the additional
|
||||
* logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}.
|
||||
*/
|
||||
public static void forceMerge(IndexWriter writer, int maxNumSegments) throws IOException {
|
||||
forceMerge(writer, maxNumSegments, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* See {@link org.apache.lucene.index.IndexWriter#forceMerge(int, boolean)}, with the additional
|
||||
* logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}.
|
||||
*/
|
||||
public static void forceMerge(IndexWriter writer, int maxNumSegments, boolean doWait) throws IOException {
|
||||
MergeScheduler mergeScheduler = writer.getConfig().getMergeScheduler();
|
||||
if (mergeScheduler instanceof EnableMergeScheduler) {
|
||||
((EnableMergeScheduler) mergeScheduler).enableMerge();
|
||||
try {
|
||||
writer.forceMerge(maxNumSegments, doWait);
|
||||
} finally {
|
||||
((EnableMergeScheduler) mergeScheduler).disableMerge();
|
||||
}
|
||||
} else {
|
||||
writer.forceMerge(maxNumSegments, doWait);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* See {@link org.apache.lucene.index.IndexWriter#forceMergeDeletes()}, with the additional
|
||||
* logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}.
|
||||
*/
|
||||
public static void forceMergeDeletes(IndexWriter writer) throws IOException {
|
||||
forceMergeDeletes(writer, true);
|
||||
}
|
||||
|
||||
/**
|
||||
* See {@link org.apache.lucene.index.IndexWriter#forceMergeDeletes(boolean)}, with the additional
|
||||
* logic of explicitly enabling merges if the scheduler is {@link org.elasticsearch.index.merge.EnableMergeScheduler}.
|
||||
*/
|
||||
public static void forceMergeDeletes(IndexWriter writer, boolean doWait) throws IOException {
|
||||
MergeScheduler mergeScheduler = writer.getConfig().getMergeScheduler();
|
||||
if (mergeScheduler instanceof EnableMergeScheduler) {
|
||||
((EnableMergeScheduler) mergeScheduler).enableMerge();
|
||||
try {
|
||||
writer.forceMergeDeletes(doWait);
|
||||
} finally {
|
||||
((EnableMergeScheduler) mergeScheduler).disableMerge();
|
||||
}
|
||||
} else {
|
||||
writer.forceMergeDeletes(doWait);
|
||||
}
|
||||
}
|
||||
}
|
@ -84,9 +84,9 @@ public class SerialMergeSchedulerProvider extends MergeSchedulerProvider {
|
||||
}
|
||||
|
||||
@Override
|
||||
public void merge(IndexWriter writer) throws CorruptIndexException, IOException {
|
||||
public void merge(IndexWriter writer, MergeTrigger trigger, boolean newMergesFound) throws CorruptIndexException, IOException {
|
||||
try {
|
||||
super.merge(writer);
|
||||
super.merge(writer, trigger, newMergesFound);
|
||||
} catch (Throwable e) {
|
||||
logger.warn("failed to merge", e);
|
||||
provider.failedMerge(new MergePolicy.MergeException(e, writer.getDirectory()));
|
||||
|
@ -176,7 +176,7 @@ public class ChildrenConstantScoreQuery extends Query {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
if (remaining == 0) {
|
||||
return null;
|
||||
}
|
||||
|
@ -265,7 +265,7 @@ public class ChildrenQuery extends Query {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
DocIdSet parentsSet = parentFilter.getDocIdSet(context, acceptDocs);
|
||||
if (DocIdSets.isEmpty(parentsSet) || remaining == 0) {
|
||||
return null;
|
||||
|
@ -74,7 +74,7 @@ public class CustomQueryWrappingFilter extends NoCacheFilter implements Releasab
|
||||
final DocIdSet set = DocIdSets.toCacheable(leaf.reader(), new DocIdSet() {
|
||||
@Override
|
||||
public DocIdSetIterator iterator() throws IOException {
|
||||
return weight.scorer(leaf, true, false, null);
|
||||
return weight.scorer(leaf, null);
|
||||
}
|
||||
@Override
|
||||
public boolean isCacheable() { return false; }
|
||||
|
@ -91,7 +91,7 @@ public class DeleteByQueryWrappingFilter extends Filter {
|
||||
return new DocIdSet() {
|
||||
@Override
|
||||
public DocIdSetIterator iterator() throws IOException {
|
||||
return weight.scorer(context, true, false, acceptDocs);
|
||||
return weight.scorer(context, acceptDocs);
|
||||
}
|
||||
@Override
|
||||
public boolean isCacheable() { return false; }
|
||||
|
@ -154,7 +154,7 @@ public class ParentConstantScoreQuery extends Query {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
DocIdSet childrenDocIdSet = childrenFilter.getDocIdSet(context, acceptDocs);
|
||||
if (DocIdSets.isEmpty(childrenDocIdSet)) {
|
||||
return null;
|
||||
|
@ -245,7 +245,7 @@ public class ParentQuery extends Query {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
DocIdSet childrenDocSet = childrenFilter.getDocIdSet(context, acceptDocs);
|
||||
if (DocIdSets.isEmpty(childrenDocSet)) {
|
||||
return null;
|
||||
|
@ -327,7 +327,7 @@ public class TopChildrenQuery extends Query {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
ParentDoc[] readerParentDocs = parentDocs.v().get(context.reader().getCoreCacheKey());
|
||||
if (readerParentDocs != null) {
|
||||
if (scoreType == ScoreType.MAX) {
|
||||
|
@ -103,8 +103,8 @@ public class IncludeNestedDocsQuery extends Query {
|
||||
}
|
||||
|
||||
@Override
|
||||
public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
|
||||
final Scorer parentScorer = parentWeight.scorer(context, true, false, acceptDocs);
|
||||
public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException {
|
||||
final Scorer parentScorer = parentWeight.scorer(context, acceptDocs);
|
||||
|
||||
// no matches
|
||||
if (parentScorer == null) {
|
||||
|
@ -32,7 +32,6 @@ import org.elasticsearch.common.compress.Compressor;
|
||||
import org.elasticsearch.common.compress.CompressorFactory;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.lucene.Directories;
|
||||
import org.elasticsearch.common.lucene.store.ChecksumIndexOutput;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.unit.ByteSizeValue;
|
||||
import org.elasticsearch.index.CloseableIndexComponent;
|
||||
@ -46,6 +45,7 @@ import org.elasticsearch.index.store.support.ForceSyncDirectory;
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.NoSuchFileException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
@ -53,6 +53,7 @@ import java.util.Map;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.zip.Adler32;
|
||||
import java.util.zip.Checksum;
|
||||
|
||||
/**
|
||||
*/
|
||||
@ -145,7 +146,7 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
|
||||
} else {
|
||||
try {
|
||||
directory.deleteFile(file);
|
||||
} catch (FileNotFoundException e) {
|
||||
} catch (NoSuchFileException | FileNotFoundException e) {
|
||||
// ignore
|
||||
} catch (IOException e) {
|
||||
lastException = e;
|
||||
@ -649,10 +650,13 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
|
||||
out.close();
|
||||
String checksum = null;
|
||||
IndexOutput underlying = out;
|
||||
// TODO: cut over to lucene's CRC
|
||||
// *WARNING*: lucene has classes in same o.a.l.store package with very similar names,
|
||||
// but using CRC, not Adler!
|
||||
if (underlying instanceof BufferedChecksumIndexOutput) {
|
||||
checksum = Long.toString(((BufferedChecksumIndexOutput) underlying).digest().getValue(), Character.MAX_RADIX);
|
||||
} else if (underlying instanceof ChecksumIndexOutput) {
|
||||
checksum = Long.toString(((ChecksumIndexOutput) underlying).digest().getValue(), Character.MAX_RADIX);
|
||||
Checksum digest = ((BufferedChecksumIndexOutput) underlying).digest();
|
||||
assert digest instanceof Adler32;
|
||||
checksum = Long.toString(digest.getValue(), Character.MAX_RADIX);
|
||||
}
|
||||
synchronized (mutex) {
|
||||
StoreFileMetaData md = new StoreFileMetaData(name, metaData.directory().fileLength(name), checksum, metaData.directory());
|
||||
@ -705,5 +709,10 @@ public class Store extends AbstractIndexShardComponent implements CloseableIndex
|
||||
public String toString() {
|
||||
return out.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getChecksum() throws IOException {
|
||||
return out.getChecksum();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -21,23 +21,17 @@ package org.elasticsearch.indices.analysis;
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.cache.CacheLoader;
|
||||
import com.google.common.cache.LoadingCache;
|
||||
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.apache.lucene.analysis.hunspell.Dictionary;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.common.component.AbstractComponent;
|
||||
import org.elasticsearch.common.inject.Inject;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.env.Environment;
|
||||
|
||||
import java.io.*;
|
||||
import java.net.MalformedURLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.*;
|
||||
|
||||
/**
|
||||
* Serves as a node level registry for hunspell dictionaries. This services expects all dictionaries to be located under
|
||||
@ -73,31 +67,28 @@ public class HunspellService extends AbstractComponent {
|
||||
private final static DictionaryFileFilter DIC_FILE_FILTER = new DictionaryFileFilter();
|
||||
private final static AffixFileFilter AFFIX_FILE_FILTER = new AffixFileFilter();
|
||||
|
||||
private final LoadingCache<String, HunspellDictionary> dictionaries;
|
||||
private final Map<String, HunspellDictionary> knownDictionaries;
|
||||
private final LoadingCache<String, Dictionary> dictionaries;
|
||||
private final Map<String, Dictionary> knownDictionaries;
|
||||
|
||||
private final boolean defaultIgnoreCase;
|
||||
private final boolean defaultStrictAffixParsing;
|
||||
private final File hunspellDir;
|
||||
|
||||
public HunspellService(final Settings settings, final Environment env) {
|
||||
this(settings, env, Collections.<String, HunspellDictionary>emptyMap());
|
||||
this(settings, env, Collections.<String, Dictionary>emptyMap());
|
||||
}
|
||||
|
||||
@Inject
|
||||
public HunspellService(final Settings settings, final Environment env, final Map<String, HunspellDictionary> knownDictionaries) {
|
||||
public HunspellService(final Settings settings, final Environment env, final Map<String, Dictionary> knownDictionaries) {
|
||||
super(settings);
|
||||
this.knownDictionaries = knownDictionaries;
|
||||
this.hunspellDir = resolveHunspellDirectory(settings, env);
|
||||
this.defaultIgnoreCase = settings.getAsBoolean("indices.analysis.hunspell.dictionary.ignore_case", false);
|
||||
this.defaultStrictAffixParsing = settings.getAsBoolean("indices.analysis.hunspell.dictionary.strict_affix_parsing", false);
|
||||
final Version version = Lucene.parseVersion(settings.get("indices.analysis.hunspell.version"), Lucene.ANALYZER_VERSION, logger);
|
||||
dictionaries = CacheBuilder.newBuilder().build(new CacheLoader<String, HunspellDictionary>() {
|
||||
dictionaries = CacheBuilder.newBuilder().build(new CacheLoader<String, Dictionary>() {
|
||||
@Override
|
||||
public HunspellDictionary load(String locale) throws Exception {
|
||||
HunspellDictionary dictionary = knownDictionaries.get(locale);
|
||||
public Dictionary load(String locale) throws Exception {
|
||||
Dictionary dictionary = knownDictionaries.get(locale);
|
||||
if (dictionary == null) {
|
||||
dictionary = loadDictionary(locale, settings, env, version);
|
||||
dictionary = loadDictionary(locale, settings, env);
|
||||
}
|
||||
return dictionary;
|
||||
}
|
||||
@ -110,7 +101,7 @@ public class HunspellService extends AbstractComponent {
|
||||
*
|
||||
* @param locale The name of the locale
|
||||
*/
|
||||
public HunspellDictionary getDictionary(String locale) {
|
||||
public Dictionary getDictionary(String locale) {
|
||||
return dictionaries.getUnchecked(locale);
|
||||
}
|
||||
|
||||
@ -147,7 +138,7 @@ public class HunspellService extends AbstractComponent {
|
||||
* @return The loaded Hunspell dictionary
|
||||
* @throws Exception when loading fails (due to IO errors or malformed dictionary files)
|
||||
*/
|
||||
private HunspellDictionary loadDictionary(String locale, Settings nodeSettings, Environment env, Version version) throws Exception {
|
||||
private Dictionary loadDictionary(String locale, Settings nodeSettings, Environment env) throws Exception {
|
||||
if (logger.isDebugEnabled()) {
|
||||
logger.debug("Loading huspell dictionary [{}]...", locale);
|
||||
}
|
||||
@ -160,7 +151,6 @@ public class HunspellService extends AbstractComponent {
|
||||
nodeSettings = loadDictionarySettings(dicDir, nodeSettings.getByPrefix("indices.analysis.hunspell.dictionary." + locale + "."));
|
||||
|
||||
boolean ignoreCase = nodeSettings.getAsBoolean("ignore_case", defaultIgnoreCase);
|
||||
boolean strictAffixParsing = nodeSettings.getAsBoolean("strict_affix_parsing", defaultStrictAffixParsing);
|
||||
|
||||
File[] affixFiles = dicDir.listFiles(AFFIX_FILE_FILTER);
|
||||
if (affixFiles.length != 1) {
|
||||
@ -178,7 +168,7 @@ public class HunspellService extends AbstractComponent {
|
||||
|
||||
affixStream = new FileInputStream(affixFiles[0]);
|
||||
|
||||
return new HunspellDictionary(affixStream, dicStreams, version, ignoreCase, strictAffixParsing);
|
||||
return new Dictionary(affixStream, dicStreams, ignoreCase);
|
||||
|
||||
} catch (Exception e) {
|
||||
logger.error("Could not load hunspell dictionary [{}]", e, locale);
|
||||
|
@ -20,7 +20,7 @@
|
||||
package org.elasticsearch.indices.analysis;
|
||||
|
||||
import com.google.common.collect.Maps;
|
||||
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
|
||||
import org.apache.lucene.analysis.hunspell.Dictionary;
|
||||
import org.elasticsearch.common.inject.AbstractModule;
|
||||
import org.elasticsearch.common.inject.multibindings.MapBinder;
|
||||
|
||||
@ -28,9 +28,9 @@ import java.util.Map;
|
||||
|
||||
public class IndicesAnalysisModule extends AbstractModule {
|
||||
|
||||
private final Map<String, HunspellDictionary> hunspellDictionaries = Maps.newHashMap();
|
||||
private final Map<String, Dictionary> hunspellDictionaries = Maps.newHashMap();
|
||||
|
||||
public void addHunspellDictionary(String lang, HunspellDictionary dictionary) {
|
||||
public void addHunspellDictionary(String lang, Dictionary dictionary) {
|
||||
hunspellDictionaries.put(lang, dictionary);
|
||||
}
|
||||
|
||||
@ -38,8 +38,8 @@ public class IndicesAnalysisModule extends AbstractModule {
|
||||
protected void configure() {
|
||||
bind(IndicesAnalysisService.class).asEagerSingleton();
|
||||
|
||||
MapBinder<String, HunspellDictionary> dictionariesBinder = MapBinder.newMapBinder(binder(), String.class, HunspellDictionary.class);
|
||||
for (Map.Entry<String, HunspellDictionary> entry : hunspellDictionaries.entrySet()) {
|
||||
MapBinder<String, Dictionary> dictionariesBinder = MapBinder.newMapBinder(binder(), String.class, Dictionary.class);
|
||||
for (Map.Entry<String, Dictionary> entry : hunspellDictionaries.entrySet()) {
|
||||
dictionariesBinder.addBinding(entry.getKey()).toInstance(entry.getValue());
|
||||
}
|
||||
bind(HunspellService.class).asEagerSingleton();
|
||||
|
@ -60,12 +60,21 @@ public enum PreBuiltTokenFilters {
|
||||
WORD_DELIMITER(CachingStrategy.ONE) {
|
||||
@Override
|
||||
public TokenStream create(TokenStream tokenStream, Version version) {
|
||||
return new WordDelimiterFilter(tokenStream,
|
||||
WordDelimiterFilter.GENERATE_WORD_PARTS |
|
||||
WordDelimiterFilter.GENERATE_NUMBER_PARTS |
|
||||
WordDelimiterFilter.SPLIT_ON_CASE_CHANGE |
|
||||
WordDelimiterFilter.SPLIT_ON_NUMERICS |
|
||||
WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
|
||||
if (version.luceneVersion.onOrAfter(org.apache.lucene.util.Version.LUCENE_48)) {
|
||||
return new WordDelimiterFilter(version.luceneVersion, tokenStream,
|
||||
WordDelimiterFilter.GENERATE_WORD_PARTS |
|
||||
WordDelimiterFilter.GENERATE_NUMBER_PARTS |
|
||||
WordDelimiterFilter.SPLIT_ON_CASE_CHANGE |
|
||||
WordDelimiterFilter.SPLIT_ON_NUMERICS |
|
||||
WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
|
||||
} else {
|
||||
return new Lucene47WordDelimiterFilter(tokenStream,
|
||||
WordDelimiterFilter.GENERATE_WORD_PARTS |
|
||||
WordDelimiterFilter.GENERATE_NUMBER_PARTS |
|
||||
WordDelimiterFilter.SPLIT_ON_CASE_CHANGE |
|
||||
WordDelimiterFilter.SPLIT_ON_NUMERICS |
|
||||
WordDelimiterFilter.STEM_ENGLISH_POSSESSIVE, null);
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
|
@ -303,7 +303,7 @@ public class RecoveryTarget extends AbstractComponent {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.trace("[{}][{}] recovery from [{}] failed", e, request.shardId().index().name(), request.shardId().id(), request.sourceNode());
|
||||
logger.warn("[{}][{}] recovery from [{}] failed", e, request.shardId().index().name(), request.shardId().id(), request.sourceNode());
|
||||
listener.onRecoveryFailure(new RecoveryFailedException(request, e), true);
|
||||
}
|
||||
}
|
||||
|
@ -22,7 +22,6 @@ package org.elasticsearch.monitor.dump;
|
||||
import com.google.common.base.Charsets;
|
||||
import com.google.common.collect.ImmutableMap;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.io.Streams;
|
||||
|
||||
import java.io.*;
|
||||
import java.util.ArrayList;
|
||||
|
@ -22,7 +22,6 @@ package org.elasticsearch.monitor.dump;
|
||||
import org.elasticsearch.common.Nullable;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
@ -32,7 +31,7 @@ public class SimpleDump extends AbstractDump {
|
||||
|
||||
private final File location;
|
||||
|
||||
public SimpleDump(long timestamp, String cause, @Nullable Map<String, Object> context, File location) throws FileNotFoundException {
|
||||
public SimpleDump(long timestamp, String cause, @Nullable Map<String, Object> context, File location) {
|
||||
super(timestamp, cause, context);
|
||||
this.location = location;
|
||||
}
|
||||
|
@ -25,7 +25,6 @@ import org.elasticsearch.common.Nullable;
|
||||
import org.elasticsearch.common.io.FileSystemUtils;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
@ -59,12 +58,7 @@ public class SimpleDumpGenerator implements DumpGenerator {
|
||||
}
|
||||
File file = new File(dumpLocation, fileName + cause + "-" + timestamp);
|
||||
FileSystemUtils.mkdirs(file);
|
||||
SimpleDump dump;
|
||||
try {
|
||||
dump = new SimpleDump(System.currentTimeMillis(), cause, context, file);
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new DumpGenerationFailedException("Failed to generate dump", e);
|
||||
}
|
||||
SimpleDump dump = new SimpleDump(System.currentTimeMillis(), cause, context, file);
|
||||
ArrayList<DumpContributionFailedException> failedContributors = new ArrayList<>();
|
||||
for (String name : contributors) {
|
||||
DumpContributor contributor = this.contributors.get(name);
|
||||
|
@ -52,6 +52,7 @@ import org.elasticsearch.snapshots.*;
|
||||
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.NoSuchFileException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
@ -366,7 +367,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent<Rep
|
||||
try {
|
||||
byte[] data = snapshotsBlobContainer.readBlobFully(metaDataBlobName(snapshotId));
|
||||
metaData = readMetaData(data);
|
||||
} catch (FileNotFoundException ex) {
|
||||
} catch (FileNotFoundException | NoSuchFileException ex) {
|
||||
throw new SnapshotMissingException(snapshotId, ex);
|
||||
} catch (IOException ex) {
|
||||
throw new SnapshotException(snapshotId, "failed to get snapshots", ex);
|
||||
@ -427,7 +428,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent<Rep
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (FileNotFoundException ex) {
|
||||
} catch (FileNotFoundException | NoSuchFileException ex) {
|
||||
throw new SnapshotMissingException(snapshotId, ex);
|
||||
} catch (IOException ex) {
|
||||
throw new SnapshotException(snapshotId, "failed to get snapshots", ex);
|
||||
|
@ -22,8 +22,8 @@ import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.IndexReaderContext;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.apache.lucene.util.InPlaceMergeSorter;
|
||||
import org.apache.lucene.util.BytesRefArray;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.elasticsearch.common.lucene.ReaderContextAware;
|
||||
import org.elasticsearch.common.lucene.TopReaderContextAware;
|
||||
import org.elasticsearch.common.util.CollectionUtils;
|
||||
@ -326,43 +326,39 @@ public abstract class ValuesSource {
|
||||
}
|
||||
|
||||
static class SortedUniqueBytesValues extends BytesValues {
|
||||
|
||||
final BytesValues delegate;
|
||||
int[] sortedIds;
|
||||
final BytesRefHash bytes;
|
||||
int[] indices = new int[1]; // at least one
|
||||
final BytesRefArray bytes;
|
||||
int numUniqueValues;
|
||||
int pos = Integer.MAX_VALUE;
|
||||
|
||||
public SortedUniqueBytesValues(BytesValues delegate) {
|
||||
super(delegate.isMultiValued());
|
||||
this.delegate = delegate;
|
||||
bytes = new BytesRefHash();
|
||||
bytes = new BytesRefArray(Counter.newCounter(false));
|
||||
}
|
||||
|
||||
@Override
|
||||
public int setDocument(int docId) {
|
||||
final int numValues = delegate.setDocument(docId);
|
||||
if (numValues == 0) {
|
||||
sortedIds = null;
|
||||
return 0;
|
||||
}
|
||||
bytes.clear();
|
||||
bytes.reinit();
|
||||
for (int i = 0; i < numValues; ++i) {
|
||||
final BytesRef next = delegate.nextValue();
|
||||
final int hash = delegate.currentValueHash();
|
||||
assert hash == next.hashCode();
|
||||
bytes.add(next, hash);
|
||||
}
|
||||
numUniqueValues = bytes.size();
|
||||
sortedIds = bytes.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
|
||||
numUniqueValues = 0;
|
||||
pos = 0;
|
||||
if (numValues > 0) {
|
||||
bytes.clear();
|
||||
indices = ArrayUtil.grow(this.indices, numValues);
|
||||
for (int i = 0; i < numValues; ++i) {
|
||||
final BytesRef next = delegate.nextValue();
|
||||
indices[i] = i;
|
||||
bytes.append(next);
|
||||
}
|
||||
numUniqueValues = CollectionUtils.sortAndDedup(bytes, indices);
|
||||
}
|
||||
return numUniqueValues;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef nextValue() {
|
||||
bytes.get(sortedIds[pos++], scratch);
|
||||
bytes.get(scratch, indices[pos++]);
|
||||
return scratch;
|
||||
}
|
||||
|
||||
@ -657,21 +653,6 @@ public abstract class ValuesSource {
|
||||
long[] array = new long[2];
|
||||
int pos = Integer.MAX_VALUE;
|
||||
|
||||
final InPlaceMergeSorter sorter = new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
final long tmp = array[i];
|
||||
array[i] = array[j];
|
||||
array[j] = tmp;
|
||||
}
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
final long l1 = array[i];
|
||||
final long l2 = array[j];
|
||||
return Long.compare(l1, l2);
|
||||
}
|
||||
};
|
||||
|
||||
protected SortedUniqueLongValues(LongValues delegate) {
|
||||
super(delegate);
|
||||
}
|
||||
@ -702,22 +683,9 @@ public abstract class ValuesSource {
|
||||
|
||||
private static class SortedUniqueDoubleValues extends FilterDoubleValues {
|
||||
|
||||
int numUniqueValues;
|
||||
double[] array = new double[2];
|
||||
int pos = Integer.MAX_VALUE;
|
||||
|
||||
final InPlaceMergeSorter sorter = new InPlaceMergeSorter() {
|
||||
@Override
|
||||
protected void swap(int i, int j) {
|
||||
final double tmp = array[i];
|
||||
array[i] = array[j];
|
||||
array[j] = tmp;
|
||||
}
|
||||
@Override
|
||||
protected int compare(int i, int j) {
|
||||
return Double.compare(array[i], array[j]);
|
||||
}
|
||||
};
|
||||
private int numUniqueValues;
|
||||
private double[] array = new double[2];
|
||||
private int pos = Integer.MAX_VALUE;
|
||||
|
||||
SortedUniqueDoubleValues(DoubleValues delegate) {
|
||||
super(delegate);
|
||||
|
@ -22,9 +22,10 @@ import com.carrotsearch.hppc.ObjectIntOpenHashMap;
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import org.apache.lucene.util.ArrayUtil;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefHash;
|
||||
import org.elasticsearch.common.collect.BoundedTreeSet;
|
||||
import org.elasticsearch.common.lucene.HashedBytesRef;
|
||||
import org.elasticsearch.common.util.BigArrays;
|
||||
import org.elasticsearch.common.util.BytesRefHash;
|
||||
import org.elasticsearch.index.fielddata.BytesValues;
|
||||
import org.elasticsearch.search.facet.InternalFacet;
|
||||
import org.elasticsearch.search.facet.terms.TermsFacet;
|
||||
@ -39,7 +40,7 @@ public class HashedAggregator {
|
||||
private final HashCount assertHash = getAssertHash();
|
||||
|
||||
public HashedAggregator() {
|
||||
hash = new BytesRefHashHashCount(new BytesRefHash());
|
||||
hash = new BytesRefHashHashCount(new BytesRefHash(10, BigArrays.NON_RECYCLING_INSTANCE));
|
||||
}
|
||||
|
||||
public void onDoc(int docId, BytesValues values) {
|
||||
@ -158,7 +159,7 @@ public class HashedAggregator {
|
||||
|
||||
@Override
|
||||
public boolean add(BytesRef value, int hashCode, BytesValues values) {
|
||||
int key = hash.add(value, hashCode);
|
||||
int key = (int)hash.add(value, hashCode);
|
||||
if (key < 0) {
|
||||
key = ((-key) - 1);
|
||||
} else if (key >= counts.length) {
|
||||
@ -168,7 +169,7 @@ public class HashedAggregator {
|
||||
}
|
||||
|
||||
public boolean addNoCount(BytesRef value, int hashCode, BytesValues values) {
|
||||
int key = hash.add(value, hashCode);
|
||||
int key = (int)hash.add(value, hashCode);
|
||||
final boolean added = key >= 0;
|
||||
if (key < 0) {
|
||||
key = ((-key) - 1);
|
||||
@ -190,7 +191,7 @@ public class HashedAggregator {
|
||||
private int currentCount = -1;
|
||||
|
||||
BytesRefCountIteratorImpl() {
|
||||
this.size = hash.size();
|
||||
this.size = (int)hash.size();
|
||||
}
|
||||
|
||||
public BytesRef next() {
|
||||
@ -220,7 +221,7 @@ public class HashedAggregator {
|
||||
|
||||
@Override
|
||||
public int size() {
|
||||
return hash.size();
|
||||
return (int)hash.size();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -127,7 +127,7 @@ public class ScanContext {
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
public static final RuntimeException StopCollectingException = new StopCollectingException();
|
||||
|
@ -32,8 +32,11 @@ import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.IntsRef;
|
||||
import org.apache.lucene.util.automaton.Automaton;
|
||||
import org.apache.lucene.util.fst.*;
|
||||
import org.apache.lucene.util.fst.ByteSequenceOutputs;
|
||||
import org.apache.lucene.util.fst.FST;
|
||||
import org.apache.lucene.util.fst.PairOutputs;
|
||||
import org.apache.lucene.util.fst.PairOutputs.Pair;
|
||||
import org.apache.lucene.util.fst.PositiveIntOutputs;
|
||||
import org.elasticsearch.common.regex.Regex;
|
||||
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
|
||||
import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.CompletionLookupProvider;
|
||||
@ -55,7 +58,9 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
|
||||
|
||||
public static final String CODEC_NAME = "analyzing";
|
||||
public static final int CODEC_VERSION_START = 1;
|
||||
public static final int CODEC_VERSION_LATEST = 2;
|
||||
public static final int CODEC_VERSION_SERIALIZED_LABELS = 2;
|
||||
public static final int CODEC_VERSION_CHECKSUMS = 3;
|
||||
public static final int CODEC_VERSION_LATEST = CODEC_VERSION_CHECKSUMS;
|
||||
|
||||
private boolean preserveSep;
|
||||
private boolean preservePositionIncrements;
|
||||
@ -89,10 +94,11 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
try { /*
|
||||
* write the offsets per field such that we know where
|
||||
* we need to load the FSTs from
|
||||
*/
|
||||
try {
|
||||
/*
|
||||
* write the offsets per field such that we know where
|
||||
* we need to load the FSTs from
|
||||
*/
|
||||
long pointer = output.getFilePointer();
|
||||
output.writeVInt(fieldOffsets.size());
|
||||
for (Map.Entry<FieldInfo, Long> entry : fieldOffsets.entrySet()) {
|
||||
@ -100,7 +106,7 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
|
||||
output.writeVLong(entry.getValue());
|
||||
}
|
||||
output.writeLong(pointer);
|
||||
output.flush();
|
||||
CodecUtil.writeFooter(output);
|
||||
} finally {
|
||||
IOUtils.close(output);
|
||||
}
|
||||
@ -202,8 +208,12 @@ public class AnalyzingCompletionLookupProvider extends CompletionLookupProvider
|
||||
public LookupFactory load(IndexInput input) throws IOException {
|
||||
long sizeInBytes = 0;
|
||||
int version = CodecUtil.checkHeader(input, CODEC_NAME, CODEC_VERSION_START, CODEC_VERSION_LATEST);
|
||||
if (version >= CODEC_VERSION_CHECKSUMS) {
|
||||
CodecUtil.checksumEntireFile(input);
|
||||
}
|
||||
final long metaPointerPosition = input.length() - (version >= CODEC_VERSION_CHECKSUMS? 8 + CodecUtil.footerLength() : 8);
|
||||
final Map<String, AnalyzingSuggestHolder> lookupMap = new HashMap<>();
|
||||
input.seek(input.length() - 8);
|
||||
input.seek(metaPointerPosition);
|
||||
long metaPointer = input.readLong();
|
||||
input.seek(metaPointer);
|
||||
int numFields = input.readVInt();
|
||||
|
@ -25,7 +25,10 @@ import org.apache.lucene.index.*;
|
||||
import org.apache.lucene.index.FilterAtomicReader.FilterTerms;
|
||||
import org.apache.lucene.search.suggest.Lookup;
|
||||
import org.apache.lucene.store.IOContext.Context;
|
||||
import org.apache.lucene.store.*;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.InputStreamDataInput;
|
||||
import org.apache.lucene.store.OutputStreamDataOutput;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.elasticsearch.ElasticsearchIllegalStateException;
|
||||
@ -54,6 +57,7 @@ public class Completion090PostingsFormat extends PostingsFormat {
|
||||
|
||||
public static final String CODEC_NAME = "completion090";
|
||||
public static final int SUGGEST_CODEC_VERSION = 1;
|
||||
public static final int SUGGEST_VERSION_CURRENT = SUGGEST_CODEC_VERSION;
|
||||
public static final String EXTENSION = "cmp";
|
||||
|
||||
private final static ESLogger logger = Loggers.getLogger(Completion090PostingsFormat.class);
|
||||
@ -110,7 +114,7 @@ public class Completion090PostingsFormat extends PostingsFormat {
|
||||
boolean success = false;
|
||||
try {
|
||||
output = state.directory.createOutput(suggestFSTFile, state.context);
|
||||
CodecUtil.writeHeader(output, CODEC_NAME, SUGGEST_CODEC_VERSION);
|
||||
CodecUtil.writeHeader(output, CODEC_NAME, SUGGEST_VERSION_CURRENT);
|
||||
/*
|
||||
* we write the delegate postings format name so we can load it
|
||||
* without getting an instance in the ctor
|
||||
@ -206,11 +210,12 @@ public class Completion090PostingsFormat extends PostingsFormat {
|
||||
|
||||
private final FieldsProducer delegateProducer;
|
||||
private final LookupFactory lookupFactory;
|
||||
private final int version;
|
||||
|
||||
public CompletionFieldsProducer(SegmentReadState state) throws IOException {
|
||||
String suggestFSTFile = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, EXTENSION);
|
||||
IndexInput input = state.directory.openInput(suggestFSTFile, state.context);
|
||||
CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_CODEC_VERSION);
|
||||
version = CodecUtil.checkHeader(input, CODEC_NAME, SUGGEST_CODEC_VERSION, SUGGEST_VERSION_CURRENT);
|
||||
FieldsProducer delegateProducer = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
@ -273,6 +278,11 @@ public class Completion090PostingsFormat extends PostingsFormat {
|
||||
public long ramBytesUsed() {
|
||||
return (lookupFactory == null ? 0 : lookupFactory.ramBytesUsed()) + delegateProducer.ramBytesUsed();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void checkIntegrity() throws IOException {
|
||||
delegateProducer.checkIntegrity();
|
||||
}
|
||||
}
|
||||
|
||||
public static final class CompletionTerms extends FilterTerms {
|
||||
|
@ -19,7 +19,10 @@
|
||||
package org.elasticsearch.search.suggest.completion;
|
||||
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PayloadAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
|
||||
import org.apache.lucene.util.*;
|
||||
import org.apache.lucene.util.fst.Util;
|
||||
|
||||
@ -128,8 +131,8 @@ public final class CompletionTokenStream extends TokenStream {
|
||||
private CharsRef charsRef;
|
||||
|
||||
@Override
|
||||
public int fillBytesRef() {
|
||||
return bytes.hashCode();
|
||||
public void fillBytesRef() {
|
||||
// does nothing - we change in place
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -30,7 +30,6 @@ import org.apache.lucene.index.TrackingSerialMergeScheduler;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.elasticsearch.common.logging.Loggers;
|
||||
import org.elasticsearch.index.merge.EnableMergeScheduler;
|
||||
import org.elasticsearch.index.merge.Merges;
|
||||
import org.elasticsearch.test.ElasticsearchLuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
@ -62,10 +61,10 @@ public class TrackingSerialMergeSchedulerTests extends ElasticsearchLuceneTestCa
|
||||
// 4 merge runs to work out through the pending merges
|
||||
for (int i = 0; i < 4; i++) {
|
||||
assertTrue(iw.hasPendingMerges());
|
||||
Merges.maybeMerge(iw);
|
||||
iw.maybeMerge();
|
||||
assertTrue(iw.hasPendingMerges());
|
||||
}
|
||||
Merges.maybeMerge(iw);
|
||||
iw.maybeMerge();
|
||||
assertFalse(iw.hasPendingMerges());
|
||||
|
||||
iw.close(false);
|
||||
|
@ -32,7 +32,7 @@ import org.apache.lucene.search.similarities.BM25Similarity;
|
||||
import org.apache.lucene.search.similarities.DefaultSimilarity;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.elasticsearch.test.ElasticsearchLuceneTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
@ -176,9 +176,9 @@ public class BlendedTermQueryTest extends ElasticsearchLuceneTestCase {
|
||||
for (int j = 0; j < iters; j++) {
|
||||
String[] fields = new String[1 + random().nextInt(10)];
|
||||
for (int i = 0; i < fields.length; i++) {
|
||||
fields[i] = _TestUtil.randomRealisticUnicodeString(random(), 1, 10);
|
||||
fields[i] = TestUtil.randomRealisticUnicodeString(random(), 1, 10);
|
||||
}
|
||||
String term = _TestUtil.randomRealisticUnicodeString(random(), 1, 10);
|
||||
String term = TestUtil.randomRealisticUnicodeString(random(), 1, 10);
|
||||
Term[] terms = toTerms(fields, term);
|
||||
boolean disableCoord = random().nextBoolean();
|
||||
boolean useBoolean = random().nextBoolean();
|
||||
@ -213,7 +213,7 @@ public class BlendedTermQueryTest extends ElasticsearchLuceneTestCase {
|
||||
Set<Term> terms = new HashSet<>();
|
||||
int num = scaledRandomIntBetween(1, 10);
|
||||
for (int i = 0; i < num; i++) {
|
||||
terms.add(new Term(_TestUtil.randomRealisticUnicodeString(random(), 1, 10), _TestUtil.randomRealisticUnicodeString(random(), 1, 10)));
|
||||
terms.add(new Term(TestUtil.randomRealisticUnicodeString(random(), 1, 10), TestUtil.randomRealisticUnicodeString(random(), 1, 10)));
|
||||
}
|
||||
|
||||
BlendedTermQuery blendedTermQuery = random().nextBoolean() ? BlendedTermQuery.dismaxBlendedQuery(terms.toArray(new Term[0]), random().nextFloat()) :
|
||||
|
@ -37,7 +37,6 @@ import org.elasticsearch.index.fielddata.IndexNumericFieldData;
|
||||
import org.elasticsearch.index.mapper.ContentPath;
|
||||
import org.elasticsearch.index.mapper.Mapper.BuilderContext;
|
||||
import org.elasticsearch.index.mapper.core.LongFieldMapper;
|
||||
import org.elasticsearch.index.merge.Merges;
|
||||
import org.elasticsearch.indices.fielddata.breaker.DummyCircuitBreakerService;
|
||||
|
||||
import java.util.Random;
|
||||
@ -142,7 +141,7 @@ public class LongFieldDataBenchmark {
|
||||
}
|
||||
indexWriter.addDocument(doc);
|
||||
}
|
||||
Merges.forceMerge(indexWriter, 1);
|
||||
indexWriter.forceMerge(1, true);
|
||||
indexWriter.close();
|
||||
|
||||
final DirectoryReader dr = DirectoryReader.open(dir);
|
||||
|
@ -34,7 +34,6 @@ import org.elasticsearch.common.Numbers;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
||||
import org.elasticsearch.index.mapper.internal.VersionFieldMapper;
|
||||
import org.elasticsearch.index.merge.Merges;
|
||||
import org.elasticsearch.index.merge.policy.ElasticsearchMergePolicy;
|
||||
import org.elasticsearch.test.ElasticsearchLuceneTestCase;
|
||||
import org.hamcrest.MatcherAssert;
|
||||
@ -267,7 +266,7 @@ public class VersionsTests extends ElasticsearchLuceneTestCase {
|
||||
.put("1", 0L).put("2", 0L).put("3", 0L).put("4", 4L).put("5", 5L).put("6", 6L).build();
|
||||
|
||||
// Force merge and check versions
|
||||
Merges.forceMerge(iw, 1);
|
||||
iw.forceMerge(1, true);
|
||||
final AtomicReader ir = SlowCompositeReaderWrapper.wrap(DirectoryReader.open(iw.getDirectory()));
|
||||
final NumericDocValues versions = ir.getNumericDocValues(VersionFieldMapper.NAME);
|
||||
assertThat(versions, notNullValue());
|
||||
|
@ -23,7 +23,7 @@ import com.carrotsearch.hppc.ObjectLongMap;
|
||||
import com.carrotsearch.hppc.ObjectLongOpenHashMap;
|
||||
import com.carrotsearch.hppc.cursors.ObjectLongCursor;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
@ -100,7 +100,7 @@ public class BytesRefHashTests extends ElasticsearchTestCase {
|
||||
for (int i = 0; i < 797; i++) {
|
||||
String str;
|
||||
do {
|
||||
str = _TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
|
||||
str = TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
|
||||
} while (str.length() == 0);
|
||||
ref.copyChars(str);
|
||||
long count = hash.size();
|
||||
@ -133,7 +133,7 @@ public class BytesRefHashTests extends ElasticsearchTestCase {
|
||||
for (int i = 0; i < 797; i++) {
|
||||
String str;
|
||||
do {
|
||||
str = _TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
|
||||
str = TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
|
||||
} while (str.length() == 0);
|
||||
ref.copyChars(str);
|
||||
long count = hash.size();
|
||||
@ -173,7 +173,7 @@ public class BytesRefHashTests extends ElasticsearchTestCase {
|
||||
for (int i = 0; i < 797; i++) {
|
||||
String str;
|
||||
do {
|
||||
str = _TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
|
||||
str = TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
|
||||
} while (str.length() == 0);
|
||||
ref.copyChars(str);
|
||||
long count = hash.size();
|
||||
@ -209,7 +209,7 @@ public class BytesRefHashTests extends ElasticsearchTestCase {
|
||||
for (int i = 0; i < 797; i++) {
|
||||
String str;
|
||||
do {
|
||||
str = _TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
|
||||
str = TestUtil.randomRealisticUnicodeString(getRandom(), 1000);
|
||||
} while (str.length() == 0);
|
||||
ref.copyChars(str);
|
||||
long count = hash.size();
|
||||
|
@ -21,12 +21,16 @@ package org.elasticsearch.common.util;
|
||||
|
||||
import com.google.common.collect.ImmutableList;
|
||||
import com.google.common.collect.Iterables;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefArray;
|
||||
import org.apache.lucene.util.Counter;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.*;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
|
||||
public class CollectionUtilsTests extends ElasticsearchTestCase {
|
||||
|
||||
@ -61,4 +65,67 @@ public class CollectionUtilsTests extends ElasticsearchTestCase {
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSortAndDedupByteRefArray() {
|
||||
SortedSet<BytesRef> set = new TreeSet<>();
|
||||
final int numValues = scaledRandomIntBetween(0, 10000);
|
||||
List<BytesRef> tmpList = new ArrayList<>();
|
||||
BytesRefArray array = new BytesRefArray(Counter.newCounter());
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
String s = randomRealisticUnicodeOfCodepointLengthBetween(1, 100);
|
||||
set.add(new BytesRef(s));
|
||||
tmpList.add(new BytesRef(s));
|
||||
array.append(new BytesRef(s));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
Collections.shuffle(tmpList, getRandom());
|
||||
for (BytesRef ref : tmpList) {
|
||||
array.append(ref);
|
||||
}
|
||||
}
|
||||
int[] indices = new int[array.size()];
|
||||
for (int i = 0; i < indices.length; i++) {
|
||||
indices[i] = i;
|
||||
}
|
||||
int numUnique = CollectionUtils.sortAndDedup(array, indices);
|
||||
assertThat(numUnique, equalTo(set.size()));
|
||||
Iterator<BytesRef> iterator = set.iterator();
|
||||
|
||||
BytesRef spare = new BytesRef();
|
||||
for (int i = 0; i < numUnique; i++) {
|
||||
assertThat(iterator.hasNext(), is(true));
|
||||
assertThat(array.get(spare, indices[i]), equalTo(iterator.next()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSortByteRefArray() {
|
||||
List<BytesRef> values = new ArrayList<>();
|
||||
final int numValues = scaledRandomIntBetween(0, 10000);
|
||||
BytesRefArray array = new BytesRefArray(Counter.newCounter());
|
||||
for (int i = 0; i < numValues; i++) {
|
||||
String s = randomRealisticUnicodeOfCodepointLengthBetween(1, 100);
|
||||
values.add(new BytesRef(s));
|
||||
array.append(new BytesRef(s));
|
||||
}
|
||||
if (randomBoolean()) {
|
||||
Collections.shuffle(values, getRandom());
|
||||
}
|
||||
int[] indices = new int[array.size()];
|
||||
for (int i = 0; i < indices.length; i++) {
|
||||
indices[i] = i;
|
||||
}
|
||||
CollectionUtils.sort(array, indices);
|
||||
Collections.sort(values);
|
||||
Iterator<BytesRef> iterator = values.iterator();
|
||||
|
||||
BytesRef spare = new BytesRef();
|
||||
for (int i = 0; i < values.size(); i++) {
|
||||
assertThat(iterator.hasNext(), is(true));
|
||||
assertThat(array.get(spare, indices[i]), equalTo(iterator.next()));
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,220 @@
|
||||
/*
|
||||
* Licensed to Elasticsearch under one or more contributor
|
||||
* license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright
|
||||
* ownership. Elasticsearch licenses this file to you under
|
||||
* the Apache License, Version 2.0 (the "License"); you may
|
||||
* not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
|
||||
import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.TreeSet;
|
||||
|
||||
/**
|
||||
* Alerts us if new analyzers are added to lucene, so we don't miss them.
|
||||
* <p>
|
||||
* If we don't want to expose one for a specific reason, just map it to Void
|
||||
*/
|
||||
public class AnalysisFactoryTests extends ElasticsearchTestCase {
|
||||
|
||||
static final Map<String,Class<?>> KNOWN_TOKENIZERS = new HashMap<String,Class<?>>() {{
|
||||
// deprecated ones, we dont care about these
|
||||
put("arabicletter", Deprecated.class);
|
||||
put("chinese", Deprecated.class);
|
||||
put("cjk", Deprecated.class);
|
||||
put("russianletter", Deprecated.class);
|
||||
|
||||
// exposed in ES
|
||||
put("edgengram", EdgeNGramTokenizerFactory.class);
|
||||
put("keyword", KeywordTokenizerFactory.class);
|
||||
put("letter", LetterTokenizerFactory.class);
|
||||
put("lowercase", LowerCaseTokenizerFactory.class);
|
||||
put("ngram", NGramTokenizerFactory.class);
|
||||
put("pathhierarchy", PathHierarchyTokenizerFactory.class);
|
||||
put("pattern", PatternTokenizerFactory.class);
|
||||
put("standard", StandardTokenizerFactory.class);
|
||||
put("uax29urlemail", UAX29URLEmailTokenizerFactory.class);
|
||||
put("whitespace", WhitespaceTokenizerFactory.class);
|
||||
|
||||
// TODO: these tokenizers are not yet exposed: useful?
|
||||
|
||||
// historical version of standardtokenizer... tries to recognize
|
||||
// company names and a few other things. not good for asian languages etc.
|
||||
put("classic", Void.class);
|
||||
// we should add this, the thaiwordfilter is deprecated. this one has correct offsets
|
||||
put("thai", Void.class);
|
||||
// this one "seems to mess up offsets". probably shouldn't be a tokenizer...
|
||||
put("wikipedia", Void.class);
|
||||
}};
|
||||
|
||||
public void testTokenizers() {
|
||||
Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.TokenizerFactory.availableTokenizers());
|
||||
missing.removeAll(KNOWN_TOKENIZERS.keySet());
|
||||
assertTrue("new tokenizers found, please update KNOWN_TOKENIZERS: " + missing.toString(), missing.isEmpty());
|
||||
}
|
||||
|
||||
static final Map<String,Class<?>> KNOWN_TOKENFILTERS = new HashMap<String,Class<?>>() {{
|
||||
// deprecated ones, we dont care about these
|
||||
put("chinese", Deprecated.class);
|
||||
put("collationkey", Deprecated.class);
|
||||
put("position", Deprecated.class);
|
||||
put("thaiword", Deprecated.class);
|
||||
|
||||
|
||||
// exposed in ES
|
||||
put("arabicnormalization", ArabicNormalizationFilterFactory.class);
|
||||
put("arabicstem", ArabicStemTokenFilterFactory.class);
|
||||
put("asciifolding", ASCIIFoldingTokenFilterFactory.class);
|
||||
put("brazilianstem", BrazilianStemTokenFilterFactory.class);
|
||||
put("bulgarianstem", StemmerTokenFilterFactory.class);
|
||||
put("cjkbigram", CJKBigramFilterFactory.class);
|
||||
put("cjkwidth", CJKWidthFilterFactory.class);
|
||||
put("commongrams", CommonGramsTokenFilterFactory.class);
|
||||
put("commongramsquery", CommonGramsTokenFilterFactory.class);
|
||||
put("czechstem", CzechStemTokenFilterFactory.class);
|
||||
put("delimitedpayload", DelimitedPayloadTokenFilterFactory.class);
|
||||
put("dictionarycompoundword", DictionaryCompoundWordTokenFilterFactory.class);
|
||||
put("edgengram", EdgeNGramTokenFilterFactory.class);
|
||||
put("elision", ElisionTokenFilterFactory.class);
|
||||
put("englishminimalstem", StemmerTokenFilterFactory.class);
|
||||
put("englishpossessive", StemmerTokenFilterFactory.class);
|
||||
put("finnishlightstem", StemmerTokenFilterFactory.class);
|
||||
put("frenchlightstem", StemmerTokenFilterFactory.class);
|
||||
put("frenchminimalstem", StemmerTokenFilterFactory.class);
|
||||
put("germanstem", GermanStemTokenFilterFactory.class);
|
||||
put("germanlightstem", StemmerTokenFilterFactory.class);
|
||||
put("germanminimalstem", StemmerTokenFilterFactory.class);
|
||||
put("greeklowercase", LowerCaseTokenFilterFactory.class);
|
||||
put("greekstem", StemmerTokenFilterFactory.class);
|
||||
put("hindistem", StemmerTokenFilterFactory.class);
|
||||
put("hindistem", StemmerTokenFilterFactory.class);
|
||||
put("hungarianlightstem", StemmerTokenFilterFactory.class);
|
||||
put("hunspellstem", HunspellTokenFilterFactory.class);
|
||||
put("hyphenationcompoundword", HyphenationCompoundWordTokenFilterFactory.class);
|
||||
put("indonesianstem", StemmerTokenFilterFactory.class);
|
||||
put("italianlightstem", StemmerTokenFilterFactory.class);
|
||||
put("keepword", KeepWordFilterFactory.class);
|
||||
put("keywordmarker", KeywordMarkerTokenFilterFactory.class);
|
||||
put("kstem", KStemTokenFilterFactory.class);
|
||||
put("latvianstem", StemmerTokenFilterFactory.class);
|
||||
put("length", LengthTokenFilterFactory.class);
|
||||
put("limittokencount", LimitTokenCountFilterFactory.class);
|
||||
put("lowercase", LowerCaseTokenFilterFactory.class);
|
||||
put("ngram", NGramTokenFilterFactory.class);
|
||||
put("norwegianminimalstem", StemmerTokenFilterFactory.class);
|
||||
put("patterncapturegroup", PatternCaptureGroupTokenFilterFactory.class);
|
||||
put("patternreplace", PatternReplaceTokenFilterFactory.class);
|
||||
put("persiannormalization", PersianNormalizationFilterFactory.class);
|
||||
put("porterstem", PorterStemTokenFilterFactory.class);
|
||||
put("portugueselightstem", StemmerTokenFilterFactory.class);
|
||||
put("portugueseminimalstem", StemmerTokenFilterFactory.class);
|
||||
put("reversestring", ReverseTokenFilterFactory.class);
|
||||
put("russianlightstem", StemmerTokenFilterFactory.class);
|
||||
put("shingle", ShingleTokenFilterFactory.class);
|
||||
put("snowballporter", SnowballTokenFilterFactory.class);
|
||||
put("spanishlightstem", StemmerTokenFilterFactory.class);
|
||||
put("standard", StandardTokenFilterFactory.class);
|
||||
put("stemmeroverride", StemmerOverrideTokenFilterFactory.class);
|
||||
put("stop", StopTokenFilterFactory.class);
|
||||
put("swedishlightstem", StemmerTokenFilterFactory.class);
|
||||
put("synonym", SynonymTokenFilterFactory.class);
|
||||
put("trim", TrimTokenFilterFactory.class);
|
||||
put("truncate", TruncateTokenFilterFactory.class);
|
||||
put("turkishlowercase", LowerCaseTokenFilterFactory.class);
|
||||
put("worddelimiter", WordDelimiterTokenFilterFactory.class);
|
||||
|
||||
// TODO: these tokenfilters are not yet exposed: useful?
|
||||
|
||||
// useful for turkish language
|
||||
put("apostrophe", Void.class);
|
||||
// capitalizes tokens
|
||||
put("capitalization", Void.class);
|
||||
// cleans up after classic tokenizer
|
||||
put("classic", Void.class);
|
||||
// like length filter (but codepoints)
|
||||
put("codepointcount", Void.class);
|
||||
// galician language stemmers
|
||||
put("galicianminimalstem", Void.class);
|
||||
put("galicianstem", Void.class);
|
||||
// o+umlaut=oe type normalization for german
|
||||
put("germannormalization", Void.class);
|
||||
// hindi text normalization
|
||||
put("hindinormalization", Void.class);
|
||||
// puts hyphenated words back together
|
||||
put("hyphenatedwords", Void.class);
|
||||
// unicode normalization for indian languages
|
||||
put("indicnormalization", Void.class);
|
||||
// lowercasing for irish: add to LowerCase (has a stemmer, too)
|
||||
put("irishlowercase", Void.class);
|
||||
// repeats anything marked as keyword
|
||||
put("keywordrepeat", Void.class);
|
||||
// like limittokencount, but by position
|
||||
put("limittokenposition", Void.class);
|
||||
// ???
|
||||
put("numericpayload", Void.class);
|
||||
// RSLP stemmer for portuguese
|
||||
put("portuguesestem", Void.class);
|
||||
// light stemming for norwegian (has nb/nn options too)
|
||||
put("norwegianlightstem", Void.class);
|
||||
// removes duplicates at the same position (this should be used by the existing factory)
|
||||
put("removeduplicates", Void.class);
|
||||
// accent handling for scandinavian languages
|
||||
put("scandinavianfolding", Void.class);
|
||||
// less aggressive accent handling for scandinavian languages
|
||||
put("scandinaviannormalization", Void.class);
|
||||
// kurdish language support
|
||||
put("soraninormalization", Void.class);
|
||||
put("soranistem", Void.class);
|
||||
// ???
|
||||
put("tokenoffsetpayload", Void.class);
|
||||
// like a stop filter but by token-type
|
||||
put("type", Void.class);
|
||||
// puts the type into the payload
|
||||
put("typeaspayload", Void.class);
|
||||
// opposite of lowercase...
|
||||
put("uppercase", Void.class);
|
||||
}};
|
||||
|
||||
public void testTokenFilters() {
|
||||
Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.TokenFilterFactory.availableTokenFilters());
|
||||
missing.removeAll(KNOWN_TOKENFILTERS.keySet());
|
||||
assertTrue("new tokenfilters found, please update KNOWN_TOKENFILTERS: " + missing.toString(), missing.isEmpty());
|
||||
}
|
||||
|
||||
static final Map<String,Class<?>> KNOWN_CHARFILTERS = new HashMap<String,Class<?>>() {{
|
||||
// exposed in ES
|
||||
put("htmlstrip", HtmlStripCharFilterFactory.class);
|
||||
put("mapping", MappingCharFilterFactory.class);
|
||||
put("patternreplace", PatternReplaceCharFilterFactory.class);
|
||||
|
||||
// TODO: these charfilters are not yet exposed: useful?
|
||||
// handling of zwnj for persian
|
||||
put("persian", Void.class);
|
||||
}};
|
||||
|
||||
public void testCharFilters() {
|
||||
Set<String> missing = new TreeSet<String>(org.apache.lucene.analysis.util.CharFilterFactory.availableCharFilters());
|
||||
missing.removeAll(KNOWN_CHARFILTERS.keySet());
|
||||
assertTrue("new charfilters found, please update KNOWN_CHARFILTERS: " + missing.toString(), missing.isEmpty());
|
||||
}
|
||||
|
||||
|
||||
}
|
@ -18,7 +18,6 @@
|
||||
*/
|
||||
package org.elasticsearch.index.analysis;
|
||||
|
||||
import org.elasticsearch.common.inject.ProvisionException;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
@ -59,46 +58,4 @@ public class HunspellTokenFilterFactoryTests extends ElasticsearchTestCase {
|
||||
assertThat(hunspellTokenFilter.dedup(), is(false));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultRecursionLevel() throws IOException {
|
||||
Settings settings = settingsBuilder()
|
||||
.put("path.conf", getResource("/indices/analyze/conf_dir"))
|
||||
.put("index.analysis.filter.en_US.type", "hunspell")
|
||||
.put("index.analysis.filter.en_US.locale", "en_US")
|
||||
.build();
|
||||
|
||||
AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
|
||||
TokenFilterFactory tokenFilter = analysisService.tokenFilter("en_US");
|
||||
assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class));
|
||||
HunspellTokenFilterFactory hunspellTokenFilter = (HunspellTokenFilterFactory) tokenFilter;
|
||||
assertThat(hunspellTokenFilter.recursionLevel(), is(2));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCustomRecursionLevel() throws IOException {
|
||||
Settings settings = settingsBuilder()
|
||||
.put("path.conf", getResource("/indices/analyze/conf_dir"))
|
||||
.put("index.analysis.filter.en_US.type", "hunspell")
|
||||
.put("index.analysis.filter.en_US.recursion_level", 0)
|
||||
.put("index.analysis.filter.en_US.locale", "en_US")
|
||||
.build();
|
||||
|
||||
AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
|
||||
TokenFilterFactory tokenFilter = analysisService.tokenFilter("en_US");
|
||||
assertThat(tokenFilter, instanceOf(HunspellTokenFilterFactory.class));
|
||||
HunspellTokenFilterFactory hunspellTokenFilter = (HunspellTokenFilterFactory) tokenFilter;
|
||||
assertThat(hunspellTokenFilter.recursionLevel(), is(0));
|
||||
}
|
||||
|
||||
@Test(expected = ProvisionException.class)
|
||||
public void negativeRecursionLevelShouldFail() throws IOException {
|
||||
Settings settings = settingsBuilder()
|
||||
.put("path.conf", getResource("/indices/analyze/conf_dir"))
|
||||
.put("index.analysis.filter.en_US.type", "hunspell")
|
||||
.put("index.analysis.filter.en_US.recursion_level", -1)
|
||||
.put("index.analysis.filter.en_US.locale", "en_US")
|
||||
.build();
|
||||
AnalysisTestsHelper.createAnalysisServiceFromSettings(settings);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -125,4 +125,36 @@ public class WordDelimiterTokenFilterFactoryTests extends ElasticsearchTokenStre
|
||||
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
|
||||
}
|
||||
|
||||
/** Correct offset order when doing both parts and concatenation: PowerShot is a synonym of Power */
|
||||
@Test
|
||||
public void testPartsAndCatenate() throws IOException {
|
||||
AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settingsBuilder()
|
||||
.put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
|
||||
.put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
|
||||
.put("index.analysis.filter.my_word_delimiter.generate_word_parts", "true")
|
||||
.build());
|
||||
TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_word_delimiter");
|
||||
String source = "PowerShot";
|
||||
String[] expected = new String[]{"Power", "PowerShot", "Shot" };
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(source));
|
||||
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
|
||||
}
|
||||
|
||||
/** Back compat:
|
||||
* old offset order when doing both parts and concatenation: PowerShot is a synonym of Shot */
|
||||
@Test
|
||||
public void testDeprecatedPartsAndCatenate() throws IOException {
|
||||
AnalysisService analysisService = AnalysisTestsHelper.createAnalysisServiceFromSettings(settingsBuilder()
|
||||
.put("index.analysis.filter.my_word_delimiter.type", "word_delimiter")
|
||||
.put("index.analysis.filter.my_word_delimiter.catenate_words", "true")
|
||||
.put("index.analysis.filter.my_word_delimiter.generate_word_parts", "true")
|
||||
.put("index.analysis.filter.my_word_delimiter.version", "4.7")
|
||||
.build());
|
||||
TokenFilterFactory tokenFilter = analysisService.tokenFilter("my_word_delimiter");
|
||||
String source = "PowerShot";
|
||||
String[] expected = new String[]{"Power", "Shot", "PowerShot" };
|
||||
Tokenizer tokenizer = new WhitespaceTokenizer(TEST_VERSION_CURRENT, new StringReader(source));
|
||||
assertTokenStreamContents(tokenFilter.create(tokenizer), expected);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -32,7 +32,6 @@ import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.elasticsearch.index.codec.postingsformat.BloomFilterPostingsFormat;
|
||||
import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat;
|
||||
import org.elasticsearch.index.mapper.internal.UidFieldMapper;
|
||||
import org.elasticsearch.index.merge.Merges;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
@ -94,7 +93,7 @@ public class DefaultPostingsFormatTests extends ElasticsearchTestCase {
|
||||
for (int i = 0; i < 100; i++) {
|
||||
writer.addDocument(Arrays.asList(new TextField("foo", "foo bar foo bar", Store.YES), new TextField("some_other_field", "1234", Store.YES)));
|
||||
}
|
||||
Merges.forceMerge(writer, 1);
|
||||
writer.forceMerge(1, true);
|
||||
writer.commit();
|
||||
|
||||
DirectoryReader reader = DirectoryReader.open(writer, false);
|
||||
|
@ -25,8 +25,8 @@ import com.carrotsearch.randomizedtesting.annotations.ThreadLeakScope;
|
||||
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.index.BasePostingsFormatTestCase;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.TimeUnits;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.elasticsearch.index.codec.postingsformat.Elasticsearch090PostingsFormat;
|
||||
import org.elasticsearch.test.ElasticsearchThreadFilter;
|
||||
import org.elasticsearch.test.junit.listeners.ReproduceInfoPrinter;
|
||||
@ -42,7 +42,7 @@ public class ElasticsearchPostingsFormatTest extends BasePostingsFormatTestCase
|
||||
|
||||
@Override
|
||||
protected Codec getCodec() {
|
||||
return _TestUtil.alwaysPostingsFormat(new Elasticsearch090PostingsFormat());
|
||||
return TestUtil.alwaysPostingsFormat(new Elasticsearch090PostingsFormat());
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -31,7 +31,6 @@ import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.ExceptionsHelper;
|
||||
import org.elasticsearch.common.bytes.BytesArray;
|
||||
import org.elasticsearch.common.bytes.BytesReference;
|
||||
@ -322,9 +321,6 @@ public class InternalEngineTests extends ElasticsearchTestCase {
|
||||
assertThat(segments.get(2).isCompound(), equalTo(true));
|
||||
}
|
||||
|
||||
static {
|
||||
assert Version.LUCENE_47.onOrAfter(Lucene.VERSION) : "LUCENE-5481 is fixed, improve test below";
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSegmentsWithMergeFlag() throws Exception {
|
||||
@ -396,17 +392,16 @@ public class InternalEngineTests extends ElasticsearchTestCase {
|
||||
}
|
||||
|
||||
// forcing an optimize will merge this single segment shard
|
||||
// TODO: put a random boolean again once LUCENE-5481 is fixed
|
||||
final boolean force = true; // randomBoolean();
|
||||
waitTillMerge.set(new CountDownLatch(1));
|
||||
waitForMerge.set(new CountDownLatch(1));
|
||||
final boolean force = randomBoolean();
|
||||
if (force) {
|
||||
waitTillMerge.set(new CountDownLatch(1));
|
||||
waitForMerge.set(new CountDownLatch(1));
|
||||
}
|
||||
engine.optimize(new Engine.Optimize().flush(true).maxNumSegments(1).force(force).waitForMerge(false));
|
||||
waitTillMerge.get().await();
|
||||
|
||||
for (Segment segment : engine.segments()) {
|
||||
assertThat(segment.getMergeId(), force ? notNullValue() : nullValue());
|
||||
}
|
||||
|
||||
waitForMerge.get().countDown();
|
||||
|
||||
engine.close();
|
||||
|
@ -35,8 +35,8 @@ import org.apache.lucene.search.join.ScoreMode;
|
||||
import org.apache.lucene.search.join.ToParentBlockJoinQuery;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.OpenBitSet;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.elasticsearch.common.lucene.search.NotFilter;
|
||||
import org.elasticsearch.common.lucene.search.XFilteredQuery;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
@ -225,7 +225,7 @@ public abstract class AbstractStringFieldDataTests extends AbstractFieldDataImpl
|
||||
d.add(s);
|
||||
final String[] values = new String[randomIntBetween(2, 30)];
|
||||
for (int i = 1; i < values.length; ++i) {
|
||||
values[i] = _TestUtil.randomUnicodeString(getRandom());
|
||||
values[i] = TestUtil.randomUnicodeString(getRandom());
|
||||
}
|
||||
final int numDocs = scaledRandomIntBetween(10, 10000);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
@ -283,7 +283,7 @@ public abstract class AbstractStringFieldDataTests extends AbstractFieldDataImpl
|
||||
d.add(s);
|
||||
final String[] values = new String[randomIntBetween(2, 10)];
|
||||
for (int i = 1; i < values.length; ++i) {
|
||||
values[i] = _TestUtil.randomUnicodeString(getRandom());
|
||||
values[i] = TestUtil.randomUnicodeString(getRandom());
|
||||
}
|
||||
final int numDocs = scaledRandomIntBetween(10, 10000);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
@ -335,7 +335,7 @@ public abstract class AbstractStringFieldDataTests extends AbstractFieldDataImpl
|
||||
public void testNestedSorting(MultiValueMode sortMode) throws IOException {
|
||||
final String[] values = new String[randomIntBetween(2, 20)];
|
||||
for (int i = 0; i < values.length; ++i) {
|
||||
values[i] = _TestUtil.randomSimpleString(getRandom());
|
||||
values[i] = TestUtil.randomSimpleString(getRandom());
|
||||
}
|
||||
final int numParents = scaledRandomIntBetween(10, 10000);
|
||||
List<Document> docs = new ArrayList<>();
|
||||
@ -379,7 +379,7 @@ public abstract class AbstractStringFieldDataTests extends AbstractFieldDataImpl
|
||||
missingValue = new BytesRef(RandomPicks.randomFrom(getRandom(), values));
|
||||
break;
|
||||
default:
|
||||
missingValue = new BytesRef(_TestUtil.randomSimpleString(getRandom()));
|
||||
missingValue = new BytesRef(TestUtil.randomSimpleString(getRandom()));
|
||||
break;
|
||||
}
|
||||
BytesRefFieldComparatorSource innerSource = new BytesRefFieldComparatorSource(fieldData, missingValue, sortMode);
|
||||
|
@ -26,7 +26,6 @@ import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.index.fielddata.AtomicFieldData.WithOrdinals;
|
||||
import org.elasticsearch.index.fielddata.ScriptDocValues.Strings;
|
||||
import org.elasticsearch.index.fielddata.ordinals.Ordinals.Docs;
|
||||
import org.elasticsearch.index.merge.Merges;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.Random;
|
||||
@ -61,7 +60,7 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
|
||||
}
|
||||
writer.addDocument(d);
|
||||
}
|
||||
Merges.forceMerge(writer, 1);
|
||||
writer.forceMerge(1, true);
|
||||
AtomicReaderContext context = refreshReader();
|
||||
String[] formats = new String[] { "fst", "paged_bytes"};
|
||||
|
||||
@ -158,8 +157,8 @@ public class FilterFieldDataTest extends AbstractFieldDataTests {
|
||||
}
|
||||
writer.addDocument(d);
|
||||
}
|
||||
System.out.println(hundred + " " + ten + " " + five);
|
||||
Merges.forceMerge(writer, 1);
|
||||
logger.debug(hundred + " " + ten + " " + five);
|
||||
writer.forceMerge(1, true);
|
||||
AtomicReaderContext context = refreshReader();
|
||||
String[] formats = new String[] { "fst", "paged_bytes"};
|
||||
for (String format : formats) {
|
||||
|
@ -27,7 +27,6 @@ import org.apache.lucene.document.LongField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.elasticsearch.index.fielddata.plain.PackedArrayAtomicFieldData;
|
||||
import org.elasticsearch.index.merge.Merges;
|
||||
import org.joda.time.DateTimeZone;
|
||||
import org.junit.Test;
|
||||
|
||||
@ -334,7 +333,7 @@ public class LongFieldDataTests extends AbstractNumericFieldDataTests {
|
||||
}
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
Merges.forceMerge(writer, 1);
|
||||
writer.forceMerge(1, true);
|
||||
|
||||
final IndexNumericFieldData indexFieldData = getForField("value");
|
||||
final AtomicNumericFieldData atomicFieldData = indexFieldData.load(refreshReader());
|
||||
|
@ -18,9 +18,7 @@
|
||||
*/
|
||||
package org.elasticsearch.indices.analyze;
|
||||
|
||||
import org.apache.lucene.analysis.hunspell.HunspellDictionary;
|
||||
import org.apache.lucene.util.Version;
|
||||
import org.elasticsearch.common.lucene.Lucene;
|
||||
import org.apache.lucene.analysis.hunspell.Dictionary;
|
||||
import org.elasticsearch.common.settings.ImmutableSettings;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.indices.analysis.HunspellService;
|
||||
@ -28,7 +26,8 @@ import org.elasticsearch.test.ElasticsearchIntegrationTest;
|
||||
import org.elasticsearch.test.ElasticsearchIntegrationTest.ClusterScope;
|
||||
import org.junit.Test;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import java.lang.reflect.Field;
|
||||
|
||||
import static org.hamcrest.Matchers.notNullValue;
|
||||
|
||||
/**
|
||||
@ -46,11 +45,9 @@ public class HunspellServiceTests extends ElasticsearchIntegrationTest {
|
||||
.build();
|
||||
|
||||
cluster().startNode(settings);
|
||||
HunspellDictionary dictionary = cluster().getInstance(HunspellService.class).getDictionary("en_US");
|
||||
Dictionary dictionary = cluster().getInstance(HunspellService.class).getDictionary("en_US");
|
||||
assertThat(dictionary, notNullValue());
|
||||
Version expectedVersion = Lucene.parseVersion(settings.get("indices.analysis.hunspell.version"), Lucene.ANALYZER_VERSION, logger);
|
||||
assertThat(dictionary.getVersion(), equalTo(expectedVersion));
|
||||
assertThat(dictionary.isIgnoreCase(), equalTo(true));
|
||||
assertIgnoreCase(true, dictionary);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -64,18 +61,16 @@ public class HunspellServiceTests extends ElasticsearchIntegrationTest {
|
||||
.build();
|
||||
|
||||
cluster().startNode(settings);
|
||||
HunspellDictionary dictionary = cluster().getInstance(HunspellService.class).getDictionary("en_US");
|
||||
Dictionary dictionary = cluster().getInstance(HunspellService.class).getDictionary("en_US");
|
||||
assertThat(dictionary, notNullValue());
|
||||
Version expectedVersion = Lucene.parseVersion(settings.get("indices.analysis.hunspell.version"), Lucene.ANALYZER_VERSION, logger);
|
||||
assertThat(dictionary.getVersion(), equalTo(expectedVersion));
|
||||
assertThat(dictionary.isIgnoreCase(), equalTo(false));
|
||||
assertIgnoreCase(false, dictionary);
|
||||
|
||||
|
||||
|
||||
// testing that dictionary specific settings override node level settings
|
||||
dictionary = cluster().getInstance(HunspellService.class).getDictionary("en_US_custom");
|
||||
assertThat(dictionary, notNullValue());
|
||||
assertThat(dictionary.getVersion(), equalTo(expectedVersion));
|
||||
assertThat(dictionary.isIgnoreCase(), equalTo(true));
|
||||
assertIgnoreCase(true, dictionary);
|
||||
}
|
||||
|
||||
@Test
|
||||
@ -85,8 +80,14 @@ public class HunspellServiceTests extends ElasticsearchIntegrationTest {
|
||||
.build();
|
||||
|
||||
cluster().startNode(settings);
|
||||
HunspellDictionary dictionary = cluster().getInstance(HunspellService.class).getDictionary("en_US");
|
||||
Dictionary dictionary = cluster().getInstance(HunspellService.class).getDictionary("en_US");
|
||||
assertThat(dictionary, notNullValue());
|
||||
}
|
||||
|
||||
|
||||
// TODO: open up a getter on Dictionary
|
||||
private void assertIgnoreCase(boolean expected, Dictionary dictionary) throws Exception {
|
||||
Field f = Dictionary.class.getDeclaredField("ignoreCase");
|
||||
f.setAccessible(true);
|
||||
assertEquals(expected, f.getBoolean(dictionary));
|
||||
}
|
||||
}
|
||||
|
@ -44,7 +44,7 @@ public class IndicesLeaksTests extends ElasticsearchIntegrationTest {
|
||||
|
||||
@SuppressWarnings({"ConstantConditions", "unchecked"})
|
||||
@Test
|
||||
@BadApple
|
||||
@BadApple(bugUrl = "https://github.com/elasticsearch/elasticsearch/issues/3232")
|
||||
public void testIndexShardLifecycleLeak() throws Exception {
|
||||
|
||||
client().admin().indices().prepareCreate("test")
|
||||
|
@ -27,20 +27,29 @@ import org.elasticsearch.script.SearchScript;
|
||||
import org.elasticsearch.test.ElasticsearchTestCase;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Map;
|
||||
|
||||
import static org.hamcrest.Matchers.lessThan;
|
||||
public class FieldDataSourceTests extends ElasticsearchTestCase {
|
||||
|
||||
private static BytesValues randomBytesValues() {
|
||||
final boolean multiValued = randomBoolean();
|
||||
final int maxLength = rarely() ? 3 : 10;
|
||||
return new BytesValues(multiValued) {
|
||||
BytesRef previous;
|
||||
@Override
|
||||
public int setDocument(int docId) {
|
||||
return randomInt(multiValued ? 10 : 1);
|
||||
}
|
||||
@Override
|
||||
public BytesRef nextValue() {
|
||||
scratch.copyChars(randomAsciiOfLength(10));
|
||||
if (previous != null && randomBoolean()) {
|
||||
scratch.copyBytes(previous);
|
||||
} else {
|
||||
scratch.copyChars(randomAsciiOfLength(maxLength));
|
||||
}
|
||||
previous = BytesRef.deepCopyOf(scratch);
|
||||
return scratch;
|
||||
}
|
||||
|
||||
@ -103,7 +112,8 @@ public class FieldDataSourceTests extends ElasticsearchTestCase {
|
||||
}
|
||||
|
||||
private static void assertConsistent(BytesValues values) {
|
||||
for (int i = 0; i < 10; ++i) {
|
||||
final int numDocs = scaledRandomIntBetween(10, 100);
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
final int valueCount = values.setDocument(i);
|
||||
for (int j = 0; j < valueCount; ++j) {
|
||||
final BytesRef term = values.nextValue();
|
||||
@ -136,6 +146,23 @@ public class FieldDataSourceTests extends ElasticsearchTestCase {
|
||||
@Test
|
||||
public void sortedUniqueBytesValues() {
|
||||
assertConsistent(new ValuesSource.Bytes.SortedAndUnique.SortedUniqueBytesValues(randomBytesValues()));
|
||||
assertSortedAndUnique(new ValuesSource.Bytes.SortedAndUnique.SortedUniqueBytesValues(randomBytesValues()));
|
||||
}
|
||||
|
||||
private static void assertSortedAndUnique(BytesValues values) {
|
||||
final int numDocs = scaledRandomIntBetween(10, 100);
|
||||
ArrayList<BytesRef> ref = new ArrayList<BytesRef>();
|
||||
for (int i = 0; i < numDocs; ++i) {
|
||||
final int valueCount = values.setDocument(i);
|
||||
ref.clear();
|
||||
for (int j = 0; j < valueCount; ++j) {
|
||||
final BytesRef term = values.nextValue();
|
||||
if (j > 0) {
|
||||
assertThat(BytesRef.getUTF8SortedAsUnicodeComparator().compare(ref.get(ref.size() - 1), term), lessThan(0));
|
||||
}
|
||||
ref.add(values.copyShared());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -20,7 +20,6 @@ package org.elasticsearch.search.child;
|
||||
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.ElasticsearchIllegalArgumentException;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.action.admin.indices.mapping.get.GetMappingsResponse;
|
||||
import org.elasticsearch.action.admin.indices.mapping.put.PutMappingResponse;
|
||||
import org.elasticsearch.action.admin.indices.stats.IndicesStatsResponse;
|
||||
@ -565,10 +564,6 @@ public class SimpleChildQuerySearchTests extends ElasticsearchIntegrationTest {
|
||||
assertThat(searchResponse.getHits().getAt(0).sourceAsString(), containsString("\"p_value1_updated\""));
|
||||
}
|
||||
|
||||
static {
|
||||
assert Version.CURRENT.luceneVersion == org.apache.lucene.util.Version.LUCENE_47 : "See comments in testDfsSearchType";
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDfsSearchType() throws Exception {
|
||||
assertAcked(prepareCreate("test")
|
||||
|
@ -167,7 +167,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
|
||||
SearchResponse search = client().prepareSearch().setQuery(matchQuery("body", "Test: http://www.facebook.com ").type(Type.PHRASE)).addHighlightedField("body").execute().actionGet();
|
||||
assertHighlight(search, 0, "body", 0, startsWith("<em>Test: http://www.facebook.com</em>"));
|
||||
search = client().prepareSearch().setQuery(matchQuery("body", "Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature Test: http://www.facebook.com http://elasticsearch.org http://xing.com http://cnn.com http://quora.com http://twitter.com this is a test for highlighting feature").type(Type.PHRASE)).addHighlightedField("body").execute().actionGet();
|
||||
assertHighlight(search, 0, "body", 0, equalTo("<em>Test</em>: <em>http</em>://<em>www</em>.<em>facebook</em>.com <em>http</em>://<em>elasticsearch</em>.<em>org</em> <em>http</em>://<em>xing</em>.com <em>http</em>://<em>cnn</em>.com <em>http</em>://<em>quora</em>.com"));
|
||||
assertHighlight(search, 0, "body", 0, equalTo("<em>Test</em>: <em>http://www.facebook.com</em> <em>http://elasticsearch.org</em> <em>http://xing.com</em> <em>http://cnn.com</em> http://quora.com"));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -21,8 +21,8 @@ package org.elasticsearch.search.sort;
|
||||
|
||||
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.TestUtil;
|
||||
import org.apache.lucene.util.UnicodeUtil;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.action.index.IndexRequestBuilder;
|
||||
import org.elasticsearch.action.search.SearchPhaseExecutionException;
|
||||
@ -128,7 +128,7 @@ public class SimpleSortTests extends ElasticsearchIntegrationTest {
|
||||
String docId = Integer.toString(i);
|
||||
BytesRef ref = null;
|
||||
do {
|
||||
ref = new BytesRef(_TestUtil.randomRealisticUnicodeString(random));
|
||||
ref = new BytesRef(TestUtil.randomRealisticUnicodeString(random));
|
||||
} while (denseBytes.containsKey(ref));
|
||||
denseBytes.put(ref, docId);
|
||||
XContentBuilder src = jsonBuilder().startObject().field("dense_bytes", ref.utf8ToString());
|
||||
|
@ -42,7 +42,6 @@ import org.elasticsearch.index.codec.postingsformat.PreBuiltPostingsFormatProvid
|
||||
import org.elasticsearch.index.mapper.FieldMapper.Names;
|
||||
import org.elasticsearch.index.mapper.core.AbstractFieldMapper;
|
||||
import org.elasticsearch.index.mapper.core.CompletionFieldMapper;
|
||||
import org.elasticsearch.index.merge.Merges;
|
||||
import org.elasticsearch.search.suggest.SuggestUtils;
|
||||
import org.elasticsearch.search.suggest.completion.Completion090PostingsFormat.LookupFactory;
|
||||
import org.elasticsearch.search.suggest.context.ContextMapping;
|
||||
@ -54,6 +53,7 @@ import java.lang.reflect.Field;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import static org.hamcrest.Matchers.equalTo;
|
||||
import static org.hamcrest.Matchers.is;
|
||||
@ -180,6 +180,16 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<BytesRef> contexts() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasContexts() {
|
||||
return false;
|
||||
}
|
||||
|
||||
};
|
||||
InputIterator iter;
|
||||
if (usePayloads) {
|
||||
@ -208,6 +218,16 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
|
||||
public boolean hasPayloads() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<BytesRef> contexts() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasContexts() {
|
||||
return false;
|
||||
}
|
||||
};
|
||||
} else {
|
||||
iter = primaryIter;
|
||||
@ -275,7 +295,7 @@ public class CompletionPostingsFormatTest extends ElasticsearchTestCase {
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.commit();
|
||||
Merges.forceMerge(writer, 1);
|
||||
writer.forceMerge(1, true);
|
||||
writer.commit();
|
||||
DirectoryReader reader = DirectoryReader.open(writer, true);
|
||||
assertThat(reader.leaves().size(), equalTo(1));
|
||||
|
@ -765,7 +765,7 @@ public final class TestCluster extends ImmutableTestCluster {
|
||||
if (!dataDirToClean.isEmpty()) {
|
||||
boolean deleted = false;
|
||||
try {
|
||||
deleted = FileSystemUtils.deleteRecursively(dataDirToClean.toArray(new File[dataDirToClean.size()]), false);
|
||||
deleted = FileSystemUtils.deleteSubDirectories(dataDirToClean.toArray(new File[dataDirToClean.size()]));
|
||||
} finally {
|
||||
logger.info("Wipe data directory for all nodes locations: {} success: {}", this.dataDirToClean, deleted);
|
||||
this.dataDirToClean.clear();
|
||||
|
Loading…
x
Reference in New Issue
Block a user