Merge branch 'master' into feature/translog_checkpoints

Conflicts:
	src/main/java/org/elasticsearch/index/engine/InternalEngine.java
This commit is contained in:
Simon Willnauer 2015-05-17 20:11:19 +02:00
commit f9f0e99eae
34 changed files with 1250 additions and 3667 deletions

View File

@ -218,3 +218,6 @@ See the {client}/net-api/current/index.html[official Elasticsearch .NET client].
=== R
* https://github.com/Tomesch/elasticsearch[elasticsearch]
R client for Elasticsearch
* https://github.com/ropensci/elastic[elastic]:
A general purpose R client for Elasticsearch

View File

@ -7,16 +7,13 @@ via a regular expression. Accepts the following settings:
The following are settings that can be set for a `pattern` analyzer
type:
[cols="<,<",options="header",]
|===================================================================
|Setting |Description
|`lowercase` |Should terms be lowercased or not. Defaults to `true`.
|`pattern` |The regular expression pattern, defaults to `\W+`.
|`flags` |The regular expression flags.
|`stopwords` |A list of stopwords to initialize the stop filter with.
Defaults to an 'empty' stopword list Check
<<analysis-stop-analyzer,Stop Analyzer>> for more details.
|===================================================================
[horizontal]
`lowercase`:: Should terms be lowercased or not. Defaults to `true`.
`pattern`:: The regular expression pattern, defaults to `\W+`.
`flags`:: The regular expression flags.
`stopwords`:: A list of stopwords to initialize the stop filter with.
Defaults to an 'empty' stopword list Check
<<analysis-stop-analyzer,Stop Analyzer>> for more details.
*IMPORTANT*: The regular expression should match the *token separators*,
not the tokens themselves.
@ -29,101 +26,103 @@ Pattern API] for more details about `flags` options.
==== Pattern Analyzer Examples
In order to try out these examples, you should delete the `test` index
before running each example:
[source,js]
--------------------------------------------------
curl -XDELETE localhost:9200/test
--------------------------------------------------
before running each example.
[float]
===== Whitespace tokenizer
[source,js]
--------------------------------------------------
curl -XPUT 'localhost:9200/test' -d '
{
"settings":{
"analysis": {
"analyzer": {
"whitespace":{
"type": "pattern",
"pattern":"\\\\s+"
}
}
}
}
}'
DELETE test
curl 'localhost:9200/test/_analyze?pretty=1&analyzer=whitespace' -d 'foo,bar baz'
# "foo,bar", "baz"
PUT /test
{
"settings": {
"analysis": {
"analyzer": {
"whitespace": {
"type": "pattern",
"pattern": "\\s+"
}
}
}
}
}
GET /test/_analyze?analyzer=whitespace&text=foo,bar baz
# "foo,bar", "baz"
--------------------------------------------------
// AUTOSENSE
[float]
===== Non-word character tokenizer
[source,js]
--------------------------------------------------
DELETE test
curl -XPUT 'localhost:9200/test' -d '
{
"settings":{
"analysis": {
"analyzer": {
"nonword":{
"type": "pattern",
"pattern":"[^\\\\w]+"
}
}
}
PUT /test
{
"settings": {
"analysis": {
"analyzer": {
"nonword": {
"type": "pattern",
"pattern": "[^\\w]+" <1>
}
}'
}
}
}
}
curl 'localhost:9200/test/_analyze?pretty=1&analyzer=nonword' -d 'foo,bar baz'
# "foo,bar baz" becomes "foo", "bar", "baz"
GET /test/_analyze?analyzer=nonword&text=foo,bar baz
# "foo,bar baz" becomes "foo", "bar", "baz"
curl 'localhost:9200/test/_analyze?pretty=1&analyzer=nonword' -d 'type_1-type_4'
# "type_1","type_4"
GET /test/_analyze?analyzer=nonword&text=type_1-type_4
# "type_1","type_4"
--------------------------------------------------
// AUTOSENSE
[float]
===== CamelCase tokenizer
[source,js]
--------------------------------------------------
DELETE test
curl -XPUT 'localhost:9200/test?pretty=1' -d '
{
"settings":{
"analysis": {
"analyzer": {
"camel":{
"type": "pattern",
"pattern":"([^\\\\p{L}\\\\d]+)|(?<=\\\\D)(?=\\\\d)|(?<=\\\\d)(?=\\\\D)|(?<=[\\\\p{L}&&[^\\\\p{Lu}]])(?=\\\\p{Lu})|(?<=\\\\p{Lu})(?=\\\\p{Lu}[\\\\p{L}&&[^\\\\p{Lu}]])"
}
}
}
PUT /test?pretty=1
{
"settings": {
"analysis": {
"analyzer": {
"camel": {
"type": "pattern",
"pattern": "([^\\p{L}\\d]+)|(?<=\\D)(?=\\d)|(?<=\\d)(?=\\D)|(?<=[\\p{L}&&[^\\p{Lu}]])(?=\\p{Lu})|(?<=\\p{Lu})(?=\\p{Lu}[\\p{L}&&[^\\p{Lu}]])"
}
}'
}
}
}
}
curl 'localhost:9200/test/_analyze?pretty=1&analyzer=camel' -d '
MooseX::FTPClass2_beta
'
# "moose","x","ftp","class","2","beta"
GET /test/_analyze?analyzer=camel&text=MooseX::FTPClass2_beta
# "moose","x","ftp","class","2","beta"
--------------------------------------------------
// AUTOSENSE
The regex above is easier to understand as:
[source,js]
--------------------------------------------------
([^\\p{L}\\d]+) # swallow non letters and numbers,
| (?<=\\D)(?=\\d) # or non-number followed by number,
| (?<=\\d)(?=\\D) # or number followed by non-number,
| (?<=[ \\p{L} && [^\\p{Lu}]]) # or lower case
(?=\\p{Lu}) # followed by upper case,
| (?<=\\p{Lu}) # or upper case
(?=\\p{Lu} # followed by upper case
[\\p{L}&&[^\\p{Lu}]] # then lower case
)
([^\p{L}\d]+) # swallow non letters and numbers,
| (?<=\D)(?=\d) # or non-number followed by number,
| (?<=\d)(?=\D) # or number followed by non-number,
| (?<=[ \p{L} && [^\p{Lu}]]) # or lower case
(?=\p{Lu}) # followed by upper case,
| (?<=\p{Lu}) # or upper case
(?=\p{Lu} # followed by upper case
[\p{L}&&[^\p{Lu}]] # then lower case
)
--------------------------------------------------

View File

@ -219,8 +219,8 @@ the maximum allowed Levenshtein Edit Distance (or number of edits)
--
generates an edit distance based on the length of the term. For lengths:
`0..1`:: must match exactly
`1..5`:: one edit allowed
`0..2`:: must match exactly
`3..5`:: one edit allowed
`>5`:: two edits allowed
`AUTO` should generally be the preferred value for `fuzziness`.

View File

@ -498,7 +498,8 @@ systems and the provided start/stop scripts.
=== Analyze API
The Analyze API return 0 as first Token's position instead of 1.
* The Analyze API return 0 as first Token's position instead of 1.
* The `text()` method on `AnalyzeRequest` now returns `String[]` instead of `String`.
=== Multiple data.path striping
@ -586,3 +587,22 @@ Before version `1.1.0` the parent/child had its own in-memory data structures fo
Removed `id_cache` option from the clear cache apis. The `fielddata` option should be used to clear `_parent` field
from fielddata.
[float]
=== Highlighting
The default value for the `require_field_match` option is `true` rather than
`false`, meaning that the highlighters will take the fields that were queried
into account by default. That means for instance that highlighting any field
when querying the `_all` field will produce no highlighted snippets by default,
given that the match was on the `_all` field only. Querying the same fields
that need to be highlighted is the cleaner solution to get highlighted snippets
back. Otherwise `require_field_match` option can be set to `false` to ignore
field names completely when highlighting.
The postings highlighter doesn't support the `require_field_match` option
anymore, it will only highlight fields that were queried.
The `match` query with type set to `match_phrase_prefix` is not supported by the
postings highlighter. No highlighted snippets will be returned.

View File

@ -72,11 +72,9 @@ that the query is composed of, regardless of whether they are actually part of
a query match, effectively ignoring their positions.
[WARNING]
The postings highlighter does support highlighting of multi term queries, like
prefix queries, wildcard queries and so on. On the other hand, this requires
the queries to be rewritten using a proper
<<query-dsl-multi-term-rewrite,rewrite method>> that supports multi term
extraction, which is a potentially expensive operation.
The postings highlighter doesn't support highlighting some complex queries,
like a `match` query with `type` set to `match_phrase_prefix`. No highlighted
snippets will be returned in that case.
[[fast-vector-highlighter]]
==== Fast vector highlighter
@ -406,10 +404,10 @@ at the field level.
[[field-match]]
==== Require Field Match
`require_field_match` can be set to `true` which will cause a field to
be highlighted only if a query matched that field. `false` means that
terms are highlighted on all requested fields regardless if the query
matches specifically on them.
`require_field_match` can be set to `false` which will cause any field to
be highlighted regardless of whether the query matched specifically on them.
The default behaviour is `true`, meaning that only fields that hold a query
match will be highlighted.
[[boundary-characters]]
==== Boundary Characters

View File

@ -787,8 +787,6 @@
<!-- unit tests for test framework classes-->
<exclude>org/elasticsearch/test/test/**/*</exclude>
</excludes>
<!-- Resources are large and not really helpful as "test sources". -->
<excludeResources>true</excludeResources>
</configuration>
</execution>
</executions>
@ -1519,6 +1517,7 @@
<signaturesFile>dev-tools/forbidden/all-signatures.txt</signaturesFile>
</signaturesFiles>
<signatures>${forbidden.test.signatures}</signatures>
<suppressAnnotations><annotation>**.SuppressForbidden</annotation></suppressAnnotations>
</configuration>
<phase>test-compile</phase>
<goals>
@ -1539,6 +1538,7 @@
</goals>
<configuration>
<includes>
<include>rest-api-spec/**/*</include>
<include>org/elasticsearch/test/**/*</include>
<include>org/elasticsearch/bootstrap/BootstrapForTesting.class</include>
<include>org/elasticsearch/common/cli/CliToolTestCase.class</include>

View File

@ -65,6 +65,8 @@ public class CustomPassageFormatter extends PassageFormatter {
//we remove the paragraph separator if present at the end of the snippet (we used it as separator between values)
if (sb.charAt(sb.length() - 1) == HighlightUtils.PARAGRAPH_SEPARATOR) {
sb.deleteCharAt(sb.length() - 1);
} else if (sb.charAt(sb.length() - 1) == HighlightUtils.NULL_SEPARATOR) {
sb.deleteCharAt(sb.length() - 1);
}
//and we trim the snippets too
snippets[j] = new Snippet(sb.toString().trim(), passage.score, passage.numMatches > 0);

View File

@ -18,121 +18,85 @@
package org.apache.lucene.search.postingshighlight;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.Strings;
import org.elasticsearch.search.highlight.HighlightUtils;
import org.apache.lucene.search.Query;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.List;
import java.util.Map;
/**
* Subclass of the {@link XPostingsHighlighter} that works for a single field in a single document.
* It receives the field values as input and it performs discrete highlighting on each single value
* calling the highlightDoc method multiple times.
* It allows to pass in the query terms to avoid calling extract terms multiple times.
*
* The use that we make of the postings highlighter is not optimal. It would be much better to
* highlight multiple docs in a single call, as we actually lose its sequential IO. But that would require:
* 1) to make our fork more complex and harder to maintain to perform discrete highlighting (needed to return
* a different snippet per value when number_of_fragments=0 and the field has multiple values)
* 2) refactoring of the elasticsearch highlight api which currently works per hit
* Subclass of the {@link PostingsHighlighter} that works for a single field in a single document.
* Uses a custom {@link PassageFormatter}. Accepts field content as a constructor argument, given that loading
* is custom and can be done reading from _source field. Supports using different {@link BreakIterator} to break
* the text into fragments. Considers every distinct field value as a discrete passage for highlighting (unless
* the whole content needs to be highlighted). Supports both returning empty snippets and non highlighted snippets
* when no highlighting can be performed.
*
* The use that we make of the postings highlighter is not optimal. It would be much better to highlight
* multiple docs in a single call, as we actually lose its sequential IO. That would require to
* refactor the elasticsearch highlight api which currently works per hit.
*/
public final class CustomPostingsHighlighter extends XPostingsHighlighter {
public final class CustomPostingsHighlighter extends PostingsHighlighter {
private static final Snippet[] EMPTY_SNIPPET = new Snippet[0];
private static final Passage[] EMPTY_PASSAGE = new Passage[0];
private final Analyzer analyzer;
private final CustomPassageFormatter passageFormatter;
private final int noMatchSize;
private final int totalContentLength;
private final String[] fieldValues;
private final int[] fieldValuesOffsets;
private int currentValueIndex = 0;
private final BreakIterator breakIterator;
private final boolean returnNonHighlightedSnippets;
private final String fieldValue;
private BreakIterator breakIterator;
public CustomPostingsHighlighter(CustomPassageFormatter passageFormatter, List<Object> fieldValues, boolean mergeValues, int maxLength, int noMatchSize) {
super(maxLength);
this.passageFormatter = passageFormatter;
this.noMatchSize = noMatchSize;
if (mergeValues) {
String rawValue = Strings.collectionToDelimitedString(fieldValues, String.valueOf(getMultiValuedSeparator("")));
String fieldValue = rawValue.substring(0, Math.min(rawValue.length(), maxLength));
this.fieldValues = new String[]{fieldValue};
this.fieldValuesOffsets = new int[]{0};
this.totalContentLength = fieldValue.length();
} else {
this.fieldValues = new String[fieldValues.size()];
this.fieldValuesOffsets = new int[fieldValues.size()];
int contentLength = 0;
int offset = 0;
int previousLength = -1;
for (int i = 0; i < fieldValues.size(); i++) {
String rawValue = fieldValues.get(i).toString();
String fieldValue = rawValue.substring(0, Math.min(rawValue.length(), maxLength));
this.fieldValues[i] = fieldValue;
contentLength += fieldValue.length();
offset += previousLength + 1;
this.fieldValuesOffsets[i] = offset;
previousLength = fieldValue.length();
}
this.totalContentLength = contentLength;
}
/**
* Creates a new instance of {@link CustomPostingsHighlighter}
*
* @param analyzer the analyzer used for the field at index time, used for multi term queries internally
* @param passageFormatter our own {@link PassageFormatter} which generates snippets in forms of {@link Snippet} objects
* @param fieldValue the original field values as constructor argument, loaded from te _source field or the relevant stored field.
* @param returnNonHighlightedSnippets whether non highlighted snippets should be returned rather than empty snippets when
* no highlighting can be performed
*/
public CustomPostingsHighlighter(Analyzer analyzer, CustomPassageFormatter passageFormatter, String fieldValue, boolean returnNonHighlightedSnippets) {
this(analyzer, passageFormatter, null, fieldValue, returnNonHighlightedSnippets);
}
/*
Our own api to highlight a single document field, passing in the query terms, and get back our own Snippet object
/**
* Creates a new instance of {@link CustomPostingsHighlighter}
*
* @param analyzer the analyzer used for the field at index time, used for multi term queries internally
* @param passageFormatter our own {@link PassageFormatter} which generates snippets in forms of {@link Snippet} objects
* @param breakIterator an instance {@link BreakIterator} selected depending on the highlighting options
* @param fieldValue the original field values as constructor argument, loaded from te _source field or the relevant stored field.
* @param returnNonHighlightedSnippets whether non highlighted snippets should be returned rather than empty snippets when
* no highlighting can be performed
*/
public Snippet[] highlightDoc(String field, BytesRef[] terms, IndexReader reader, int docId, int maxPassages) throws IOException {
IndexReaderContext readerContext = reader.getContext();
List<LeafReaderContext> leaves = readerContext.leaves();
public CustomPostingsHighlighter(Analyzer analyzer, CustomPassageFormatter passageFormatter, BreakIterator breakIterator, String fieldValue, boolean returnNonHighlightedSnippets) {
this.analyzer = analyzer;
this.passageFormatter = passageFormatter;
this.breakIterator = breakIterator;
this.returnNonHighlightedSnippets = returnNonHighlightedSnippets;
this.fieldValue = fieldValue;
}
String[] contents = new String[]{loadCurrentFieldValue()};
Map<Integer, Object> snippetsMap = highlightField(field, contents, getBreakIterator(field), terms, new int[]{docId}, leaves, maxPassages);
//increment the current value index so that next time we'll highlight the next value if available
currentValueIndex++;
Object snippetObject = snippetsMap.get(docId);
if (snippetObject != null && snippetObject instanceof Snippet[]) {
return (Snippet[]) snippetObject;
/**
* Highlights terms extracted from the provided query within the content of the provided field name
*/
public Snippet[] highlightField(String field, Query query, IndexSearcher searcher, int docId, int maxPassages) throws IOException {
Map<String, Object[]> fieldsAsObjects = super.highlightFieldsAsObjects(new String[]{field}, query, searcher, new int[]{docId}, new int[]{maxPassages});
Object[] snippetObjects = fieldsAsObjects.get(field);
if (snippetObjects != null) {
//one single document at a time
assert snippetObjects.length == 1;
Object snippetObject = snippetObjects[0];
if (snippetObject != null && snippetObject instanceof Snippet[]) {
return (Snippet[]) snippetObject;
}
}
return EMPTY_SNIPPET;
}
/*
Method provided through our own fork: allows to do proper scoring when doing per value discrete highlighting.
Used to provide the total length of the field (all values) for proper scoring.
*/
@Override
protected int getContentLength(String field, int docId) {
return totalContentLength;
}
/*
Method provided through our own fork: allows to perform proper per value discrete highlighting.
Used to provide the offset for the current value.
*/
@Override
protected int getOffsetForCurrentValue(String field, int docId) {
if (currentValueIndex < fieldValuesOffsets.length) {
return fieldValuesOffsets[currentValueIndex];
}
throw new IllegalArgumentException("No more values offsets to return");
}
public void setBreakIterator(BreakIterator breakIterator) {
this.breakIterator = breakIterator;
}
@Override
protected PassageFormatter getFormatter(String field) {
return passageFormatter;
@ -146,41 +110,27 @@ public final class CustomPostingsHighlighter extends XPostingsHighlighter {
return breakIterator;
}
@Override
protected char getMultiValuedSeparator(String field) {
//U+2029 PARAGRAPH SEPARATOR (PS): each value holds a discrete passage for highlighting
return HighlightUtils.PARAGRAPH_SEPARATOR;
}
/*
By default the postings highlighter returns non highlighted snippet when there are no matches.
We want to return no snippets by default, unless no_match_size is greater than 0
*/
@Override
protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
if (noMatchSize > 0) {
if (returnNonHighlightedSnippets) {
//we want to return the first sentence of the first snippet only
return super.getEmptyHighlight(fieldName, bi, 1);
}
return EMPTY_PASSAGE;
}
/*
Not needed since we call our own loadCurrentFieldValue explicitly, but we override it anyway for consistency.
*/
@Override
protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
return new String[][]{new String[]{loadCurrentFieldValue()}};
protected Analyzer getIndexAnalyzer(String field) {
return analyzer;
}
/*
Our own method that returns the field values, which relies on the content that was provided when creating the highlighter.
Supports per value discrete highlighting calling the highlightDoc method multiple times, one per value.
*/
protected String loadCurrentFieldValue() {
if (currentValueIndex < fieldValues.length) {
return fieldValues[currentValueIndex];
}
throw new IllegalArgumentException("No more values to return");
@Override
protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
//we only highlight one field, one document at a time
return new String[][]{new String[]{fieldValue}};
}
}

View File

@ -0,0 +1,153 @@
/*
Licensed to Elasticsearch under one or more contributor
license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright
ownership. Elasticsearch licenses this file to you under
the Apache License, Version 2.0 (the "License"); you may
not use this file except in compliance with the License.
You may obtain a copy of the License at
*
http://www.apache.org/licenses/LICENSE-2.0
*
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/
package org.apache.lucene.search.postingshighlight;
import java.text.BreakIterator;
import java.text.CharacterIterator;
/**
* A {@link BreakIterator} that breaks the text whenever a certain separator, provided as a constructor argument, is found.
*/
public class CustomSeparatorBreakIterator extends BreakIterator {
private final char separator;
private CharacterIterator text;
private int current;
public CustomSeparatorBreakIterator(char separator) {
this.separator = separator;
}
@Override
public int current() {
return current;
}
@Override
public int first() {
text.setIndex(text.getBeginIndex());
return current = text.getIndex();
}
@Override
public int last() {
text.setIndex(text.getEndIndex());
return current = text.getIndex();
}
@Override
public int next() {
if (text.getIndex() == text.getEndIndex()) {
return DONE;
} else {
return advanceForward();
}
}
private int advanceForward() {
char c;
while( (c = text.next()) != CharacterIterator.DONE) {
if (c == separator) {
return current = text.getIndex() + 1;
}
}
assert text.getIndex() == text.getEndIndex();
return current = text.getIndex();
}
@Override
public int following(int pos) {
if (pos < text.getBeginIndex() || pos > text.getEndIndex()) {
throw new IllegalArgumentException("offset out of bounds");
} else if (pos == text.getEndIndex()) {
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909
text.setIndex(text.getEndIndex());
current = text.getIndex();
return DONE;
} else {
text.setIndex(pos);
current = text.getIndex();
return advanceForward();
}
}
@Override
public int previous() {
if (text.getIndex() == text.getBeginIndex()) {
return DONE;
} else {
return advanceBackward();
}
}
private int advanceBackward() {
char c;
while( (c = text.previous()) != CharacterIterator.DONE) {
if (c == separator) {
return current = text.getIndex() + 1;
}
}
assert text.getIndex() == text.getBeginIndex();
return current = text.getIndex();
}
@Override
public int preceding(int pos) {
if (pos < text.getBeginIndex() || pos > text.getEndIndex()) {
throw new IllegalArgumentException("offset out of bounds");
} else if (pos == text.getBeginIndex()) {
// this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
// http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=9000909
text.setIndex(text.getBeginIndex());
current = text.getIndex();
return DONE;
} else {
text.setIndex(pos);
current = text.getIndex();
return advanceBackward();
}
}
@Override
public int next(int n) {
if (n < 0) {
for (int i = 0; i < -n; i++) {
previous();
}
} else {
for (int i = 0; i < n; i++) {
next();
}
}
return current();
}
@Override
public CharacterIterator getText() {
return text;
}
@Override
public void setText(CharacterIterator newText) {
text = newText;
current = text.getBeginIndex();
}
}

View File

@ -22,7 +22,7 @@ package org.apache.lucene.search.postingshighlight;
* Represents a scored highlighted snippet.
* It's our own arbitrary object that we get back from the postings highlighter when highlighting a document.
* Every snippet contains its formatted text and its score.
* The score is needed since we highlight every single value separately and we might want to return snippets sorted by score.
* The score is needed in case we want to sort snippets by score, they get sorted by position in the text by default.
*/
public class Snippet {

View File

@ -1,772 +0,0 @@
/*
* Licensed to Elasticsearch under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elasticsearch licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.apache.lucene.search.postingshighlight;
import org.apache.lucene.index.*;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.InPlaceMergeSorter;
import org.apache.lucene.util.UnicodeUtil;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.text.BreakIterator;
import java.util.*;
/*
FORKED from Lucene 4.5 to be able to:
1) support discrete highlighting for multiple values, so that we can return a different snippet per value when highlighting the whole text
2) call the highlightField method directly from subclasses and provide the terms by ourselves
3) Applied LUCENE-4906 to allow PassageFormatter to return arbitrary objects (LUCENE 4.6)
All our changes start with //BEGIN EDIT
*/
public class XPostingsHighlighter {
//BEGIN EDIT added method to override offset for current value (default 0)
//we need this to perform discrete highlighting per field
protected int getOffsetForCurrentValue(String field, int docId) {
return 0;
}
//END EDIT
//BEGIN EDIT
//we need this to fix scoring when highlighting every single value separately, since the score depends on the total length of the field (all values rather than only the current one)
protected int getContentLength(String field, int docId) {
return -1;
}
//END EDIT
// TODO: maybe allow re-analysis for tiny fields? currently we require offsets,
// but if the analyzer is really fast and the field is tiny, this might really be
// unnecessary.
/** for rewriting: we don't want slow processing from MTQs */
private static final IndexSearcher EMPTY_INDEXSEARCHER;
static {
try {
IndexReader emptyReader = new MultiReader();
EMPTY_INDEXSEARCHER = new IndexSearcher(emptyReader);
EMPTY_INDEXSEARCHER.setQueryCache(null);
} catch (IOException bogus) {
throw new RuntimeException(bogus);
}
}
/** Default maximum content size to process. Typically snippets
* closer to the beginning of the document better summarize its content */
public static final int DEFAULT_MAX_LENGTH = 10000;
private final int maxLength;
/** Set the first time {@link #getFormatter} is called,
* and then reused. */
private PassageFormatter defaultFormatter;
/** Set the first time {@link #getScorer} is called,
* and then reused. */
private PassageScorer defaultScorer;
/**
* Creates a new highlighter with default parameters.
*/
public XPostingsHighlighter() {
this(DEFAULT_MAX_LENGTH);
}
/**
* Creates a new highlighter, specifying maximum content length.
* @param maxLength maximum content size to process.
* @throws IllegalArgumentException if <code>maxLength</code> is negative or <code>Integer.MAX_VALUE</code>
*/
public XPostingsHighlighter(int maxLength) {
if (maxLength < 0 || maxLength == Integer.MAX_VALUE) {
// two reasons: no overflow problems in BreakIterator.preceding(offset+1),
// our sentinel in the offsets queue uses this value to terminate.
throw new IllegalArgumentException("maxLength must be < Integer.MAX_VALUE");
}
this.maxLength = maxLength;
}
/** Returns the {@link java.text.BreakIterator} to use for
* dividing text into passages. This returns
* {@link java.text.BreakIterator#getSentenceInstance(java.util.Locale)} by default;
* subclasses can override to customize. */
protected BreakIterator getBreakIterator(String field) {
return BreakIterator.getSentenceInstance(Locale.ROOT);
}
/** Returns the {@link PassageFormatter} to use for
* formatting passages into highlighted snippets. This
* returns a new {@code PassageFormatter} by default;
* subclasses can override to customize. */
protected PassageFormatter getFormatter(String field) {
if (defaultFormatter == null) {
defaultFormatter = new DefaultPassageFormatter();
}
return defaultFormatter;
}
/** Returns the {@link PassageScorer} to use for
* ranking passages. This
* returns a new {@code PassageScorer} by default;
* subclasses can override to customize. */
protected PassageScorer getScorer(String field) {
if (defaultScorer == null) {
defaultScorer = new PassageScorer();
}
return defaultScorer;
}
/**
* Highlights the top passages from a single field.
*
* @param field field name to highlight.
* Must have a stored string value and also be indexed with offsets.
* @param query query to highlight.
* @param searcher searcher that was previously used to execute the query.
* @param topDocs TopDocs containing the summary result documents to highlight.
* @return Array of formatted snippets corresponding to the documents in <code>topDocs</code>.
* If no highlights were found for a document, the
* first sentence for the field will be returned.
* @throws java.io.IOException if an I/O error occurred during processing
* @throws IllegalArgumentException if <code>field</code> was indexed without
* {@link org.apache.lucene.index.FieldInfo.IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
*/
public String[] highlight(String field, Query query, IndexSearcher searcher, TopDocs topDocs) throws IOException {
return highlight(field, query, searcher, topDocs, 1);
}
/**
* Highlights the top-N passages from a single field.
*
* @param field field name to highlight.
* Must have a stored string value and also be indexed with offsets.
* @param query query to highlight.
* @param searcher searcher that was previously used to execute the query.
* @param topDocs TopDocs containing the summary result documents to highlight.
* @param maxPassages The maximum number of top-N ranked passages used to
* form the highlighted snippets.
* @return Array of formatted snippets corresponding to the documents in <code>topDocs</code>.
* If no highlights were found for a document, the
* first {@code maxPassages} sentences from the
* field will be returned.
* @throws IOException if an I/O error occurred during processing
* @throws IllegalArgumentException if <code>field</code> was indexed without
* {@link org.apache.lucene.index.FieldInfo.IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
*/
public String[] highlight(String field, Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages) throws IOException {
Map<String,String[]> res = highlightFields(new String[] { field }, query, searcher, topDocs, new int[] { maxPassages });
return res.get(field);
}
/**
* Highlights the top passages from multiple fields.
* <p>
* Conceptually, this behaves as a more efficient form of:
* <pre class="prettyprint">
* Map m = new HashMap();
* for (String field : fields) {
* m.put(field, highlight(field, query, searcher, topDocs));
* }
* return m;
* </pre>
*
* @param fields field names to highlight.
* Must have a stored string value and also be indexed with offsets.
* @param query query to highlight.
* @param searcher searcher that was previously used to execute the query.
* @param topDocs TopDocs containing the summary result documents to highlight.
* @return Map keyed on field name, containing the array of formatted snippets
* corresponding to the documents in <code>topDocs</code>.
* If no highlights were found for a document, the
* first sentence from the field will be returned.
* @throws IOException if an I/O error occurred during processing
* @throws IllegalArgumentException if <code>field</code> was indexed without
* {@link org.apache.lucene.index.FieldInfo.IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
*/
public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, TopDocs topDocs) throws IOException {
int maxPassages[] = new int[fields.length];
Arrays.fill(maxPassages, 1);
return highlightFields(fields, query, searcher, topDocs, maxPassages);
}
/**
* Highlights the top-N passages from multiple fields.
* <p>
* Conceptually, this behaves as a more efficient form of:
* <pre class="prettyprint">
* Map m = new HashMap();
* for (String field : fields) {
* m.put(field, highlight(field, query, searcher, topDocs, maxPassages));
* }
* return m;
* </pre>
*
* @param fields field names to highlight.
* Must have a stored string value and also be indexed with offsets.
* @param query query to highlight.
* @param searcher searcher that was previously used to execute the query.
* @param topDocs TopDocs containing the summary result documents to highlight.
* @param maxPassages The maximum number of top-N ranked passages per-field used to
* form the highlighted snippets.
* @return Map keyed on field name, containing the array of formatted snippets
* corresponding to the documents in <code>topDocs</code>.
* If no highlights were found for a document, the
* first {@code maxPassages} sentences from the
* field will be returned.
* @throws IOException if an I/O error occurred during processing
* @throws IllegalArgumentException if <code>field</code> was indexed without
* {@link org.apache.lucene.index.FieldInfo.IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
*/
public Map<String,String[]> highlightFields(String fields[], Query query, IndexSearcher searcher, TopDocs topDocs, int maxPassages[]) throws IOException {
final ScoreDoc scoreDocs[] = topDocs.scoreDocs;
int docids[] = new int[scoreDocs.length];
for (int i = 0; i < docids.length; i++) {
docids[i] = scoreDocs[i].doc;
}
return highlightFields(fields, query, searcher, docids, maxPassages);
}
/**
* Highlights the top-N passages from multiple fields,
* for the provided int[] docids.
*
* @param fieldsIn field names to highlight.
* Must have a stored string value and also be indexed with offsets.
* @param query query to highlight.
* @param searcher searcher that was previously used to execute the query.
* @param docidsIn containing the document IDs to highlight.
* @param maxPassagesIn The maximum number of top-N ranked passages per-field used to
* form the highlighted snippets.
* @return Map keyed on field name, containing the array of formatted snippets
* corresponding to the documents in <code>topDocs</code>.
* If no highlights were found for a document, the
* first {@code maxPassages} from the field will
* be returned.
* @throws IOException if an I/O error occurred during processing
* @throws IllegalArgumentException if <code>field</code> was indexed without
* {@link org.apache.lucene.index.FieldInfo.IndexOptions#DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS}
*/
public Map<String,String[]> highlightFields(String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[]) throws IOException {
Map<String,String[]> snippets = new HashMap<>();
for(Map.Entry<String,Object[]> ent : highlightFieldsAsObjects(fieldsIn, query, searcher, docidsIn, maxPassagesIn).entrySet()) {
Object[] snippetObjects = ent.getValue();
String[] snippetStrings = new String[snippetObjects.length];
snippets.put(ent.getKey(), snippetStrings);
for(int i=0;i<snippetObjects.length;i++) {
Object snippet = snippetObjects[i];
if (snippet != null) {
snippetStrings[i] = snippet.toString();
}
}
}
return snippets;
}
public Map<String,Object[]> highlightFieldsAsObjects(String fieldsIn[], Query query, IndexSearcher searcher, int[] docidsIn, int maxPassagesIn[]) throws IOException {
if (fieldsIn.length < 1) {
throw new IllegalArgumentException("fieldsIn must not be empty");
}
if (fieldsIn.length != maxPassagesIn.length) {
throw new IllegalArgumentException("invalid number of maxPassagesIn");
}
SortedSet<Term> queryTerms = new TreeSet<>();
EMPTY_INDEXSEARCHER.createNormalizedWeight(query, false).extractTerms(queryTerms);
IndexReaderContext readerContext = searcher.getIndexReader().getContext();
List<LeafReaderContext> leaves = readerContext.leaves();
// Make our own copies because we sort in-place:
int[] docids = new int[docidsIn.length];
System.arraycopy(docidsIn, 0, docids, 0, docidsIn.length);
final String fields[] = new String[fieldsIn.length];
System.arraycopy(fieldsIn, 0, fields, 0, fieldsIn.length);
final int maxPassages[] = new int[maxPassagesIn.length];
System.arraycopy(maxPassagesIn, 0, maxPassages, 0, maxPassagesIn.length);
// sort for sequential io
Arrays.sort(docids);
new InPlaceMergeSorter() {
@Override
protected void swap(int i, int j) {
String tmp = fields[i];
fields[i] = fields[j];
fields[j] = tmp;
int tmp2 = maxPassages[i];
maxPassages[i] = maxPassages[j];
maxPassages[j] = tmp2;
}
@Override
protected int compare(int i, int j) {
return fields[i].compareTo(fields[j]);
}
}.sort(0, fields.length);
// pull stored data:
String[][] contents = loadFieldValues(searcher, fields, docids, maxLength);
Map<String,Object[]> highlights = new HashMap<>();
for (int i = 0; i < fields.length; i++) {
String field = fields[i];
int numPassages = maxPassages[i];
Term floor = new Term(field, "");
Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
SortedSet<Term> fieldTerms = queryTerms.subSet(floor, ceiling);
// TODO: should we have some reasonable defaults for term pruning? (e.g. stopwords)
// Strip off the redundant field:
BytesRef terms[] = new BytesRef[fieldTerms.size()];
int termUpto = 0;
for(Term term : fieldTerms) {
terms[termUpto++] = term.bytes();
}
Map<Integer,Object> fieldHighlights = highlightField(field, contents[i], getBreakIterator(field), terms, docids, leaves, numPassages);
Object[] result = new Object[docids.length];
for (int j = 0; j < docidsIn.length; j++) {
result[j] = fieldHighlights.get(docidsIn[j]);
}
highlights.put(field, result);
}
return highlights;
}
/** Loads the String values for each field X docID to be
* highlighted. By default this loads from stored
* fields, but a subclass can change the source. This
* method should allocate the String[fields.length][docids.length]
* and fill all values. The returned Strings must be
* identical to what was indexed. */
protected String[][] loadFieldValues(IndexSearcher searcher, String[] fields, int[] docids, int maxLength) throws IOException {
String contents[][] = new String[fields.length][docids.length];
char valueSeparators[] = new char[fields.length];
for (int i = 0; i < fields.length; i++) {
valueSeparators[i] = getMultiValuedSeparator(fields[i]);
}
LimitedStoredFieldVisitor visitor = new LimitedStoredFieldVisitor(fields, valueSeparators, maxLength);
for (int i = 0; i < docids.length; i++) {
searcher.doc(docids[i], visitor);
for (int j = 0; j < fields.length; j++) {
contents[j][i] = visitor.getValue(j);
}
visitor.reset();
}
return contents;
}
/**
* Returns the logical separator between values for multi-valued fields.
* The default value is a space character, which means passages can span across values,
* but a subclass can override, for example with {@code U+2029 PARAGRAPH SEPARATOR (PS)}
* if each value holds a discrete passage for highlighting.
*/
protected char getMultiValuedSeparator(String field) {
return ' ';
}
//BEGIN EDIT: made protected so that we can call from our subclass and pass in the terms by ourselves
protected Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<LeafReaderContext> leaves, int maxPassages) throws IOException {
//private Map<Integer,Object> highlightField(String field, String contents[], BreakIterator bi, BytesRef terms[], int[] docids, List<LeafReaderContext > leaves, int maxPassages) throws IOException {
//END EDIT
Map<Integer,Object> highlights = new HashMap<>();
// reuse in the real sense... for docs in same segment we just advance our old enum
PostingsEnum postings[] = null;
TermsEnum termsEnum = null;
int lastLeaf = -1;
PassageFormatter fieldFormatter = getFormatter(field);
if (fieldFormatter == null) {
throw new NullPointerException("PassageFormatter cannot be null");
}
for (int i = 0; i < docids.length; i++) {
String content = contents[i];
if (content.length() == 0) {
continue; // nothing to do
}
bi.setText(content);
int doc = docids[i];
int leaf = ReaderUtil.subIndex(doc, leaves);
LeafReaderContext subContext = leaves.get(leaf);
LeafReader r = subContext.reader();
Terms t = r.terms(field);
if (t == null) {
continue; // nothing to do
}
if (!t.hasOffsets()) {
// no offsets available
throw new IllegalArgumentException("field '" + field + "' was indexed without offsets, cannot highlight");
}
if (leaf != lastLeaf) {
termsEnum = t.iterator();
postings = new PostingsEnum[terms.length];
}
Passage passages[] = highlightDoc(field, terms, content.length(), bi, doc - subContext.docBase, termsEnum, postings, maxPassages);
if (passages.length == 0) {
passages = getEmptyHighlight(field, bi, maxPassages);
}
if (passages.length > 0) {
// otherwise a null snippet (eg if field is missing
// entirely from the doc)
highlights.put(doc, fieldFormatter.format(passages, content));
}
lastLeaf = leaf;
}
return highlights;
}
// algorithm: treat sentence snippets as miniature documents
// we can intersect these with the postings lists via BreakIterator.preceding(offset),s
// score each sentence as norm(sentenceStartOffset) * sum(weight * tf(freq))
private Passage[] highlightDoc(String field, BytesRef terms[], int contentLength, BreakIterator bi, int doc,
TermsEnum termsEnum, PostingsEnum[] postings, int n) throws IOException {
//BEGIN EDIT added call to method that returns the offset for the current value (discrete highlighting)
int valueOffset = getOffsetForCurrentValue(field, doc);
//END EDIT
PassageScorer scorer = getScorer(field);
if (scorer == null) {
throw new NullPointerException("PassageScorer cannot be null");
}
//BEGIN EDIT discrete highlighting
// the scoring needs to be based on the length of the whole field (all values rather than only the current one)
int totalContentLength = getContentLength(field, doc);
if (totalContentLength == -1) {
totalContentLength = contentLength;
}
//END EDIT
PriorityQueue<OffsetsEnum> pq = new PriorityQueue<>();
float weights[] = new float[terms.length];
// initialize postings
for (int i = 0; i < terms.length; i++) {
PostingsEnum de = postings[i];
int pDoc;
if (de == EMPTY) {
continue;
} else if (de == null) {
postings[i] = EMPTY; // initially
if (!termsEnum.seekExact(terms[i])) {
continue; // term not found
}
de = postings[i] = termsEnum.postings(null, null, PostingsEnum.OFFSETS);
assert de != null;
pDoc = de.advance(doc);
} else {
pDoc = de.docID();
if (pDoc < doc) {
pDoc = de.advance(doc);
}
}
if (doc == pDoc) {
//BEGIN EDIT we take into account the length of the whole field (all values) to properly score the snippets
weights[i] = scorer.weight(totalContentLength, de.freq());
//weights[i] = scorer.weight(contentLength, de.freq());
//END EDIT
de.nextPosition();
pq.add(new OffsetsEnum(de, i));
}
}
pq.add(new OffsetsEnum(EMPTY, Integer.MAX_VALUE)); // a sentinel for termination
PriorityQueue<Passage> passageQueue = new PriorityQueue<>(n, new Comparator<Passage>() {
@Override
public int compare(Passage left, Passage right) {
if (left.score < right.score) {
return -1;
} else if (left.score > right.score) {
return 1;
} else {
return left.startOffset - right.startOffset;
}
}
});
Passage current = new Passage();
OffsetsEnum off;
while ((off = pq.poll()) != null) {
final PostingsEnum dp = off.dp;
int start = dp.startOffset();
assert start >= 0;
int end = dp.endOffset();
// LUCENE-5166: this hit would span the content limit... however more valid
// hits may exist (they are sorted by start). so we pretend like we never
// saw this term, it won't cause a passage to be added to passageQueue or anything.
assert EMPTY.startOffset() == Integer.MAX_VALUE;
if (start < contentLength && end > contentLength) {
continue;
}
//BEGIN EDIT support for discrete highlighting (added block code)
//switch to the first match in the current value if there is one
boolean seenEnough = false;
while (start < valueOffset) {
if (off.pos == dp.freq()) {
seenEnough = true;
break;
} else {
off.pos++;
dp.nextPosition();
start = dp.startOffset();
end = dp.endOffset();
}
}
//continue with next term if we've already seen the current one all the times it appears
//that means that the current value doesn't hold matches for the current term
if (seenEnough) {
continue;
}
//we now subtract the offset of the current value to both start and end
start -= valueOffset;
end -= valueOffset;
//END EDIT
if (start >= current.endOffset) {
if (current.startOffset >= 0) {
// finalize current
//BEGIN EDIT we take into account the value offset when scoring the snippet based on its position
current.score *= scorer.norm(current.startOffset + valueOffset);
//current.score *= scorer.norm(current.startOffset);
//END EDIT
// new sentence: first add 'current' to queue
if (passageQueue.size() == n && current.score < passageQueue.peek().score) {
current.reset(); // can't compete, just reset it
} else {
passageQueue.offer(current);
if (passageQueue.size() > n) {
current = passageQueue.poll();
current.reset();
} else {
current = new Passage();
}
}
}
// if we exceed limit, we are done
if (start >= contentLength) {
Passage passages[] = new Passage[passageQueue.size()];
passageQueue.toArray(passages);
for (Passage p : passages) {
p.sort();
}
// sort in ascending order
Arrays.sort(passages, new Comparator<Passage>() {
@Override
public int compare(Passage left, Passage right) {
return left.startOffset - right.startOffset;
}
});
return passages;
}
// advance breakiterator
assert BreakIterator.DONE < 0;
current.startOffset = Math.max(bi.preceding(start+1), 0);
current.endOffset = Math.min(bi.next(), contentLength);
}
int tf = 0;
while (true) {
tf++;
current.addMatch(start, end, terms[off.id]);
if (off.pos == dp.freq()) {
break; // removed from pq
} else {
off.pos++;
dp.nextPosition();
//BEGIN EDIT support for discrete highlighting
start = dp.startOffset() - valueOffset;
end = dp.endOffset() - valueOffset;
//start = dp.startOffset();
//end = dp.endOffset();
//END EDIT
}
if (start >= current.endOffset || end > contentLength) {
pq.offer(off);
break;
}
}
current.score += weights[off.id] * scorer.tf(tf, current.endOffset - current.startOffset);
}
// Dead code but compiler disagrees:
assert false;
return null;
}
/** Called to summarize a document when no hits were
* found. By default this just returns the first
* {@code maxPassages} sentences; subclasses can override
* to customize. */
protected Passage[] getEmptyHighlight(String fieldName, BreakIterator bi, int maxPassages) {
// BreakIterator should be un-next'd:
List<Passage> passages = new ArrayList<>();
int pos = bi.current();
assert pos == 0;
while (passages.size() < maxPassages) {
int next = bi.next();
if (next == BreakIterator.DONE) {
break;
}
Passage passage = new Passage();
passage.score = Float.NaN;
passage.startOffset = pos;
passage.endOffset = next;
passages.add(passage);
pos = next;
}
return passages.toArray(new Passage[passages.size()]);
}
private static class OffsetsEnum implements Comparable<OffsetsEnum> {
PostingsEnum dp;
int pos;
int id;
OffsetsEnum(PostingsEnum dp, int id) throws IOException {
this.dp = dp;
this.id = id;
this.pos = 1;
}
@Override
public int compareTo(OffsetsEnum other) {
try {
int off = dp.startOffset();
int otherOff = other.dp.startOffset();
if (off == otherOff) {
return id - other.id;
} else {
return Long.signum(((long)off) - otherOff);
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
private static final PostingsEnum EMPTY = new PostingsEnum() {
@Override
public int nextPosition() throws IOException { return 0; }
@Override
public int startOffset() throws IOException { return Integer.MAX_VALUE; }
@Override
public int endOffset() throws IOException { return Integer.MAX_VALUE; }
@Override
public BytesRef getPayload() throws IOException { return null; }
@Override
public int freq() throws IOException { return 0; }
@Override
public int docID() { return NO_MORE_DOCS; }
@Override
public int nextDoc() throws IOException { return NO_MORE_DOCS; }
@Override
public int advance(int target) throws IOException { return NO_MORE_DOCS; }
@Override
public long cost() { return 0; }
};
private static class LimitedStoredFieldVisitor extends StoredFieldVisitor {
private final String fields[];
private final char valueSeparators[];
private final int maxLength;
private final StringBuilder builders[];
private int currentField = -1;
public LimitedStoredFieldVisitor(String fields[], char valueSeparators[], int maxLength) {
assert fields.length == valueSeparators.length;
this.fields = fields;
this.valueSeparators = valueSeparators;
this.maxLength = maxLength;
builders = new StringBuilder[fields.length];
for (int i = 0; i < builders.length; i++) {
builders[i] = new StringBuilder();
}
}
@Override
public void stringField(FieldInfo fieldInfo, byte[] bytes) throws IOException {
String value = new String(bytes, StandardCharsets.UTF_8);
assert currentField >= 0;
StringBuilder builder = builders[currentField];
if (builder.length() > 0 && builder.length() < maxLength) {
builder.append(valueSeparators[currentField]);
}
if (builder.length() + value.length() > maxLength) {
builder.append(value, 0, maxLength - builder.length());
} else {
builder.append(value);
}
}
@Override
public Status needsField(FieldInfo fieldInfo) throws IOException {
currentField = Arrays.binarySearch(fields, fieldInfo.name);
if (currentField < 0) {
return Status.NO;
} else if (builders[currentField].length() > maxLength) {
return fields.length == 1 ? Status.STOP : Status.NO;
}
return Status.YES;
}
String getValue(int i) {
return builders[i].toString();
}
void reset() {
currentField = -1;
for (int i = 0; i < fields.length; i++) {
builders[i].setLength(0);
}
}
}
}

View File

@ -66,7 +66,7 @@ public class TransportCreateSnapshotAction extends TransportMasterNodeOperationA
@Override
protected void masterOperation(final CreateSnapshotRequest request, ClusterState state, final ActionListener<CreateSnapshotResponse> listener) {
SnapshotsService.SnapshotRequest snapshotRequest =
new SnapshotsService.SnapshotRequest("create_snapshot[" + request.snapshot() + "]", request.snapshot(), request.repository())
new SnapshotsService.SnapshotRequest("create_snapshot [" + request.snapshot() + "]", request.snapshot(), request.repository())
.indices(request.indices())
.indicesOptions(request.indicesOptions())
.partial(request.partial())

View File

@ -1,100 +0,0 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.common.lucene.docset;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.RamUsageEstimator;
import java.io.IOException;
/**
* A {@link DocIdSet} that matches all docs up to a {@code maxDoc}.
*/
public class AllDocIdSet extends DocIdSet {
private final int maxDoc;
public AllDocIdSet(int maxDoc) {
this.maxDoc = maxDoc;
}
/**
* Does not go to the reader and ask for data, so can be cached.
*/
@Override
public boolean isCacheable() {
return true;
}
@Override
public long ramBytesUsed() {
return RamUsageEstimator.NUM_BYTES_INT;
}
@Override
public DocIdSetIterator iterator() throws IOException {
return new Iterator(maxDoc);
}
@Override
public Bits bits() throws IOException {
return new Bits.MatchAllBits(maxDoc);
}
public static final class Iterator extends DocIdSetIterator {
private final int maxDoc;
private int doc = -1;
public Iterator(int maxDoc) {
this.maxDoc = maxDoc;
}
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
if (++doc < maxDoc) {
return doc;
}
return doc = NO_MORE_DOCS;
}
@Override
public int advance(int target) throws IOException {
doc = target;
if (doc < maxDoc) {
return doc;
}
return doc = NO_MORE_DOCS;
}
@Override
public long cost() {
return maxDoc;
}
}
}

View File

@ -23,8 +23,10 @@ import com.carrotsearch.hppc.ObjectLongHashMap;
import com.carrotsearch.hppc.ObjectHashSet;
import com.carrotsearch.hppc.cursors.ObjectCursor;
import com.carrotsearch.hppc.predicates.ObjectPredicate;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.ExceptionsHelper;
import org.elasticsearch.action.FailedNodeException;
import org.elasticsearch.cluster.metadata.IndexMetaData;
@ -45,14 +47,13 @@ import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.index.settings.IndexSettings;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.store.StoreFileMetaData;
import org.elasticsearch.indices.store.TransportNodesListShardStoreMetaData;
import org.elasticsearch.transport.ConnectTransportException;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.*;
import java.util.concurrent.ConcurrentMap;
/**
@ -101,6 +102,15 @@ public class GatewayAllocator extends AbstractComponent {
}
}
/**
* Return {@code true} if the index is configured to allow shards to be
* recovered on any node
*/
private boolean recoverOnAnyNode(@IndexSettings Settings idxSettings) {
return IndexMetaData.isOnSharedFilesystem(idxSettings) &&
idxSettings.getAsBoolean(IndexMetaData.SETTING_SHARED_FS_ALLOW_RECOVERY_ON_ANY_NODE, false);
}
public boolean allocateUnassigned(RoutingAllocation allocation) {
boolean changed = false;
DiscoveryNodes nodes = allocation.nodes();
@ -125,11 +135,13 @@ public class GatewayAllocator extends AbstractComponent {
int numberOfAllocationsFound = 0;
long highestVersion = -1;
Set<DiscoveryNode> nodesWithHighestVersion = Sets.newHashSet();
final Map<DiscoveryNode, Long> nodesWithVersion = Maps.newHashMap();
assert !nodesState.containsKey(null);
final Object[] keys = nodesState.keys;
final long[] values = nodesState.values;
IndexMetaData indexMetaData = routingNodes.metaData().index(shard.index());
Settings idxSettings = indexMetaData.settings();
for (int i = 0; i < keys.length; i++) {
if (keys[i] == null) {
continue;
@ -141,29 +153,63 @@ public class GatewayAllocator extends AbstractComponent {
if (allocation.shouldIgnoreShardForNode(shard.shardId(), node.id())) {
continue;
}
if (version != -1) {
if (recoverOnAnyNode(idxSettings)) {
numberOfAllocationsFound++;
if (highestVersion == -1) {
nodesWithHighestVersion.add(node);
if (version > highestVersion) {
highestVersion = version;
} else {
if (version > highestVersion) {
nodesWithHighestVersion.clear();
nodesWithHighestVersion.add(node);
highestVersion = version;
} else if (version == highestVersion) {
nodesWithHighestVersion.add(node);
}
}
// We always put the node without clearing the map
nodesWithVersion.put(node, version);
} else if (version != -1) {
numberOfAllocationsFound++;
// If we've found a new "best" candidate, clear the
// current candidates and add it
if (version > highestVersion) {
highestVersion = version;
nodesWithVersion.clear();
nodesWithVersion.put(node, version);
} else if (version == highestVersion) {
// If the candidate is the same, add it to the
// list, but keep the current candidate
nodesWithVersion.put(node, version);
}
}
}
// Now that we have a map of nodes to versions along with the
// number of allocations found (and not ignored), we need to sort
// it so the node with the highest version is at the beginning
List<DiscoveryNode> nodesWithHighestVersion = Lists.newArrayList();
nodesWithHighestVersion.addAll(nodesWithVersion.keySet());
CollectionUtil.timSort(nodesWithHighestVersion, new Comparator<DiscoveryNode>() {
@Override
public int compare(DiscoveryNode o1, DiscoveryNode o2) {
return Long.compare(nodesWithVersion.get(o2), nodesWithVersion.get(o1));
}
});
if (logger.isDebugEnabled()) {
logger.debug("[{}][{}] found {} allocations of {}, highest version: [{}]",
shard.index(), shard.id(), numberOfAllocationsFound, shard, highestVersion);
}
if (logger.isTraceEnabled()) {
StringBuilder sb = new StringBuilder("[");
for (DiscoveryNode n : nodesWithHighestVersion) {
sb.append("[");
sb.append(n.getName());
sb.append("]");
sb.append(" -> ");
sb.append(nodesWithVersion.get(n));
sb.append(", ");
}
sb.append("]");
logger.trace("{} candidates for allocation: {}", shard, sb.toString());
}
// check if the counts meets the minimum set
int requiredAllocation = 1;
// if we restore from a repository one copy is more then enough
if (shard.restoreSource() == null) {
try {
IndexMetaData indexMetaData = routingNodes.metaData().index(shard.index());
String initialShards = indexMetaData.settings().get(INDEX_RECOVERY_INITIAL_SHARDS, settings.get(INDEX_RECOVERY_INITIAL_SHARDS, this.initialShards));
if ("quorum".equals(initialShards)) {
if (indexMetaData.numberOfReplicas() > 1) {
@ -415,13 +461,6 @@ public class GatewayAllocator extends AbstractComponent {
for (TransportNodesListGatewayStartedShards.NodeGatewayStartedShards nodeShardState : response) {
long version = nodeShardState.version();
Settings idxSettings = indexMetaData.settings();
if (IndexMetaData.isOnSharedFilesystem(idxSettings) &&
idxSettings.getAsBoolean(IndexMetaData.SETTING_SHARED_FS_ALLOW_RECOVERY_ON_ANY_NODE, false)) {
// Shared filesystems use 0 as a minimum shard state, which
// means that the shard can be allocated to any node
version = Math.max(0, version);
}
// -1 version means it does not exists, which is what the API returns, and what we expect to
logger.trace("[{}] on node [{}] has version [{}] of shard",
shard, nodeShardState.getNode(), version);

View File

@ -38,6 +38,7 @@ import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.cluster.routing.DjbHashFunction;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.lease.Releasable;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.lucene.LoggerInfoStream;
import org.elasticsearch.common.lucene.Lucene;
import org.elasticsearch.common.lucene.index.ElasticsearchDirectoryReader;
@ -53,6 +54,7 @@ import org.elasticsearch.index.merge.policy.ElasticsearchMergePolicy;
import org.elasticsearch.index.merge.policy.MergePolicyProvider;
import org.elasticsearch.index.merge.scheduler.MergeSchedulerProvider;
import org.elasticsearch.index.search.nested.IncludeNestedDocsQuery;
import org.elasticsearch.index.shard.ShardId;
import org.elasticsearch.index.shard.TranslogRecoveryPerformer;
import org.elasticsearch.index.translog.Translog;
import org.elasticsearch.index.translog.TranslogConfig;
@ -128,7 +130,7 @@ public class InternalEngine extends Engine {
}
throttle = new IndexThrottle();
this.searcherFactory = new SearchFactory(engineConfig);
this.searcherFactory = new SearchFactory(logger, isClosed, engineConfig);
final Translog.TranslogGeneration translogGeneration;
try {
// TODO: would be better if ES could tell us "from above" whether this shard was already here, instead of using Lucene's API
@ -1053,10 +1055,19 @@ public class InternalEngine extends Engine {
}
/** Extended SearcherFactory that warms the segments if needed when acquiring a new searcher */
class SearchFactory extends EngineSearcherFactory {
final static class SearchFactory extends EngineSearcherFactory {
SearchFactory(EngineConfig engineConfig) {
private final IndicesWarmer warmer;
private final ShardId shardId;
private final ESLogger logger;
private final AtomicBoolean isEngineClosed;
SearchFactory(ESLogger logger, AtomicBoolean isEngineClosed, EngineConfig engineConfig) {
super(engineConfig);
warmer = engineConfig.getWarmer();
shardId = engineConfig.getShardId();
this.logger = logger;
this.isEngineClosed = isEngineClosed;
}
@Override
@ -1068,36 +1079,34 @@ public class InternalEngine extends Engine {
IndexSearcher newSearcher = null;
boolean closeNewSearcher = false;
try {
if (searcherManager == null) {
if (previousReader == null) {
// we are starting up - no writer active so we can't acquire a searcher.
newSearcher = searcher;
} else {
try (final Searcher currentSearcher = acquireSearcher("search_factory")) {
// figure out the newSearcher, with only the new readers that are relevant for us
List<IndexReader> readers = Lists.newArrayList();
for (LeafReaderContext newReaderContext : searcher.getIndexReader().leaves()) {
if (isMergedSegment(newReaderContext.reader())) {
// merged segments are already handled by IndexWriterConfig.setMergedSegmentWarmer
continue;
}
boolean found = false;
for (LeafReaderContext currentReaderContext : currentSearcher.reader().leaves()) {
if (currentReaderContext.reader().getCoreCacheKey().equals(newReaderContext.reader().getCoreCacheKey())) {
found = true;
break;
}
}
if (!found) {
readers.add(newReaderContext.reader());
// figure out the newSearcher, with only the new readers that are relevant for us
List<IndexReader> readers = Lists.newArrayList();
for (LeafReaderContext newReaderContext : reader.leaves()) {
if (isMergedSegment(newReaderContext.reader())) {
// merged segments are already handled by IndexWriterConfig.setMergedSegmentWarmer
continue;
}
boolean found = false;
for (LeafReaderContext currentReaderContext : previousReader.leaves()) {
if (currentReaderContext.reader().getCoreCacheKey().equals(newReaderContext.reader().getCoreCacheKey())) {
found = true;
break;
}
}
if (!readers.isEmpty()) {
// we don't want to close the inner readers, just increase ref on them
IndexReader newReader = new MultiReader(readers.toArray(new IndexReader[readers.size()]), false);
newSearcher = super.newSearcher(newReader, null);
closeNewSearcher = true;
if (!found) {
readers.add(newReaderContext.reader());
}
}
if (!readers.isEmpty()) {
// we don't want to close the inner readers, just increase ref on them
IndexReader newReader = new MultiReader(readers.toArray(new IndexReader[readers.size()]), false);
newSearcher = super.newSearcher(newReader, null);
closeNewSearcher = true;
}
}
if (newSearcher != null) {
@ -1106,7 +1115,7 @@ public class InternalEngine extends Engine {
}
warmer.warmTopReader(new IndicesWarmer.WarmerContext(shardId, new Searcher("warmer", searcher)));
} catch (Throwable e) {
if (isClosed.get() == false) {
if (isEngineClosed.get() == false) {
logger.warn("failed to prepare/warm", e);
}
} finally {

View File

@ -37,6 +37,7 @@ import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.aggregations.support.ValuesSource;
import org.elasticsearch.search.aggregations.support.ValuesSource.Bytes.WithOrdinals;
import java.io.IOException;
import java.util.HashSet;
@ -80,33 +81,65 @@ public class IncludeExclude {
}
// Only used for the 'map' execution mode (ie. scripts)
public static class StringFilter {
public abstract static class StringFilter {
public abstract boolean accept(BytesRef value);
}
static class AutomatonBackedStringFilter extends StringFilter {
private final ByteRunAutomaton runAutomaton;
private StringFilter(Automaton automaton) {
private AutomatonBackedStringFilter(Automaton automaton) {
this.runAutomaton = new ByteRunAutomaton(automaton);
}
/**
* Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns.
*/
@Override
public boolean accept(BytesRef value) {
return runAutomaton.run(value.bytes, value.offset, value.length);
}
}
public static class OrdinalsFilter {
static class TermListBackedStringFilter extends StringFilter {
private final Set<BytesRef> valids;
private final Set<BytesRef> invalids;
public TermListBackedStringFilter(Set<BytesRef> includeValues, Set<BytesRef> excludeValues) {
this.valids = includeValues;
this.invalids = excludeValues;
}
/**
* Returns whether the given value is accepted based on the
* {@code include} & {@code exclude} sets.
*/
@Override
public boolean accept(BytesRef value) {
return ((valids == null) || (valids.contains(value))) && ((invalids == null) || (!invalids.contains(value)));
}
}
public static abstract class OrdinalsFilter {
public abstract LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) throws IOException;
}
static class AutomatonBackedOrdinalsFilter extends OrdinalsFilter {
private final CompiledAutomaton compiled;
private OrdinalsFilter(Automaton automaton) {
private AutomatonBackedOrdinalsFilter(Automaton automaton) {
this.compiled = new CompiledAutomaton(automaton);
}
/**
* Computes which global ordinals are accepted by this IncludeExclude instance.
*
*/
@Override
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, ValuesSource.Bytes.WithOrdinals valueSource) throws IOException {
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
TermsEnum globalTermsEnum;
@ -120,6 +153,43 @@ public class IncludeExclude {
}
}
static class TermListBackedOrdinalsFilter extends OrdinalsFilter {
private final SortedSet<BytesRef> includeValues;
private final SortedSet<BytesRef> excludeValues;
public TermListBackedOrdinalsFilter(SortedSet<BytesRef> includeValues, SortedSet<BytesRef> excludeValues) {
this.includeValues = includeValues;
this.excludeValues = excludeValues;
}
@Override
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals, WithOrdinals valueSource) throws IOException {
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
if(includeValues!=null){
for (BytesRef term : includeValues) {
long ord = globalOrdinals.lookupTerm(term);
if (ord >= 0) {
acceptedGlobalOrdinals.set(ord);
}
}
} else {
// default to all terms being acceptable
acceptedGlobalOrdinals.set(0, acceptedGlobalOrdinals.length());
}
if (excludeValues != null) {
for (BytesRef term : excludeValues) {
long ord = globalOrdinals.lookupTerm(term);
if (ord >= 0) {
acceptedGlobalOrdinals.clear(ord);
}
}
}
return acceptedGlobalOrdinals;
}
}
private final RegExp include, exclude;
private final SortedSet<BytesRef> includeValues, excludeValues;
@ -325,11 +395,18 @@ public class IncludeExclude {
}
public StringFilter convertToStringFilter() {
return new StringFilter(toAutomaton());
if (isRegexBased()) {
return new AutomatonBackedStringFilter(toAutomaton());
}
return new TermListBackedStringFilter(includeValues, excludeValues);
}
public OrdinalsFilter convertToOrdinalsFilter() {
return new OrdinalsFilter(toAutomaton());
if (isRegexBased()) {
return new AutomatonBackedOrdinalsFilter(toAutomaton());
}
return new TermListBackedOrdinalsFilter(includeValues, excludeValues);
}
public LongFilter convertToLongFilter() {

View File

@ -22,7 +22,6 @@ package org.elasticsearch.search.highlight;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import org.apache.lucene.index.IndexOptions;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.component.AbstractComponent;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.regex.Regex;

View File

@ -20,8 +20,6 @@ package org.elasticsearch.search.highlight;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.highlight.DefaultEncoder;
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
@ -29,7 +27,6 @@ import org.elasticsearch.index.fieldvisitor.CustomFieldsVisitor;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.fetch.FetchSubPhase;
import org.elasticsearch.search.internal.SearchContext;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.lookup.SourceLookup;
import java.io.IOException;
@ -39,6 +36,7 @@ public final class HighlightUtils {
//U+2029 PARAGRAPH SEPARATOR (PS): each value holds a discrete passage for highlighting (postings highlighter)
public static final char PARAGRAPH_SEPARATOR = 8233;
public static final char NULL_SEPARATOR = '\u0000';
private HighlightUtils() {

View File

@ -82,7 +82,7 @@ public class HighlighterParseElement implements SearchParseElement {
final SearchContextHighlight.FieldOptions.Builder globalOptionsBuilder = new SearchContextHighlight.FieldOptions.Builder()
.preTags(DEFAULT_PRE_TAGS).postTags(DEFAULT_POST_TAGS).scoreOrdered(false).highlightFilter(false)
.requireFieldMatch(false).forceSource(false).fragmentCharSize(100).numberOfFragments(5)
.requireFieldMatch(true).forceSource(false).fragmentCharSize(100).numberOfFragments(5)
.encoder("default").boundaryMaxScan(SimpleBoundaryScanner.DEFAULT_MAX_SCAN)
.boundaryChars(SimpleBoundaryScanner.DEFAULT_BOUNDARY_CHARS)
.noMatchSize(0).phraseLimit(256);

View File

@ -18,31 +18,14 @@
*/
package org.elasticsearch.search.highlight;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoringRewrite;
import org.apache.lucene.search.TopTermsRewrite;
import org.apache.lucene.search.Weight;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.highlight.Encoder;
import org.apache.lucene.search.postingshighlight.CustomPassageFormatter;
import org.apache.lucene.search.postingshighlight.CustomPostingsHighlighter;
import org.apache.lucene.search.postingshighlight.Snippet;
import org.apache.lucene.search.postingshighlight.WholeBreakIterator;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.search.postingshighlight.*;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.text.StringText;
import org.elasticsearch.index.mapper.FieldMapper;
import org.elasticsearch.search.fetch.FetchPhaseExecutionException;
@ -51,13 +34,7 @@ import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.*;
public class PostingsHighlighter implements Highlighter {
@ -81,15 +58,7 @@ public class PostingsHighlighter implements Highlighter {
FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
if (!hitContext.cache().containsKey(CACHE_KEY)) {
//get the non rewritten query and rewrite it
Query query;
try {
query = rewrite(highlighterContext, hitContext.topLevelReader());
SortedSet<Term> queryTerms = extractTerms(context.searcher().createNormalizedWeight(query, false));
hitContext.cache().put(CACHE_KEY, new HighlighterEntry(queryTerms));
} catch (IOException e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
}
hitContext.cache().put(CACHE_KEY, new HighlighterEntry());
}
HighlighterEntry highlighterEntry = (HighlighterEntry) hitContext.cache().get(CACHE_KEY);
@ -98,37 +67,34 @@ public class PostingsHighlighter implements Highlighter {
if (mapperHighlighterEntry == null) {
Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
CustomPassageFormatter passageFormatter = new CustomPassageFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0], encoder);
BytesRef[] filteredQueryTerms = filterTerms(highlighterEntry.queryTerms, fieldMapper.names().indexName(), field.fieldOptions().requireFieldMatch());
mapperHighlighterEntry = new MapperHighlighterEntry(passageFormatter, filteredQueryTerms);
mapperHighlighterEntry = new MapperHighlighterEntry(passageFormatter);
}
//we merge back multiple values into a single value using the paragraph separator, unless we have to highlight every single value separately (number_of_fragments=0).
boolean mergeValues = field.fieldOptions().numberOfFragments() != 0;
List<Snippet> snippets = new ArrayList<>();
int numberOfFragments;
try {
//we manually load the field values (from source if needed)
List<Object> textsToHighlight = HighlightUtils.loadFieldValues(field, fieldMapper, context, hitContext);
CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(mapperHighlighterEntry.passageFormatter, textsToHighlight, mergeValues, Integer.MAX_VALUE-1, field.fieldOptions().noMatchSize());
if (field.fieldOptions().numberOfFragments() == 0) {
highlighter.setBreakIterator(new WholeBreakIterator());
numberOfFragments = 1; //1 per value since we highlight per value
Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().type()).mappers().indexAnalyzer();
List<Object> fieldValues = HighlightUtils.loadFieldValues(field, fieldMapper, context, hitContext);
CustomPostingsHighlighter highlighter;
if (field.fieldOptions().numberOfFragments() == 0) {
//we use a control char to separate values, which is the only char that the custom break iterator breaks the text on,
//so we don't lose the distinction between the different values of a field and we get back a snippet per value
String fieldValue = mergeFieldValues(fieldValues, HighlightUtils.NULL_SEPARATOR);
CustomSeparatorBreakIterator breakIterator = new CustomSeparatorBreakIterator(HighlightUtils.NULL_SEPARATOR);
highlighter = new CustomPostingsHighlighter(analyzer, mapperHighlighterEntry.passageFormatter, breakIterator, fieldValue, field.fieldOptions().noMatchSize() > 0);
numberOfFragments = fieldValues.size(); //we are highlighting the whole content, one snippet per value
} else {
//using paragraph separator we make sure that each field value holds a discrete passage for highlighting
String fieldValue = mergeFieldValues(fieldValues, HighlightUtils.PARAGRAPH_SEPARATOR);
highlighter = new CustomPostingsHighlighter(analyzer, mapperHighlighterEntry.passageFormatter, fieldValue, field.fieldOptions().noMatchSize() > 0);
numberOfFragments = field.fieldOptions().numberOfFragments();
}
//we highlight every value separately calling the highlight method multiple times, only if we need to have back a snippet per value (whole value)
int values = mergeValues ? 1 : textsToHighlight.size();
for (int i = 0; i < values; i++) {
Snippet[] fieldSnippets = highlighter.highlightDoc(fieldMapper.names().indexName(), mapperHighlighterEntry.filteredQueryTerms, hitContext.reader(), hitContext.docId(), numberOfFragments);
if (fieldSnippets != null) {
for (Snippet fieldSnippet : fieldSnippets) {
if (Strings.hasText(fieldSnippet.getText())) {
snippets.add(fieldSnippet);
}
}
IndexSearcher searcher = new IndexSearcher(hitContext.reader());
Snippet[] fieldSnippets = highlighter.highlightField(fieldMapper.names().indexName(), highlighterContext.query.originalQuery(), searcher, hitContext.docId(), numberOfFragments);
for (Snippet fieldSnippet : fieldSnippets) {
if (Strings.hasText(fieldSnippet.getText())) {
snippets.add(fieldSnippet);
}
}
@ -160,97 +126,17 @@ public class PostingsHighlighter implements Highlighter {
return null;
}
private static Query rewrite(HighlighterContext highlighterContext, IndexReader reader) throws IOException {
Query original = highlighterContext.query.originalQuery();
//we walk the query tree and when we encounter multi term queries we need to make sure the rewrite method
//supports multi term extraction. If not we temporarily override it (and restore it after the rewrite).
List<Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod>> modifiedMultiTermQueries = Lists.newArrayList();
overrideMultiTermRewriteMethod(original, modifiedMultiTermQueries);
//rewrite is expensive: if the query was already rewritten we try not to rewrite it again
if (highlighterContext.query.queryRewritten() && modifiedMultiTermQueries.size() == 0) {
//return the already rewritten query
return highlighterContext.query.query();
}
Query query = original;
for (Query rewrittenQuery = query.rewrite(reader); rewrittenQuery != query;
rewrittenQuery = query.rewrite(reader)) {
query = rewrittenQuery;
}
//set back the original rewrite method after the rewrite is done
for (Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod> modifiedMultiTermQuery : modifiedMultiTermQueries) {
modifiedMultiTermQuery.v1().setRewriteMethod(modifiedMultiTermQuery.v2());
}
return query;
}
private static void overrideMultiTermRewriteMethod(Query query, List<Tuple<MultiTermQuery, MultiTermQuery.RewriteMethod>> modifiedMultiTermQueries) {
if (query instanceof MultiTermQuery) {
MultiTermQuery originalMultiTermQuery = (MultiTermQuery) query;
if (!allowsForTermExtraction(originalMultiTermQuery.getRewriteMethod())) {
MultiTermQuery.RewriteMethod originalRewriteMethod = originalMultiTermQuery.getRewriteMethod();
originalMultiTermQuery.setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(50));
//we need to rewrite anyway if it is a multi term query which was rewritten with the wrong rewrite method
modifiedMultiTermQueries.add(Tuple.tuple(originalMultiTermQuery, originalRewriteMethod));
}
}
if (query instanceof BooleanQuery) {
BooleanQuery booleanQuery = (BooleanQuery) query;
for (BooleanClause booleanClause : booleanQuery) {
overrideMultiTermRewriteMethod(booleanClause.getQuery(), modifiedMultiTermQueries);
}
}
if (query instanceof FilteredQuery) {
overrideMultiTermRewriteMethod(((FilteredQuery) query).getQuery(), modifiedMultiTermQueries);
}
if (query instanceof ConstantScoreQuery) {
overrideMultiTermRewriteMethod(((ConstantScoreQuery) query).getQuery(), modifiedMultiTermQueries);
}
}
private static boolean allowsForTermExtraction(MultiTermQuery.RewriteMethod rewriteMethod) {
return rewriteMethod instanceof TopTermsRewrite || rewriteMethod instanceof ScoringRewrite;
}
private static SortedSet<Term> extractTerms(Weight weight) {
SortedSet<Term> queryTerms = new TreeSet<>();
weight.extractTerms(queryTerms);
return queryTerms;
}
private static BytesRef[] filterTerms(SortedSet<Term> queryTerms, String field, boolean requireFieldMatch) {
SortedSet<Term> fieldTerms;
if (requireFieldMatch) {
Term floor = new Term(field, "");
Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
fieldTerms = queryTerms.subSet(floor, ceiling);
} else {
fieldTerms = queryTerms;
}
BytesRef terms[] = new BytesRef[fieldTerms.size()];
int termUpto = 0;
for(Term term : fieldTerms) {
terms[termUpto++] = term.bytes();
}
return terms;
private static String mergeFieldValues(List<Object> fieldValues, char valuesSeparator) {
//postings highlighter accepts all values in a single string, as offsets etc. need to match with content
//loaded from stored fields, we merge all values using a proper separator
String rawValue = Strings.collectionToDelimitedString(fieldValues, String.valueOf(valuesSeparator));
return rawValue.substring(0, Math.min(rawValue.length(), Integer.MAX_VALUE - 1));
}
private static List<Snippet> filterSnippets(List<Snippet> snippets, int numberOfFragments) {
//We need to filter the snippets as due to no_match_size we could have
//either highlighted snippets together non highlighted ones
//We don't want to mix those up
//either highlighted snippets or non highlighted ones and we don't want to mix those up
List<Snippet> filteredSnippets = new ArrayList<>(snippets.size());
for (Snippet snippet : snippets) {
if (snippet.isHighlighted()) {
@ -263,8 +149,8 @@ public class PostingsHighlighter implements Highlighter {
if (filteredSnippets.size() == 0) {
if (snippets.size() > 0) {
Snippet snippet = snippets.get(0);
//if we did discrete per value highlighting using whole break iterator (as number_of_fragments was 0)
//we need to obtain the first sentence of the first value
//if we tried highlighting the whole content using whole break iterator (as number_of_fragments was 0)
//we need to return the first sentence of the content rather than the whole content
if (numberOfFragments == 0) {
BreakIterator bi = BreakIterator.getSentenceInstance(Locale.ROOT);
String text = snippet.getText();
@ -283,21 +169,14 @@ public class PostingsHighlighter implements Highlighter {
}
private static class HighlighterEntry {
final SortedSet<Term> queryTerms;
Map<FieldMapper<?>, MapperHighlighterEntry> mappers = Maps.newHashMap();
private HighlighterEntry(SortedSet<Term> queryTerms) {
this.queryTerms = queryTerms;
}
}
private static class MapperHighlighterEntry {
final CustomPassageFormatter passageFormatter;
final BytesRef[] filteredQueryTerms;
private MapperHighlighterEntry(CustomPassageFormatter passageFormatter, BytesRef[] filteredQueryTerms) {
private MapperHighlighterEntry(CustomPassageFormatter passageFormatter) {
this.passageFormatter = passageFormatter;
this.filteredQueryTerms = filteredQueryTerms;
}
}
}

View File

@ -204,9 +204,7 @@ public class DefaultSearchContext extends SearchContext {
@Override
public void doClose() {
if (scanContext != null) {
scanContext.clear();
}
scanContext = null;
// clear and scope phase we have
Releasables.close(searcher, engineSearcher);
}

View File

@ -19,59 +19,50 @@
package org.elasticsearch.search.scan;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.BitsFilteredDocIdSet;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.FilteredQuery;
import org.apache.lucene.search.CollectionTerminatedException;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.elasticsearch.common.lucene.docset.AllDocIdSet;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.search.internal.SearchContext;
import java.io.IOException;
import java.util.ArrayList;
import java.util.concurrent.ConcurrentMap;
import java.util.List;
/**
* The scan context allows to optimize readers we already processed during scanning. We do that by keeping track
* of the count per reader, and if we are done with it, we no longer process it by using a filter that returns
* null docIdSet for this reader.
* of the last collected doc ID and only collecting doc IDs that are greater.
*/
public class ScanContext {
private final ConcurrentMap<IndexReader, ReaderState> readerStates = ConcurrentCollections.newConcurrentMap();
public void clear() {
readerStates.clear();
}
private volatile int docUpTo;
public TopDocs execute(SearchContext context) throws IOException {
ScanCollector collector = new ScanCollector(readerStates, context.from(), context.size(), context.trackScores());
Query query = new FilteredQuery(context.query(), new ScanFilter(readerStates, collector));
try {
context.searcher().search(query, collector);
} catch (ScanCollector.StopCollectingException e) {
// all is well
}
return execute(context.searcher(), context.query(), context.size(), context.trackScores());
}
TopDocs execute(IndexSearcher searcher, Query query, int size, boolean trackScores) throws IOException {
ScanCollector collector = new ScanCollector(size, trackScores);
Query q = Queries.filtered(query, new MinDocQuery(docUpTo));
searcher.search(q, collector);
return collector.topDocs();
}
static class ScanCollector extends SimpleCollector {
private class ScanCollector extends SimpleCollector {
private final ConcurrentMap<IndexReader, ReaderState> readerStates;
private final List<ScoreDoc> docs;
private final int from;
private final int to;
private final ArrayList<ScoreDoc> docs;
private final int size;
private final boolean trackScores;
@ -79,21 +70,10 @@ public class ScanContext {
private int docBase;
private int counter;
private IndexReader currentReader;
private ReaderState readerState;
ScanCollector(ConcurrentMap<IndexReader, ReaderState> readerStates, int from, int size, boolean trackScores) {
this.readerStates = readerStates;
this.from = from;
this.to = from + size;
ScanCollector(int size, boolean trackScores) {
this.trackScores = trackScores;
this.docs = new ArrayList<>(size);
}
void incCounter(int count) {
this.counter += count;
this.size = size;
}
public TopDocs topDocs() {
@ -112,70 +92,114 @@ public class ScanContext {
@Override
public void collect(int doc) throws IOException {
if (counter >= from) {
docs.add(new ScoreDoc(docBase + doc, trackScores ? scorer.score() : 0f));
}
readerState.count++;
counter++;
if (counter >= to) {
throw StopCollectingException;
int topLevelDoc = docBase + doc;
docs.add(new ScoreDoc(topLevelDoc, trackScores ? scorer.score() : 0f));
// record that we collected up to this document
assert topLevelDoc >= docUpTo;
docUpTo = topLevelDoc + 1;
if (docs.size() >= size) {
throw new CollectionTerminatedException();
}
}
@Override
public void doSetNextReader(LeafReaderContext context) throws IOException {
// if we have a reader state, and we haven't registered one already, register it
// we need to check in readersState since even when the filter return null, setNextReader is still
// called for that reader (before)
if (currentReader != null && !readerStates.containsKey(currentReader)) {
assert readerState != null;
readerState.done = true;
readerStates.put(currentReader, readerState);
}
this.currentReader = context.reader();
this.docBase = context.docBase;
this.readerState = new ReaderState();
}
public static final RuntimeException StopCollectingException = new StopCollectingException();
static class StopCollectingException extends RuntimeException {
@Override
public Throwable fillInStackTrace() {
return null;
if (docs.size() >= size || context.docBase + context.reader().maxDoc() <= docUpTo) {
// no need to collect a new segment, we either already collected enough
// or the segment is not competitive
throw new CollectionTerminatedException();
}
docBase = context.docBase;
}
}
public static class ScanFilter extends Filter {
/**
* A filtering query that matches all doc IDs that are not deleted and
* greater than or equal to the configured doc ID.
*/
// pkg-private for testing
static class MinDocQuery extends Query {
private final ConcurrentMap<IndexReader, ReaderState> readerStates;
private final int minDoc;
private final ScanCollector scanCollector;
public ScanFilter(ConcurrentMap<IndexReader, ReaderState> readerStates, ScanCollector scanCollector) {
this.readerStates = readerStates;
this.scanCollector = scanCollector;
MinDocQuery(int minDoc) {
this.minDoc = minDoc;
}
@Override
public DocIdSet getDocIdSet(LeafReaderContext context, Bits acceptedDocs) throws IOException {
ReaderState readerState = readerStates.get(context.reader());
if (readerState != null && readerState.done) {
scanCollector.incCounter(readerState.count);
return null;
public int hashCode() {
return 31 * super.hashCode() + minDoc;
}
@Override
public boolean equals(Object obj) {
if (super.equals(obj) == false) {
return false;
}
return BitsFilteredDocIdSet.wrap(new AllDocIdSet(context.reader().maxDoc()), acceptedDocs);
MinDocQuery that = (MinDocQuery) obj;
return minDoc == that.minDoc;
}
@Override
public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException {
return new ConstantScoreWeight(this) {
@Override
public Scorer scorer(LeafReaderContext context, final Bits acceptDocs) throws IOException {
final int maxDoc = context.reader().maxDoc();
if (context.docBase + maxDoc <= minDoc) {
return null;
}
final int segmentMinDoc = Math.max(0, minDoc - context.docBase);
final DocIdSetIterator disi = new DocIdSetIterator() {
int doc = -1;
@Override
public int docID() {
return doc;
}
@Override
public int nextDoc() throws IOException {
return advance(doc + 1);
}
@Override
public int advance(int target) throws IOException {
assert target > doc;
if (doc == -1) {
// skip directly to minDoc
doc = Math.max(target, segmentMinDoc);
} else {
doc = target;
}
while (doc < maxDoc) {
if (acceptDocs == null || acceptDocs.get(doc)) {
break;
}
doc += 1;
}
if (doc >= maxDoc) {
doc = NO_MORE_DOCS;
}
return doc;
}
@Override
public long cost() {
return maxDoc - minDoc;
}
};
return new ConstantScoreScorer(this, score(), disi);
}
};
}
@Override
public String toString(String field) {
return "ScanFilter";
return "MinDocQuery(minDoc=" + minDoc + ")";
}
}
static class ReaderState {
public int count;
public boolean done;
}
}

View File

@ -308,7 +308,7 @@ public class SnapshotsService extends AbstractLifecycleComponent<SnapshotsServic
endSnapshot(snapshot);
return;
}
clusterService.submitStateUpdateTask("update_snapshot [" + snapshot + "]", new ProcessedClusterStateUpdateTask() {
clusterService.submitStateUpdateTask("update_snapshot [" + snapshot.snapshotId().getSnapshot() + "]", new ProcessedClusterStateUpdateTask() {
boolean accepted = false;
SnapshotMetaData.Entry updatedSnapshot;
String failure = null;

View File

@ -98,4 +98,7 @@ grant {
// needed by JDKESLoggerTests
permission java.util.logging.LoggingPermission "control";
// needed by Mockito
permission java.lang.RuntimePermission "reflectionFactoryAccess";
};

View File

@ -27,106 +27,16 @@ import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.search.highlight.DefaultEncoder;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.elasticsearch.search.highlight.HighlightUtils;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.junit.Test;
import java.io.IOException;
import java.util.*;
import static org.hamcrest.CoreMatchers.equalTo;
import static org.hamcrest.CoreMatchers.notNullValue;
public class CustomPostingsHighlighterTests extends ElasticsearchTestCase {
@Test
public void testDiscreteHighlightingPerValue() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
final String firstValue = "This is a test. Just a test highlighting from postings highlighter.";
Document doc = new Document();
doc.add(body);
body.setStringValue(firstValue);
final String secondValue = "This is the second value to perform highlighting on.";
Field body2 = new Field("body", "", offsetsType);
doc.add(body2);
body2.setStringValue(secondValue);
final String thirdValue = "This is the third value to test highlighting with postings.";
Field body3 = new Field("body", "", offsetsType);
doc.add(body3);
body3.setStringValue(thirdValue);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
List<Object> fieldValues = new ArrayList<>();
fieldValues.add(firstValue);
fieldValues.add(secondValue);
fieldValues.add(thirdValue);
IndexSearcher searcher = newSearcher(ir);
Query query = new TermQuery(new Term("body", "highlighting"));
BytesRef[] queryTerms = filterTerms(extractTerms(searcher, query), "body", true);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertThat(topDocs.totalHits, equalTo(1));
int docId = topDocs.scoreDocs[0].doc;
//highlighting per value, considering whole values (simulating number_of_fragments=0)
CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValues, false, Integer.MAX_VALUE - 1, 0);
highlighter.setBreakIterator(new WholeBreakIterator());
Snippet[] snippets = highlighter.highlightDoc("body", queryTerms, ir, docId, 5);
assertThat(snippets.length, equalTo(1));
assertThat(snippets[0].getText(), equalTo("This is a test. Just a test <b>highlighting</b> from postings highlighter."));
snippets = highlighter.highlightDoc("body", queryTerms, ir, docId, 5);
assertThat(snippets.length, equalTo(1));
assertThat(snippets[0].getText(), equalTo("This is the second value to perform <b>highlighting</b> on."));
snippets = highlighter.highlightDoc("body", queryTerms, ir, docId, 5);
assertThat(snippets.length, equalTo(1));
assertThat(snippets[0].getText(), equalTo("This is the third value to test <b>highlighting</b> with postings."));
//let's try without whole break iterator as well, to prove that highlighting works the same when working per value (not optimized though)
highlighter = new CustomPostingsHighlighter(new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValues, false, Integer.MAX_VALUE - 1, 0);
snippets = highlighter.highlightDoc("body", queryTerms, ir, docId, 5);
assertThat(snippets.length, equalTo(1));
assertThat(snippets[0].getText(), equalTo("Just a test <b>highlighting</b> from postings highlighter."));
snippets = highlighter.highlightDoc("body", queryTerms, ir, docId, 5);
assertThat(snippets.length, equalTo(1));
assertThat(snippets[0].getText(), equalTo("This is the second value to perform <b>highlighting</b> on."));
snippets = highlighter.highlightDoc("body", queryTerms, ir, docId, 5);
assertThat(snippets.length, equalTo(1));
assertThat(snippets[0].getText(), equalTo("This is the third value to test <b>highlighting</b> with postings."));
ir.close();
dir.close();
}
/*
Tests that scoring works properly even when using discrete per value highlighting
*/
@Test
public void testDiscreteHighlightingScoring() throws Exception {
public void testCustomPostingsHighlighter() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
@ -166,31 +76,23 @@ public class CustomPostingsHighlighterTests extends ElasticsearchTestCase {
IndexReader ir = iw.getReader();
iw.close();
String firstHlValue = "Just a test1 <b>highlighting</b> from postings highlighter.";
String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a longer text that gets scored lower.";
String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value.";
String fourthHlValue = "Just a test4 <b>highlighting</b> from postings highlighter.";
IndexSearcher searcher = newSearcher(ir);
Query query = new TermQuery(new Term("body", "highlighting"));
BytesRef[] queryTerms = filterTerms(extractTerms(searcher, query), "body", true);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertThat(topDocs.totalHits, equalTo(1));
int docId = topDocs.scoreDocs[0].doc;
List<Object> fieldValues = new ArrayList<>();
fieldValues.add(firstValue);
fieldValues.add(secondValue);
fieldValues.add(thirdValue);
fieldValues.add(fourthValue);
String fieldValue = firstValue + HighlightUtils.PARAGRAPH_SEPARATOR + secondValue + HighlightUtils.PARAGRAPH_SEPARATOR + thirdValue + HighlightUtils.PARAGRAPH_SEPARATOR + fourthValue;
boolean mergeValues = true;
CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValues, mergeValues, Integer.MAX_VALUE-1, 0);
Snippet[] snippets = highlighter.highlightDoc("body", queryTerms, ir, docId, 5);
CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValue, false);
Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5);
assertThat(snippets.length, equalTo(4));
@ -199,211 +101,6 @@ public class CustomPostingsHighlighterTests extends ElasticsearchTestCase {
assertThat(snippets[2].getText(), equalTo(thirdHlValue));
assertThat(snippets[3].getText(), equalTo(fourthHlValue));
//Let's highlight each separate value and check how the snippets are scored
mergeValues = false;
highlighter = new CustomPostingsHighlighter(new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValues, mergeValues, Integer.MAX_VALUE-1, 0);
List<Snippet> snippets2 = new ArrayList<>();
for (int i = 0; i < fieldValues.size(); i++) {
snippets2.addAll(Arrays.asList(highlighter.highlightDoc("body", queryTerms, ir, docId, 5)));
}
assertThat(snippets2.size(), equalTo(4));
assertThat(snippets2.get(0).getText(), equalTo(firstHlValue));
assertThat(snippets2.get(1).getText(), equalTo(secondHlValue));
assertThat(snippets2.get(2).getText(), equalTo(thirdHlValue));
assertThat(snippets2.get(3).getText(), equalTo(fourthHlValue));
Comparator <Snippet> comparator = new Comparator<Snippet>() {
@Override
public int compare(Snippet o1, Snippet o2) {
return (int)Math.signum(o1.getScore() - o2.getScore());
}
};
//sorting both groups of snippets
Arrays.sort(snippets, comparator);
Collections.sort(snippets2, comparator);
//checking that the snippets are in the same order, regardless of whether we used per value discrete highlighting or not
//we can't compare the scores directly since they are slightly different due to the multiValued separator added when merging values together
//That causes slightly different lengths and start offsets, thus a slightly different score.
//Anyways, that's not an issue. What's important is that the score is computed the same way, so that the produced order is always the same.
for (int i = 0; i < snippets.length; i++) {
assertThat(snippets[i].getText(), equalTo(snippets2.get(i).getText()));
}
ir.close();
dir.close();
}
/*
Tests that we produce the same snippets and scores when manually merging values in our own custom highlighter rather than using the built-in code
*/
@Test
public void testMergeValuesScoring() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
//good position but only one match
final String firstValue = "This is a test. Just a test1 highlighting from postings highlighter.";
Field body = new Field("body", "", offsetsType);
Document doc = new Document();
doc.add(body);
body.setStringValue(firstValue);
//two matches, not the best snippet due to its length though
final String secondValue = "This is the second highlighting value to perform highlighting on a longer text that gets scored lower.";
Field body2 = new Field("body", "", offsetsType);
doc.add(body2);
body2.setStringValue(secondValue);
//two matches and short, will be scored highest
final String thirdValue = "This is highlighting the third short highlighting value.";
Field body3 = new Field("body", "", offsetsType);
doc.add(body3);
body3.setStringValue(thirdValue);
//one match, same as first but at the end, will be scored lower due to its position
final String fourthValue = "Just a test4 highlighting from postings highlighter.";
Field body4 = new Field("body", "", offsetsType);
doc.add(body4);
body4.setStringValue(fourthValue);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
String firstHlValue = "Just a test1 <b>highlighting</b> from postings highlighter.";
String secondHlValue = "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a longer text that gets scored lower.";
String thirdHlValue = "This is <b>highlighting</b> the third short <b>highlighting</b> value.";
String fourthHlValue = "Just a test4 <b>highlighting</b> from postings highlighter.";
IndexSearcher searcher = newSearcher(ir);
Query query = new TermQuery(new Term("body", "highlighting"));
BytesRef[] queryTerms = filterTerms(extractTerms(searcher, query), "body", true);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertThat(topDocs.totalHits, equalTo(1));
int docId = topDocs.scoreDocs[0].doc;
List<Object> fieldValues = new ArrayList<>();
fieldValues.add(firstValue);
fieldValues.add(secondValue);
fieldValues.add(thirdValue);
fieldValues.add(fourthValue);
boolean mergeValues = true;
CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), fieldValues, mergeValues, Integer.MAX_VALUE-1, 0);
Snippet[] snippets = highlighter.highlightDoc("body", queryTerms, ir, docId, 5);
assertThat(snippets.length, equalTo(4));
assertThat(snippets[0].getText(), equalTo(firstHlValue));
assertThat(snippets[1].getText(), equalTo(secondHlValue));
assertThat(snippets[2].getText(), equalTo(thirdHlValue));
assertThat(snippets[3].getText(), equalTo(fourthHlValue));
//testing now our fork / normal postings highlighter, which merges multiple values together using the paragraph separator
XPostingsHighlighter highlighter2 = new XPostingsHighlighter(Integer.MAX_VALUE - 1) {
@Override
protected char getMultiValuedSeparator(String field) {
return HighlightUtils.PARAGRAPH_SEPARATOR;
}
@Override
protected PassageFormatter getFormatter(String field) {
return new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
}
};
Map<String, Object[]> highlightMap = highlighter2.highlightFieldsAsObjects(new String[]{"body"}, query, searcher, new int[]{docId}, new int[]{5});
Object[] objects = highlightMap.get("body");
assertThat(objects, notNullValue());
assertThat(objects.length, equalTo(1));
Snippet[] normalSnippets = (Snippet[])objects[0];
assertThat(normalSnippets.length, equalTo(4));
assertThat(normalSnippets[0].getText(), equalTo(firstHlValue));
assertThat(normalSnippets[1].getText(), equalTo(secondHlValue));
assertThat(normalSnippets[2].getText(), equalTo(thirdHlValue));
assertThat(normalSnippets[3].getText(), equalTo(fourthHlValue));
for (int i = 0; i < normalSnippets.length; i++) {
Snippet normalSnippet = snippets[0];
Snippet customSnippet = normalSnippets[0];
assertThat(customSnippet.getText(), equalTo(normalSnippet.getText()));
assertThat(customSnippet.getScore(), equalTo(normalSnippet.getScore()));
}
ir.close();
dir.close();
}
@Test
public void testRequireFieldMatch() throws Exception {
Directory dir = newDirectory();
IndexWriterConfig iwc = newIndexWriterConfig(new MockAnalyzer(random()));
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
FieldType offsetsType = new FieldType(TextField.TYPE_STORED);
offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
Field body = new Field("body", "", offsetsType);
Field none = new Field("none", "", offsetsType);
Document doc = new Document();
doc.add(body);
doc.add(none);
String firstValue = "This is a test. Just a test highlighting from postings. Feel free to ignore.";
body.setStringValue(firstValue);
none.setStringValue(firstValue);
iw.addDocument(doc);
IndexReader ir = iw.getReader();
iw.close();
Query query = new TermQuery(new Term("none", "highlighting"));
IndexSearcher searcher = newSearcher(ir);
SortedSet<Term> queryTerms = extractTerms(searcher, query);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertThat(topDocs.totalHits, equalTo(1));
int docId = topDocs.scoreDocs[0].doc;
List<Object> values = new ArrayList<>();
values.add(firstValue);
CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(passageFormatter, values, true, Integer.MAX_VALUE - 1, 0);
//no snippets with simulated require field match (we filter the terms ourselves)
boolean requireFieldMatch = true;
BytesRef[] filteredQueryTerms = filterTerms(queryTerms, "body", requireFieldMatch);
Snippet[] snippets = highlighter.highlightDoc("body", filteredQueryTerms, ir, docId, 5);
assertThat(snippets.length, equalTo(0));
highlighter = new CustomPostingsHighlighter(passageFormatter, values, true, Integer.MAX_VALUE - 1, 0);
//one snippet without require field match, just passing in the query terms with no filtering on our side
requireFieldMatch = false;
filteredQueryTerms = filterTerms(queryTerms, "body", requireFieldMatch);
snippets = highlighter.highlightDoc("body", filteredQueryTerms, ir, docId, 5);
assertThat(snippets.length, equalTo(1));
assertThat(snippets[0].getText(), equalTo("Just a test <b>highlighting</b> from postings."));
ir.close();
dir.close();
}
@ -434,56 +131,22 @@ public class CustomPostingsHighlighterTests extends ElasticsearchTestCase {
Query query = new TermQuery(new Term("none", "highlighting"));
IndexSearcher searcher = newSearcher(ir);
SortedSet<Term> queryTerms = extractTerms(searcher, query);
TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER);
assertThat(topDocs.totalHits, equalTo(1));
int docId = topDocs.scoreDocs[0].doc;
List<Object> values = new ArrayList<>();
values.add(firstValue);
BytesRef[] filteredQueryTerms = filterTerms(queryTerms, "body", true);
CustomPassageFormatter passageFormatter = new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder());
CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(passageFormatter, values, true, Integer.MAX_VALUE - 1, 0);
Snippet[] snippets = highlighter.highlightDoc("body", filteredQueryTerms, ir, docId, 5);
CustomPostingsHighlighter highlighter = new CustomPostingsHighlighter(null, passageFormatter, firstValue, false);
Snippet[] snippets = highlighter.highlightField("body", query, searcher, docId, 5);
assertThat(snippets.length, equalTo(0));
highlighter = new CustomPostingsHighlighter(passageFormatter, values, true, Integer.MAX_VALUE - 1, scaledRandomIntBetween(1, 10));
snippets = highlighter.highlightDoc("body", filteredQueryTerms, ir, docId, 5);
highlighter = new CustomPostingsHighlighter(null, passageFormatter, firstValue, true);
snippets = highlighter.highlightField("body", query, searcher, docId, 5);
assertThat(snippets.length, equalTo(1));
assertThat(snippets[0].getText(), equalTo("This is a test."));
ir.close();
dir.close();
}
private static SortedSet<Term> extractTerms(IndexSearcher searcher, Query query) throws IOException {
return extractTerms(searcher.createNormalizedWeight(query, false));
}
private static SortedSet<Term> extractTerms(Weight weight) {
SortedSet<Term> queryTerms = new TreeSet<>();
weight.extractTerms(queryTerms);
return queryTerms;
}
private static BytesRef[] filterTerms(SortedSet<Term> queryTerms, String field, boolean requireFieldMatch) {
SortedSet<Term> fieldTerms;
if (requireFieldMatch) {
Term floor = new Term(field, "");
Term ceiling = new Term(field, UnicodeUtil.BIG_TERM);
fieldTerms = queryTerms.subSet(floor, ceiling);
} else {
fieldTerms = queryTerms;
}
BytesRef terms[] = new BytesRef[fieldTerms.size()];
int termUpto = 0;
for(Term term : fieldTerms) {
terms[termUpto++] = term.bytes();
}
return terms;
}
}

View File

@ -0,0 +1,186 @@
/*
Licensed to Elasticsearch under one or more contributor
license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright
ownership. Elasticsearch licenses this file to you under
the Apache License, Version 2.0 (the "License"); you may
not use this file except in compliance with the License.
You may obtain a copy of the License at
*
http://www.apache.org/licenses/LICENSE-2.0
*
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/
package org.apache.lucene.search.postingshighlight;
import org.elasticsearch.search.highlight.HighlightUtils;
import org.elasticsearch.test.ElasticsearchTestCase;
import org.junit.Test;
import java.text.BreakIterator;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.Locale;
import static org.hamcrest.CoreMatchers.equalTo;
public class CustomSeparatorBreakIteratorTests extends ElasticsearchTestCase {
@Test
public void testBreakOnCustomSeparator() throws Exception {
Character separator = randomSeparator();
BreakIterator bi = new CustomSeparatorBreakIterator(separator);
String source = "this" + separator + "is" + separator + "the" + separator + "first" + separator + "sentence";
bi.setText(source);
assertThat(bi.current(), equalTo(0));
assertThat(bi.first(), equalTo(0));
assertThat(source.substring(bi.current(), bi.next()), equalTo("this" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("is" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("the" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("first" + separator));
assertThat(source.substring(bi.current(), bi.next()), equalTo("sentence"));
assertThat(bi.next(), equalTo(BreakIterator.DONE));
assertThat(bi.last(), equalTo(source.length()));
int current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("sentence"));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("first" + separator));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("the" + separator));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("is" + separator));
current = bi.current();
assertThat(source.substring(bi.previous(), current), equalTo("this" + separator));
assertThat(bi.previous(), equalTo(BreakIterator.DONE));
assertThat(bi.current(), equalTo(0));
assertThat(source.substring(0, bi.following(9)), equalTo("this" + separator + "is" + separator + "the" + separator));
assertThat(source.substring(0, bi.preceding(9)), equalTo("this" + separator + "is" + separator));
assertThat(bi.first(), equalTo(0));
assertThat(source.substring(0, bi.next(3)), equalTo("this" + separator + "is" + separator + "the" + separator));
}
@Test
public void testSingleSentences() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("a", expected, actual);
assertSameBreaks("ab", expected, actual);
assertSameBreaks("abc", expected, actual);
assertSameBreaks("", expected, actual);
}
@Test
public void testSliceEnd() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("a000", 0, 1, expected, actual);
assertSameBreaks("ab000", 0, 1, expected, actual);
assertSameBreaks("abc000", 0, 1, expected, actual);
assertSameBreaks("000", 0, 0, expected, actual);
}
@Test
public void testSliceStart() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("000a", 3, 1, expected, actual);
assertSameBreaks("000ab", 3, 2, expected, actual);
assertSameBreaks("000abc", 3, 3, expected, actual);
assertSameBreaks("000", 3, 0, expected, actual);
}
@Test
public void testSliceMiddle() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("000a000", 3, 1, expected, actual);
assertSameBreaks("000ab000", 3, 2, expected, actual);
assertSameBreaks("000abc000", 3, 3, expected, actual);
assertSameBreaks("000000", 3, 0, expected, actual);
}
/** the current position must be ignored, initial position is always first() */
@Test
public void testFirstPosition() throws Exception {
BreakIterator expected = BreakIterator.getSentenceInstance(Locale.ROOT);
BreakIterator actual = new CustomSeparatorBreakIterator(randomSeparator());
assertSameBreaks("000ab000", 3, 2, 4, expected, actual);
}
private static char randomSeparator() {
return randomFrom(' ', HighlightUtils.NULL_SEPARATOR, HighlightUtils.PARAGRAPH_SEPARATOR);
}
private static void assertSameBreaks(String text, BreakIterator expected, BreakIterator actual) {
assertSameBreaks(new StringCharacterIterator(text),
new StringCharacterIterator(text),
expected,
actual);
}
private static void assertSameBreaks(String text, int offset, int length, BreakIterator expected, BreakIterator actual) {
assertSameBreaks(text, offset, length, offset, expected, actual);
}
private static void assertSameBreaks(String text, int offset, int length, int current, BreakIterator expected, BreakIterator actual) {
assertSameBreaks(new StringCharacterIterator(text, offset, offset + length, current),
new StringCharacterIterator(text, offset, offset + length, current),
expected,
actual);
}
/** Asserts that two breakiterators break the text the same way */
private static void assertSameBreaks(CharacterIterator one, CharacterIterator two, BreakIterator expected, BreakIterator actual) {
expected.setText(one);
actual.setText(two);
assertEquals(expected.current(), actual.current());
// next()
int v = expected.current();
while (v != BreakIterator.DONE) {
assertEquals(v = expected.next(), actual.next());
assertEquals(expected.current(), actual.current());
}
// first()
assertEquals(expected.first(), actual.first());
assertEquals(expected.current(), actual.current());
// last()
assertEquals(expected.last(), actual.last());
assertEquals(expected.current(), actual.current());
// previous()
v = expected.current();
while (v != BreakIterator.DONE) {
assertEquals(v = expected.previous(), actual.previous());
assertEquals(expected.current(), actual.current());
}
// following()
for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
expected.first();
actual.first();
assertEquals(expected.following(i), actual.following(i));
assertEquals(expected.current(), actual.current());
}
// preceding()
for (int i = one.getBeginIndex(); i <= one.getEndIndex(); i++) {
expected.last();
actual.last();
assertEquals(expected.preceding(i), actual.preceding(i));
assertEquals(expected.current(), actual.current());
}
}
}

View File

@ -52,6 +52,7 @@ import java.nio.file.Path;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
@ -612,20 +613,67 @@ public class IndexWithShadowReplicasTests extends ElasticsearchIntegrationTest {
assertThat(hits[3].field("foo").getValue().toString(), equalTo("foo"));
}
/** wait until none of the nodes have shards allocated on them */
private void assertNoShardsOn(final List<String> nodeList) throws Exception {
assertBusy(new Runnable() {
@Override
public void run() {
ClusterStateResponse resp = client().admin().cluster().prepareState().get();
RoutingNodes nodes = resp.getState().getRoutingNodes();
for (RoutingNode node : nodes) {
logger.info("--> node {} has {} shards", node.node().getName(), node.numberOfOwningShards());
if (nodeList.contains(node.node().getName())) {
assertThat("no shards on node", node.numberOfOwningShards(), equalTo(0));
}
}
}
});
}
/** wait until the node has the specified number of shards allocated on it */
private void assertShardCountOn(final String nodeName, final int shardCount) throws Exception {
assertBusy(new Runnable() {
@Override
public void run() {
ClusterStateResponse resp = client().admin().cluster().prepareState().get();
RoutingNodes nodes = resp.getState().getRoutingNodes();
for (RoutingNode node : nodes) {
logger.info("--> node {} has {} shards", node.node().getName(), node.numberOfOwningShards());
if (nodeName.equals(node.node().getName())) {
assertThat(node.numberOfOwningShards(), equalTo(shardCount));
}
}
}
});
}
@Test
public void testIndexOnSharedFSRecoversToAnyNode() throws Exception {
Settings nodeSettings = nodeSettings();
Settings fooSettings = ImmutableSettings.builder().put(nodeSettings).put("node.affinity", "foo").build();
Settings barSettings = ImmutableSettings.builder().put(nodeSettings).put("node.affinity", "bar").build();
internalCluster().startNode(nodeSettings);
final Future<List<String>> fooNodes = internalCluster().startNodesAsync(2, fooSettings);
final Future<List<String>> barNodes = internalCluster().startNodesAsync(2, barSettings);
fooNodes.get();
barNodes.get();
Path dataPath = createTempDir();
String IDX = "test";
Settings includeFoo = ImmutableSettings.builder()
.put("index.routing.allocation.include.affinity", "foo")
.build();
Settings includeBar = ImmutableSettings.builder()
.put("index.routing.allocation.include.affinity", "bar")
.build();
Settings idxSettings = ImmutableSettings.builder()
.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 5)
.put(IndexMetaData.SETTING_NUMBER_OF_REPLICAS, 0)
.put(IndexMetaData.SETTING_DATA_PATH, dataPath.toAbsolutePath().toString())
.put(IndexMetaData.SETTING_SHARED_FILESYSTEM, true)
.put(IndexMetaData.SETTING_SHARED_FS_ALLOW_RECOVERY_ON_ANY_NODE, true)
.put(includeFoo) // start with requiring the shards on "foo"
.build();
// only one node, so all primaries will end up on node1
@ -637,17 +685,43 @@ public class IndexWithShadowReplicasTests extends ElasticsearchIntegrationTest {
client().prepareIndex(IDX, "doc", "2").setSource("foo", "bar").get();
client().prepareIndex(IDX, "doc", "3").setSource("foo", "baz").get();
client().prepareIndex(IDX, "doc", "4").setSource("foo", "eggplant").get();
flushAndRefresh(IDX);
// start a second node
internalCluster().startNode(nodeSettings);
// put shards on "bar"
client().admin().indices().prepareUpdateSettings(IDX).setSettings(includeBar).get();
// node1 is master, stop that one, since we only have primaries,
// usually this would mean data loss, but not on shared fs!
internalCluster().stopCurrentMasterNode();
// wait for the shards to move from "foo" nodes to "bar" nodes
assertNoShardsOn(fooNodes.get());
// put shards back on "foo"
client().admin().indices().prepareUpdateSettings(IDX).setSettings(includeFoo).get();
// wait for the shards to move from "bar" nodes to "foo" nodes
assertNoShardsOn(barNodes.get());
// Stop a foo node
logger.info("--> stopping first 'foo' node");
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(fooNodes.get().get(0)));
// Ensure that the other foo node has all the shards now
assertShardCountOn(fooNodes.get().get(1), 5);
// Assert no shards on the "bar" nodes
assertNoShardsOn(barNodes.get());
// Stop the second "foo" node
logger.info("--> stopping second 'foo' node");
internalCluster().stopRandomNode(InternalTestCluster.nameFilter(fooNodes.get().get(1)));
// The index should still be able to be allocated (on the "bar" nodes),
// all the "foo" nodes are gone
ensureGreen(IDX);
refresh();
SearchResponse resp = client().prepareSearch(IDX).setQuery(matchAllQuery()).addFieldDataField("foo").addSort("foo", SortOrder.ASC).get();
assertHitCount(resp, 4);
// Start another "foo" node and make sure the index moves back
logger.info("--> starting additional 'foo' node");
String newFooNode = internalCluster().startNode(fooSettings);
assertShardCountOn(newFooNode, 5);
assertNoShardsOn(barNodes.get());
}
}

View File

@ -21,7 +21,6 @@ package org.elasticsearch.search.highlight;
import com.carrotsearch.randomizedtesting.generators.RandomPicks;
import com.google.common.base.Joiner;
import com.google.common.collect.Iterables;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.elasticsearch.Version;
import org.elasticsearch.action.index.IndexRequestBuilder;
@ -31,15 +30,9 @@ import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.ImmutableSettings.Builder;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.BoostableQueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.index.query.IdsQueryBuilder;
import org.elasticsearch.index.query.MatchQueryBuilder;
import org.elasticsearch.index.query.*;
import org.elasticsearch.index.query.MatchQueryBuilder.Operator;
import org.elasticsearch.index.query.MatchQueryBuilder.Type;
import org.elasticsearch.index.query.MultiMatchQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.rest.RestStatus;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.builder.SearchSourceBuilder;
@ -56,40 +49,12 @@ import java.util.Map;
import static org.elasticsearch.client.Requests.searchRequest;
import static org.elasticsearch.common.settings.ImmutableSettings.settingsBuilder;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.missingQuery;
import static org.elasticsearch.index.query.QueryBuilders.typeQuery;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.boostingQuery;
import static org.elasticsearch.index.query.QueryBuilders.commonTermsQuery;
import static org.elasticsearch.index.query.QueryBuilders.constantScoreQuery;
import static org.elasticsearch.index.query.QueryBuilders.filteredQuery;
import static org.elasticsearch.index.query.QueryBuilders.fuzzyQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchPhrasePrefixQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.index.query.QueryBuilders.multiMatchQuery;
import static org.elasticsearch.index.query.QueryBuilders.prefixQuery;
import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
import static org.elasticsearch.index.query.QueryBuilders.rangeQuery;
import static org.elasticsearch.index.query.QueryBuilders.regexpQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.index.query.QueryBuilders.wildcardQuery;
import static org.elasticsearch.index.query.QueryBuilders.*;
import static org.elasticsearch.search.builder.SearchSourceBuilder.highlight;
import static org.elasticsearch.search.builder.SearchSourceBuilder.searchSource;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFailures;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNotHighlighted;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import static org.elasticsearch.test.hamcrest.RegexMatcher.matches;
import static org.hamcrest.Matchers.anyOf;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.hasKey;
import static org.hamcrest.Matchers.not;
import static org.hamcrest.Matchers.nullValue;
import static org.hamcrest.Matchers.startsWith;
import static org.hamcrest.Matchers.*;
@Slow
public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
@ -493,13 +458,13 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
SearchResponse search = client().prepareSearch()
.setQuery(matchQuery("title", "bug"))
.addHighlightedField("title", -1, 2)
.addHighlightedField("titleTV", -1, 2)
.addHighlightedField("titleTV", -1, 2).setHighlighterRequireFieldMatch(false)
.get();
assertHighlight(search, 0, "title", 0, equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
assertHighlight(search, 0, "title", 0, equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
assertHighlight(search, 0, "title", 1, 2, equalTo("The <em>bug</em> is bugging us"));
assertHighlight(search, 0, "titleTV", 0, equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
assertHighlight(search, 0, "titleTV", 1, 2, equalTo("The <em>bug</em> is bugging us"));
assertHighlight(search, 0, "titleTV", 0, equalTo("This is a test on the highlighting <em>bug</em> present in elasticsearch"));
assertHighlight(search, 0, "titleTV", 1, 2, equalTo("The <em>bug</em> is bugging us"));
search = client().prepareSearch()
.setQuery(matchQuery("titleTV", "highlight"))
@ -525,7 +490,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
.query(termQuery("field1", "test"))
.highlight(highlight().order("score").preTags("<global>").postTags("</global>").fragmentSize(1).numOfFragments(1)
.field(new HighlightBuilder.Field("field1").numOfFragments(2))
.field(new HighlightBuilder.Field("field2").preTags("<field2>").postTags("</field2>").fragmentSize(50)));
.field(new HighlightBuilder.Field("field2").preTags("<field2>").postTags("</field2>").fragmentSize(50).requireFieldMatch(false)));
SearchResponse searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
@ -551,8 +516,9 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
logger.info("--> highlighting and searching on field*");
SearchSourceBuilder source = searchSource()
.query(termQuery("field-plain", "test"))
.highlight(highlight().field("field*").preTags("<xxx>").postTags("</xxx>"));
//postings hl doesn't support require_field_match, its field needs to be queried directly
.query(termQuery("field-postings", "test"))
.highlight(highlight().field("field*").preTags("<xxx>").postTags("</xxx>").requireFieldMatch(false));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
@ -586,14 +552,14 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
assertFailures(client().prepareSearch("test")
.setQuery(termQuery("field1", "quick"))
.addHighlightedField(new Field("field1").preTags("<xxx>").postTags("</xxx>").highlighterType("plain").forceSource(true)),
.setQuery(termQuery("field1", "quick"))
.addHighlightedField(new Field("field1").preTags("<xxx>").postTags("</xxx>").highlighterType("plain").forceSource(true)),
RestStatus.BAD_REQUEST,
containsString("source is forced for fields [field1] but type [type1] has disabled _source"));
assertFailures(client().prepareSearch("test")
.setQuery(termQuery("field1", "quick"))
.addHighlightedField(new Field("field1").preTags("<xxx>").postTags("</xxx>").highlighterType("fvh").forceSource(true)),
.setQuery(termQuery("field1", "quick"))
.addHighlightedField(new Field("field1").preTags("<xxx>").postTags("</xxx>").highlighterType("fvh").forceSource(true)),
RestStatus.BAD_REQUEST,
containsString("source is forced for fields [field1] but type [type1] has disabled _source"));
@ -637,7 +603,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
logger.info("--> searching on _all, highlighting on field1");
source = searchSource()
.query(termQuery("_all", "test"))
.highlight(highlight().field("field1").order("score").preTags("<xxx>").postTags("</xxx>"));
.highlight(highlight().field("field1").order("score").preTags("<xxx>").postTags("</xxx>").requireFieldMatch(false));
searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
@ -646,7 +612,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
logger.info("--> searching on _all, highlighting on field2");
source = searchSource()
.query(termQuery("_all", "quick"))
.highlight(highlight().field("field2").order("score").preTags("<xxx>").postTags("</xxx>"));
.highlight(highlight().field("field2").order("score").preTags("<xxx>").postTags("</xxx>").requireFieldMatch(false));
searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
@ -655,7 +621,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
logger.info("--> searching on _all, highlighting on field2");
source = searchSource()
.query(prefixQuery("_all", "qui"))
.highlight(highlight().field("field2").order("score").preTags("<xxx>").postTags("</xxx>"));
.highlight(highlight().field("field2").order("score").preTags("<xxx>").postTags("</xxx>").requireFieldMatch(false));
searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
@ -664,7 +630,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
logger.info("--> searching on _all with constant score, highlighting on field2");
source = searchSource()
.query(constantScoreQuery(prefixQuery("_all", "qui")))
.highlight(highlight().field("field2").order("score").preTags("<xxx>").postTags("</xxx>"));
.highlight(highlight().field("field2").order("score").preTags("<xxx>").postTags("</xxx>").requireFieldMatch(false));
searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
@ -673,7 +639,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
logger.info("--> searching on _all with constant score, highlighting on field2");
source = searchSource()
.query(boolQuery().should(constantScoreQuery(prefixQuery("_all", "qui"))))
.highlight(highlight().field("field2").order("score").preTags("<xxx>").postTags("</xxx>"));
.highlight(highlight().field("field2").order("score").preTags("<xxx>").postTags("</xxx>").requireFieldMatch(false));
searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy dog"));
@ -700,7 +666,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
logger.info("--> searching on _all, highlighting on field1");
source = searchSource()
.query(termQuery("_all", "test"))
.highlight(highlight().field("field1", 100, 0).order("score").preTags("<xxx>").postTags("</xxx>"));
.highlight(highlight().field("field1", 100, 0).order("score").preTags("<xxx>").postTags("</xxx>").requireFieldMatch(false));
searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
@ -710,7 +676,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
logger.info("--> searching on _all, highlighting on field2");
source = searchSource()
.query(termQuery("_all", "quick"))
.highlight(highlight().field("field2", 100, 0).order("score").preTags("<xxx>").postTags("</xxx>"));
.highlight(highlight().field("field2", 100, 0).order("score").preTags("<xxx>").postTags("</xxx>").requireFieldMatch(false));
searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
@ -720,7 +686,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
logger.info("--> searching on _all, highlighting on field2");
source = searchSource()
.query(prefixQuery("_all", "qui"))
.highlight(highlight().field("field2", 100, 0).order("score").preTags("<xxx>").postTags("</xxx>"));
.highlight(highlight().field("field2", 100, 0).order("score").preTags("<xxx>").postTags("</xxx>").requireFieldMatch(false));
searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
@ -1450,11 +1416,11 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertHighlight(response, 0, "tags", 1, 2, equalTo("here is another one that is very <em>long</em> <em>tag</em> and has the tag token near the end"));
assertFailures(client().prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE))
.addHighlightedField(new HighlightBuilder.Field("tags")
.fragmentSize(-1).numOfFragments(2).fragmenter("invalid")),
RestStatus.BAD_REQUEST,
containsString("unknown fragmenter option [invalid] for the field [tags]"));
.setQuery(QueryBuilders.matchQuery("tags", "long tag").type(MatchQueryBuilder.Type.PHRASE))
.addHighlightedField(new HighlightBuilder.Field("tags")
.fragmentSize(-1).numOfFragments(2).fragmenter("invalid")),
RestStatus.BAD_REQUEST,
containsString("unknown fragmenter option [invalid] for the field [tags]"));
}
@Test
@ -1852,7 +1818,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
.addMapping("type1", "text", "type=string," + randomStoreField() + "term_vector=with_positions_offsets,index_options=offsets"));
ensureGreen();
String text1 = "This is the first sentence. This is the second sentence.";
String text1 = "This is the first sentence. This is the second sentence." + HighlightUtils.PARAGRAPH_SEPARATOR;
String text2 = "This is the third sentence. This is the fourth sentence.";
String text3 = "This is the fifth sentence";
index("test", "type1", "1", "text", new String[] {text1, text2, text3});
@ -1911,27 +1877,27 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("this is a <xxx>test</xxx>"));
logger.info("--> searching on _all, highlighting on field1");
logger.info("--> searching on field1, highlighting on field1");
source = searchSource()
.query(termQuery("_all", "test"))
.query(termQuery("field1", "test"))
.highlight(highlight().field("field1").preTags("<xxx>").postTags("</xxx>"));
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("this is a <xxx>test</xxx>"));
logger.info("--> searching on _all, highlighting on field2");
logger.info("--> searching on field2, highlighting on field2");
source = searchSource()
.query(termQuery("_all", "quick"))
.query(termQuery("field2", "quick"))
.highlight(highlight().field("field2").order("score").preTags("<xxx>").postTags("</xxx>"));
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <xxx>quick</xxx> brown fox jumps over the lazy <xxx>quick</xxx> dog"));
logger.info("--> searching on _all, highlighting on field2");
logger.info("--> searching on field2, highlighting on field2");
source = searchSource()
.query(matchPhraseQuery("_all", "quick brown"))
.query(matchPhraseQuery("field2", "quick brown"))
.highlight(highlight().field("field2").preTags("<xxx>").postTags("</xxx>"));
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
@ -1940,10 +1906,10 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <xxx>quick</xxx> <xxx>brown</xxx> fox jumps over the lazy <xxx>quick</xxx> dog"));
//lets fall back to the standard highlighter then, what people would do to highlight query matches
logger.info("--> searching on _all, highlighting on field2, falling back to the plain highlighter");
logger.info("--> searching on field2, highlighting on field2, falling back to the plain highlighter");
source = searchSource()
.query(matchPhraseQuery("_all", "quick brown"))
.highlight(highlight().field("field2").preTags("<xxx>").postTags("</xxx>").highlighterType("highlighter"));
.highlight(highlight().field("field2").preTags("<xxx>").postTags("</xxx>").highlighterType("highlighter").requireFieldMatch(false));
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
@ -1961,10 +1927,8 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
SearchResponse response = client().prepareSearch("test")
.setQuery(QueryBuilders.matchQuery("field1", "fox"))
.addHighlightedField(new HighlightBuilder.Field("field1").preTags("<1>").postTags("</1>").requireFieldMatch(true))
.addHighlightedField(new HighlightBuilder.Field("field2").preTags("<2>").postTags("</2>").requireFieldMatch(false))
.get();
assertHighlight(response, 0, "field1", 0, 1, equalTo("The <b>quick<b> brown <1>fox</1>."));
assertHighlight(response, 0, "field2", 0, 1, equalTo("The <b>slow<b> brown <2>fox</2>."));
}
@Test
@ -1981,8 +1945,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
SearchSourceBuilder source = searchSource()
.query(termQuery("field1", "fox"))
.highlight(highlight()
.field(new HighlightBuilder.Field("field1").numOfFragments(5).preTags("<field1>").postTags("</field1>"))
.field(new HighlightBuilder.Field("field2").numOfFragments(2).preTags("<field2>").postTags("</field2>")));
.field(new HighlightBuilder.Field("field1").numOfFragments(5).preTags("<field1>").postTags("</field1>")));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
@ -1990,9 +1953,6 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertHighlight(searchResponse, 0, "field1", 1, equalTo("The lazy red <field1>fox</field1> jumps over the quick dog."));
assertHighlight(searchResponse, 0, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy <field1>fox</field1>."));
assertHighlight(searchResponse, 0, "field2", 0, equalTo("The quick brown <field2>fox</field2> jumps over the lazy dog."));
assertHighlight(searchResponse, 0, "field2", 1, 2, equalTo("The lazy red <field2>fox</field2> jumps over the quick dog."));
client().prepareIndex("test", "type1", "2")
.setSource("field1", new String[]{"The quick brown fox jumps over the lazy dog. Second sentence not finished", "The lazy red fox jumps over the quick dog.", "The quick brown dog jumps over the lazy fox."}).get();
refresh();
@ -2009,8 +1969,8 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
if ("1".equals(searchHit.id())) {
assertHighlight(searchHit, "field1", 0, 1, equalTo("The quick brown <field1>fox</field1> jumps over the lazy dog. The lazy red <field1>fox</field1> jumps over the quick dog. The quick brown dog jumps over the lazy <field1>fox</field1>."));
} else if ("2".equals(searchHit.id())) {
assertHighlight(searchHit, "field1", 0, equalTo("The quick brown <field1>fox</field1> jumps over the lazy dog. Second sentence not finished"));
assertHighlight(searchHit, "field1", 1, equalTo("The lazy red <field1>fox</field1> jumps over the quick dog."));
assertHighlight(searchHit, "field1", 0, 3, equalTo("The quick brown <field1>fox</field1> jumps over the lazy dog. Second sentence not finished"));
assertHighlight(searchHit, "field1", 1, 3, equalTo("The lazy red <field1>fox</field1> jumps over the quick dog."));
assertHighlight(searchHit, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy <field1>fox</field1>."));
} else {
fail("Only hits with id 1 and 2 are returned");
@ -2018,67 +1978,6 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
}
}
@Test
public void testPostingsHighlighterRequireFieldMatch() throws Exception {
assertAcked(prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
ensureGreen();
client().prepareIndex("test", "type1")
.setSource("field1", "The quick brown fox jumps over the lazy dog. The lazy red fox jumps over the quick dog. The quick brown dog jumps over the lazy fox.",
"field2", "The quick brown fox jumps over the lazy dog. The lazy red fox jumps over the quick dog. The quick brown dog jumps over the lazy fox.").get();
refresh();
logger.info("--> highlighting and searching on field1");
SearchSourceBuilder source = searchSource()
.query(termQuery("field1", "fox"))
.highlight(highlight()
.field(new HighlightBuilder.Field("field1").requireFieldMatch(true).preTags("<field1>").postTags("</field1>"))
.field(new HighlightBuilder.Field("field2").requireFieldMatch(true).preTags("<field2>").postTags("</field2>")));
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
//field2 is not returned highlighted because of the require field match option set to true
assertNotHighlighted(searchResponse, 0, "field2");
assertHighlight(searchResponse, 0, "field1", 0, equalTo("The quick brown <field1>fox</field1> jumps over the lazy dog."));
assertHighlight(searchResponse, 0, "field1", 1, equalTo("The lazy red <field1>fox</field1> jumps over the quick dog."));
assertHighlight(searchResponse, 0, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy <field1>fox</field1>."));
logger.info("--> highlighting and searching on field1 and field2 - require field match set to false");
source = searchSource()
.query(termQuery("field1", "fox"))
.highlight(highlight()
.field(new HighlightBuilder.Field("field1").requireFieldMatch(false).preTags("<field1>").postTags("</field1>"))
.field(new HighlightBuilder.Field("field2").requireFieldMatch(false).preTags("<field2>").postTags("</field2>")));
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field1", 0, equalTo("The quick brown <field1>fox</field1> jumps over the lazy dog."));
assertHighlight(searchResponse, 0, "field1", 1, equalTo("The lazy red <field1>fox</field1> jumps over the quick dog."));
assertHighlight(searchResponse, 0, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy <field1>fox</field1>."));
//field2 is now returned highlighted thanks to require_field_match set to false
assertHighlight(searchResponse, 0, "field2", 0, equalTo("The quick brown <field2>fox</field2> jumps over the lazy dog."));
assertHighlight(searchResponse, 0, "field2", 1, equalTo("The lazy red <field2>fox</field2> jumps over the quick dog."));
assertHighlight(searchResponse, 0, "field2", 2, 3, equalTo("The quick brown dog jumps over the lazy <field2>fox</field2>."));
logger.info("--> highlighting and searching on field1 and field2 via multi_match query");
final MultiMatchQueryBuilder mmquery = multiMatchQuery("fox", "field1", "field2").type(RandomPicks.randomFrom(getRandom(), MultiMatchQueryBuilder.Type.values()));
source = searchSource()
.query(mmquery)
.highlight(highlight().highlightQuery(randomBoolean() ? mmquery : null)
.field(new HighlightBuilder.Field("field1").requireFieldMatch(true).preTags("<field1>").postTags("</field1>"))
.field(new HighlightBuilder.Field("field2").requireFieldMatch(true).preTags("<field2>").postTags("</field2>")));
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHitCount(searchResponse, 1l);
assertHighlight(searchResponse, 0, "field1", 0, equalTo("The quick brown <field1>fox</field1> jumps over the lazy dog."));
assertHighlight(searchResponse, 0, "field1", 1, equalTo("The lazy red <field1>fox</field1> jumps over the quick dog."));
assertHighlight(searchResponse, 0, "field1", 2, 3, equalTo("The quick brown dog jumps over the lazy <field1>fox</field1>."));
//field2 is now returned highlighted thanks to the multi_match query on both fields
assertHighlight(searchResponse, 0, "field2", 0, equalTo("The quick brown <field2>fox</field2> jumps over the lazy dog."));
assertHighlight(searchResponse, 0, "field2", 1, equalTo("The lazy red <field2>fox</field2> jumps over the quick dog."));
assertHighlight(searchResponse, 0, "field2", 2, 3, equalTo("The quick brown dog jumps over the lazy <field2>fox</field2>."));
}
@Test
public void testMultiMatchQueryHighlight() throws IOException {
String[] highlighterTypes = new String[] {"fvh", "plain", "postings"};
@ -2097,14 +1996,22 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
refresh();
final int iters = scaledRandomIntBetween(20, 30);
for (int i = 0; i < iters; i++) {
MultiMatchQueryBuilder.Type matchQueryType = rarely() ? null : RandomPicks.randomFrom(getRandom(), MultiMatchQueryBuilder.Type.values());
String highlighterType = rarely() ? null : RandomPicks.randomFrom(getRandom(), highlighterTypes);
MultiMatchQueryBuilder.Type[] supportedQueryTypes;
if ("postings".equals(highlighterType)) {
//phrase_prefix is not supported by postings highlighter, as it rewrites against an empty reader, the prefix will never match any term
supportedQueryTypes = new MultiMatchQueryBuilder.Type[]{MultiMatchQueryBuilder.Type.BEST_FIELDS, MultiMatchQueryBuilder.Type.CROSS_FIELDS, MultiMatchQueryBuilder.Type.MOST_FIELDS, MultiMatchQueryBuilder.Type.PHRASE};
} else {
supportedQueryTypes = MultiMatchQueryBuilder.Type.values();
}
MultiMatchQueryBuilder.Type matchQueryType = rarely() ? null : RandomPicks.randomFrom(getRandom(), supportedQueryTypes);
final MultiMatchQueryBuilder multiMatchQueryBuilder = multiMatchQuery("the quick brown fox", "field1", "field2").type(matchQueryType);
String type = rarely() ? null : RandomPicks.randomFrom(getRandom(),highlighterTypes);
SearchSourceBuilder source = searchSource()
.query(multiMatchQueryBuilder)
.highlight(highlight().highlightQuery(randomBoolean() ? multiMatchQueryBuilder : null).highlighterType(type)
.highlight(highlight().highlightQuery(randomBoolean() ? multiMatchQueryBuilder : null).highlighterType(highlighterType)
.field(new Field("field1").requireFieldMatch(true).preTags("<field1>").postTags("</field1>")));
logger.info("Running multi-match type: [" + matchQueryType + "] highlight with type: [" + type + "]");
logger.info("Running multi-match type: [" + matchQueryType + "] highlight with type: [" + highlighterType + "]");
SearchResponse searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHitCount(searchResponse, 1l);
assertHighlight(searchResponse, 0, "field1", 0, anyOf(equalTo("<field1>The quick brown fox</field1> jumps over"),
@ -2139,16 +2046,6 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertThat(field1.fragments()[2].string(), equalTo("This is the second value's first <em>sentence</em>."));
assertThat(field1.fragments()[3].string(), equalTo("This <em>sentence</em> contains one match, not that short."));
assertThat(field1.fragments()[4].string(), equalTo("One <em>sentence</em> match here and scored lower since the text is quite long, not that appealing."));
//lets use now number_of_fragments = 0, so that we highlight per value without breaking them into snippets, but we sort the values by score
source = searchSource()
.query(termQuery("field1", "sentence"))
.highlight(highlight().field("field1", -1, 0).order("score"));
searchResponse = client().search(searchRequest("test").source(source)).actionGet();
assertHighlight(searchResponse, 0, "field1", 0, equalTo("This is the second value's first <em>sentence</em>. This one contains no matches. This <em>sentence</em> contains three <em>sentence</em> occurrences (<em>sentence</em>)."));
assertHighlight(searchResponse, 0, "field1", 1, equalTo("This <em>sentence</em> contains one match, not that short. This <em>sentence</em> contains two <em>sentence</em> matches. This one contains no matches."));
assertHighlight(searchResponse, 0, "field1", 2, 3, equalTo("One <em>sentence</em> match here and scored lower since the text is quite long, not that appealing. This one contains no matches."));
}
@Test
@ -2260,25 +2157,24 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertNoFailures(search);
assertFailures(client().prepareSearch()
.setQuery(matchQuery("title", "this is a test"))
.addHighlightedField("title")
.setHighlighterType("postings-highlighter"),
.setQuery(matchQuery("title", "this is a test"))
.addHighlightedField("title")
.setHighlighterType("postings-highlighter"),
RestStatus.BAD_REQUEST,
containsString("the field [title] should be indexed with positions and offsets in the postings list to be used with postings highlighter"));
assertFailures(client().prepareSearch()
.setQuery(matchQuery("title", "this is a test"))
.addHighlightedField("title")
.setHighlighterType("postings"),
.setQuery(matchQuery("title", "this is a test"))
.addHighlightedField("title")
.setHighlighterType("postings"),
RestStatus.BAD_REQUEST,
containsString("the field [title] should be indexed with positions and offsets in the postings list to be used with postings highlighter"));
assertFailures(client().prepareSearch()
.setQuery(matchQuery("title", "this is a test"))
.addHighlightedField("tit*")
.setHighlighterType("postings"),
.setQuery(matchQuery("title", "this is a test"))
.addHighlightedField("tit*")
.setHighlighterType("postings"),
RestStatus.BAD_REQUEST,
containsString("the field [title] should be indexed with positions and offsets in the postings list to be used with postings highlighter"));
}
@ -2316,9 +2212,8 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <x>quick</x> <x>brown</x> fox jumps over the lazy dog!"));
}
public XContentBuilder type1PostingsffsetsMapping() throws IOException {
private static XContentBuilder type1PostingsffsetsMapping() throws IOException {
return XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("_all").field("store", "yes").field("index_options", "offsets").endObject()
.startObject("properties")
.startObject("field1").field("type", "string").field("index_options", "offsets").endObject()
.startObject("field2").field("type", "string").field("index_options", "offsets").endObject()
@ -2326,9 +2221,6 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
.endObject().endObject();
}
private static final String[] REWRITE_METHODS = new String[]{"constant_score_auto", "scoring_boolean", "constant_score_boolean",
"constant_score_filter", "top_terms_boost_50", "top_terms_50"};
@Test
public void testPostingsHighlighterPrefixQuery() throws Exception {
assertAcked(prepareCreate("test").addMapping("type1", type1PostingsffsetsMapping()));
@ -2338,7 +2230,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
refresh();
logger.info("--> highlighting and searching on field2");
SearchSourceBuilder source = searchSource().query(prefixQuery("field2", "qui").rewrite(randomFrom(REWRITE_METHODS)))
SearchSourceBuilder source = searchSource().query(prefixQuery("field2", "qui"))
.highlight(highlight().field("field2"));
SearchResponse searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
@ -2368,7 +2260,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
refresh();
logger.info("--> highlighting and searching on field2");
SearchSourceBuilder source = searchSource().query(regexpQuery("field2", "qu[a-l]+k").rewrite(randomFrom(REWRITE_METHODS)))
SearchSourceBuilder source = searchSource().query(regexpQuery("field2", "qu[a-l]+k"))
.highlight(highlight().field("field2"));
SearchResponse searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
@ -2383,13 +2275,13 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
refresh();
logger.info("--> highlighting and searching on field2");
SearchSourceBuilder source = searchSource().query(wildcardQuery("field2", "qui*").rewrite(randomFrom(REWRITE_METHODS)))
SearchSourceBuilder source = searchSource().query(wildcardQuery("field2", "qui*"))
.highlight(highlight().field("field2"));
SearchResponse searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
source = searchSource().query(wildcardQuery("field2", "qu*k").rewrite(randomFrom(REWRITE_METHODS)))
source = searchSource().query(wildcardQuery("field2", "qu*k"))
.highlight(highlight().field("field2"));
searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
assertHitCount(searchResponse, 1l);
@ -2420,7 +2312,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
client().prepareIndex("test", "type1").setSource("field1", "this is a test", "field2", "The quick brown fox jumps over the lazy dog! Second sentence.").get();
refresh();
logger.info("--> highlighting and searching on field2");
SearchSourceBuilder source = searchSource().query(queryStringQuery("qui*").defaultField("field2").rewrite(randomFrom(REWRITE_METHODS)))
SearchSourceBuilder source = searchSource().query(queryStringQuery("qui*").defaultField("field2"))
.highlight(highlight().field("field2"));
SearchResponse searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
assertHighlight(searchResponse, 0, "field2", 0, 1, equalTo("The <em>quick</em> brown fox jumps over the lazy dog!"));
@ -2436,7 +2328,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
refresh();
logger.info("--> highlighting and searching on field1");
SearchSourceBuilder source = searchSource().query(constantScoreQuery(regexpQuery("field1", "pho[a-z]+").rewrite(randomFrom(REWRITE_METHODS))))
SearchSourceBuilder source = searchSource().query(constantScoreQuery(regexpQuery("field1", "pho[a-z]+")))
.highlight(highlight().field("field1"));
SearchResponse searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <em>photography</em> word will get highlighted"));
@ -2455,7 +2347,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
SearchSourceBuilder source = searchSource().query(boolQuery()
.should(constantScoreQuery(QueryBuilders.missingQuery("field1")))
.should(matchQuery("field1", "test"))
.should(filteredQuery(queryStringQuery("field1:photo*").rewrite(randomFrom(REWRITE_METHODS)), null)))
.should(filteredQuery(queryStringQuery("field1:photo*"), null)))
.highlight(highlight().field("field1"));
SearchResponse searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <em>photography</em> word will get highlighted"));
@ -2471,7 +2363,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
refresh();
logger.info("--> highlighting and searching on field1");
SearchSourceBuilder source = searchSource().query(boolQuery().must(prefixQuery("field1", "photo").rewrite(randomFrom(REWRITE_METHODS))).should(matchQuery("field1", "test").minimumShouldMatch("0")))
SearchSourceBuilder source = searchSource().query(boolQuery().must(prefixQuery("field1", "photo")).should(matchQuery("field1", "test").minimumShouldMatch("0")))
.highlight(highlight().field("field1"));
SearchResponse searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <em>photography</em> word will get highlighted"));
@ -2487,7 +2379,7 @@ public class HighlighterSearchTests extends ElasticsearchIntegrationTest {
refresh();
logger.info("--> highlighting and searching on field1");
SearchSourceBuilder source = searchSource().query(filteredQuery(queryStringQuery("field1:photo*").rewrite(randomFrom(REWRITE_METHODS)), missingQuery("field_null")))
SearchSourceBuilder source = searchSource().query(filteredQuery(queryStringQuery("field1:photo*"), missingQuery("field_null")))
.highlight(highlight().field("field1"));
SearchResponse searchResponse = client().prepareSearch("test").setSource(source.buildAsBytes()).get();
assertHighlight(searchResponse, 0, "field1", 0, 1, equalTo("The <em>photography</em> word will get highlighted"));

View File

@ -0,0 +1,116 @@
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.scan;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.QueryUtils;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.elasticsearch.search.scan.ScanContext.MinDocQuery;
import org.elasticsearch.test.ElasticsearchTestCase;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
public class ScanContextTests extends ElasticsearchTestCase {
public void testMinDocQueryBasics() {
MinDocQuery query1 = new MinDocQuery(42);
MinDocQuery query2 = new MinDocQuery(42);
MinDocQuery query3 = new MinDocQuery(43);
QueryUtils.check(query1);
QueryUtils.checkEqual(query1, query2);
QueryUtils.checkUnequal(query1, query3);
}
public void testMinDocQueryRandom() throws IOException {
final int numDocs = randomIntBetween(10, 200);
final Document doc = new Document();
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(getRandom(), dir);
for (int i = 0; i < numDocs; ++i) {
w.addDocument(doc);
}
final IndexReader reader = w.getReader();
final IndexSearcher searcher = newSearcher(reader);
for (int i = 0; i <= numDocs; ++i) {
assertEquals(numDocs - i, searcher.count(new MinDocQuery(i)));
}
w.close();
reader.close();
dir.close();
}
public void testRandom() throws Exception {
final int numDocs = randomIntBetween(10, 200);
final Document doc1 = new Document();
doc1.add(new StringField("foo", "bar", Store.NO));
final Document doc2 = new Document();
final Directory dir = newDirectory();
final RandomIndexWriter w = new RandomIndexWriter(getRandom(), dir);
for (int i = 0; i < numDocs; ++i) {
w.addDocument(randomBoolean() ? doc1 : doc2);
}
final IndexReader reader = w.getReader();
final IndexSearcher searcher = newSearcher(reader);
final boolean trackScores = randomBoolean();
final int pageSize = randomIntBetween(1, numDocs / 2);
Query query = new TermQuery(new Term("foo", "bar"));
if (trackScores == false) {
query.setBoost(0f);
}
final ScoreDoc[] expected = searcher.search(query, numDocs, Sort.INDEXORDER, true, true).scoreDocs;
final List<ScoreDoc> actual = new ArrayList<>();
ScanContext context = new ScanContext();
while (true) {
final ScoreDoc[] page = context.execute(searcher, query, pageSize, trackScores).scoreDocs;
assertTrue(page.length <= pageSize);
if (page.length == 0) {
assertEquals(0, context.execute(searcher, query, pageSize, trackScores).scoreDocs.length);
break;
}
actual.addAll(Arrays.asList(page));
}
assertEquals(expected.length, actual.size());
for (int i = 0; i < expected.length; ++i) {
ScoreDoc sd1 = expected[i];
ScoreDoc sd2 = actual.get(i);
assertEquals(sd1.doc, sd2.doc);
assertEquals(sd1.score, sd2.score, 0.001f);
}
w.close();
reader.close();
dir.close();
}
}

View File

@ -23,11 +23,14 @@ import com.carrotsearch.randomizedtesting.RandomizedTest;
import com.carrotsearch.randomizedtesting.annotations.TestGroup;
import com.carrotsearch.randomizedtesting.annotations.TimeoutSuite;
import com.google.common.collect.Lists;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.lucene.util.LuceneTestCase.SuppressCodecs;
import org.apache.lucene.util.LuceneTestCase.SuppressFsync;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.TimeUnits;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.SuppressForbidden;
import org.elasticsearch.common.io.PathUtils;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
@ -48,9 +51,17 @@ import org.junit.BeforeClass;
import org.junit.Test;
import java.io.IOException;
import java.io.InputStream;
import java.lang.annotation.*;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.PathMatcher;
import java.nio.file.StandardCopyOption;
import java.util.*;
/**
@ -158,23 +169,29 @@ public abstract class ElasticsearchRestTestCase extends ElasticsearchIntegration
}
private static List<RestTestCandidate> collectTestCandidates(int id, int count) throws RestTestParseException, IOException {
String[] paths = resolvePathsProperty(REST_TESTS_SUITE, DEFAULT_TESTS_PATH);
Map<String, Set<Path>> yamlSuites = FileUtils.findYamlSuites(DEFAULT_TESTS_PATH, paths);
List<RestTestCandidate> testCandidates = Lists.newArrayList();
RestTestSuiteParser restTestSuiteParser = new RestTestSuiteParser();
//yaml suites are grouped by directory (effectively by api)
for (String api : yamlSuites.keySet()) {
List<Path> yamlFiles = Lists.newArrayList(yamlSuites.get(api));
for (Path yamlFile : yamlFiles) {
String key = api + yamlFile.getFileName().toString();
if (mustExecute(key, id, count)) {
RestTestSuite restTestSuite = restTestSuiteParser.parse(api, yamlFile);
for (TestSection testSection : restTestSuite.getTestSections()) {
testCandidates.add(new RestTestCandidate(restTestSuite, testSection));
FileSystem fileSystem = getFileSystem();
// don't make a try-with, getFileSystem returns null
// ... and you can't close() the default filesystem
try {
String[] paths = resolvePathsProperty(REST_TESTS_SUITE, DEFAULT_TESTS_PATH);
Map<String, Set<Path>> yamlSuites = FileUtils.findYamlSuites(fileSystem, DEFAULT_TESTS_PATH, paths);
RestTestSuiteParser restTestSuiteParser = new RestTestSuiteParser();
//yaml suites are grouped by directory (effectively by api)
for (String api : yamlSuites.keySet()) {
List<Path> yamlFiles = Lists.newArrayList(yamlSuites.get(api));
for (Path yamlFile : yamlFiles) {
String key = api + yamlFile.getFileName().toString();
if (mustExecute(key, id, count)) {
RestTestSuite restTestSuite = restTestSuiteParser.parse(api, yamlFile);
for (TestSection testSection : restTestSuite.getTestSections()) {
testCandidates.add(new RestTestCandidate(restTestSuite, testSection));
}
}
}
}
} finally {
IOUtils.close(fileSystem);
}
//sort the candidates so they will always be in the same order before being shuffled, for repeatability
@ -202,10 +219,46 @@ public abstract class ElasticsearchRestTestCase extends ElasticsearchIntegration
}
}
/**
* Returns a new FileSystem to read REST resources, or null if they
* are available from classpath.
*/
@SuppressForbidden(reason = "proper use of URL, hack around a JDK bug")
static FileSystem getFileSystem() throws IOException {
// REST suite handling is currently complicated, with lots of filtering and so on
// For now, to work embedded in a jar, return a ZipFileSystem over the jar contents.
URL codeLocation = FileUtils.class.getProtectionDomain().getCodeSource().getLocation();
if (codeLocation.getFile().endsWith(".jar")) {
try {
// hack around a bug in the zipfilesystem implementation before java 9,
// its checkWritable was incorrect and it won't work without write permissions.
// if we add the permission, it will open jars r/w, which is too scary! so copy to a safe r-w location.
Path tmp = Files.createTempFile(null, ".jar");
try (InputStream in = codeLocation.openStream()) {
Files.copy(in, tmp, StandardCopyOption.REPLACE_EXISTING);
}
return FileSystems.newFileSystem(new URI("jar:" + tmp.toUri()), Collections.<String,Object>emptyMap());
} catch (URISyntaxException e) {
throw new IOException("couldn't open zipfilesystem: ", e);
}
} else {
return null;
}
}
@BeforeClass
public static void initExecutionContext() throws IOException, RestException {
String[] specPaths = resolvePathsProperty(REST_TESTS_SPEC, DEFAULT_SPEC_PATH);
RestSpec restSpec = RestSpec.parseFrom(DEFAULT_SPEC_PATH, specPaths);
RestSpec restSpec = null;
FileSystem fileSystem = getFileSystem();
// don't make a try-with, getFileSystem returns null
// ... and you can't close() the default filesystem
try {
restSpec = RestSpec.parseFrom(fileSystem, DEFAULT_SPEC_PATH, specPaths);
} finally {
IOUtils.close(fileSystem);
}
validateSpec(restSpec);
restTestExecutionContext = new RestTestExecutionContext(restSpec);
}

View File

@ -19,12 +19,14 @@
package org.elasticsearch.test.rest.spec;
import com.google.common.collect.Maps;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.common.xcontent.json.JsonXContent;
import org.elasticsearch.test.rest.support.FileUtils;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.FileSystem;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collection;
@ -54,10 +56,10 @@ public class RestSpec {
/**
* Parses the complete set of REST spec available under the provided directories
*/
public static RestSpec parseFrom(String optionalPathPrefix, String... paths) throws IOException {
public static RestSpec parseFrom(FileSystem fileSystem, String optionalPathPrefix, String... paths) throws IOException {
RestSpec restSpec = new RestSpec();
for (String path : paths) {
for (Path jsonFile : FileUtils.findJsonSpec(optionalPathPrefix, path)) {
for (Path jsonFile : FileUtils.findJsonSpec(fileSystem, optionalPathPrefix, path)) {
try (InputStream stream = Files.newInputStream(jsonFile)) {
XContentParser parser = JsonXContent.jsonXContent.createParser(stream);
RestApi restApi = new RestApiParser().parse(parser);

View File

@ -25,17 +25,14 @@ import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.PathUtils;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.DirectoryStream;
import java.nio.file.FileSystems;
import java.nio.file.FileSystem;
import java.nio.file.FileVisitResult;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
import java.nio.file.NotDirectoryException;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.SimpleFileVisitor;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.HashSet;
@ -53,10 +50,10 @@ public final class FileUtils {
/**
* Returns the json files found within the directory provided as argument.
* Files are looked up in the classpath first, then outside of it if not found.
* Files are looked up in the classpath, or optionally from {@code fileSystem} if its not null.
*/
public static Set<Path> findJsonSpec(String optionalPathPrefix, String path) throws IOException {
Path dir = resolveFile(optionalPathPrefix, path, null);
public static Set<Path> findJsonSpec(FileSystem fileSystem, String optionalPathPrefix, String path) throws IOException {
Path dir = resolveFile(fileSystem, optionalPathPrefix, path, null);
if (!Files.isDirectory(dir)) {
throw new NotDirectoryException(path);
@ -81,37 +78,46 @@ public final class FileUtils {
/**
* Returns the yaml files found within the paths provided.
* Each input path can either be a single file (the .yaml suffix is optional) or a directory.
* Each path is looked up in the classpath first, then outside of it if not found yet.
* Each path is looked up in the classpath, or optionally from {@code fileSystem} if its not null.
*/
public static Map<String, Set<Path>> findYamlSuites(final String optionalPathPrefix, final String... paths) throws IOException {
public static Map<String, Set<Path>> findYamlSuites(FileSystem fileSystem, String optionalPathPrefix, final String... paths) throws IOException {
Map<String, Set<Path>> yamlSuites = Maps.newHashMap();
for (String path : paths) {
collectFiles(resolveFile(optionalPathPrefix, path, YAML_SUFFIX), YAML_SUFFIX, yamlSuites);
collectFiles(resolveFile(fileSystem, optionalPathPrefix, path, YAML_SUFFIX), YAML_SUFFIX, yamlSuites);
}
return yamlSuites;
}
private static Path resolveFile(String optionalPathPrefix, String path, String optionalFileSuffix) throws IOException {
//try within classpath with and without file suffix (as it could be a single test suite)
URL resource = findResource(path, optionalFileSuffix);
if (resource == null) {
//try within classpath with optional prefix: /rest-api-spec/test (or /rest-api-spec/api) is optional
String newPath = optionalPathPrefix + "/" + path;
resource = findResource(newPath, optionalFileSuffix);
if (resource == null) {
//if it wasn't on classpath we look outside of the classpath
Path file = findFile(path, optionalFileSuffix);
if (!Files.exists(file)) {
private static Path resolveFile(FileSystem fileSystem, String optionalPathPrefix, String path, String optionalFileSuffix) throws IOException {
if (fileSystem != null) {
Path file = findFile(fileSystem, path, optionalFileSuffix);
if (!lenientExists(file)) {
// try with optional prefix: /rest-api-spec/test (or /rest-api-spec/api) is optional
String newPath = optionalPathPrefix + "/" + path;
file = findFile(fileSystem, newPath, optionalFileSuffix);
if (!lenientExists(file)) {
throw new NoSuchFileException(path);
}
return file;
}
}
try {
return PathUtils.get(resource.toURI());
} catch (URISyntaxException e) {
throw new RuntimeException(e);
return file;
} else {
//try within classpath
URL resource = findResource(path, optionalFileSuffix);
if (resource == null) {
//try within classpath with optional prefix: /rest-api-spec/test (or /rest-api-spec/api) is optional
String newPath = optionalPathPrefix + "/" + path;
resource = findResource(newPath, optionalFileSuffix);
if (resource == null) {
throw new NoSuchFileException(path);
}
}
try {
return PathUtils.get(resource.toURI());
} catch (Exception e) {
// some filesystems have REALLY useless exceptions here.
// ZipFileSystem I am looking at you.
throw new RuntimeException("couldn't retrieve URL: " + resource, e);
}
}
}
@ -125,11 +131,20 @@ public final class FileUtils {
}
return resource;
}
// used because this test "guesses" from like 4 different places from the filesystem!
private static boolean lenientExists(Path file) {
boolean exists = false;
try {
exists = Files.exists(file);
} catch (SecurityException ok) {}
return exists;
}
private static Path findFile(String path, String optionalFileSuffix) {
Path file = PathUtils.get(path);
if (!Files.exists(file)) {
file = PathUtils.get(path + optionalFileSuffix);
private static Path findFile(FileSystem fileSystem, String path, String optionalFileSuffix) {
Path file = fileSystem.getPath(path);
if (!lenientExists(file)) {
file = fileSystem.getPath(path + optionalFileSuffix);
}
return file;
}

View File

@ -35,29 +35,29 @@ public class FileUtilsTests extends ElasticsearchTestCase {
@Test
public void testLoadSingleYamlSuite() throws Exception {
Map<String,Set<Path>> yamlSuites = FileUtils.findYamlSuites("/rest-api-spec/test", "/rest-api-spec/test/get/10_basic");
Map<String,Set<Path>> yamlSuites = FileUtils.findYamlSuites(null, "/rest-api-spec/test", "/rest-api-spec/test/get/10_basic");
assertSingleFile(yamlSuites, "get", "10_basic.yaml");
//the path prefix is optional
yamlSuites = FileUtils.findYamlSuites("/rest-api-spec/test", "get/10_basic.yaml");
yamlSuites = FileUtils.findYamlSuites(null, "/rest-api-spec/test", "get/10_basic.yaml");
assertSingleFile(yamlSuites, "get", "10_basic.yaml");
//extension .yaml is optional
yamlSuites = FileUtils.findYamlSuites("/rest-api-spec/test", "get/10_basic");
yamlSuites = FileUtils.findYamlSuites(null, "/rest-api-spec/test", "get/10_basic");
assertSingleFile(yamlSuites, "get", "10_basic.yaml");
}
@Test
public void testLoadMultipleYamlSuites() throws Exception {
//single directory
Map<String,Set<Path>> yamlSuites = FileUtils.findYamlSuites("/rest-api-spec/test", "get");
Map<String,Set<Path>> yamlSuites = FileUtils.findYamlSuites(null, "/rest-api-spec/test", "get");
assertThat(yamlSuites, notNullValue());
assertThat(yamlSuites.size(), equalTo(1));
assertThat(yamlSuites.containsKey("get"), equalTo(true));
assertThat(yamlSuites.get("get").size(), greaterThan(1));
//multiple directories
yamlSuites = FileUtils.findYamlSuites("/rest-api-spec/test", "get", "index");
yamlSuites = FileUtils.findYamlSuites(null, "/rest-api-spec/test", "get", "index");
assertThat(yamlSuites, notNullValue());
assertThat(yamlSuites.size(), equalTo(2));
assertThat(yamlSuites.containsKey("get"), equalTo(true));
@ -66,7 +66,7 @@ public class FileUtilsTests extends ElasticsearchTestCase {
assertThat(yamlSuites.get("index").size(), greaterThan(1));
//multiple paths, which can be both directories or yaml test suites (with optional file extension)
yamlSuites = FileUtils.findYamlSuites("/rest-api-spec/test", "indices.optimize/10_basic", "index");
yamlSuites = FileUtils.findYamlSuites(null, "/rest-api-spec/test", "indices.optimize/10_basic", "index");
assertThat(yamlSuites, notNullValue());
assertThat(yamlSuites.size(), equalTo(2));
assertThat(yamlSuites.containsKey("indices.optimize"), equalTo(true));
@ -81,22 +81,16 @@ public class FileUtilsTests extends ElasticsearchTestCase {
Files.createFile(file);
//load from directory outside of the classpath
yamlSuites = FileUtils.findYamlSuites("/rest-api-spec/test", "get/10_basic", dir.toAbsolutePath().toString());
yamlSuites = FileUtils.findYamlSuites(dir.getFileSystem(), "/rest-api-spec/test", dir.toAbsolutePath().toString());
assertThat(yamlSuites, notNullValue());
assertThat(yamlSuites.size(), equalTo(2));
assertThat(yamlSuites.containsKey("get"), equalTo(true));
assertThat(yamlSuites.get("get").size(), equalTo(1));
assertSingleFile(yamlSuites.get("get"), "get", "10_basic.yaml");
assertThat(yamlSuites.size(), equalTo(1));
assertThat(yamlSuites.containsKey(dir.getFileName().toString()), equalTo(true));
assertSingleFile(yamlSuites.get(dir.getFileName().toString()), dir.getFileName().toString(), file.getFileName().toString());
//load from external file (optional extension)
yamlSuites = FileUtils.findYamlSuites("/rest-api-spec/test", "get/10_basic", dir.resolve("test_loading").toAbsolutePath().toString());
yamlSuites = FileUtils.findYamlSuites(dir.getFileSystem(), "/rest-api-spec/test", dir.resolve("test_loading").toAbsolutePath().toString());
assertThat(yamlSuites, notNullValue());
assertThat(yamlSuites.size(), equalTo(2));
assertThat(yamlSuites.containsKey("get"), equalTo(true));
assertThat(yamlSuites.get("get").size(), equalTo(1));
assertSingleFile(yamlSuites.get("get"), "get", "10_basic.yaml");
assertThat(yamlSuites.size(), equalTo(1));
assertThat(yamlSuites.containsKey(dir.getFileName().toString()), equalTo(true));
assertSingleFile(yamlSuites.get(dir.getFileName().toString()), dir.getFileName().toString(), file.getFileName().toString());
}