adding escape html ability

This commit is contained in:
LocJayMP 2011-08-25 17:58:55 -04:00 committed by Shay Banon
parent cb7242f84e
commit c0f8223a6c
6 changed files with 151 additions and 8 deletions

View File

@ -490,6 +490,8 @@ public class SearchRequestBuilder extends BaseRequestBuilder<SearchRequest, Sear
return this;
}
/**
* Adds a field to be highlighted with a provided fragment size (in characters), and
* default number of fragments of 5.
@ -566,6 +568,15 @@ public class SearchRequestBuilder extends BaseRequestBuilder<SearchRequest, Sear
return this;
}
/**
* The encoder to set for highlighting
*/
public SearchRequestBuilder setEncoder(String encoder) {
highlightBuilder().encoder(encoder);
return this;
}
/**
* Sets the source of the request as a json string. Note, settings anything other
* than the search type will cause this source to be overridden, consider using

View File

@ -45,9 +45,11 @@ public class HighlightBuilder implements ToXContent {
private String order;
private String encoder;
/**
* Adds a field to be highlighted with default fragment size of 100 characters, and
* default number of fragments of 5.
* default number of fragments of 5 using the default encoder
*
* @param name The field to highlight
*/
@ -59,6 +61,8 @@ public class HighlightBuilder implements ToXContent {
return this;
}
/**
* Adds a field to be highlighted with a provided fragment size (in characters), and
* default number of fragments of 5.
@ -74,6 +78,7 @@ public class HighlightBuilder implements ToXContent {
return this;
}
/**
* Adds a field to be highlighted with a provided fragment size (in characters), and
* a provided (maximum) number of fragments.
@ -90,6 +95,8 @@ public class HighlightBuilder implements ToXContent {
return this;
}
/**
* Adds a field to be highlighted with a provided fragment size (in characters), and
* a provided (maximum) number of fragments.
@ -103,10 +110,12 @@ public class HighlightBuilder implements ToXContent {
if (fields == null) {
fields = newArrayList();
}
fields.add(new Field(name).fragmentSize(fragmentSize).numOfFragments(numberOfFragments).fragmentOffset(fragmentOffset));
fields.add(new Field(name).fragmentSize(fragmentSize).numOfFragments(numberOfFragments)
.fragmentOffset(fragmentOffset));
return this;
}
/**
* Set a tag scheme that encapsulates a built in pre and post tags. The allows schemes
* are <tt>styled</tt> and <tt>default</tt>.
@ -118,6 +127,17 @@ public class HighlightBuilder implements ToXContent {
return this;
}
/**
* Set encoder for the highlighting
* are <tt>styled</tt> and <tt>default</tt>.
*
* @param encoder name
*/
public HighlightBuilder encoder(String encoder) {
this.encoder = encoder;
return this;
}
/**
* Explicitly set the pre tags that will be used for highlighting.
*/
@ -158,6 +178,9 @@ public class HighlightBuilder implements ToXContent {
if (order != null) {
builder.field("order", order);
}
if (encoder != null) {
builder.field("encoder", encoder);
}
if (fields != null) {
builder.startObject("fields");
for (Field field : fields) {
@ -171,10 +194,12 @@ public class HighlightBuilder implements ToXContent {
if (field.fragmentOffset() != -1) {
builder.field("fragment_offset", field.fragmentOffset());
}
builder.endObject();
}
builder.endObject();
}
builder.endObject();
return builder;
}
@ -185,7 +210,6 @@ public class HighlightBuilder implements ToXContent {
private int fragmentOffset = -1;
private int numOfFragments = -1;
private Field(String name) {
this.name = name;
}
@ -220,5 +244,6 @@ public class HighlightBuilder implements ToXContent {
this.numOfFragments = numOfFragments;
return this;
}
}
}

View File

@ -57,7 +57,9 @@ import static org.elasticsearch.common.collect.Maps.*;
*/
public class HighlightPhase implements SearchHitPhase {
private static final Encoder DEFAULT_ENCODER = new DefaultEncoder();
private Encoder encoder;
private FragListBuilder fraglistbuilder;
private FragmentsBuilder fragmentbuiler;
@Override public Map<String, ? extends SearchParseElement> parseElements() {
return ImmutableMap.of("highlight", new HighlighterParseElement());
@ -73,6 +75,10 @@ public class HighlightPhase implements SearchHitPhase {
Map<String, HighlightField> highlightFields = newHashMap();
for (SearchContextHighlight.Field field : context.highlight().fields()) {
if (field.encoder().equals("html"))
encoder = new SimpleHTMLEncoder();
else
encoder = new DefaultEncoder();
FieldMapper mapper = documentMapper.mappers().smartNameFieldMapper(field.field());
if (mapper == null) {
MapperService.SmartNameFieldMappers fullMapper = context.mapperService().smartName(field.field());
@ -110,7 +116,10 @@ public class HighlightPhase implements SearchHitPhase {
fragmenter = new SimpleSpanFragmenter(queryScorer, field.fragmentCharSize());
}
Formatter formatter = new SimpleHTMLFormatter(field.preTags()[0], field.postTags()[0]);
Highlighter highlighter = new Highlighter(formatter, DEFAULT_ENCODER, queryScorer);
Highlighter highlighter = new Highlighter(formatter, encoder, queryScorer);
highlighter.setTextFragmenter(fragmenter);
List<Object> textsToHighlight;
@ -186,7 +195,8 @@ public class HighlightPhase implements SearchHitPhase {
try {
// a HACK to make highlighter do highlighting, even though its using the single frag list builder
int numberOfFragments = field.numberOfFragments() == 0 ? 1 : field.numberOfFragments();
fragments = highlighter.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.names().indexName(), field.fragmentCharSize(), numberOfFragments);
fragments = highlighter.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.names().indexName(), field.fragmentCharSize(), numberOfFragments,
this.fraglistbuilder, this.fragmentbuiler, field.preTags(), field.postTags(), encoder);
} catch (IOException e) {
throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + field.field() + "]", e);
}
@ -241,7 +251,9 @@ public class HighlightPhase implements SearchHitPhase {
}
}
}
this.fraglistbuilder = fragListBuilder;
this.fragmentbuiler = fragmentsBuilder;
return new FastVectorHighlighter(true, false, fragListBuilder, fragmentsBuilder);
}
}

View File

@ -72,6 +72,7 @@ public class HighlighterParseElement implements SearchParseElement {
boolean globalHighlightFilter = true;
int globalFragmentSize = 100;
int globalNumOfFragments = 5;
String globalEncoder = "default";
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
@ -106,6 +107,9 @@ public class HighlighterParseElement implements SearchParseElement {
} else if ("number_of_fragments".equals(topLevelFieldName) || "numberOfFragments".equals(topLevelFieldName)) {
globalNumOfFragments = parser.intValue();
}
else if ("encoder".equals(topLevelFieldName)){
globalEncoder = parser.text();
}
} else if (token == XContentParser.Token.START_OBJECT) {
if ("fields".equals(topLevelFieldName)) {
String highlightFieldName = null;
@ -176,6 +180,10 @@ public class HighlighterParseElement implements SearchParseElement {
if (field.numberOfFragments() == -1) {
field.numberOfFragments(globalNumOfFragments);
}
if (field.encoder() == null){
field.encoder(globalEncoder);
}
}
context.highlight(new SearchContextHighlight(fields));

View File

@ -19,6 +19,8 @@
package org.elasticsearch.search.highlight;
import org.elasticsearch.common.io.stream.StreamInput;
import java.util.List;
/**
@ -46,6 +48,8 @@ public class SearchContextHighlight {
private int fragmentOffset = -1;
private String encoder;
private String[] preTags;
private String[] postTags;
@ -86,7 +90,15 @@ public class SearchContextHighlight {
this.fragmentOffset = fragmentOffset;
}
public String[] preTags() {
public String encoder(){
return encoder;
}
public void encoder(String encoder){
this.encoder = encoder;
}
public String[] preTags() {
return preTags;
}
@ -117,5 +129,6 @@ public class SearchContextHighlight {
public void highlightFilter(boolean highlightFilter) {
this.highlightFilter = highlightFilter;
}
}
}

View File

@ -437,4 +437,78 @@ public class HighlighterSearchTests extends AbstractNodesTests {
assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("hlighting <em>bug</em> present in elasticsearch "));
}
}
@Test public void testEscapeHtml() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 2))
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("title").field("type", "string").field("store", "yes")
.endObject().endObject().endObject())
.execute().actionGet();
for (int i = 0; i < 5; i++) {
client.prepareIndex("test", "type1", Integer.toString(i))
.setSource("title", "This is a html escaping highlighting test for *&? elasticsearch").setRefresh(true).execute().actionGet();
}
SearchSourceBuilder source = searchSource()
.query(termQuery("field1", "test"))
.from(0).size(60).explain(true)
.highlight(highlight().field("field1", 100, 0).order("score").preTags("<xxx>").postTags("</xxx>"));
SearchResponse search = client.prepareSearch()
.setQuery(fieldQuery("title", "test")).setEncoder("html")
.addHighlightedField("title",50,1,10)
.execute().actionGet();
assertThat(search.hits().totalHits(), equalTo(5l));
assertThat(search.hits().hits().length, equalTo(5));
assertThat(search.getFailedShards(), equalTo(0));
for (SearchHit hit : search.hits()) {
// LUCENE 3.1 UPGRADE: Caused adding the space at the end...
assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("This is a html escaping highlighting <em>test</em> for *&amp;? elasticsearch"));
}
}
@Test public void testEscapeHtml_vector() throws Exception {
try {
client.admin().indices().prepareDelete("test").execute().actionGet();
} catch (Exception e) {
// ignore
}
client.admin().indices().prepareCreate("test").setSettings(ImmutableSettings.settingsBuilder().put("number_of_shards", 2))
.addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("title").field("type", "string").field("store", "yes").field("term_vector", "with_positions_offsets").endObject()
.endObject().endObject().endObject())
.execute().actionGet();
for (int i = 0; i < 5; i++) {
client.prepareIndex("test", "type1", Integer.toString(i))
.setSource("title", "This is a html escaping highlighting test for *&? elasticsearch").setRefresh(true).execute().actionGet();
}
SearchResponse search = client.prepareSearch()
.setQuery(fieldQuery("title", "test")).setEncoder("html")
.addHighlightedField("title",50,1,10)
.execute().actionGet();
assertThat(search.hits().totalHits(), equalTo(5l));
assertThat(search.hits().hits().length, equalTo(5));
assertThat(search.getFailedShards(), equalTo(0));
for (SearchHit hit : search.hits()) {
// LUCENE 3.1 UPGRADE: Caused adding the space at the end...
assertThat(hit.highlightFields().get("title").fragments()[0], equalTo("hlighting <em>test</em> for *&amp;? elasticsearch "));
}
}
}