mirror of https://github.com/apache/lucene.git
LUCENE-9091: UnifiedHighlighter HTML escaping should only
escape essentials
This commit is contained in:
parent
403fd05646
commit
1be5b68964
|
@ -88,6 +88,8 @@ Improvements
|
|||
|
||||
* LUCENE-9102: Add maxQueryLength option to DirectSpellchecker. (Andy Webb via Bruno Roustant)
|
||||
|
||||
* LUCENE-9091: UnifiedHighlighter HTML escaping should only escape essentials (Nándor Mátravölgyi)
|
||||
|
||||
Optimizations
|
||||
---------------------
|
||||
(No changes)
|
||||
|
|
|
@ -129,15 +129,7 @@ public class DefaultPassageFormatter extends PassageFormatter {
|
|||
dest.append("/");
|
||||
break;
|
||||
default:
|
||||
if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
|
||||
dest.append(ch);
|
||||
} else if (ch < 0xff) {
|
||||
dest.append("&#");
|
||||
dest.append((int) ch);
|
||||
dest.append(";");
|
||||
} else {
|
||||
dest.append(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
|
|
|
@ -0,0 +1,51 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.apache.lucene.search.uhighlight;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestDefaultPassageFormatter extends LuceneTestCase {
|
||||
public void testBasic() throws Exception {
|
||||
String text = "Test customization & <div class=\"xy\">"escaping"</div> of this very formatter. Unrelated part. It's not very N/A!";
|
||||
// fabricate passages with matches to format
|
||||
Passage[] passages = new Passage[2];
|
||||
passages[0] = new Passage();
|
||||
passages[0].setStartOffset(0);
|
||||
passages[0].setEndOffset(text.indexOf(".")+1);
|
||||
passages[0].addMatch(text.indexOf("very"), text.indexOf("very")+4, null, 2);
|
||||
passages[1] = new Passage();
|
||||
passages[1].setStartOffset(text.indexOf(".", passages[0].getEndOffset()+1) + 2);
|
||||
passages[1].setEndOffset(text.length());
|
||||
passages[1].addMatch(
|
||||
text.indexOf("very", passages[0].getEndOffset()),
|
||||
text.indexOf("very", passages[0].getEndOffset())+4, null, 2);
|
||||
|
||||
// test default
|
||||
DefaultPassageFormatter formatter = new DefaultPassageFormatter();
|
||||
assertEquals(
|
||||
"Test customization & <div class=\"xy\">"escaping"</div> of this <b>very</b> formatter." +
|
||||
"... It's not <b>very</b> N/A!", formatter.format(passages, text));
|
||||
|
||||
// test customization and encoding
|
||||
formatter = new DefaultPassageFormatter("<u>", "</u>", "\u2026 ", true);
|
||||
assertEquals(
|
||||
"Test customization & <div class="xy">&quot;escaping&quot;" +
|
||||
"</div> of this <u>very</u> formatter.\u2026 It's not <u>very</u> N/A!",
|
||||
formatter.format(passages, text));
|
||||
}
|
||||
}
|
|
@ -957,7 +957,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
|||
assertEquals(1, topDocs.totalHits.value);
|
||||
String snippets[] = highlighter.highlight("body", query, topDocs);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
|
||||
assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
|
||||
|
||||
ir.close();
|
||||
}
|
||||
|
|
|
@ -866,7 +866,7 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
|||
assertEquals(1, topDocs.totalHits.value);
|
||||
String snippets[] = highlighter.highlight("body", query, topDocs);
|
||||
assertEquals(1, snippets.length);
|
||||
assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
|
||||
assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
|
||||
ir.close();
|
||||
}
|
||||
|
||||
|
|
|
@ -171,7 +171,7 @@ public class TestPostingsSolrHighlighter extends SolrTestCaseJ4 {
|
|||
assertU(commit());
|
||||
assertQ("html escaped",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.encoder", "html"),
|
||||
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first <i>sentence</i>.'");
|
||||
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first <i>sentence</i>.'");
|
||||
}
|
||||
|
||||
public void testWildcard() {
|
||||
|
|
|
@ -274,7 +274,7 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
|
|||
assertU(commit());
|
||||
assertQ("html escaped",
|
||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.encoder", "html"),
|
||||
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first <i>sentence</i>.'");
|
||||
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first <i>sentence</i>.'");
|
||||
}
|
||||
|
||||
public void testRangeQuery() {
|
||||
|
|
Loading…
Reference in New Issue