mirror of https://github.com/apache/lucene.git
LUCENE-9091: UnifiedHighlighter HTML escaping should only
escape essentials
This commit is contained in:
parent
403fd05646
commit
1be5b68964
|
@ -88,6 +88,8 @@ Improvements
|
||||||
|
|
||||||
* LUCENE-9102: Add maxQueryLength option to DirectSpellchecker. (Andy Webb via Bruno Roustant)
|
* LUCENE-9102: Add maxQueryLength option to DirectSpellchecker. (Andy Webb via Bruno Roustant)
|
||||||
|
|
||||||
|
* LUCENE-9091: UnifiedHighlighter HTML escaping should only escape essentials (Nándor Mátravölgyi)
|
||||||
|
|
||||||
Optimizations
|
Optimizations
|
||||||
---------------------
|
---------------------
|
||||||
(No changes)
|
(No changes)
|
||||||
|
|
|
@ -129,15 +129,7 @@ public class DefaultPassageFormatter extends PassageFormatter {
|
||||||
dest.append("/");
|
dest.append("/");
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
if (ch >= 0x30 && ch <= 0x39 || ch >= 0x41 && ch <= 0x5A || ch >= 0x61 && ch <= 0x7A) {
|
dest.append(ch);
|
||||||
dest.append(ch);
|
|
||||||
} else if (ch < 0xff) {
|
|
||||||
dest.append("&#");
|
|
||||||
dest.append((int) ch);
|
|
||||||
dest.append(";");
|
|
||||||
} else {
|
|
||||||
dest.append(ch);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
/*
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package org.apache.lucene.search.uhighlight;
|
||||||
|
|
||||||
|
import org.apache.lucene.util.LuceneTestCase;
|
||||||
|
|
||||||
|
public class TestDefaultPassageFormatter extends LuceneTestCase {
|
||||||
|
public void testBasic() throws Exception {
|
||||||
|
String text = "Test customization & <div class=\"xy\">"escaping"</div> of this very formatter. Unrelated part. It's not very N/A!";
|
||||||
|
// fabricate passages with matches to format
|
||||||
|
Passage[] passages = new Passage[2];
|
||||||
|
passages[0] = new Passage();
|
||||||
|
passages[0].setStartOffset(0);
|
||||||
|
passages[0].setEndOffset(text.indexOf(".")+1);
|
||||||
|
passages[0].addMatch(text.indexOf("very"), text.indexOf("very")+4, null, 2);
|
||||||
|
passages[1] = new Passage();
|
||||||
|
passages[1].setStartOffset(text.indexOf(".", passages[0].getEndOffset()+1) + 2);
|
||||||
|
passages[1].setEndOffset(text.length());
|
||||||
|
passages[1].addMatch(
|
||||||
|
text.indexOf("very", passages[0].getEndOffset()),
|
||||||
|
text.indexOf("very", passages[0].getEndOffset())+4, null, 2);
|
||||||
|
|
||||||
|
// test default
|
||||||
|
DefaultPassageFormatter formatter = new DefaultPassageFormatter();
|
||||||
|
assertEquals(
|
||||||
|
"Test customization & <div class=\"xy\">"escaping"</div> of this <b>very</b> formatter." +
|
||||||
|
"... It's not <b>very</b> N/A!", formatter.format(passages, text));
|
||||||
|
|
||||||
|
// test customization and encoding
|
||||||
|
formatter = new DefaultPassageFormatter("<u>", "</u>", "\u2026 ", true);
|
||||||
|
assertEquals(
|
||||||
|
"Test customization & <div class="xy">&quot;escaping&quot;" +
|
||||||
|
"</div> of this <u>very</u> formatter.\u2026 It's not <u>very</u> N/A!",
|
||||||
|
formatter.format(passages, text));
|
||||||
|
}
|
||||||
|
}
|
|
@ -957,7 +957,7 @@ public class TestUnifiedHighlighter extends LuceneTestCase {
|
||||||
assertEquals(1, topDocs.totalHits.value);
|
assertEquals(1, topDocs.totalHits.value);
|
||||||
String snippets[] = highlighter.highlight("body", query, topDocs);
|
String snippets[] = highlighter.highlight("body", query, topDocs);
|
||||||
assertEquals(1, snippets.length);
|
assertEquals(1, snippets.length);
|
||||||
assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
|
assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
|
||||||
|
|
||||||
ir.close();
|
ir.close();
|
||||||
}
|
}
|
||||||
|
|
|
@ -866,7 +866,7 @@ public class TestUnifiedHighlighterTermIntervals extends LuceneTestCase {
|
||||||
assertEquals(1, topDocs.totalHits.value);
|
assertEquals(1, topDocs.totalHits.value);
|
||||||
String snippets[] = highlighter.highlight("body", query, topDocs);
|
String snippets[] = highlighter.highlight("body", query, topDocs);
|
||||||
assertEquals(1, snippets.length);
|
assertEquals(1, snippets.length);
|
||||||
assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
|
assertEquals("Just a test <b>highlighting</b> from <i>postings</i>. ", snippets[0]);
|
||||||
ir.close();
|
ir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -171,7 +171,7 @@ public class TestPostingsSolrHighlighter extends SolrTestCaseJ4 {
|
||||||
assertU(commit());
|
assertU(commit());
|
||||||
assertQ("html escaped",
|
assertQ("html escaped",
|
||||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.encoder", "html"),
|
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.encoder", "html"),
|
||||||
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first <i>sentence</i>.'");
|
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first <i>sentence</i>.'");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testWildcard() {
|
public void testWildcard() {
|
||||||
|
|
|
@ -274,7 +274,7 @@ public class TestUnifiedSolrHighlighter extends SolrTestCaseJ4 {
|
||||||
assertU(commit());
|
assertU(commit());
|
||||||
assertQ("html escaped",
|
assertQ("html escaped",
|
||||||
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.encoder", "html"),
|
req("q", "text:document", "sort", "id asc", "hl", "true", "hl.encoder", "html"),
|
||||||
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first <i>sentence</i>.'");
|
"//lst[@name='highlighting']/lst[@name='103']/arr[@name='text']/str='<em>Document</em> one has a first <i>sentence</i>.'");
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testRangeQuery() {
|
public void testRangeQuery() {
|
||||||
|
|
Loading…
Reference in New Issue