mirror of https://github.com/apache/lucene.git
LUCENE-590: Demo HTML parser gives incorrect summaries when title is repeated as a heading
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1031467 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
048cdb57f4
commit
c54ea4da67
|
@ -141,6 +141,9 @@ Bug fixes
|
|||
* LUCENE-2246: Fix contrib/demo for Turkish html documents.
|
||||
(Selim Nadi via Robert Muir)
|
||||
|
||||
* LUCENE-590: Demo HTML parser gives incorrect summaries when title is repeated as a heading
|
||||
(Curtis d'Entremont via Robert Muir)
|
||||
|
||||
API Changes
|
||||
|
||||
* LUCENE-2147: Spatial GeoHashUtils now always decode GeoHash strings
|
||||
|
|
|
@ -84,7 +84,7 @@ InterruptedException {
|
|||
|
||||
String sum = summary.toString().trim();
|
||||
String tit = getTitle();
|
||||
if (sum.startsWith(tit) || sum.equals(""))
|
||||
if (sum.equals(""))
|
||||
return tit;
|
||||
else
|
||||
return sum;
|
||||
|
|
|
@ -111,7 +111,7 @@ InterruptedException {
|
|||
|
||||
String sum = summary.toString().trim();
|
||||
String tit = getTitle();
|
||||
if (sum.startsWith(tit) || sum.equals(""))
|
||||
if (sum.equals(""))
|
||||
return tit;
|
||||
else
|
||||
return sum;
|
||||
|
|
|
@ -105,6 +105,13 @@ public class TestHtmlParser extends LuceneTestCase {
|
|||
assertEquals(200, parser.getSummary().length());
|
||||
}
|
||||
|
||||
// LUCENE-590
|
||||
public void testSummaryTitle() throws Exception {
|
||||
String text = "<html><head><title>Summary</title></head><body>Summary of the document</body></html>";
|
||||
HTMLParser parser = new HTMLParser(new StringReader(text));
|
||||
assertEquals("Summary of the document", parser.getSummary());
|
||||
}
|
||||
|
||||
// LUCENE-2246
|
||||
public void testTurkish() throws Exception {
|
||||
String text = "<html><body>" +
|
||||
|
|
Loading…
Reference in New Issue