Test case that exposes bug in PR #11724

This commit is contained in:
Marvin Justice 2024-03-05 10:50:44 -05:00
parent 012b959b05
commit 0b1617c52b
1 changed files with 12 additions and 0 deletions

View File

@ -33,6 +33,7 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase; import org.apache.lucene.tests.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.tests.analysis.MockTokenizer; import org.apache.lucene.tests.analysis.MockTokenizer;
import org.apache.lucene.tests.util.TestUtil; import org.apache.lucene.tests.util.TestUtil;
import org.junit.Ignore;
public class TestHTMLStripCharFilter extends BaseTokenStreamTestCase { public class TestHTMLStripCharFilter extends BaseTokenStreamTestCase {
@ -649,6 +650,17 @@ public class TestHTMLStripCharFilter extends BaseTokenStreamTestCase {
assertEquals("Test\n\n\n\nSome text.", result.toString().trim()); assertEquals("Test\n\n\n\nSome text.", result.toString().trim());
} }
@Ignore("Fails due to PR #11724")
public void testForIssue10520Regression() throws IOException {
String test =
"<!DOCTYPE html><html lang=\"en\"><head><title>Test</title></head><a href=\"https://www.somewhere.com?data=\">a link</a> some text <a href=\"https://www.elsewhere.com\">another link</a></html>";
Reader reader = new StringReader(test);
HTMLStripCharFilter filter = new HTMLStripCharFilter(reader);
StringWriter result = new StringWriter();
filter.transferTo(result);
assertEquals("Test\n\na link some text another link", result.toString().trim());
}
public static void assertHTMLStripsTo(String input, String gold, Set<String> escapedTags) public static void assertHTMLStripsTo(String input, String gold, Set<String> escapedTags)
throws Exception { throws Exception {
assertHTMLStripsTo(new StringReader(input), gold, escapedTags); assertHTMLStripsTo(new StringReader(input), gold, escapedTags);