mirror of https://github.com/apache/lucene.git
SOLR-999 -- XPathRecordReader fails on XMLs with nodes mixed with CDATA content
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@739962 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
164c2481e2
commit
33f9318049
|
@ -106,6 +106,9 @@ Bug Fixes
|
|||
13. SOLR-985: Fix thread-safety issue with TemplateString for concurrent imports with multiple cores.
|
||||
(Ryuuichi Kumai via shalin)
|
||||
|
||||
14. SOLR-999: XPathRecordReader fails on XMLs with nodes mixed with CDATA content.
|
||||
(Fergus McMenemie, Noble Paul via shalin)
|
||||
|
||||
Documentation
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -162,19 +162,20 @@ public class XPathRecordReader {
|
|||
skipNextEvent = true;
|
||||
String text = parser.getText();
|
||||
event = parser.next();
|
||||
while (event == CDATA || event == CHARACTERS || event == SPACE) {
|
||||
text = text + parser.getText();
|
||||
|
||||
while (true) {
|
||||
if(event == CDATA || event == CHARACTERS || event == SPACE) {
|
||||
text = text + parser.getText();
|
||||
} else if(event == START_ELEMENT) {
|
||||
handleStartElement(parser, childrenFound, handler, values, stack, recordStarted);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
event = parser.next();
|
||||
}
|
||||
putText(values, text, fieldName, multiValued);
|
||||
} else if (event == START_ELEMENT) {
|
||||
Node n = getMatchingChild(parser);
|
||||
if (n != null) {
|
||||
childrenFound.add(n);
|
||||
n.parse(parser, handler, values, stack, recordStarted);
|
||||
} else {
|
||||
skipTag(parser);
|
||||
}
|
||||
handleStartElement(parser, childrenFound, handler, values, stack, recordStarted);
|
||||
}
|
||||
}
|
||||
} finally {
|
||||
|
@ -193,6 +194,19 @@ public class XPathRecordReader {
|
|||
}
|
||||
}
|
||||
|
||||
private void handleStartElement(XMLStreamReader parser, Set<Node> childrenFound,
|
||||
Handler handler, Map<String, Object> values,
|
||||
Stack<Set<String>> stack, boolean recordStarted)
|
||||
throws IOException, XMLStreamException {
|
||||
Node n = getMatchingChild(parser);
|
||||
if (n != null) {
|
||||
childrenFound.add(n);
|
||||
n.parse(parser, handler, values, stack, recordStarted);
|
||||
} else {
|
||||
skipTag(parser);
|
||||
}
|
||||
}
|
||||
|
||||
private Node getMatchingChild(XMLStreamReader parser) {
|
||||
if (childNodes == null)
|
||||
return null;
|
||||
|
|
|
@ -25,9 +25,7 @@ import java.util.List;
|
|||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Test for XPathRecordReader
|
||||
* </p>
|
||||
* <p> Test for XPathRecordReader </p>
|
||||
*
|
||||
* @version $Id$
|
||||
* @since solr 1.3
|
||||
|
@ -135,6 +133,28 @@ public class TestXPathRecordReader {
|
|||
Assert.assertNull(((List) l.get(1).get("b")).get(0));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void mixedContent() {
|
||||
String xml = "<xhtml:p xmlns:xhtml=\"http://xhtml.com/\" >This text is \n" +
|
||||
" <xhtml:b>bold</xhtml:b> and this text is \n" +
|
||||
" <xhtml:u>underlined</xhtml:u>!\n" +
|
||||
"</xhtml:p>";
|
||||
XPathRecordReader rr = new XPathRecordReader("/p");
|
||||
rr.addField("p", "/p", true);
|
||||
rr.addField("b", "/p/b", true);
|
||||
rr.addField("u", "/p/u", true);
|
||||
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
|
||||
Map<String, Object> row = l.get(0);
|
||||
|
||||
Assert.assertEquals("bold", ((List) row.get("b")).get(0));
|
||||
Assert.assertEquals("underlined", ((List) row.get("u")).get(0));
|
||||
String p = (String) ((List) row.get("p")).get(0);
|
||||
Assert.assertTrue(p.contains("This text is"));
|
||||
Assert.assertTrue(p.contains("and this text is"));
|
||||
Assert.assertTrue(p.contains("!"));
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void elems2LevelWithAttrib() {
|
||||
String xml = "<root>\n" + "\t<a>\n" + "\t <b k=\"x\">\n"
|
||||
|
|
Loading…
Reference in New Issue