mirror of https://github.com/apache/lucene.git
SOLR-999 -- XPathRecordReader fails on XMLs with nodes mixed with CDATA content
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@739962 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
164c2481e2
commit
33f9318049
|
@ -106,6 +106,9 @@ Bug Fixes
|
||||||
13. SOLR-985: Fix thread-safety issue with TemplateString for concurrent imports with multiple cores.
|
13. SOLR-985: Fix thread-safety issue with TemplateString for concurrent imports with multiple cores.
|
||||||
(Ryuuichi Kumai via shalin)
|
(Ryuuichi Kumai via shalin)
|
||||||
|
|
||||||
|
14. SOLR-999: XPathRecordReader fails on XMLs with nodes mixed with CDATA content.
|
||||||
|
(Fergus McMenemie, Noble Paul via shalin)
|
||||||
|
|
||||||
Documentation
|
Documentation
|
||||||
----------------------
|
----------------------
|
||||||
|
|
||||||
|
|
|
@ -162,19 +162,20 @@ public class XPathRecordReader {
|
||||||
skipNextEvent = true;
|
skipNextEvent = true;
|
||||||
String text = parser.getText();
|
String text = parser.getText();
|
||||||
event = parser.next();
|
event = parser.next();
|
||||||
while (event == CDATA || event == CHARACTERS || event == SPACE) {
|
|
||||||
text = text + parser.getText();
|
while (true) {
|
||||||
|
if(event == CDATA || event == CHARACTERS || event == SPACE) {
|
||||||
|
text = text + parser.getText();
|
||||||
|
} else if(event == START_ELEMENT) {
|
||||||
|
handleStartElement(parser, childrenFound, handler, values, stack, recordStarted);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
event = parser.next();
|
event = parser.next();
|
||||||
}
|
}
|
||||||
putText(values, text, fieldName, multiValued);
|
putText(values, text, fieldName, multiValued);
|
||||||
} else if (event == START_ELEMENT) {
|
} else if (event == START_ELEMENT) {
|
||||||
Node n = getMatchingChild(parser);
|
handleStartElement(parser, childrenFound, handler, values, stack, recordStarted);
|
||||||
if (n != null) {
|
|
||||||
childrenFound.add(n);
|
|
||||||
n.parse(parser, handler, values, stack, recordStarted);
|
|
||||||
} else {
|
|
||||||
skipTag(parser);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -193,6 +194,19 @@ public class XPathRecordReader {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void handleStartElement(XMLStreamReader parser, Set<Node> childrenFound,
|
||||||
|
Handler handler, Map<String, Object> values,
|
||||||
|
Stack<Set<String>> stack, boolean recordStarted)
|
||||||
|
throws IOException, XMLStreamException {
|
||||||
|
Node n = getMatchingChild(parser);
|
||||||
|
if (n != null) {
|
||||||
|
childrenFound.add(n);
|
||||||
|
n.parse(parser, handler, values, stack, recordStarted);
|
||||||
|
} else {
|
||||||
|
skipTag(parser);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private Node getMatchingChild(XMLStreamReader parser) {
|
private Node getMatchingChild(XMLStreamReader parser) {
|
||||||
if (childNodes == null)
|
if (childNodes == null)
|
||||||
return null;
|
return null;
|
||||||
|
|
|
@ -25,9 +25,7 @@ import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* <p>
|
* <p> Test for XPathRecordReader </p>
|
||||||
* Test for XPathRecordReader
|
|
||||||
* </p>
|
|
||||||
*
|
*
|
||||||
* @version $Id$
|
* @version $Id$
|
||||||
* @since solr 1.3
|
* @since solr 1.3
|
||||||
|
@ -135,6 +133,28 @@ public class TestXPathRecordReader {
|
||||||
Assert.assertNull(((List) l.get(1).get("b")).get(0));
|
Assert.assertNull(((List) l.get(1).get("b")).get(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void mixedContent() {
|
||||||
|
String xml = "<xhtml:p xmlns:xhtml=\"http://xhtml.com/\" >This text is \n" +
|
||||||
|
" <xhtml:b>bold</xhtml:b> and this text is \n" +
|
||||||
|
" <xhtml:u>underlined</xhtml:u>!\n" +
|
||||||
|
"</xhtml:p>";
|
||||||
|
XPathRecordReader rr = new XPathRecordReader("/p");
|
||||||
|
rr.addField("p", "/p", true);
|
||||||
|
rr.addField("b", "/p/b", true);
|
||||||
|
rr.addField("u", "/p/u", true);
|
||||||
|
List<Map<String, Object>> l = rr.getAllRecords(new StringReader(xml));
|
||||||
|
Map<String, Object> row = l.get(0);
|
||||||
|
|
||||||
|
Assert.assertEquals("bold", ((List) row.get("b")).get(0));
|
||||||
|
Assert.assertEquals("underlined", ((List) row.get("u")).get(0));
|
||||||
|
String p = (String) ((List) row.get("p")).get(0);
|
||||||
|
Assert.assertTrue(p.contains("This text is"));
|
||||||
|
Assert.assertTrue(p.contains("and this text is"));
|
||||||
|
Assert.assertTrue(p.contains("!"));
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void elems2LevelWithAttrib() {
|
public void elems2LevelWithAttrib() {
|
||||||
String xml = "<root>\n" + "\t<a>\n" + "\t <b k=\"x\">\n"
|
String xml = "<root>\n" + "\t<a>\n" + "\t <b k=\"x\">\n"
|
||||||
|
|
Loading…
Reference in New Issue