From 59cbcf51a783db1e6cccb8f1a971e02c0d44024e Mon Sep 17 00:00:00 2001 From: Shalin Shekhar Mangar Date: Thu, 26 Feb 2009 12:41:08 +0000 Subject: [PATCH] SOLR-1040 -- XPathEntityProcessor fails with an xpath like containing forward slash in a attribute selector's value git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@748117 13f79535-47bb-0310-9956-ffa450edef68 --- contrib/dataimporthandler/CHANGES.txt | 3 +++ .../handler/dataimport/XPathRecordReader.java | 26 ++++++++++++++++++- .../dataimport/TestXPathRecordReader.java | 13 ++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/contrib/dataimporthandler/CHANGES.txt b/contrib/dataimporthandler/CHANGES.txt index a2b78af4073..2ad98c6e57e 100644 --- a/contrib/dataimporthandler/CHANGES.txt +++ b/contrib/dataimporthandler/CHANGES.txt @@ -155,6 +155,9 @@ Bug Fixes 19.SOLR-1037: DIH should not add null values in a row returned by EntityProcessor to documents. (shalin) +20.SOLR-1040: XPathEntityProcessor fails with an xpath like /feed/entry/link[@type='text/html']/@href + (Noble Paul via shalin) + Documentation ---------------------- diff --git a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java index c6794066759..026c7001683 100644 --- a/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java +++ b/contrib/dataimporthandler/src/main/java/org/apache/solr/handler/dataimport/XPathRecordReader.java @@ -67,7 +67,7 @@ public class XPathRecordReader { private void addField0(String xpath, String name, boolean multiValued, boolean isRecord, int flags) { - List paths = new LinkedList(Arrays.asList(xpath.split("/"))); + List paths = splitEscapeQuote(xpath); if ("".equals(paths.get(0).trim())) paths.remove(0); rootNode.build(paths, name, multiValued, isRecord, flags); @@ -367,6 +367,30 @@ public class XPathRecordReader { return result; } + /** + * Used for handling cases where there is a slash '/' character + * inside the attribute value e.g. x@html='text/html'. We need to split + * by '/' excluding the '/' which is a part of the attribute's value. + */ + private static List splitEscapeQuote(String str) { + List result = new LinkedList(); + String[] ss = str.split("/"); + for (int i = 0; i < ss.length; i++) { + if (ss[i].length() == 0 && result.size() == 0) continue; + StringBuilder sb = new StringBuilder(); + int quoteCount = 0; + while (true) { + sb.append(ss[i]); + for (int j = 0; j < ss[i].length(); j++) if (ss[i].charAt(j) == '\'') quoteCount++; + if ((quoteCount % 2) == 0) break; + i++; + sb.append("/"); + } + result.add(sb.toString()); + } + return result; + } + static XMLInputFactory factory = XMLInputFactory.newInstance(); static{ factory.setProperty(XMLInputFactory.IS_VALIDATING , Boolean.FALSE); diff --git a/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java b/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java index 5f3c74a48f1..84ba12bd0c0 100644 --- a/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java +++ b/contrib/dataimporthandler/src/test/java/org/apache/solr/handler/dataimport/TestXPathRecordReader.java @@ -226,6 +226,19 @@ public class TestXPathRecordReader { Assert.assertEquals(0, l.get(1).size()); } + @Test + public void attribValWithSlash() { + String xml = "\n" + + " \n" + + ""; + XPathRecordReader rr = new XPathRecordReader("/root/b"); + rr.addField("x", "/root/b/a[@x='a/b']/@h", false); + List> l = rr.getAllRecords(new StringReader(xml)); + Assert.assertEquals(1, l.size()); + Map m = l.get(0); + Assert.assertEquals("hello-A", m.get("x")); + } + @Test public void another() { String xml = "\n"