From a85eefd70a515eade6745a3a79c0baa59d10e391 Mon Sep 17 00:00:00 2001 From: Erik Hatcher Date: Tue, 3 Apr 2007 04:50:54 +0000 Subject: [PATCH] Add XPathMapper, useful for mapping XML file data into Solr git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@525033 13f79535-47bb-0310-9956-ffa450edef68 --- client/ruby/solr-ruby/lib/solr/importer.rb | 1 + .../lib/solr/importer/xpath_mapper.rb | 27 +++++++++++++++ .../solr-ruby/test/unit/xpath_mapper_test.rb | 33 +++++++++++++++++++ .../solr-ruby/test/unit/xpath_test_file.xml | 6 ++++ 4 files changed, 67 insertions(+) create mode 100755 client/ruby/solr-ruby/lib/solr/importer/xpath_mapper.rb create mode 100755 client/ruby/solr-ruby/test/unit/xpath_mapper_test.rb create mode 100644 client/ruby/solr-ruby/test/unit/xpath_test_file.xml diff --git a/client/ruby/solr-ruby/lib/solr/importer.rb b/client/ruby/solr-ruby/lib/solr/importer.rb index 3260eb465f4..129453e8524 100755 --- a/client/ruby/solr-ruby/lib/solr/importer.rb +++ b/client/ruby/solr-ruby/lib/solr/importer.rb @@ -13,3 +13,4 @@ module Solr; module Importer; end; end require 'solr/importer/mapper' require 'solr/importer/tab_delimited_file_source' +require 'solr/importer/xpath_mapper' \ No newline at end of file diff --git a/client/ruby/solr-ruby/lib/solr/importer/xpath_mapper.rb b/client/ruby/solr-ruby/lib/solr/importer/xpath_mapper.rb new file mode 100755 index 00000000000..4fad299f33d --- /dev/null +++ b/client/ruby/solr-ruby/lib/solr/importer/xpath_mapper.rb @@ -0,0 +1,27 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require 'xml/libxml' + +# For files with the first line containing field names +class Solr::Importer::XPathMapper < Solr::Importer::Mapper + def field_data(doc, xpath) + doc.find(xpath.to_s).collect do |node| + case node + when XML::Attr + node.value + when XML::Node + node.content + end + end + end +end diff --git a/client/ruby/solr-ruby/test/unit/xpath_mapper_test.rb b/client/ruby/solr-ruby/test/unit/xpath_mapper_test.rb new file mode 100755 index 00000000000..70a200c27e4 --- /dev/null +++ b/client/ruby/solr-ruby/test/unit/xpath_mapper_test.rb @@ -0,0 +1,33 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require 'solr' +require 'test/unit' + +class XPathMapperTest < Test::Unit::TestCase + + def setup + @doc = XML::Document.file(File.expand_path(File.dirname(__FILE__)) + "/xpath_test_file.xml") + end + + def test_simple_xpath + mapping = {:solr_field1 => :'/root/parent/child', + :solr_field2 => :'/root/parent/child/@attribute'} + + mapper = Solr::Importer::XPathMapper.new(mapping) + mapped_data = mapper.map(@doc) + + assert_equal ['text1', 'text2'], mapped_data[:solr_field1] + assert_equal ['attribute1', 'attribute2'], mapped_data[:solr_field2] + end + +end diff --git a/client/ruby/solr-ruby/test/unit/xpath_test_file.xml b/client/ruby/solr-ruby/test/unit/xpath_test_file.xml new file mode 100644 index 00000000000..18be68d0008 --- /dev/null +++ b/client/ruby/solr-ruby/test/unit/xpath_test_file.xml @@ -0,0 +1,6 @@ + + + text1 + text2 + + \ No newline at end of file