mirror of https://github.com/apache/lucene.git
The beginnings of a general purpose importer / data mapper in order to feed data into Solr
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@522416 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1da74f6678
commit
6016746b57
|
@ -16,4 +16,6 @@ require 'solr/request'
|
|||
require 'solr/connection'
|
||||
require 'solr/response'
|
||||
require 'solr/util'
|
||||
require 'solr/xml'
|
||||
require 'solr/xml'
|
||||
require 'solr/importer'
|
||||
require 'solr/indexer'
|
||||
|
|
|
@ -0,0 +1,15 @@
|
|||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
module Solr; module Importer; end; end
|
||||
require 'solr/importer/mapper'
|
||||
require 'solr/importer/tab_delimited_file_source'
|
|
@ -0,0 +1,48 @@
|
|||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# For files with the first line containing field names
|
||||
class Solr::Importer::Mapper
|
||||
def initialize(mapping)
|
||||
@mapping = mapping
|
||||
end
|
||||
|
||||
def field_data(orig_data, field_name)
|
||||
case field_name
|
||||
when Symbol
|
||||
orig_data[field_name]
|
||||
else
|
||||
field_name
|
||||
end
|
||||
end
|
||||
|
||||
def map(orig_data)
|
||||
mapped_data = {}
|
||||
@mapping.each do |solr_name, field_mapping|
|
||||
value = case field_mapping
|
||||
when Proc
|
||||
field_mapping.call(orig_data)
|
||||
when String, Symbol
|
||||
field_data(orig_data, field_mapping)
|
||||
when Enumerable
|
||||
field_mapping.collect {|orig_field_name| field_data(orig_data, orig_field_name)}.flatten
|
||||
else
|
||||
raise "Unknown mapping for #{solr_name}: #{field_mapping}"
|
||||
end
|
||||
mapped_data[solr_name] = value if value
|
||||
end
|
||||
|
||||
mapped_data
|
||||
end
|
||||
|
||||
|
||||
end
|
|
@ -0,0 +1,37 @@
|
|||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# For files with the first line containing field names
|
||||
# Currently not designed for enormous files, as all lines are
|
||||
# read into an array
|
||||
class Solr::Importer::TabDelimitedFileSource
|
||||
include Enumerable
|
||||
|
||||
def initialize(filename)
|
||||
@filename = filename
|
||||
end
|
||||
|
||||
def each
|
||||
lines = IO.readlines(@filename)
|
||||
headers = lines[0].split("\t").collect{|h| h.chomp}
|
||||
|
||||
lines[1..-1].each do |line|
|
||||
data = headers.zip(line.split("\t").collect{|s| s.chomp})
|
||||
def data.[](key)
|
||||
self.assoc(key.to_s)[1]
|
||||
end
|
||||
|
||||
yield(data)
|
||||
end
|
||||
end
|
||||
|
||||
end
|
|
@ -0,0 +1,29 @@
|
|||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
class Solr::Indexer
|
||||
def self.index(data_source, mapper, options={})
|
||||
solr_url = options[:solr_url] || ENV["SOLR_URL"] || "http://localhost:8983/solr"
|
||||
|
||||
solr = Solr::Connection.new(solr_url)
|
||||
data_source.each do |record|
|
||||
document = mapper.map(record)
|
||||
|
||||
# yield(document) if block_given? # TODO: does yielding add value here? possibly to allow the caller
|
||||
# to manipulate the document outside of mappings just before indexing?
|
||||
|
||||
solr.add(document) unless options[:debug]
|
||||
puts document.inspect if options[:debug]
|
||||
end
|
||||
solr.commit unless options[:debug]
|
||||
end
|
||||
end
|
|
@ -0,0 +1,75 @@
|
|||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
require 'solr'
|
||||
require 'test/unit'
|
||||
|
||||
class DataMapperTest < Test::Unit::TestCase
|
||||
|
||||
def test_static_mapping
|
||||
mapping = {:static => "value",
|
||||
:static_array => ["value1", "value2"]}
|
||||
|
||||
mapper = Solr::Importer::Mapper.new(mapping)
|
||||
mapped_data = mapper.map({})
|
||||
|
||||
assert_equal "value", mapped_data[:static]
|
||||
assert_equal ["value1", "value2"], mapped_data[:static_array]
|
||||
end
|
||||
|
||||
def test_simple_mapping
|
||||
orig_data = {:orig_field => "value",
|
||||
:multi1 => "val1", :multi2 => "val2"}
|
||||
mapping = {:solr_field => :orig_field,
|
||||
:mapped_array => [:multi1, :multi2], }
|
||||
|
||||
mapper = Solr::Importer::Mapper.new(mapping)
|
||||
mapped_data = mapper.map(orig_data)
|
||||
|
||||
assert_equal "value", mapped_data[:solr_field]
|
||||
assert_equal ["val1", "val2"], mapped_data[:mapped_array]
|
||||
end
|
||||
|
||||
def test_proc
|
||||
orig_data = {:orig_field => "value"}
|
||||
mapping = {:solr_field => Proc.new {|record| ">#{record[:orig_field]}<"}}
|
||||
|
||||
mapper = Solr::Importer::Mapper.new(mapping)
|
||||
mapped_data = mapper.map(orig_data)
|
||||
|
||||
assert_equal ">value<", mapped_data[:solr_field]
|
||||
end
|
||||
|
||||
def test_overridden_field
|
||||
mapping = {:solr_field => [:orig_field1, :orig_field2]}
|
||||
orig_data = {:orig_field1 => "value1", :orig_field2 => "value2", }
|
||||
|
||||
mapper = Solr::Importer::Mapper.new(mapping)
|
||||
def mapper.field_data(orig_data, field_name)
|
||||
["~#{super(orig_data, field_name)}~"] # array tests that the value is flattened
|
||||
end
|
||||
mapped_data = mapper.map(orig_data)
|
||||
|
||||
assert_equal ["~value1~", "~value2~"], mapped_data[:solr_field]
|
||||
end
|
||||
|
||||
def test_unknown_mapping
|
||||
mapping = {:solr_field => /foo/} # regexp currently not a valid mapping type
|
||||
|
||||
mapper = Solr::Importer::Mapper.new(mapping)
|
||||
|
||||
assert_raise(RuntimeError) do
|
||||
mapped_data = mapper.map({})
|
||||
end
|
||||
end
|
||||
|
||||
end
|
|
@ -0,0 +1,2 @@
|
|||
medium associatedURL boxHeightInInches boxLengthInInches boxWeightInPounds boxWidthInInches scannednumber upc asin country title fullTitle series numberInSeries edition aspect mediacount genre price currentValue language netrating description owner publisher published rare purchaseDate rating used signed hasExperienced notes location paid condition notowned author illustrator pages
|
||||
book 9780865681743 0865681740 us Xing Yi Nei Gong: Xing Yi Health Maintenance and Internal Strength Development Xing Yi Nei Gong: Xing Yi Health Maintenance and Internal Strength Development Paperback $21.95 $14.05 4.5 This is the most complete book on the art of xing yi (hsing Yi) available. It includes the complete xing yi history and lineage going back eight generations; manuscripts handed down from famous practitioners Dai Long Bang and Li Neng Ran; 16 health maintenance and power development exercises; qigong (chi kung) exerices; xing yi long spear power training exercises; and more. Unique Publications 1998-02-10 12:00:00 +0000 2007-02-03 02:22:25 -0500 Dan Miller/ Tim Cartmell 200
|
|
@ -0,0 +1,29 @@
|
|||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
require 'solr'
|
||||
require 'test/unit'
|
||||
|
||||
class TabDelimitedFileSourceTest < Test::Unit::TestCase
|
||||
|
||||
def test_load
|
||||
filename = File.expand_path(File.dirname(__FILE__)) + "/tab_delimited.txt"
|
||||
|
||||
source = Solr::Importer::TabDelimitedFileSource.new(filename)
|
||||
assert_equal source.to_a.size, 1
|
||||
|
||||
source.each do |data|
|
||||
assert_equal data[:asin], '0865681740'
|
||||
end
|
||||
end
|
||||
|
||||
end
|
Loading…
Reference in New Issue