mirror of https://github.com/apache/lucene.git
Add more MARC mappings. Add a -debug switch to dump documents to stdout instead of sending to Solr
git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@496805 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c7d72e0e5a
commit
ebfafc6d63
|
@ -15,40 +15,73 @@ require 'solr'
|
|||
|
||||
solr_url = ENV["SOLR_URL"] || "http://localhost:8983/solr"
|
||||
marc_filename = ARGV[0]
|
||||
debug = ARGV[1] == "-debug"
|
||||
|
||||
$KCODE = 'UTF8'
|
||||
|
||||
mapping = {
|
||||
:id => Proc.new {|r| r['001'].value},
|
||||
:subject_genre_facet => ['650v', '655a'],
|
||||
:subject_era_facet => '650y',
|
||||
:title_text => '245a'
|
||||
# :solr_field_name => String
|
||||
# :solr_field_name => Array of Strings
|
||||
# :solr_field_name => Proc [Proc operates on record]
|
||||
# String = 3 digit control field number or 3 digit data field number + subfield letter
|
||||
|
||||
:id => '001',
|
||||
:subject_genre_facet => ['600v', '610v', '611v', '651v', '650v', '655a'],
|
||||
:subject_era_facet => ['650y', '651y'],
|
||||
:subject_topic_facet => ['650a', '650x'],
|
||||
:title_text => '245a',
|
||||
:author_text => '100a',
|
||||
}
|
||||
|
||||
connection = Solr::Connection.new(solr_url)
|
||||
|
||||
reader = MARC::Reader.new(marc_filename)
|
||||
count = 0
|
||||
|
||||
def extract_record_data(record, fields)
|
||||
extracted_data = []
|
||||
|
||||
fields.each do |field|
|
||||
tag = field[0,3]
|
||||
|
||||
extracted_fields = record.find_all {|f| f.tag === tag}
|
||||
|
||||
extracted_fields.each do |field_instance|
|
||||
if tag < '010' # control field
|
||||
extracted_data << field_instance.value rescue nil
|
||||
else # data field
|
||||
subfield = field[3].chr
|
||||
extracted_data << field_instance[subfield] rescue nil
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
extracted_data.compact.uniq
|
||||
end
|
||||
|
||||
for record in reader
|
||||
doc = {}
|
||||
mapping.each do |key,value|
|
||||
if value.kind_of? Proc
|
||||
doc[key] = value.call(record)
|
||||
else
|
||||
data = []
|
||||
value.each do |v|
|
||||
field = v[0,3]
|
||||
subfield = v[3].chr
|
||||
data << record[field][subfield] rescue nil
|
||||
end
|
||||
data.compact!
|
||||
doc[key] = data if not data.empty?
|
||||
data = nil
|
||||
case value
|
||||
when Proc
|
||||
data = value.call(record)
|
||||
|
||||
when String, Array
|
||||
data = extract_record_data(record, value)
|
||||
data = nil if data.empty?
|
||||
end
|
||||
|
||||
doc[key] = data if data
|
||||
end
|
||||
|
||||
puts doc.inspect,"------" if debug
|
||||
|
||||
connection.send(Solr::Request::AddDocument.new(doc))
|
||||
connection.send(Solr::Request::AddDocument.new(doc)) unless debug
|
||||
|
||||
count += 1
|
||||
|
||||
puts count if count % 100 == 0
|
||||
end
|
||||
|
||||
connection.send(Solr::Request::Commit.new)
|
||||
connection.send(Solr::Request::Commit.new) unless debug
|
||||
|
|
Loading…
Reference in New Issue