Add gunzip capability to MARC importer

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@504065 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Erik Hatcher 2007-02-06 10:30:06 +00:00
parent 57fef31f73
commit 32280db869
1 changed files with 10 additions and 0 deletions

View File

@ -1,3 +1,4 @@
#!/usr/bin/env ruby
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
@ -37,6 +38,13 @@ mapping = {
connection = Solr::Connection.new(solr_url)
if marc_filename =~ /.gz$/
puts "Unzipping data file..."
system("cp #{marc_filename} /tmp/marc_data.mrc.gz")
system("gunzip /tmp/marc_data.mrc.gz")
marc_filename = "/tmp/marc_data.mrc"
end
reader = MARC::Reader.new(marc_filename)
count = 0
@ -61,6 +69,7 @@ def extract_record_data(record, fields)
extracted_data.compact.uniq
end
puts "Indexing..."
for record in reader
doc = {}
mapping.each do |key,value|
@ -87,3 +96,4 @@ for record in reader
end
connection.send(Solr::Request::Commit.new) unless debug
puts "Done"