mirror of https://github.com/apache/lucene.git
Add year, call number, isbn, and filename to the documents. note that the filename_facet is not currently generalizable and specific to the data files i load, which are numbered but this should not be a problem
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@508291 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ea7d510901
commit
4a33261c27
|
@ -16,6 +16,7 @@ require 'solr'
|
||||||
|
|
||||||
solr_url = ENV["SOLR_URL"] || "http://localhost:8983/solr"
|
solr_url = ENV["SOLR_URL"] || "http://localhost:8983/solr"
|
||||||
marc_filename = ARGV[0]
|
marc_filename = ARGV[0]
|
||||||
|
file_number = marc_filename.scan(/\d\d/)
|
||||||
debug = ARGV[1] == "-debug"
|
debug = ARGV[1] == "-debug"
|
||||||
|
|
||||||
$KCODE = 'UTF8'
|
$KCODE = 'UTF8'
|
||||||
|
@ -31,18 +32,24 @@ mapping = {
|
||||||
:subject_era_facet => ['650d', '650y', '651y', '655y'],
|
:subject_era_facet => ['650d', '650y', '651y', '655y'],
|
||||||
:subject_topic_facet => ['650a', '650b', '650x'],
|
:subject_topic_facet => ['650a', '650b', '650x'],
|
||||||
:subject_geographic_facet => ['650c', '650z', '651a', '651x', '651z', '655z'],
|
:subject_geographic_facet => ['650c', '650z', '651a', '651x', '651z', '655z'],
|
||||||
|
:year_facet => Proc.new do |r|
|
||||||
|
extract_record_data(r,'260c').collect {|f| f.scan(/\d\d\d\d/)}.flatten
|
||||||
|
end,
|
||||||
:title_text => '245a',
|
:title_text => '245a',
|
||||||
:author_text => '100a',
|
:author_text => '100a',
|
||||||
# :call_number => '050a',
|
:call_number_text => '050a',
|
||||||
|
:isbn_text => '010a',
|
||||||
|
:filename_facet => Proc.new {|r| file_number},
|
||||||
}
|
}
|
||||||
|
|
||||||
connection = Solr::Connection.new(solr_url)
|
connection = Solr::Connection.new(solr_url)
|
||||||
|
|
||||||
if marc_filename =~ /.gz$/
|
if marc_filename =~ /.gz$/
|
||||||
puts "Unzipping data file..."
|
puts "Unzipping data file..."
|
||||||
system("cp #{marc_filename} /tmp/marc_data.mrc.gz")
|
temp_filename = "/tmp/marc_data_#{file_number}.mrc"
|
||||||
system("gunzip /tmp/marc_data.mrc.gz")
|
system("cp #{marc_filename} #{temp_filename}.gz")
|
||||||
marc_filename = "/tmp/marc_data.mrc"
|
system("gunzip #{temp_filename}")
|
||||||
|
marc_filename = temp_filename
|
||||||
end
|
end
|
||||||
|
|
||||||
reader = MARC::Reader.new(marc_filename)
|
reader = MARC::Reader.new(marc_filename)
|
||||||
|
@ -69,7 +76,7 @@ def extract_record_data(record, fields)
|
||||||
extracted_data.compact.uniq
|
extracted_data.compact.uniq
|
||||||
end
|
end
|
||||||
|
|
||||||
puts "Indexing..."
|
puts "Indexing #{marc_filename}..."
|
||||||
for record in reader
|
for record in reader
|
||||||
doc = {}
|
doc = {}
|
||||||
mapping.each do |key,value|
|
mapping.each do |key,value|
|
||||||
|
|
Loading…
Reference in New Issue