Add buffering to allow indexer to add multiple documents at a time, determined by the :buffer_docs parameter. If not specified, individual documents are added to Solr.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@532580 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Erik Hatcher 2007-04-26 02:54:53 +00:00
parent d42109383e
commit eaed609277
2 changed files with 91 additions and 9 deletions

View File

@ -11,19 +11,44 @@
# limitations under the License. # limitations under the License.
class Solr::Indexer class Solr::Indexer
# deprecated, use Indexer.new(ds,mapping).index instead
def self.index(data_source, mapper_or_mapping, options={}) def self.index(data_source, mapper_or_mapping, options={})
mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping indexer = Solr::Indexer.new(data_source, mapper_or_mapping, options={})
indexer.index
end
def initialize(data_source, mapper_or_mapping, options={})
solr_url = options[:solr_url] || ENV["SOLR_URL"] || "http://localhost:8983/solr" solr_url = options[:solr_url] || ENV["SOLR_URL"] || "http://localhost:8983/solr"
@solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed
solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed
data_source.each do |record| @data_source = data_source
document = mapper.map(record) @mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping
@buffer_docs = options[:buffer_docs]
@debug = options[:debug]
end
def index
buffer = []
@data_source.each do |record|
document = @mapper.map(record)
yield(record, document) if block_given? yield(record, document) if block_given?
solr.add(document) unless options[:debug] buffer << document
puts document.inspect if options[:debug]
if !@buffer_docs || buffer.size == @buffer_docs
add_docs(buffer)
buffer.clear
end
end end
solr.commit unless options[:debug] add_docs(buffer) if !buffer.empty?
@solr.commit unless @debug
end end
end
def add_docs(documents)
@solr.add(documents) unless @debug
puts documents.inspect if @debug
end
end

View File

@ -0,0 +1,57 @@
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
require 'test/unit'
require 'solr'
class Solr::Indexer
attr_reader :added
def add_docs(doc)
@added ||= []
@added << doc
end
end
class IndexerTest < Test::Unit::TestCase
def test_mapping_or_mapping
mapping = {:field => "foo"}
indexer = Solr::Indexer.new([1,2,3], mapping, :debug => true)
indexer.index
assert_equal 3, indexer.added.size
indexer = Solr::Indexer.new([1,2,3,4], Solr::Importer::Mapper.new(mapping), :debug => true)
indexer.index
assert_equal 4, indexer.added.size
end
def test_batch
mapping = {:field => "foo"}
indexer = Solr::Indexer.new([1,2,3], mapping, :debug => true, :buffer_docs => 2)
indexer.index
assert_equal 2, indexer.added.size
end
end
# source = DataSource.new
#
# mapping = {
# :id => :isbn,
# :name => :author,
# :source => "BOOKS",
# :year => Proc.new {|record| record.date[0,4] },
# }
#
# Solr::Indexer.index(source, mapper) do |orig_data, solr_document|
# solr_document[:timestamp] = Time.now
# end