From eaed60927799bfc7237eff5280c8e0c58f119856 Mon Sep 17 00:00:00 2001 From: Erik Hatcher Date: Thu, 26 Apr 2007 02:54:53 +0000 Subject: [PATCH] Add buffering to allow indexer to add multiple documents at a time, determined by the :buffer_docs parameter. If not specified, individual documents are added to Solr. git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@532580 13f79535-47bb-0310-9956-ffa450edef68 --- client/ruby/solr-ruby/lib/solr/indexer.rb | 43 +++++++++++--- .../ruby/solr-ruby/test/unit/indexer_test.rb | 57 +++++++++++++++++++ 2 files changed, 91 insertions(+), 9 deletions(-) create mode 100755 client/ruby/solr-ruby/test/unit/indexer_test.rb diff --git a/client/ruby/solr-ruby/lib/solr/indexer.rb b/client/ruby/solr-ruby/lib/solr/indexer.rb index 4b8b5661b6c..82f3aaca5cc 100755 --- a/client/ruby/solr-ruby/lib/solr/indexer.rb +++ b/client/ruby/solr-ruby/lib/solr/indexer.rb @@ -11,19 +11,44 @@ # limitations under the License. class Solr::Indexer + # deprecated, use Indexer.new(ds,mapping).index instead def self.index(data_source, mapper_or_mapping, options={}) - mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping + indexer = Solr::Indexer.new(data_source, mapper_or_mapping, options={}) + indexer.index + end + + def initialize(data_source, mapper_or_mapping, options={}) solr_url = options[:solr_url] || ENV["SOLR_URL"] || "http://localhost:8983/solr" - - solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed - data_source.each do |record| - document = mapper.map(record) + @solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed + + @data_source = data_source + @mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping + + @buffer_docs = options[:buffer_docs] + @debug = options[:debug] + end + + def index + buffer = [] + @data_source.each do |record| + document = @mapper.map(record) yield(record, document) if block_given? - solr.add(document) unless options[:debug] - puts document.inspect if options[:debug] + buffer << document + + if !@buffer_docs || buffer.size == @buffer_docs + add_docs(buffer) + buffer.clear + end end - solr.commit unless options[:debug] + add_docs(buffer) if !buffer.empty? + + @solr.commit unless @debug end -end \ No newline at end of file + + def add_docs(documents) + @solr.add(documents) unless @debug + puts documents.inspect if @debug + end +end diff --git a/client/ruby/solr-ruby/test/unit/indexer_test.rb b/client/ruby/solr-ruby/test/unit/indexer_test.rb new file mode 100755 index 00000000000..58d1a8defd8 --- /dev/null +++ b/client/ruby/solr-ruby/test/unit/indexer_test.rb @@ -0,0 +1,57 @@ +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +require 'test/unit' +require 'solr' + +class Solr::Indexer + attr_reader :added + def add_docs(doc) + @added ||= [] + @added << doc + end +end + +class IndexerTest < Test::Unit::TestCase + def test_mapping_or_mapping + mapping = {:field => "foo"} + indexer = Solr::Indexer.new([1,2,3], mapping, :debug => true) + indexer.index + assert_equal 3, indexer.added.size + + indexer = Solr::Indexer.new([1,2,3,4], Solr::Importer::Mapper.new(mapping), :debug => true) + indexer.index + assert_equal 4, indexer.added.size + end + + def test_batch + mapping = {:field => "foo"} + indexer = Solr::Indexer.new([1,2,3], mapping, :debug => true, :buffer_docs => 2) + indexer.index + assert_equal 2, indexer.added.size + end + +end + + +# source = DataSource.new +# +# mapping = { +# :id => :isbn, +# :name => :author, +# :source => "BOOKS", +# :year => Proc.new {|record| record.date[0,4] }, +# } +# +# Solr::Indexer.index(source, mapper) do |orig_data, solr_document| +# solr_document[:timestamp] = Time.now +# end \ No newline at end of file