mirror of https://github.com/apache/lucene.git
Add buffering to allow indexer to add multiple documents at a time, determined by the :buffer_docs parameter. If not specified, individual documents are added to Solr.
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@532580 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d42109383e
commit
eaed609277
|
@ -11,19 +11,44 @@
|
|||
# limitations under the License.
|
||||
|
||||
class Solr::Indexer
|
||||
# deprecated, use Indexer.new(ds,mapping).index instead
|
||||
def self.index(data_source, mapper_or_mapping, options={})
|
||||
mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping
|
||||
indexer = Solr::Indexer.new(data_source, mapper_or_mapping, options={})
|
||||
indexer.index
|
||||
end
|
||||
|
||||
def initialize(data_source, mapper_or_mapping, options={})
|
||||
solr_url = options[:solr_url] || ENV["SOLR_URL"] || "http://localhost:8983/solr"
|
||||
|
||||
solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed
|
||||
data_source.each do |record|
|
||||
document = mapper.map(record)
|
||||
@solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed
|
||||
|
||||
@data_source = data_source
|
||||
@mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping
|
||||
|
||||
@buffer_docs = options[:buffer_docs]
|
||||
@debug = options[:debug]
|
||||
end
|
||||
|
||||
def index
|
||||
buffer = []
|
||||
@data_source.each do |record|
|
||||
document = @mapper.map(record)
|
||||
|
||||
yield(record, document) if block_given?
|
||||
|
||||
solr.add(document) unless options[:debug]
|
||||
puts document.inspect if options[:debug]
|
||||
buffer << document
|
||||
|
||||
if !@buffer_docs || buffer.size == @buffer_docs
|
||||
add_docs(buffer)
|
||||
buffer.clear
|
||||
end
|
||||
end
|
||||
solr.commit unless options[:debug]
|
||||
add_docs(buffer) if !buffer.empty?
|
||||
|
||||
@solr.commit unless @debug
|
||||
end
|
||||
end
|
||||
|
||||
def add_docs(documents)
|
||||
@solr.add(documents) unless @debug
|
||||
puts documents.inspect if @debug
|
||||
end
|
||||
end
|
||||
|
|
|
@ -0,0 +1,57 @@
|
|||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
# (the "License"); you may not use this file except in compliance with
|
||||
# the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
require 'test/unit'
|
||||
require 'solr'
|
||||
|
||||
class Solr::Indexer
|
||||
attr_reader :added
|
||||
def add_docs(doc)
|
||||
@added ||= []
|
||||
@added << doc
|
||||
end
|
||||
end
|
||||
|
||||
class IndexerTest < Test::Unit::TestCase
|
||||
def test_mapping_or_mapping
|
||||
mapping = {:field => "foo"}
|
||||
indexer = Solr::Indexer.new([1,2,3], mapping, :debug => true)
|
||||
indexer.index
|
||||
assert_equal 3, indexer.added.size
|
||||
|
||||
indexer = Solr::Indexer.new([1,2,3,4], Solr::Importer::Mapper.new(mapping), :debug => true)
|
||||
indexer.index
|
||||
assert_equal 4, indexer.added.size
|
||||
end
|
||||
|
||||
def test_batch
|
||||
mapping = {:field => "foo"}
|
||||
indexer = Solr::Indexer.new([1,2,3], mapping, :debug => true, :buffer_docs => 2)
|
||||
indexer.index
|
||||
assert_equal 2, indexer.added.size
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
|
||||
# source = DataSource.new
|
||||
#
|
||||
# mapping = {
|
||||
# :id => :isbn,
|
||||
# :name => :author,
|
||||
# :source => "BOOKS",
|
||||
# :year => Proc.new {|record| record.date[0,4] },
|
||||
# }
|
||||
#
|
||||
# Solr::Indexer.index(source, mapper) do |orig_data, solr_document|
|
||||
# solr_document[:timestamp] = Time.now
|
||||
# end
|
Loading…
Reference in New Issue