mirror of https://github.com/apache/lucene.git
Add buffering to allow indexer to add multiple documents at a time, determined by the :buffer_docs parameter. If not specified, individual documents are added to Solr.
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@532580 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d42109383e
commit
eaed609277
|
@ -11,19 +11,44 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
class Solr::Indexer
|
class Solr::Indexer
|
||||||
|
# deprecated, use Indexer.new(ds,mapping).index instead
|
||||||
def self.index(data_source, mapper_or_mapping, options={})
|
def self.index(data_source, mapper_or_mapping, options={})
|
||||||
mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping
|
indexer = Solr::Indexer.new(data_source, mapper_or_mapping, options={})
|
||||||
solr_url = options[:solr_url] || ENV["SOLR_URL"] || "http://localhost:8983/solr"
|
indexer.index
|
||||||
|
end
|
||||||
|
|
||||||
solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed
|
def initialize(data_source, mapper_or_mapping, options={})
|
||||||
data_source.each do |record|
|
solr_url = options[:solr_url] || ENV["SOLR_URL"] || "http://localhost:8983/solr"
|
||||||
document = mapper.map(record)
|
@solr = Solr::Connection.new(solr_url, options) #TODO - these options contain the solr_url and debug keys also, so tidy up what gets passed
|
||||||
|
|
||||||
|
@data_source = data_source
|
||||||
|
@mapper = mapper_or_mapping.is_a?(Hash) ? Solr::Importer::Mapper.new(mapper_or_mapping) : mapper_or_mapping
|
||||||
|
|
||||||
|
@buffer_docs = options[:buffer_docs]
|
||||||
|
@debug = options[:debug]
|
||||||
|
end
|
||||||
|
|
||||||
|
def index
|
||||||
|
buffer = []
|
||||||
|
@data_source.each do |record|
|
||||||
|
document = @mapper.map(record)
|
||||||
|
|
||||||
yield(record, document) if block_given?
|
yield(record, document) if block_given?
|
||||||
|
|
||||||
solr.add(document) unless options[:debug]
|
buffer << document
|
||||||
puts document.inspect if options[:debug]
|
|
||||||
|
if !@buffer_docs || buffer.size == @buffer_docs
|
||||||
|
add_docs(buffer)
|
||||||
|
buffer.clear
|
||||||
|
end
|
||||||
end
|
end
|
||||||
solr.commit unless options[:debug]
|
add_docs(buffer) if !buffer.empty?
|
||||||
|
|
||||||
|
@solr.commit unless @debug
|
||||||
|
end
|
||||||
|
|
||||||
|
def add_docs(documents)
|
||||||
|
@solr.add(documents) unless @debug
|
||||||
|
puts documents.inspect if @debug
|
||||||
end
|
end
|
||||||
end
|
end
|
|
@ -0,0 +1,57 @@
|
||||||
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# (the "License"); you may not use this file except in compliance with
|
||||||
|
# the License. You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
require 'test/unit'
|
||||||
|
require 'solr'
|
||||||
|
|
||||||
|
class Solr::Indexer
|
||||||
|
attr_reader :added
|
||||||
|
def add_docs(doc)
|
||||||
|
@added ||= []
|
||||||
|
@added << doc
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
class IndexerTest < Test::Unit::TestCase
|
||||||
|
def test_mapping_or_mapping
|
||||||
|
mapping = {:field => "foo"}
|
||||||
|
indexer = Solr::Indexer.new([1,2,3], mapping, :debug => true)
|
||||||
|
indexer.index
|
||||||
|
assert_equal 3, indexer.added.size
|
||||||
|
|
||||||
|
indexer = Solr::Indexer.new([1,2,3,4], Solr::Importer::Mapper.new(mapping), :debug => true)
|
||||||
|
indexer.index
|
||||||
|
assert_equal 4, indexer.added.size
|
||||||
|
end
|
||||||
|
|
||||||
|
def test_batch
|
||||||
|
mapping = {:field => "foo"}
|
||||||
|
indexer = Solr::Indexer.new([1,2,3], mapping, :debug => true, :buffer_docs => 2)
|
||||||
|
indexer.index
|
||||||
|
assert_equal 2, indexer.added.size
|
||||||
|
end
|
||||||
|
|
||||||
|
end
|
||||||
|
|
||||||
|
|
||||||
|
# source = DataSource.new
|
||||||
|
#
|
||||||
|
# mapping = {
|
||||||
|
# :id => :isbn,
|
||||||
|
# :name => :author,
|
||||||
|
# :source => "BOOKS",
|
||||||
|
# :year => Proc.new {|record| record.date[0,4] },
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# Solr::Indexer.index(source, mapper) do |orig_data, solr_document|
|
||||||
|
# solr_document[:timestamp] = Time.now
|
||||||
|
# end
|
Loading…
Reference in New Issue