mirror of https://github.com/apache/lucene.git
rename tab_delimited to just delimited and added splitter parameter allowing lines to be split on any regular expression
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@532565 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
1b817c008d
commit
4866d6e22f
|
@ -13,19 +13,20 @@
|
||||||
# For files with the first line containing field names
|
# For files with the first line containing field names
|
||||||
# Currently not designed for enormous files, as all lines are
|
# Currently not designed for enormous files, as all lines are
|
||||||
# read into an array
|
# read into an array
|
||||||
class Solr::Importer::TabDelimitedFileSource
|
class Solr::Importer::DelimitedFileSource
|
||||||
include Enumerable
|
include Enumerable
|
||||||
|
|
||||||
def initialize(filename)
|
def initialize(filename, splitter=/\t/)
|
||||||
@filename = filename
|
@filename = filename
|
||||||
|
@splitter = splitter
|
||||||
end
|
end
|
||||||
|
|
||||||
def each
|
def each
|
||||||
lines = IO.readlines(@filename)
|
lines = IO.readlines(@filename)
|
||||||
headers = lines[0].split("\t").collect{|h| h.chomp}
|
headers = lines[0].split(@splitter).collect{|h| h.chomp}
|
||||||
|
|
||||||
lines[1..-1].each do |line|
|
lines[1..-1].each do |line|
|
||||||
data = headers.zip(line.split("\t").collect{|s| s.chomp})
|
data = headers.zip(line.split(@splitter).collect{|s| s.chomp})
|
||||||
def data.[](key)
|
def data.[](key)
|
||||||
self.assoc(key.to_s)[1]
|
self.assoc(key.to_s)[1]
|
||||||
end
|
end
|
|
@ -13,12 +13,12 @@
|
||||||
require 'solr'
|
require 'solr'
|
||||||
require 'test/unit'
|
require 'test/unit'
|
||||||
|
|
||||||
class TabDelimitedFileSourceTest < Test::Unit::TestCase
|
class DelimitedFileSourceTest < Test::Unit::TestCase
|
||||||
|
|
||||||
def test_load
|
def test_load
|
||||||
filename = File.expand_path(File.dirname(__FILE__)) + "/tab_delimited.txt"
|
filename = File.expand_path(File.dirname(__FILE__)) + "/tab_delimited.txt"
|
||||||
|
|
||||||
source = Solr::Importer::TabDelimitedFileSource.new(filename)
|
source = Solr::Importer::DelimitedFileSource.new(filename,/\t/)
|
||||||
assert_equal source.to_a.size, 1
|
assert_equal source.to_a.size, 1
|
||||||
|
|
||||||
source.each do |data|
|
source.each do |data|
|
Loading…
Reference in New Issue