mirror of https://github.com/apache/lucene.git
Added author and page count to import. Adjusted schema to not stem or remove stop words.
git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@503377 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
c06fe80ce9
commit
32d4ad8b3b
|
@ -146,19 +146,19 @@
|
||||||
<!-- in this example, we will only use synonyms at query time
|
<!-- in this example, we will only use synonyms at query time
|
||||||
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
<filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
|
||||||
-->
|
-->
|
||||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
<!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> -->
|
||||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
|
||||||
<filter class="solr.LowerCaseFilterFactory"/>
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
|
<!-- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/> -->
|
||||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||||
</analyzer>
|
</analyzer>
|
||||||
<analyzer type="query">
|
<analyzer type="query">
|
||||||
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
|
||||||
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
|
<!-- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> -->
|
||||||
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
|
<!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> -->
|
||||||
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
|
||||||
<filter class="solr.LowerCaseFilterFactory"/>
|
<filter class="solr.LowerCaseFilterFactory"/>
|
||||||
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
|
<!-- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/> -->
|
||||||
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
|
||||||
</analyzer>
|
</analyzer>
|
||||||
</fieldtype>
|
</fieldtype>
|
||||||
|
|
|
@ -1,3 +1,4 @@
|
||||||
|
#!/usr/bin/env ruby
|
||||||
# The ASF licenses this file to You under the Apache License, Version 2.0
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
# (the "License"); you may not use this file except in compliance with
|
# (the "License"); you may not use this file except in compliance with
|
||||||
# the License. You may obtain a copy of the License at
|
# the License. You may obtain a copy of the License at
|
||||||
|
@ -22,7 +23,7 @@ debug = ARGV[1] == "-debug"
|
||||||
solr = Solr::Connection.new(solr_url)
|
solr = Solr::Connection.new(solr_url)
|
||||||
|
|
||||||
lines = IO.readlines(dl_filename)
|
lines = IO.readlines(dl_filename)
|
||||||
headers = lines[0].split("\t")
|
headers = lines[0].split("\t").collect{|h| h.chomp}
|
||||||
puts headers.join(','),"-----" if debug
|
puts headers.join(','),"-----" if debug
|
||||||
|
|
||||||
# Exported column names
|
# Exported column names
|
||||||
|
@ -41,11 +42,13 @@ mapping = {
|
||||||
:title_text => :title,
|
:title_text => :title,
|
||||||
:notes_text => :notes,
|
:notes_text => :notes,
|
||||||
:publisher_text => :publisher,
|
:publisher_text => :publisher,
|
||||||
:description_text => :description
|
:description_text => :description,
|
||||||
|
:author_text => :author,
|
||||||
|
:pages_text => :pages
|
||||||
}
|
}
|
||||||
|
|
||||||
lines[1..-1].each do |line|
|
lines[1..-1].each do |line|
|
||||||
data = headers.zip(line.split("\t"))
|
data = headers.zip(line.split("\t").collect{|s| s.chomp})
|
||||||
def data.method_missing(key)
|
def data.method_missing(key)
|
||||||
self.assoc(key.to_s)[1]
|
self.assoc(key.to_s)[1]
|
||||||
end
|
end
|
||||||
|
@ -60,6 +63,7 @@ lines[1..-1].each do |line|
|
||||||
doc[solr_name] = value if value
|
doc[solr_name] = value if value
|
||||||
end
|
end
|
||||||
|
|
||||||
|
puts data.title
|
||||||
puts doc.inspect if debug
|
puts doc.inspect if debug
|
||||||
solr.add doc unless debug
|
solr.add doc unless debug
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue