Added author and page count to import. Adjusted schema to not stem or remove stop words.

git-svn-id: https://svn.apache.org/repos/asf/lucene/solr/trunk@503377 13f79535-47bb-0310-9956-ffa450edef68
2007-02-04 09:39:55 +00:00 · 2007-02-04 09:39:55 +00:00 · 32d4ad8b3b
parent c06fe80ce9
commit 32d4ad8b3b
2 changed files with 12 additions and 8 deletions
--- a/client/ruby/solrb/examples/delicious_library/conf/schema.xml
+++ b/client/ruby/solrb/examples/delicious_library/conf/schema.xml
@ -146,19 +146,19 @@
        <!-- in this example, we will only use synonyms at query time
        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
        -->
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> -->
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+        <!-- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/> -->
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
      <analyzer type="query">
        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <!-- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/> -->
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <!-- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/> -->
        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
        <filter class="solr.LowerCaseFilterFactory"/>
-        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+        <!-- <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/> -->
        <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
      </analyzer>
    </fieldtype>
--- a/client/ruby/solrb/examples/delicious_library/dl_importer.rb
+++ b/client/ruby/solrb/examples/delicious_library/dl_importer.rb
@ -1,3 +1,4 @@
 #!/usr/bin/env ruby
 # The ASF licenses this file to You under the Apache License, Version 2.0
 # (the "License"); you may not use this file except in compliance with
 # the License.  You may obtain a copy of the License at
@ -22,7 +23,7 @@ debug = ARGV[1] == "-debug"
 solr = Solr::Connection.new(solr_url)
 lines = IO.readlines(dl_filename)
-headers = lines[0].split("\t")
+headers = lines[0].split("\t").collect{|h| h.chomp}
 puts headers.join(','),"-----" if debug
 # Exported column names
@ -41,11 +42,13 @@ mapping = {
  :title_text => :title,
  :notes_text => :notes,
  :publisher_text => :publisher,
-  :description_text => :description
+  :description_text => :description,
  :author_text => :author,
  :pages_text => :pages
 }
 lines[1..-1].each do |line|
-  data = headers.zip(line.split("\t"))
+  data = headers.zip(line.split("\t").collect{|s| s.chomp})
  def data.method_missing(key)
    self.assoc(key.to_s)[1]
  end
@ -60,6 +63,7 @@ lines[1..-1].each do |line|
    doc[solr_name] = value if value
  end
  puts data.title
  puts doc.inspect if debug
  solr.add doc unless debug