changes in the schema, and some sample docs

git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@380201 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2006-02-23 19:18:22 +00:00
parent 8837b42d16
commit 1aec5a3da7
11 changed files with 285 additions and 14 deletions

View File

@ -66,8 +66,10 @@
matching across fields. matching across fields.
--> -->
<!-- Standard analyzer commonly used by Lucene developers
-->
<!-- Standard analyzer commonly used by Lucene developers --> <!-- Standard analyzer commonly used by Lucene developers -->
<fieldtype name="text_lu" class="solr.TextField" positionIncrementGap="10"> <fieldtype name="text_lu" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/> <tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/> <filter class="solr.StandardFilterFactory"/>
@ -76,9 +78,15 @@
<filter class="solr.EnglishPorterFilterFactory"/> <filter class="solr.EnglishPorterFilterFactory"/>
</analyzer> </analyzer>
</fieldtype> </fieldtype>
<!-- One could also specify an existing Analyzer implementation in Java
via the class attribute on the analyzer element:
<fieldtype name="text_lu" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.snowball.SnowballAnalyzer"/>
</fieldType>
-->
<!-- A text field that only splits on whitespace for more exact matching --> <!-- A text field that only splits on whitespace for more exact matching -->
<fieldtype name="text_ws" class="solr.TextField" positionIncrementGap="10"> <fieldtype name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer> <analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer> </analyzer>
@ -86,9 +94,9 @@
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
words on case-change, alpha numeric boundaries, and non-alphanumeric chars words on case-change, alpha numeric boundaries, and non-alphanumeric chars
so that a query of "wifi" or "wi fi" could match a document containing Wi-Fi. so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
Synonyms and stopwords are customized by external files, and stemming is enabled --> Synonyms and stopwords are customized by external files, and stemming is enabled -->
<fieldtype name="text" class="solr.TextField" positionIncrementGap="10"> <fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index"> <analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/> <tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/> <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
@ -107,6 +115,20 @@
</analyzer> </analyzer>
</fieldtype> </fieldtype>
<!-- Less flexible matching, but less false matches. Probably not ideal for product names
i but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldtype name="textTight" class="solar.TextField" positionIncrementGap="100" >
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
</analyzer>
</fieldtype>
</types> </types>
@ -122,15 +144,26 @@
--> -->
<field name="id" type="string" indexed="true" stored="true"/> <field name="id" type="string" indexed="true" stored="true"/>
<field name="date" type="date" indexed="true" stored="true"/> <field name="sku" type="textTight" indexed="true" stored="true"/>
<field name="title" type="text" indexed="true" stored="true"/> <field name="name" type="text" indexed="true" stored="true"/>
<field name="subject" type="text" indexed="true" stored="true"/> <field name="manu" type="text" indexed="true" stored="true"/>
<field name="body" type="text" indexed="true" stored="true"/> <field name="cat" type="text_ws" indexed="true" stored="true" multiValued="true"/>
<field name="features" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="includes" type="text" indexed="true" stored="true"/>
<field name="weight" type="sfloat" indexed="true" stored="true"/>
<field name="price" type="sfloat" indexed="true" stored="true"/>
<field name="popularity" type="sint" indexed="true" stored="true"/>
<field name="inStock" type="boolean" indexed="true" stored="true"/>
<!-- catchall field, containing all other searchable text fields (implemented <!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema --> via copyField further on in this schema -->
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/> <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
<!-- non-tokenized version of manufacturer to make it easier to sort or group
results by manufacturer. copied from "manu" via copyField -->
<field name="manu_exact" type="string" indexed="true" stored="false"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields <!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns. will be used if the name matches any of the patterns.
@ -149,8 +182,7 @@
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/> <dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
</fields> </fields>
<!-- field to use to determine document uniqueness... used when <!-- field to use to determine and enforce document uniqueness. -->
overwriting one document with another -->
<uniqueKey>id</uniqueKey> <uniqueKey>id</uniqueKey>
<!-- field for the QueryParser to use when an explicit fieldname is absent --> <!-- field for the QueryParser to use when an explicit fieldname is absent -->
@ -159,9 +191,15 @@
<!-- copyField commands copy one field to another at the time a document <!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field different is added to the index. It's used either to index the same field different
ways, or to add multiple fields to the same field for easier/faster searching. --> ways, or to add multiple fields to the same field for easier/faster searching. -->
<copyField source="title" dest="text"/> <copyField source="id" dest="sku"/>
<copyField source="subject" dest="text"/>
<copyField source="body" dest="text"/> <copyField source="cat" dest="text"/>
<copyField source="name" dest="text"/>
<copyField source="manu" dest="text"/>
<copyField source="features" dest="text"/>
<copyField source="includes" dest="text"/>
<copyField source="manu" dest="manu_exact"/>
<!-- Similarity is the scoring routine for each document vs a query. <!-- Similarity is the scoring routine for each document vs a query.
A custom similarity may be specified here, but the default is fine A custom similarity may be specified here, but the default is fine

29
example/exampledocs/hd.xml Executable file
View File

@ -0,0 +1,29 @@
<add>
<doc>
<field name="id">SP2514N</field>
<field name="name">Samsung SpinPoint P120 SP2514N - hard drive - 250 GB - ATA-133</field>
<field name="manu">Samsung Electronics Co. Ltd.</field>
<field name="cat">electronics</field>
<field name="cat">hard drive</field>
<field name="features">7200RPM, 8MB cache, IDE Ultra ATA-133</field>
<field name="features">NoiseGuard, SilentSeek technology, Fluid Dynamic Bearing (FDB) motor</field>
<field name="price">92</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
</doc>
<doc>
<field name="id">6H500F0</field>
<field name="name">Maxtor DiamondMax 11 - hard drive - 500 GB - SATA-300</field>
<field name="manu">Maxtor Corp.</field>
<field name="cat">electronics</field>
<field name="cat">hard drive</field>
<field name="features">SATA 3.0Gb/s, NCQ</field>
<field name="features">8.5ms seek</field>
<field name="features">16MB cache</field>
<field name="price">350</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
</doc>
</add>

View File

@ -0,0 +1,33 @@
<add>
<doc>
<field name="id">F8V7067-APL-KIT</field>
<field name="name">Belkin Mobile Power Cord for iPod w/ Dock</field>
<field name="manu">Belkin</field>
<field name="cat">electronics</field>
<field name="cat">connector</field>
<field name="features">car power adapter, white</field>
<field name="weight">4</field>
<field name="price">19.95</field>
<field name="popularity">1</field>
<field name="inStock">false</field>
</doc>
<doc>
<field name="id">IW-02</field>
<field name="name">iPod &amp; iPod Mini USB 2.0 Cable</field>
<field name="manu">Belkin</field>
<field name="cat">electronics</field>
<field name="cat">connector</field>
<field name="features">car power adapter for iPod, white</field>
<field name="weight">2</field>
<field name="price">11.50</field>
<field name="popularity">1</field>
<field name="inStock">false</field>
</doc>
</add>

View File

@ -0,0 +1,18 @@
<add><doc>
<field name="id">MA147LL/A</field>
<field name="name">Apple 60 GB iPod with Video Playback Black</field>
<field name="manu">Apple Computer Inc.</field>
<field name="cat">electronics</field>
<field name="cat">music</field>
<field name="features">iTunes, Podcasts, Audiobooks</field>
<field name="features">Stores up to 15,000 songs, 25,000 photos, or 150 hours of video</field>
<field name="features">2.5-inch, 320x240 color TFT LCD display with LED backlight</field>
<field name="features">Up to 20 hours of battery life</field>
<field name="features">Plays AAC, MP3, WAV, AIFF, Audible, Apple Lossless, H.264 video</field>
<field name="features">Notes, Calendar, Phone book, Hold button, Date display, Photo wallet, Built-in games, JPEG photo playback, Upgradeable firmware, USB 2.0 compatibility, Playback speed control, Rechargeable capability, Battery level indication</field>
<field name="includes">earbud headphones, USB cable</field>
<field name="weight">5.5</field>
<field name="price">399.00</field>
<field name="popularity">10</field>
<field name="inStock">true</field>
</doc></add>

41
example/exampledocs/mem.xml Executable file
View File

@ -0,0 +1,41 @@
<add>
<doc>
<field name="id">TWINX2048-3200PRO</field>
<field name="name">CORSAIR XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) Dual Channel Kit System Memory - Retail</field>
<field name="manu">Corsair Microsystems Inc.</field>
<field name="cat">electronics</field>
<field name="cat">memory</field>
<field name="features">CAS latency 2, 2-3-3-6 timing, 2.75v, unbuffered, heat-spreader</field>
<field name="price">185</field>
<field name="popularity">5</field>
<field name="inStock">true</field>
</doc>
<doc>
<field name="id">VS1GB400C3</field>
<field name="name">CORSAIR ValueSelect 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - Retail</field>
<field name="manu">Corsair Microsystems Inc.</field>
<field name="cat">electronics</field>
<field name="cat">memory</field>
<field name="price">74.99</field>
<field name="popularity">7</field>
<field name="inStock">true</field>
</doc>
<doc>
<field name="id">VDBDB1A16</field>
<field name="name">A-DATA V-Series 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - OEM</field>
<field name="manu">A-DATA Technology Inc.</field>
<field name="cat">electronics</field>
<field name="cat">memory</field>
<field name="features">CAS latency 3, 2.7v</field>
<!-- note: price is missing on this one -->
<field name="popularity">5</field>
<field name="inStock">true</field>
</doc>
</add>

14
example/exampledocs/monitor.xml Executable file
View File

@ -0,0 +1,14 @@
<add><doc>
<field name="id">3007WFP</field>
<field name="name">Dell Widescreen UltraSharp 3007WFP</field>
<field name="manu">Dell, Inc.</field>
<field name="cat">electronics</field>
<field name="cat">monitor</field>
<field name="features">30" TFT active matrix LCD, 2560 x 1600, .25mm dot pitch, 700:1 contrast</field>
<field name="includes">USB cable</field>
<field name="weight">401.6</field>
<field name="price">2199</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
</doc></add>

View File

@ -0,0 +1,13 @@
<add><doc>
<field name="id">VA902B</field>
<field name="name">ViewSonic VA902B - flat panel display - TFT - 19"</field>
<field name="manu">ViewSonic Corp.</field>
<field name="cat">electronics</field>
<field name="cat">monitor</field>
<field name="features">19" TFT active matrix LCD, 8ms response time, 1280 x 1024 native resolution</field>
<field name="weight">190.4</field>
<field name="price">279.95</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
</doc></add>

22
example/exampledocs/mp500.xml Executable file
View File

@ -0,0 +1,22 @@
<add><doc>
<field name="id">0579B002</field>
<field name="name">Canon PIXMA MP500 All-In-One Photo Printer</field>
<field name="manu">Canon Inc.</field>
<field name="cat">electronics</field>
<field name="cat">multifunction printer</field>
<field name="cat">printer</field>
<field name="cat">scanner</field>
<field name="cat">copier</field>
<field name="features">Multifunction ink-jet color photo printer</field>
<field name="features">Flatbed scanner, optical scan resolution of 1,200 x 2,400 dpi</field>
<field name="features">2.5" color LCD preview screen</field>
<field name="features">Duplex Copying</field>
<field name="features">Printing speed up to 29ppm black, 19ppm color</field>
<field name="features">Hi-Speed USB</field>
<field name="features">memory card: CompactFlash, Micro Drive, SmartMedia, Memory Stick, Memory Stick Pro, SD Card, and MultiMediaCard</field>
<field name="weight">352</field>
<field name="price">179.99</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
</doc></add>

12
example/exampledocs/post.sh Executable file
View File

@ -0,0 +1,12 @@
#!/bin/sh
FILES=$*
URL=http://localhost:8983/solr/update
for f in $FILES; do
echo Posting file $f to $URL
curl $URL --data-binary @$f
done
#send the commit command to make sure all the changes are flushed and visible
curl $URL --data-binary '<commit/>'

16
example/exampledocs/sd500.xml Executable file
View File

@ -0,0 +1,16 @@
<add><doc>
<field name="id">9885A004</field>
<field name="name">Canon Powershot SD500</field>
<field name="manu">Canon Inc.</field>
<field name="cat">electronics</field>
<field name="cat">camera</field>
<field name="features">3x zoop, 7.1 megapixel Digital ELPH</field>
<field name="features">movie clips up to 640x480 @30 fps</field>
<field name="features">2.0" TFT LCD, 118,000 pixels</field>
<field name="features">built in flash, red-eye reduction</field>
<field name="includes">32MB SD card, USB cable, AV cable, battery</field>
<field name="weight">6.4</field>
<field name="price">329.95</field>
<field name="popularity">7</field>
<field name="inStock">true</field>
</doc></add>

35
example/exampledocs/vidcard.xml Executable file
View File

@ -0,0 +1,35 @@
<add>
<doc>
<field name="id">EN7800GTX/2DHTV/256M</field>
<field name="name">ASUS Extreme N7800GTX/2DHTV (256 MB)</field>
<field name="manu">ASUS Computer Inc.</field>
<field name="cat">electronics</field>
<field name="cat">graphics card</field>
<field name="features">NVIDIA GeForce 7800 GTX GPU/VPU clocked at 486MHz</field>
<field name="features">256MB GDDR3 Memory clocked at 1.35GHz</field>
<field name="features">PCI Express x16</field>
<field name="features">Dual DVI connectors, HDTV out, video input</field>
<field name="features">OpenGL 2.0, DirectX 9.0</field>
<field name="weight">16</field>
<field name="price">479.95</field>
<field name="popularity">7</field>
<field name="inStock">false</field>
</doc>
<!-- yes, you can add more than one document at a time -->
<doc>
<field name="id">100-435805</field>
<field name="name">ATI Radeon X1900 XTX 512 MB PCIE Video Card</field>
<field name="manu">ATI Technologies</field>
<field name="cat">electronics</field>
<field name="cat">graphics card</field>
<field name="features">ATI RADEON X1900 GPU/VPU clocked at 650MHz</field>
<field name="features">512MB GDDR3 SDRAM clocked at 1.55GHz</field>
<field name="features">PCI Express x16</field>
<field name="features">dual DVI, HDTV, svideo, composite out</field>
<field name="features">OpenGL 2.0, DirectX 9.0</field>
<field name="weight">48</field>
<field name="price">649.99</field>
<field name="popularity">7</field>
<field name="inStock">false</field>
</doc>
</add>