changes in the schema, and some sample docs

git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@380201 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2006-02-23 19:18:22 +00:00
parent 8837b42d16
commit 1aec5a3da7
11 changed files with 285 additions and 14 deletions

View File

@ -66,8 +66,10 @@
matching across fields.
-->
<!-- Standard analyzer commonly used by Lucene developers
-->
<!-- Standard analyzer commonly used by Lucene developers -->
<fieldtype name="text_lu" class="solr.TextField" positionIncrementGap="10">
<fieldtype name="text_lu" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
@ -76,9 +78,15 @@
<filter class="solr.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<!-- One could also specify an existing Analyzer implementation in Java
via the class attribute on the analyzer element:
<fieldtype name="text_lu" class="solr.TextField">
<analyzer class="org.apache.lucene.analysis.snowball.SnowballAnalyzer"/>
</fieldType>
-->
<!-- A text field that only splits on whitespace for more exact matching -->
<fieldtype name="text_ws" class="solr.TextField" positionIncrementGap="10">
<fieldtype name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
@ -86,9 +94,9 @@
<!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
words on case-change, alpha numeric boundaries, and non-alphanumeric chars
so that a query of "wifi" or "wi fi" could match a document containing Wi-Fi.
so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
Synonyms and stopwords are customized by external files, and stemming is enabled -->
<fieldtype name="text" class="solr.TextField" positionIncrementGap="10">
<fieldtype name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
@ -107,6 +115,20 @@
</analyzer>
</fieldtype>
<!-- Less flexible matching, but less false matches. Probably not ideal for product names
i but may be good for SKUs. Can insert dashes in the wrong place and still match. -->
<fieldtype name="textTight" class="solar.TextField" positionIncrementGap="100" >
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
</analyzer>
</fieldtype>
</types>
@ -122,15 +144,26 @@
-->
<field name="id" type="string" indexed="true" stored="true"/>
<field name="date" type="date" indexed="true" stored="true"/>
<field name="title" type="text" indexed="true" stored="true"/>
<field name="subject" type="text" indexed="true" stored="true"/>
<field name="body" type="text" indexed="true" stored="true"/>
<field name="sku" type="textTight" indexed="true" stored="true"/>
<field name="name" type="text" indexed="true" stored="true"/>
<field name="manu" type="text" indexed="true" stored="true"/>
<field name="cat" type="text_ws" indexed="true" stored="true" multiValued="true"/>
<field name="features" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="includes" type="text" indexed="true" stored="true"/>
<field name="weight" type="sfloat" indexed="true" stored="true"/>
<field name="price" type="sfloat" indexed="true" stored="true"/>
<field name="popularity" type="sint" indexed="true" stored="true"/>
<field name="inStock" type="boolean" indexed="true" stored="true"/>
<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->
<field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
<!-- non-tokenized version of manufacturer to make it easier to sort or group
results by manufacturer. copied from "manu" via copyField -->
<field name="manu_exact" type="string" indexed="true" stored="false"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.
@ -149,8 +182,7 @@
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
</fields>
<!-- field to use to determine document uniqueness... used when
overwriting one document with another -->
<!-- field to use to determine and enforce document uniqueness. -->
<uniqueKey>id</uniqueKey>
<!-- field for the QueryParser to use when an explicit fieldname is absent -->
@ -159,9 +191,15 @@
<!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field different
ways, or to add multiple fields to the same field for easier/faster searching. -->
<copyField source="title" dest="text"/>
<copyField source="subject" dest="text"/>
<copyField source="body" dest="text"/>
<copyField source="id" dest="sku"/>
<copyField source="cat" dest="text"/>
<copyField source="name" dest="text"/>
<copyField source="manu" dest="text"/>
<copyField source="features" dest="text"/>
<copyField source="includes" dest="text"/>
<copyField source="manu" dest="manu_exact"/>
<!-- Similarity is the scoring routine for each document vs a query.
A custom similarity may be specified here, but the default is fine

29
example/exampledocs/hd.xml Executable file
View File

@ -0,0 +1,29 @@
<add>
<doc>
<field name="id">SP2514N</field>
<field name="name">Samsung SpinPoint P120 SP2514N - hard drive - 250 GB - ATA-133</field>
<field name="manu">Samsung Electronics Co. Ltd.</field>
<field name="cat">electronics</field>
<field name="cat">hard drive</field>
<field name="features">7200RPM, 8MB cache, IDE Ultra ATA-133</field>
<field name="features">NoiseGuard, SilentSeek technology, Fluid Dynamic Bearing (FDB) motor</field>
<field name="price">92</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
</doc>
<doc>
<field name="id">6H500F0</field>
<field name="name">Maxtor DiamondMax 11 - hard drive - 500 GB - SATA-300</field>
<field name="manu">Maxtor Corp.</field>
<field name="cat">electronics</field>
<field name="cat">hard drive</field>
<field name="features">SATA 3.0Gb/s, NCQ</field>
<field name="features">8.5ms seek</field>
<field name="features">16MB cache</field>
<field name="price">350</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
</doc>
</add>

View File

@ -0,0 +1,33 @@
<add>
<doc>
<field name="id">F8V7067-APL-KIT</field>
<field name="name">Belkin Mobile Power Cord for iPod w/ Dock</field>
<field name="manu">Belkin</field>
<field name="cat">electronics</field>
<field name="cat">connector</field>
<field name="features">car power adapter, white</field>
<field name="weight">4</field>
<field name="price">19.95</field>
<field name="popularity">1</field>
<field name="inStock">false</field>
</doc>
<doc>
<field name="id">IW-02</field>
<field name="name">iPod &amp; iPod Mini USB 2.0 Cable</field>
<field name="manu">Belkin</field>
<field name="cat">electronics</field>
<field name="cat">connector</field>
<field name="features">car power adapter for iPod, white</field>
<field name="weight">2</field>
<field name="price">11.50</field>
<field name="popularity">1</field>
<field name="inStock">false</field>
</doc>
</add>

View File

@ -0,0 +1,18 @@
<add><doc>
<field name="id">MA147LL/A</field>
<field name="name">Apple 60 GB iPod with Video Playback Black</field>
<field name="manu">Apple Computer Inc.</field>
<field name="cat">electronics</field>
<field name="cat">music</field>
<field name="features">iTunes, Podcasts, Audiobooks</field>
<field name="features">Stores up to 15,000 songs, 25,000 photos, or 150 hours of video</field>
<field name="features">2.5-inch, 320x240 color TFT LCD display with LED backlight</field>
<field name="features">Up to 20 hours of battery life</field>
<field name="features">Plays AAC, MP3, WAV, AIFF, Audible, Apple Lossless, H.264 video</field>
<field name="features">Notes, Calendar, Phone book, Hold button, Date display, Photo wallet, Built-in games, JPEG photo playback, Upgradeable firmware, USB 2.0 compatibility, Playback speed control, Rechargeable capability, Battery level indication</field>
<field name="includes">earbud headphones, USB cable</field>
<field name="weight">5.5</field>
<field name="price">399.00</field>
<field name="popularity">10</field>
<field name="inStock">true</field>
</doc></add>

41
example/exampledocs/mem.xml Executable file
View File

@ -0,0 +1,41 @@
<add>
<doc>
<field name="id">TWINX2048-3200PRO</field>
<field name="name">CORSAIR XMS 2GB (2 x 1GB) 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) Dual Channel Kit System Memory - Retail</field>
<field name="manu">Corsair Microsystems Inc.</field>
<field name="cat">electronics</field>
<field name="cat">memory</field>
<field name="features">CAS latency 2, 2-3-3-6 timing, 2.75v, unbuffered, heat-spreader</field>
<field name="price">185</field>
<field name="popularity">5</field>
<field name="inStock">true</field>
</doc>
<doc>
<field name="id">VS1GB400C3</field>
<field name="name">CORSAIR ValueSelect 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - Retail</field>
<field name="manu">Corsair Microsystems Inc.</field>
<field name="cat">electronics</field>
<field name="cat">memory</field>
<field name="price">74.99</field>
<field name="popularity">7</field>
<field name="inStock">true</field>
</doc>
<doc>
<field name="id">VDBDB1A16</field>
<field name="name">A-DATA V-Series 1GB 184-Pin DDR SDRAM Unbuffered DDR 400 (PC 3200) System Memory - OEM</field>
<field name="manu">A-DATA Technology Inc.</field>
<field name="cat">electronics</field>
<field name="cat">memory</field>
<field name="features">CAS latency 3, 2.7v</field>
<!-- note: price is missing on this one -->
<field name="popularity">5</field>
<field name="inStock">true</field>
</doc>
</add>

14
example/exampledocs/monitor.xml Executable file
View File

@ -0,0 +1,14 @@
<add><doc>
<field name="id">3007WFP</field>
<field name="name">Dell Widescreen UltraSharp 3007WFP</field>
<field name="manu">Dell, Inc.</field>
<field name="cat">electronics</field>
<field name="cat">monitor</field>
<field name="features">30" TFT active matrix LCD, 2560 x 1600, .25mm dot pitch, 700:1 contrast</field>
<field name="includes">USB cable</field>
<field name="weight">401.6</field>
<field name="price">2199</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
</doc></add>

View File

@ -0,0 +1,13 @@
<add><doc>
<field name="id">VA902B</field>
<field name="name">ViewSonic VA902B - flat panel display - TFT - 19"</field>
<field name="manu">ViewSonic Corp.</field>
<field name="cat">electronics</field>
<field name="cat">monitor</field>
<field name="features">19" TFT active matrix LCD, 8ms response time, 1280 x 1024 native resolution</field>
<field name="weight">190.4</field>
<field name="price">279.95</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
</doc></add>

22
example/exampledocs/mp500.xml Executable file
View File

@ -0,0 +1,22 @@
<add><doc>
<field name="id">0579B002</field>
<field name="name">Canon PIXMA MP500 All-In-One Photo Printer</field>
<field name="manu">Canon Inc.</field>
<field name="cat">electronics</field>
<field name="cat">multifunction printer</field>
<field name="cat">printer</field>
<field name="cat">scanner</field>
<field name="cat">copier</field>
<field name="features">Multifunction ink-jet color photo printer</field>
<field name="features">Flatbed scanner, optical scan resolution of 1,200 x 2,400 dpi</field>
<field name="features">2.5" color LCD preview screen</field>
<field name="features">Duplex Copying</field>
<field name="features">Printing speed up to 29ppm black, 19ppm color</field>
<field name="features">Hi-Speed USB</field>
<field name="features">memory card: CompactFlash, Micro Drive, SmartMedia, Memory Stick, Memory Stick Pro, SD Card, and MultiMediaCard</field>
<field name="weight">352</field>
<field name="price">179.99</field>
<field name="popularity">6</field>
<field name="inStock">true</field>
</doc></add>

12
example/exampledocs/post.sh Executable file
View File

@ -0,0 +1,12 @@
#!/bin/sh
FILES=$*
URL=http://localhost:8983/solr/update
for f in $FILES; do
echo Posting file $f to $URL
curl $URL --data-binary @$f
done
#send the commit command to make sure all the changes are flushed and visible
curl $URL --data-binary '<commit/>'

16
example/exampledocs/sd500.xml Executable file
View File

@ -0,0 +1,16 @@
<add><doc>
<field name="id">9885A004</field>
<field name="name">Canon Powershot SD500</field>
<field name="manu">Canon Inc.</field>
<field name="cat">electronics</field>
<field name="cat">camera</field>
<field name="features">3x zoop, 7.1 megapixel Digital ELPH</field>
<field name="features">movie clips up to 640x480 @30 fps</field>
<field name="features">2.0" TFT LCD, 118,000 pixels</field>
<field name="features">built in flash, red-eye reduction</field>
<field name="includes">32MB SD card, USB cable, AV cable, battery</field>
<field name="weight">6.4</field>
<field name="price">329.95</field>
<field name="popularity">7</field>
<field name="inStock">true</field>
</doc></add>

35
example/exampledocs/vidcard.xml Executable file
View File

@ -0,0 +1,35 @@
<add>
<doc>
<field name="id">EN7800GTX/2DHTV/256M</field>
<field name="name">ASUS Extreme N7800GTX/2DHTV (256 MB)</field>
<field name="manu">ASUS Computer Inc.</field>
<field name="cat">electronics</field>
<field name="cat">graphics card</field>
<field name="features">NVIDIA GeForce 7800 GTX GPU/VPU clocked at 486MHz</field>
<field name="features">256MB GDDR3 Memory clocked at 1.35GHz</field>
<field name="features">PCI Express x16</field>
<field name="features">Dual DVI connectors, HDTV out, video input</field>
<field name="features">OpenGL 2.0, DirectX 9.0</field>
<field name="weight">16</field>
<field name="price">479.95</field>
<field name="popularity">7</field>
<field name="inStock">false</field>
</doc>
<!-- yes, you can add more than one document at a time -->
<doc>
<field name="id">100-435805</field>
<field name="name">ATI Radeon X1900 XTX 512 MB PCIE Video Card</field>
<field name="manu">ATI Technologies</field>
<field name="cat">electronics</field>
<field name="cat">graphics card</field>
<field name="features">ATI RADEON X1900 GPU/VPU clocked at 650MHz</field>
<field name="features">512MB GDDR3 SDRAM clocked at 1.55GHz</field>
<field name="features">PCI Express x16</field>
<field name="features">dual DVI, HDTV, svideo, composite out</field>
<field name="features">OpenGL 2.0, DirectX 9.0</field>
<field name="weight">48</field>
<field name="price">649.99</field>
<field name="popularity">7</field>
<field name="inStock">false</field>
</doc>
</add>