initial version

git-svn-id: https://svn.apache.org/repos/asf/incubator/solr/trunk@372455 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Yonik Seeley 2006-01-26 05:37:29 +00:00
parent 77e6e7313c
commit 148b4dbc92
175 changed files with 24183 additions and 0 deletions

View File

View File

@ -0,0 +1,24 @@
#change the query cache size to 3 and the autowarm size to 2 for this test
<commit/>
val_s:A
val_s:B
val_s:C
val_s:D
#B,C,D should be in cache
val_s:A
#miss, now C,D,A should be in cache
<commit/>
#should see old{lookups=5, hits=0, size=3}, new{size=2}
#now D,A should be autowarmed in new
val_s:C
#miss, now cache=D,A,C
<commit/>
#should see old{lookups,1 hits=0, size=3}, new{size=2}
#now A,C should be autowarmed in new
val_s:A
val_s:C
<commit/>
#should see old{lookups=2, hits=2, size=0}

View File

@ -0,0 +1,4 @@
PATH=c:/cygwin/bin
c:/cygwin/bin/bash.exe -c "echo handler called... cwd=`pwd` MYVAR=%MYVAR% > commit.outfile"
exit 33

View File

@ -0,0 +1 @@
userName:Alex;startDate top 2;

View File

@ -0,0 +1,540 @@
#compact the index, keep things from getting out of hand
<optimize/>
#test query
qlkciyopsbgzyvkylsjhchghjrdf %//result[@numFound="0"]
#test escaping of ";"
<delete><id>42</id></delete>
<add><doc><field name="id">42</field><field name="val_s">aa;bb</field></doc></add>
<commit/>
id:42 AND val_s:aa\;bb %//*[@numFound="1"]
id:42 AND val_s:"aa;bb" %//*[@numFound="1"]
id:42 AND val_s:"aa" %//*[@numFound="0"]
#test allowDups default of false
<delete><id>42</id></delete>
<add><doc><field name="id">42</field><field name="val_s">AAA</field></doc></add>
<add><doc><field name="id">42</field><field name="val_s">BBB</field></doc></add>
<commit/>
id:42 %//*[@numFound="1"] %//str[.="BBB"]
<add><doc><field name="id">42</field><field name="val_s">CCC</field></doc></add>
<add><doc><field name="id">42</field><field name="val_s">DDD</field></doc></add>
<commit/>
id:42 %//*[@numFound="1"] %//str[.="DDD"]
<delete><id>42</id></delete>
#test deletes
<delete><query>id:[100 TO 110]</query></delete>
<add allowDups="false"><doc><field name="id">101</field></doc></add>
<add allowDups="false"><doc><field name="id">101</field></doc></add>
<add allowDups="true"><doc><field name="id">105</field></doc></add>
<add allowDups="false"><doc><field name="id">102</field></doc></add>
<add allowDups="true"><doc><field name="id">103</field></doc></add>
<add allowDups="false"><doc><field name="id">101</field></doc></add>
<commit/>
id:[100 TO 110] %//*[@numFound="4"]
<delete><id>102</id></delete>
<commit/>
id:[100 TO 110] %//*[@numFound="3"]
<delete><query>id:105</query></delete>
<commit/>
id:[100 TO 110] %//*[@numFound="2"]
<delete><query>id:[100 TO 110]</query></delete>
<commit/>
id:[100 TO 110] %//*[@numFound="0"]
#test range
<delete><id>44</id></delete>
<add allowDups="true"><doc><field name="id">44</field><field name="val_s">apple</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="val_s">banana</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="val_s">pear</field></doc></add>
<commit/>
val_s:[a TO z] %//*[@numFound="3"] %*[count(//doc)=3] %//*[@start="0"]
val_s:[a TO z] %%start=2&limit=5 %//*[@numFound="3"] %*[count(//doc)=1] %*//doc[1]/str[.="pear"] %//*[@start="2"]
val_s:[a TO z] %%start=3&limit=5 %//*[@numFound="3"] %*[count(//doc)=0]
val_s:[a TO z] %%start=4&limit=5 %//*[@numFound="3"] %*[count(//doc)=0]
val_s:[a TO z] %%start=25&limit=5 %//*[@numFound="3"] %*[count(//doc)=0]
val_s:[a TO z] %%start=0&limit=1 %//*[@numFound="3"] %*[count(//doc)=1] %*//doc[1]/str[.="apple"]
val_s:[a TO z] %%start=0&limit=2 %//*[@numFound="3"] %*[count(//doc)=2] %*//doc[2]/str[.="banana"]
val_s:[a TO z] %%start=1&limit=1 %//*[@numFound="3"] %*[count(//doc)=1] %*//doc[1]/str[.="banana"]
val_s:[a TO z] %%start=3&limit=1 %//*[@numFound="3"] %*[count(//doc)=0]
val_s:[a TO z] %%start=4&limit=1 %//*[@numFound="3"] %*[count(//doc)=0]
val_s:[a TO z] %%start=1&limit=0 %//*[@numFound="3"] %*[count(//doc)=0]
val_s:[a TO z] %%start=0&limit=0 %//*[@numFound="3"] %*[count(//doc)=0]
val_s:[a TO z];val_s asc %%start=0&limit=0 %//*[@numFound="3"] %*[count(//doc)=0]
val_s:[a TO z];val_s desc %%start=0&limit=0 %//*[@numFound="3"] %*[count(//doc)=0]
val_s:[a TO b] %//*[@numFound="1"]
val_s:[a TO cat] %//*[@numFound="2"]
val_s:[a TO *] %//*[@numFound="3"]
val_s:[* TO z] %//*[@numFound="3"]
val_s:[* TO *] %//*[@numFound="3"]
val_s:[apple TO pear] %//*[@numFound="3"]
val_s:[bear TO boar] %//*[@numFound="0"]
val_s:[a TO a] %//*[@numFound="0"]
val_s:[apple TO apple] %//*[@numFound="1"]
val_s:{apple TO pear} %//*[@numFound="1"]
val_s:{a TO z} %//*[@numFound="3"]
val_s:{* TO *} %//*[@numFound="3"]
#test rangequery within a boolean query
id:44 AND val_s:[a TO z] %//*[@numFound="3"]
id:44 OR val_s:[a TO z] %//*[@numFound="3"]
val_s:[a TO b] OR val_s:[b TO z] %//*[@numFound="3"]
+val_s:[a TO b] -val_s:[b TO z] %//*[@numFound="1"]
-val_s:[a TO b] +val_s:[b TO z] %//*[@numFound="2"]
val_s:[a TO c] AND val_s:[apple TO z] %//*[@numFound="2"]
val_s:[a TO c] AND val_s:[a TO apple] %//*[@numFound="1"]
id:44 AND (val_s:[a TO c] AND val_s:[a TO apple]) %//*[@numFound="1"]
(val_s:[apple TO apple] OR val_s:[a TO c]) AND (val_s:[b TO c] OR val_s:[b TO b]) %//*[@numFound="1"] %//str[.="banana"]
(val_s:[apple TO apple] AND val_s:[a TO c]) OR (val_s:[p TO z] AND val_s:[a TO z]) %//*[@numFound="2"] %//str[.="apple"] %//str[.="pear"]
#check for docs that appear more than once in a range
<add allowDups="true"><doc><field name="id">44</field><field name="val_s">apple</field><field name="val_s">banana</field></doc></add>
<commit/>
val_s:[* TO *] OR val_s:[* TO *] %//*[@numFound="4"]
val_s:[* TO *] AND val_s:[* TO *] %//*[@numFound="4"]
val_s:[* TO *] %//*[@numFound="4"]
#<delete><id>44</id></delete>
<add overwritePending="true" overwriteCommitted="true"><doc><field name="id">44</field><field name="text">red riding hood</field></doc></add>
<commit/>
id:44 AND red %//@numFound[.="1"] %*[count(//doc)=1]
id:44 AND ride %//@numFound[.="1"]
id:44 AND blue %//@numFound[.="0"]
#allow duplicates
<delete><id>44</id></delete>
<add allowDups="true" overwriteCommitted="false" overwritePending="false"><doc><field name="id">44</field><field name="text">red riding hood</field></doc></add>
<add allowDups="true" overwriteCommitted="false" overwritePending="false"><doc><field name="id">44</field><field name="text">big bad wolf</field></doc></add>
<commit/>
id:44 %//@numFound[.="2"]
id:44 AND red %//@numFound[.="1"] %*[count(//doc)=1]
id:44 AND wolf %//@numFound[.="1"] %*[count(//doc)=1]
+id:44 red wolf %//@numFound[.="2"]
#test removal of multiples w/o adding anything else
<delete><id>44</id></delete>
<commit/>
id:44 %//@numFound[.="0"]
#untokenized string type
<delete><id>44</id></delete>
<add><doc><field name="id">44</field><field name="ssto">and a 10.4 ?</field></doc></add>
<commit/>
id:44 %//str[.="and a 10.4 ?"]
<delete><id>44</id></delete>
<add><doc><field name="id">44</field><field name="sind">abc123</field></doc></add>
<commit/>
#TODO: how to search for something with spaces....
sind:abc123 %//@numFound[.="1"] %*[count(//@name[.="sind"])=0] %*[count(//@name[.="id"])=1]
<delete><id>44</id></delete>
<delete><id>44</id></delete>
<add><doc><field name="id">44</field><field name="sindsto">abc123</field></doc></add>
<commit/>
#TODO: how to search for something with spaces....
sindsto:abc123 %//str[.="abc123"]
#test output of multivalued fields
<delete><id>44</id></delete>
<add><doc><field name="id">44</field><field name="title">yonik3</field><field name="title" boost="2">yonik4</field></doc></add>
<commit></commit>
id:44 %//arr[@name="title"][./str="yonik3" and ./str="yonik4"] %*[count(//@name[.="title"])=1]
title:yonik3 %//@numFound[.>"0"]
title:yonik4 %//@numFound[.>"0"]
title:yonik5 %//@numFound[.="0"]
<delete><query>title:yonik4</query></delete>
<commit/>
id:44 %//@numFound[.="0"]
#not visible until commit
<delete><id>44</id></delete>
<commit/>
<add><doc><field name="id">44</field></doc></add>
id:44 %//@numFound[.="0"]
<commit/>
id:44 %//@numFound[.="1"]
#test configurable stop words
<delete><id>44</id></delete>
<add><doc><field name="id">44</field><field name="teststop">world stopworda view</field></doc></add>
<commit/>
+id:44 +teststop:world %//@numFound[.="1"]
teststop:stopworda %//@numFound[.="0"]
#test ignoreCase stop words
<delete><id>44</id></delete>
<add><doc><field name="id">44</field><field name="stopfilt">world AnD view</field></doc></add>
<commit/>
+id:44 +stopfilt:world %//@numFound[.="1"]
stopfilt:"and" %//@numFound[.="0"]
stopfilt:"AND" %//@numFound[.="0"]
stopfilt:"AnD" %//@numFound[.="0"]
#test dynamic field types
<delete fromPending="true" fromCommitted="true"><id>44</id></delete>
<add><doc><field name="id">44</field><field name="gack_i">51778</field><field name="t_name">cats</field></doc></add>
<commit/>
#test if the dyn fields got added
id:44 %*[count(//doc/*)>=3] %//int[@name="gack_i"][.="51778"] %//str[@name="t_name"][.="cats"]
#now test if we can query by a dynamic field (requires analyzer support)
t_name:cat %//str[@name="t_name" and .="cats"]
#check that deleteByQuery works for dynamic fields
<delete><query>t_name:cat</query></delete>
<commit/>
t_name:cat %//@numFound[.="0"]
#test that longest dynamic field match happens first
<add><doc><field name="id">44</field><field name="xaa">mystr</field><field name="xaaa">12321</field></doc></add>
<commit/>
id:44 %//str[@name="xaa"][.="mystr"] %//int[@name="xaaa"][.="12321"]
#test integer ranges and sorting
<delete><id>44</id></delete>
<add allowDups="true"><doc><field name="id">44</field><field name="num_i">1234567890</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_i">10</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_i">1</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_i">2</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_i">15</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_i">-1</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_i">-987654321</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_i">2147483647</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_i">-2147483648</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_i">0</field></doc></add>
<commit/>
id:44 %*[count(//doc)=10]
num_i:2147483647 %//@numFound[.="1"] %//int[.="2147483647"]
num_i:"-2147483648" %//@numFound[.="1"] %//int[.="-2147483648"]
id:44;num_i asc; %//doc[1]/int[.="-2147483648"] %//doc[last()]/int[.="2147483647"]
id:44;num_i desc; %//doc[1]/int[.="2147483647"] %//doc[last()]/int[.="-2147483648"]
num_i:[0 TO 9] %*[count(//doc)=3]
num_i:[-2147483648 TO 2147483647] %*[count(//doc)=10]
num_i:[-10 TO -1] %*[count(//doc)=1]
#test long ranges and sorting
<delete><id>44</id></delete>
<add allowDups="true"><doc><field name="id">44</field><field name="num_l">1234567890</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_l">10</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_l">1</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_l">2</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_l">15</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_l">-1</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_l">-987654321</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_l">9223372036854775807</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_l">-9223372036854775808</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_l">0</field></doc></add>
<commit/>
id:44 %*[count(//doc)=10]
num_l:9223372036854775807 %//@numFound[.="1"] %//long[.="9223372036854775807"]
num_l:"-9223372036854775808" %//@numFound[.="1"] %//long[.="-9223372036854775808"]
id:44;num_l asc; %//doc[1]/long[.="-9223372036854775808"] %//doc[last()]/long[.="9223372036854775807"]
id:44;num_l desc; %//doc[1]/long[.="9223372036854775807"] %//doc[last()]/long[.="-9223372036854775808"]
num_l:[-1 TO 9] %*[count(//doc)=4]
num_l:[-9223372036854775808 TO 9223372036854775807] %*[count(//doc)=10]
num_l:[-10 TO -1] %*[count(//doc)=1]
#test binary float ranges and sorting
<delete><id>44</id></delete>
<add allowDups="true"><doc><field name="id">44</field><field name="num_f">1.4142135</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_f">Infinity</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_f">-Infinity</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_f">NaN</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_f">2</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_f">-1</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_f">-987654321</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_f">-999999.99</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_f">-1e20</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_f">0</field></doc></add>
<commit/>
id:44 %*[count(//doc)=10]
num_f:Infinity %//@numFound[.="1"] %//float[.="Infinity"]
num_f:"-Infinity" %//@numFound[.="1"] %//float[.="-Infinity"]
num_f:"NaN" %//@numFound[.="1"] %//float[.="NaN"]
num_f:"-1e20" %//@numFound[.="1"]
id:44;num_f asc; %//doc[1]/float[.="-Infinity"] %//doc[last()]/float[.="NaN"]
id:44;num_f desc; %//doc[1]/float[.="NaN"] %//doc[last()]/float[.="-Infinity"]
num_f:[-1 TO 2] %*[count(//doc)=4]
num_f:[-Infinity TO Infinity] %*[count(//doc)=9]
#test binary double ranges and sorting
<delete><id>44</id></delete>
<add allowDups="true"><doc><field name="id">44</field><field name="num_d">1.4142135</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_d">Infinity</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_d">-Infinity</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_d">NaN</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_d">2</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_d">-1</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_d">1e-100</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_d">-999999.99</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_d">-1e100</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="num_d">0</field></doc></add>
<commit/>
id:44 %*[count(//doc)=10]
num_d:Infinity %//@numFound[.="1"] %//double[.="Infinity"]
num_d:"-Infinity" %//@numFound[.="1"] %//double[.="-Infinity"]
num_d:"NaN" %//@numFound[.="1"] %//double[.="NaN"]
num_d:"-1e100" %//@numFound[.="1"]
num_d:"1e-100" %//@numFound[.="1"]
id:44;num_d asc; %//doc[1]/double[.="-Infinity"] %//doc[last()]/double[.="NaN"]
id:44;num_d desc; %//doc[1]/double[.="NaN"] %//doc[last()]/double[.="-Infinity"]
num_d:[-1 TO 2] %*[count(//doc)=5]
num_d:[-Infinity TO Infinity] %*[count(//doc)=9]
#test sorting on multiple fields
<delete><id>44</id></delete>
<add allowDups="true"><doc><field name="id">44</field><field name="a_i">10</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="a_i">1</field><field name="b_i">100</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="a_i">-1</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="a_i">15</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="a_i">1</field><field name="b_i">50</field></doc></add>
<add allowDups="true"><doc><field name="id">44</field><field name="a_i">0</field></doc></add>
<commit/>
id:44 %*[count(//doc)=6]
id:44; a_i asc,b_i desc %*[count(//doc)=6] %//doc[3]/int[.="100"] %//doc[4]/int[.="50"]
id:44;a_i asc , b_i asc; %*[count(//doc)=6] %//doc[3]/int[.="50"] %//doc[4]/int[.="100"]
id:44;a_i asc; %*[count(//doc)=6] %//doc[1]/int[.="-1"] %//doc[last()]/int[.="15"]
id:44;a_i asc , score top; %*[count(//doc)=6] %//doc[1]/int[.="-1"] %//doc[last()]/int[.="15"]
id:44; score top , a_i top, b_i bottom ; %*[count(//doc)=6] %//doc[last()]/int[.="-1"] %//doc[1]/int[.="15"] %//doc[3]/int[.="50"] %//doc[4]/int[.="100"]
#test sorting with some docs missing the sort field
<delete><query>id_i:[1000 TO 1010]</query></delete>
<add allowDups="true"><doc><field name="id_i">1000</field><field name="a_i">1</field></doc></add>
<add allowDups="true"><doc><field name="id_i">1001</field><field name="a_i">10</field></doc></add>
<add allowDups="true"><doc><field name="id_i">1002</field><field name="a_i">1</field><field name="b_i">100</field></doc></add>
<add allowDups="true"><doc><field name="id_i">1003</field><field name="a_i">-1</field></doc></add>
<add allowDups="true"><doc><field name="id_i">1004</field><field name="a_i">15</field></doc></add>
<add allowDups="true"><doc><field name="id_i">1005</field><field name="a_i">1</field><field name="b_i">50</field></doc></add>
<add allowDups="true"><doc><field name="id_i">1006</field><field name="a_i">0</field></doc></add>
<commit/>
id_i:[1000 TO 1010] %*[count(//doc)=7]
id_i:[1000 TO 1010]; b_i asc %*[count(//doc)=7] %//doc[1]/int[.="50"] %//doc[2]/int[.="100"]
id_i:[1000 TO 1010]; b_i desc %*[count(//doc)=7] %//doc[1]/int[.="100"] %//doc[2]/int[.="50"]
id_i:[1000 TO 1010]; a_i asc,b_i desc %*[count(//doc)=7] %//doc[3]/int[.="100"] %//doc[4]/int[.="50"] %//doc[5]/int[.="1000"]
id_i:[1000 TO 1010]; a_i asc,b_i asc %*[count(//doc)=7] %//doc[3]/int[.="50"] %//doc[4]/int[.="100"] %//doc[5]/int[.="1000"]
#test prefix query
<delete><query>val_s:[* TO *]</query></delete>
<add><doc><field name="id">100</field><field name="val_s">apple</field></doc></add>
<add><doc><field name="id">101</field><field name="val_s">banana</field></doc></add>
<add><doc><field name="id">102</field><field name="val_s">apple</field></doc></add>
<add><doc><field name="id">103</field><field name="val_s">pearing</field></doc></add>
<add><doc><field name="id">104</field><field name="val_s">pear</field></doc></add>
<add><doc><field name="id">105</field><field name="val_s">appalling</field></doc></add>
<add><doc><field name="id">106</field><field name="val_s">pearson</field></doc></add>
<add><doc><field name="id">107</field><field name="val_s">port</field></doc></add>
<commit/>
val_s:a* %//*[@numFound="3"]
val_s:p* %//*[@numFound="4"]
#val_s:* %//*[@numFound="8"]
<delete><query>id:[100 TO 110]</query></delete>
#test copyField functionality
<add><doc><field name="id">42</field><field name="title">How Now4 brown Cows</field></doc></add>
<commit/>
id:42 AND title:Now %*[count(//doc)=0]
id:42 AND title_lettertok:Now %*[count(//doc)=1]
id:42 AND title:cow %*[count(//doc)=0]
id:42 AND title_stemmed:cow %*[count(//doc)=1]
id:42 AND text:cow %*[count(//doc)=1]
#test slop
<add><doc><field name="id">42</field><field name="text">foo bar</field></doc></add>
<commit/>
id:42 AND text:"foo bar" %*[count(//doc)=1]
id:42 AND text:"foo" %*[count(//doc)=1]
id:42 AND text:"bar" %*[count(//doc)=1]
id:42 AND text:"bar foo" %*[count(//doc)=0]
id:42 AND text:"bar foo"~2 %*[count(//doc)=1]
#intra-word delimiter testing (WordDelimiterFilter)
<add><doc><field name="id">42</field><field name="subword">foo bar</field></doc></add>
<commit/>
id:42 AND subword:"foo bar" %*[count(//doc)=1]
id:42 AND subword:"foo" %*[count(//doc)=1]
id:42 AND subword:"bar" %*[count(//doc)=1]
id:42 AND subword:"bar foo" %*[count(//doc)=0]
id:42 AND subword:"bar foo"~2 %*[count(//doc)=1]
id:42 AND subword:"foo/bar" %*[count(//doc)=1]
id:42 AND subword:"foobar" %*[count(//doc)=0]
<add><doc><field name="id">42</field><field name="subword">foo-bar</field></doc></add>
<commit/>
id:42 AND subword:"foo bar" %*[count(//doc)=1]
id:42 AND subword:"foo" %*[count(//doc)=1]
id:42 AND subword:"bar" %*[count(//doc)=1]
id:42 AND subword:"bar foo" %*[count(//doc)=0]
id:42 AND subword:"bar foo"~2 %*[count(//doc)=1]
id:42 AND subword:"foo/bar" %*[count(//doc)=1]
id:42 AND subword:foobar %*[count(//doc)=1]
<add><doc><field name="id">42</field><field name="subword">Canon PowerShot SD500 7MP</field></doc></add>
<commit/>
id:42 AND subword:"power-shot" %*[count(//doc)=1]
id:42 AND subword:"power shot sd 500" %*[count(//doc)=1]
id:42 AND subword:"powershot" %*[count(//doc)=1]
id:42 AND subword:"SD-500" %*[count(//doc)=1]
id:42 AND subword:"SD500" %*[count(//doc)=1]
id:42 AND subword:"SD500-7MP" %*[count(//doc)=1]
id:42 AND subword:"PowerShotSD500-7MP" %*[count(//doc)=1]
<add><doc><field name="id">42</field><field name="subword">Wi-Fi</field></doc></add>
<commit/>
id:42 AND subword:wifi %*[count(//doc)=1]
id:42 AND subword:wi+=fi %*[count(//doc)=1]
id:42 AND subword:wi+=fi %*[count(//doc)=1]
id:42 AND subword:WiFi %*[count(//doc)=1]
id:42 AND subword:"wi fi" %*[count(//doc)=1]
<add><doc><field name="id">42</field><field name="subword">'I.B.M' A's,B's,C's</field></doc></add>
<commit/>
id:42 AND subword:"'I.B.M.'" %*[count(//doc)=1]
id:42 AND subword:I.B.M %*[count(//doc)=1]
id:42 AND subword:IBM %*[count(//doc)=1]
id:42 AND subword:I--B--M %*[count(//doc)=1]
id:42 AND subword:"I B M" %*[count(//doc)=1]
id:42 AND subword:IBM's %*[count(//doc)=1]
id:42 AND subword:IBM'sx %*[count(//doc)=0]
#this one fails since IBM and ABC are separated by two tokens
#id:42 AND subword:IBM's-ABC's %*[count(//doc)=1]
id:42 AND subword:"IBM's-ABC's"~2 %*[count(//doc)=1]
id:42 AND subword:"A's B's-C's" %*[count(//doc)=1]
<add><doc><field name="id">42</field><field name="subword">Sony KDF-E50A10</field></doc></add>
<commit/>
#check for exact match:
# Sony KDF E/KDFE 50 A 10 (this is how it's indexed)
# Sony KDF E 50 A 10 (and how it's queried)
id:42 AND subword:"Sony KDF-E50A10" %*[count(//doc)=1]
id:42 AND subword:10 %*[count(//doc)=1]
id:42 AND subword:Sony %*[count(//doc)=1]
#this one fails without slop since Sony and KDFE have a token inbetween
#id:42 AND subword:SonyKDFE50A10 %*[count(//doc)=1]
id:42 AND subword:"SonyKDFE50A10"~10 %*[count(//doc)=1]
id:42 AND subword:"Sony KDF E-50-A-10" %*[count(//doc)=1]
<add><doc><field name="id">42</field><field name="subword">http://www.yahoo.com</field></doc></add>
<commit/>
id:42 AND subword:yahoo %*[count(//doc)=1]
id:42 AND subword:www.yahoo.com %*[count(//doc)=1]
id:42 AND subword:http://www.yahoo.com %*[count(//doc)=1]
<add><doc><field name="id">42</field><field name="subword">--Q 1-- W2 E-3 Ok xY 4R 5-T *6-Y- 7-8-- 10A-B</field></doc></add>
<commit/>
id:42 AND subword:Q %*[count(//doc)=1]
id:42 AND subword:1 %*[count(//doc)=1]
id:42 AND subword:"w 2" %*[count(//doc)=1]
id:42 AND subword:"e 3" %*[count(//doc)=1]
id:42 AND subword:"o k" %*[count(//doc)=0]
id:42 AND subword:"ok" %*[count(//doc)=1]
id:42 AND subword:"x y" %*[count(//doc)=1]
id:42 AND subword:"xy" %*[count(//doc)=1]
id:42 AND subword:"4 r" %*[count(//doc)=1]
id:42 AND subword:"5 t" %*[count(//doc)=1]
id:42 AND subword:"5 t" %*[count(//doc)=1]
id:42 AND subword:"6 y" %*[count(//doc)=1]
id:42 AND subword:"7 8" %*[count(//doc)=1]
id:42 AND subword:"78" %*[count(//doc)=1]
id:42 AND subword:"10 A+B" %*[count(//doc)=1]
<add><doc><field name="id">42</field><field name="subword">FooBarBaz</field></doc></add>
<add><doc><field name="id">42</field><field name="subword">FooBar10</field></doc></add>
<add><doc><field name="id">42</field><field name="subword">10FooBar</field></doc></add>
<add><doc><field name="id">42</field><field name="subword">BAZ</field></doc></add>
<add><doc><field name="id">42</field><field name="subword">10</field></doc></add>
<add><doc><field name="id">42</field><field name="subword">Mark, I found what's the problem! It turns to be from the latest schema. I found tons of exceptions in the resin.stdout that prevented the builder from performing. It's all coming from the WordDelimiterFilter which was just added to the latest schema: [2005-08-29 15:11:38.375] java.lang.IndexOutOfBoundsException: Index: 3, Size: 3 673804 [2005-08-29 15:11:38.375] at java.util.ArrayList.RangeCheck(ArrayList.java:547) 673805 [2005-08-29 15:11:38.375] at java.util.ArrayList.get(ArrayList.java:322) 673806 [2005-08-29 15:11:38.375] at solar.analysis.WordDelimiterFilter.addCombos(WordDelimiterFilter.java:349) 673807 [2005-08-29 15:11:38.375] at solar.analysis.WordDelimiterFilter.next(WordDelimiterFilter.java:325) 673808 [2005-08-29 15:11:38.375] at org.apache.lucene.analysis.LowerCaseFilter.next(LowerCaseFilter.java:32) 673809 [2005-08-29 15:11:38.375] at org.apache.lucene.analysis.StopFilter.next(StopFilter.java:98) 673810 [2005-08-29 15:11:38.375] at solar.EnglishPorterFilter.next(TokenizerFactory.java:163) 673811 [2005-08-29 15:11:38.375] at org.apache.lucene.index.DocumentWriter.invertDocument(DocumentWriter.java:143) 673812 [2005-08-29 15:11:38.375] at org.apache.lucene.index.DocumentWriter.addDocument(DocumentWriter.java:81) 673813 [2005-08-29 15:11:38.375] at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:307) 673814 [2005-08-29 15:11:38.375] at org.apache.lucene.index.IndexWriter.addDocument(IndexWriter.java:294) 673815 [2005-08-29 15:11:38.375] at solar.DirectUpdateHandler2.doAdd(DirectUpdateHandler2.java:170) 673816 [2005-08-29 15:11:38.375] at solar.DirectUpdateHandler2.overwriteBoth(DirectUpdateHandler2.java:317) 673817 [2005-08-29 15:11:38.375] at solar.DirectUpdateHandler2.addDoc(DirectUpdateHandler2.java:191) 673818 [2005-08-29 15:11:38.375] at solar.SolarCore.update(SolarCore.java:795) 673819 [2005-08-29 15:11:38.375] at solarserver.SolarServlet.doPost(SolarServlet.java:71) 673820 [2005-08-29 15:11:38.375] at javax.servlet.http.HttpServlet.service(HttpServlet.java:154) 673821 [2005-08-29 15:11:38.375] at javax.servlet.http.HttpServlet.service(HttpServlet.java:92) 673822 [2005-08-29 15:11:38.375] at com.caucho.server.dispatch.ServletFilterChain.doFilter(ServletFilterChain.java:99) 673823 [2005-08-29 15:11:38.375] at com.caucho.server.cache.CacheFilterChain.doFilter(CacheFilterChain.java:188) 673824 [2005-08-29 15:11:38.375] at com.caucho.server.webapp.WebAppFilterChain.doFilter(WebAppFilterChain.java:163) 673825 [2005-08-29 15:11:38.375] at com.caucho.server.dispatch.ServletInvocation.service(ServletInvocation.java:208) 673826 [2005-08-29 15:11:38.375] at com.caucho.server.http.HttpRequest.handleRequest(HttpRequest.java:259) 673827 [2005-08-29 15:11:38.375] at com.caucho.server.port.TcpConnection.run(TcpConnection.java:363) 673828 [2005-08-29 15:11:38.375] at com.caucho.util.ThreadPool.runTasks(ThreadPool.java:490) 673829 [2005-08-29 15:11:38.375] at com.caucho.util.ThreadPool.run(ThreadPool.java:423) 673830 [2005-08-29 15:11:38.375] at java.lang.Thread.run(Thread.java:595) With the previous schema I'm able to perform a successful full build: http://c12-ssa-dev40-so-mas1.cnet.com:5078/select/?stylesheet=q=docTypeversion=2.0start=0rows=10indent=on Do you want to rollback to the previous schema version</field></doc></add>
#
<delete fromPending="true" fromCommitted="true"><id>44</id></delete>
<add><doc><field name="id">44</field><field name="fname_s">Yonik</field><field name="here_b">true</field><field name="iq_l">10000000000</field><field name="description_t">software engineer</field><field name="ego_d">1e100</field><field name="pi_f">3.1415962</field><field name="when_dt">2005-03-18T01:14:34Z</field><field name="arr_f">1.414213562</field><field name="arr_f">.999</field></doc></add>
<commit/>
id:44
id:44 %%fl=fname_s,arr_f %//str[.="Yonik"] %//float[.="1.4142135"]
id:44 %%fl= %//str[.="Yonik"] %//float[.="1.4142135"]
#test addition of score field
id:44 %%fl=score %//str[.="Yonik"] %//float[.="1.4142135"] %//float[@name="score"] %*[count(//doc/*)=10]
id:44 %%fl=*,score %//str[.="Yonik"] %//float[.="1.4142135"] %//float[@name="score"] %*[count(//doc/*)=10]
id:44 %%fl=* %//str[.="Yonik"] %//float[.="1.4142135"] %*[count(//doc/*)>=9]
#test maxScore
id:44 %%fl=score %//result[@maxScore>0]
id:44;id desc; %%fl=score %//result[@maxScore>0]
id:44; %%fl=score %//@maxScore = //doc/float[@name="score"]
id:44;id desc; %%fl=score %//@maxScore = //doc/float[@name="score"]
id:44;id desc; %%fl=score&limit=0 %//result[@maxScore>0]
# test schema field attribute inheritance and overriding
<delete><id>44</id></delete>
<add><doc><field name="id">44</field><field name="shouldbestored">hi</field></doc></add>
<commit/>
id:44 %//*[@name="shouldbestored"]
+id:44 +shouldbestored:hi %//*[@numFound="1"]
<delete><id>44</id></delete>
<add><doc><field name="id">44</field><field name="shouldbeunstored">hi</field></doc></add>
<commit/>
id:44 %not(//*[@name="shouldbeunstored"])
+id:44 +shouldbeunstored:hi %//*[@numFound="1"]
<delete><id>44</id></delete>
<add><doc><field name="id">44</field><field name="shouldbeunindexed">hi</field></doc></add>
<commit/>
id:44 %//*[@name="shouldbeunindexed"]
# this should result in an error... how to check for that?
#+id:44 +shouldbeunindexed:hi %//*[@numFound="0"]
#test spaces between XML elements because that can introduce extra XML events that
#can mess up parsing (and it has in the past)
<delete> <id>44</id> </delete>
<add> <doc> <field name="id">44</field> <field name="shouldbestored">hi</field> </doc> </add>
<commit />
#test adding multiple docs per add command
<delete><query>id:[0 TO 99]</query></delete>
<add><doc><field name="id">1</field></doc><doc><field name="id">2</field></doc></add>
<commit/>
id:[0 TO 99] %//*[@numFound="2"]
#test synonym filter
<delete><query>id:[10 TO 100]</query></delete>
<add><doc><field name="id">10</field><field name="syn">a</field></doc></add>
<add><doc><field name="id">11</field><field name="syn">b</field></doc></add>
<add><doc><field name="id">12</field><field name="syn">c</field></doc></add>
<add><doc><field name="id">13</field><field name="syn">foo</field></doc></add>
<commit/>
id:10 AND syn:a %//*[@numFound="1"]
id:10 AND syn:aa %//*[@numFound="1"]
id:11 AND syn:b %//*[@numFound="1"]
id:11 AND syn:b1 %//*[@numFound="1"]
id:11 AND syn:b2 %//*[@numFound="1"]
id:12 AND syn:c %//*[@numFound="1"]
id:12 AND syn:c1 %//*[@numFound="1"]
id:12 AND syn:c2 %//*[@numFound="1"]
id:13 AND syn:foo %//*[@numFound="1"]
id:13 AND syn:bar %//*[@numFound="1"]
id:13 AND syn:baz %//*[@numFound="1"]
#trigger output of custom value test
values %%qt=test

View File

@ -0,0 +1,5 @@
#use a protected word file to avoid stemming two
#unrelated words to the same base word.
#to test, we will use words that would normally obviously be stemmed.
cats
ridding

1
src/apps/SolarTest/run Executable file
View File

@ -0,0 +1 @@
java -cp "../solar/classes;classes;../../lucene/lucene-1.4.3.jar" SolarPerf -schema test_schema.xml -index F:/root/index -verbose -test newtest.txt

View File

@ -0,0 +1,334 @@
<?xml version="1.0" ?>
<!-- The Solar schema file. This file should be named "schema.xml" and
should be located where the classloader for the Solar webapp can find it.
$Id: schema.xml,v 1.1 2005/06/09 03:01:13 yonik Exp $
$Source: /cvs/main/searching/solar-configs/test/WEB-INF/classes/schema.xml,v $
$Name: $
-->
<schema name="test" version="1.0">
<types>
<!-- field type definitions... note that the "name" attribute is
just a label to be used by field definitions. The "class"
attribute and any other attributes determine the real type and
behavior of the fieldtype.
-->
<!-- numeric field types that store and index the text
value verbatim (and hence don't sort correctly or support range queries.)
These are provided more for backward compatability, allowing one
to create a schema that matches an existing lucene index.
-->
<fieldtype name="integer" class="solar.IntField"/>
<fieldtype name="long" class="solar.LongField"/>
<fieldtype name="float" class="solar.FloatField"/>
<fieldtype name="double" class="solar.DoubleField"/>
<!-- numeric field types that manipulate the value into
a string value that isn't human readable in it's internal form,
but sorts correctly and supports range queries.
If sortMissingLast="true" then a sort on this field will cause documents
without the field to come after documents with the field,
regardless of the requested sort order.
If sortMissingFirst="true" then a sort on this field will cause documents
without the field to come before documents with the field,
regardless of the requested sort order.
If sortMissingLast="false" and sortMissingFirst="false" (the default),
then default lucene sorting will be used which places docs without the field
first in an ascending sort and last in a descending sort.
-->
<fieldtype name="sint" class="solar.SortableIntField" sortMissingLast="true"/>
<fieldtype name="slong" class="solar.SortableLongField" sortMissingLast="true"/>
<fieldtype name="sfloat" class="solar.SortableFloatField" sortMissingLast="true"/>
<fieldtype name="sdouble" class="solar.SortableDoubleField" sortMissingLast="true"/>
<!-- bcd versions of sortable numeric type may provide smaller
storage space and support very large numbers.
-->
<fieldtype name="bcdint" class="solar.BCDIntField" sortMissingLast="true"/>
<fieldtype name="bcdlong" class="solar.BCDLongField" sortMissingLast="true"/>
<fieldtype name="bcdstr" class="solar.BCDStrField" sortMissingLast="true"/>
<fieldtype name="boolean" class="solar.BoolField" sortMissingLast="true"/>
<fieldtype name="string" class="solar.StrField" sortMissingLast="true"/>
<!-- format for date is 1995-12-31T23:59:59.999Z and only the fractional
seconds part (.999) is optional.
-->
<fieldtype name="date" class="solar.DateField" sortMissingLast="true"/>
<!-- solar.TextField allows the specification of custom
text analyzers specified as a tokenizer and a list
of token filters.
-->
<fieldtype name="text" class="solar.TextField">
<analyzer>
<tokenizer class="solar.StandardTokenizerFactory"/>
<filter class="solar.StandardFilterFactory"/>
<filter class="solar.LowerCaseFilterFactory"/>
<filter class="solar.StopFilterFactory"/>
<!-- lucene PorterStemFilterFactory deprecated
<filter class="solar.PorterStemFilterFactory"/>
-->
<filter class="solar.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="nametext" class="solar.TextField">
<analyzer class="org.apache.lucene.analysis.WhitespaceAnalyzer"/>
</fieldtype>
<fieldtype name="teststop" class="solar.TextField">
<analyzer>
<tokenizer class="solar.LowerCaseTokenizerFactory"/>
<filter class="solar.StandardFilterFactory"/>
<filter class="solar.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldtype>
<!-- fieldtypes in this section isolate tokenizers and tokenfilters for testing -->
<fieldtype name="lowertok" class="solar.TextField">
<analyzer><tokenizer class="solar.LowerCaseTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="standardtok" class="solar.TextField">
<analyzer><tokenizer class="solar.StandardTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="lettertok" class="solar.TextField">
<analyzer><tokenizer class="solar.LetterTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="whitetok" class="solar.TextField">
<analyzer><tokenizer class="solar.WhitespaceTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="HTMLstandardtok" class="solar.TextField">
<analyzer><tokenizer class="solar.HTMLStripStandardTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="HTMLwhitetok" class="solar.TextField">
<analyzer><tokenizer class="solar.HTMLStripWhitespaceTokenizerFactory"/></analyzer>
</fieldtype>
<fieldtype name="standardtokfilt" class="solar.TextField">
<analyzer>
<tokenizer class="solar.StandardTokenizerFactory"/>
<filter class="solar.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="standardfilt" class="solar.TextField">
<analyzer>
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.StandardFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="lowerfilt" class="solar.TextField">
<analyzer>
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="porterfilt" class="solar.TextField">
<analyzer>
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.PorterStemFilterFactory"/>
</analyzer>
</fieldtype>
<!-- fieldtype name="snowballfilt" class="solar.TextField">
<analyzer>
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.SnowballPorterFilterFactory"/>
</analyzer>
</fieldtype -->
<fieldtype name="engporterfilt" class="solar.TextField">
<analyzer>
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<fieldtype name="custengporterfilt" class="solar.TextField">
<analyzer>
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.EnglishPorterFilterFactory" protected="protwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="stopfilt" class="solar.TextField">
<analyzer>
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.StopFilterFactory" ignoreCase="true"/>
</analyzer>
</fieldtype>
<fieldtype name="custstopfilt" class="solar.TextField">
<analyzer>
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.StopFilterFactory" words="stopwords.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="lengthfilt" class="solar.TextField">
<analyzer>
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.LengthFilterFactory" min="2" max="5"/>
</analyzer>
</fieldtype>
<fieldtype name="subword" class="solar.TextField">
<analyzer type="index">
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solar.LowerCaseFilterFactory"/>
<filter class="solar.StopFilterFactory"/>
<filter class="solar.EnglishPorterFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
<filter class="solar.LowerCaseFilterFactory"/>
<filter class="solar.StopFilterFactory"/>
<filter class="solar.EnglishPorterFilterFactory"/>
</analyzer>
</fieldtype>
<!-- more flexible in matching skus, but more chance of a false match -->
<fieldtype name="skutype1" class="solar.TextField">
<analyzer type="index">
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solar.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solar.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="skutype2" class="solar.TextField">
<analyzer type="index">
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solar.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter class="solar.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solar.LowerCaseFilterFactory"/>
</analyzer>
</fieldtype>
<!-- less flexible in matching skus, but less chance of a false match -->
<fieldtype name="syn" class="solar.TextField">
<analyzer>
<tokenizer class="solar.WhitespaceTokenizerFactory"/>
<filter name="syn" class="solar.SynonymFilterFactory" synonyms="synonyms.txt"/>
</analyzer>
</fieldtype>
<fieldtype name="unstored" class="solar.StrField" indexed="true" stored="false"/>
</types>
<fields>
<field name="id" type="integer" indexed="true" stored="true"/>
<field name="name" type="nametext" indexed="true" stored="true"/>
<field name="text" type="text" indexed="true" stored="false"/>
<field name="subject" type="text" indexed="true" stored="true"/>
<field name="title" type="nametext" indexed="true" stored="true"/>
<field name="weight" type="float" indexed="true" stored="true"/>
<field name="bday" type="date" indexed="true" stored="true"/>
<field name="title_stemmed" type="text" indexed="true" stored="false"/>
<field name="title_lettertok" type="lettertok" indexed="true" stored="false"/>
<field name="syn" type="syn" indexed="true" stored="true"/>
<!-- to test property inheritance and overriding -->
<field name="shouldbeunstored" type="unstored" />
<field name="shouldbestored" type="unstored" stored="true"/>
<field name="shouldbeunindexed" type="unstored" indexed="false" stored="true"/>
<!-- test different combinations of indexed and stored -->
<field name="bind" type="boolean" indexed="true" stored="false"/>
<field name="bsto" type="boolean" indexed="false" stored="true"/>
<field name="bindsto" type="boolean" indexed="true" stored="true"/>
<field name="isto" type="integer" indexed="false" stored="true"/>
<field name="iind" type="integer" indexed="true" stored="false"/>
<field name="ssto" type="string" indexed="false" stored="true"/>
<field name="sind" type="string" indexed="true" stored="false"/>
<field name="sindsto" type="string" indexed="true" stored="true"/>
<!-- fields to test individual tokenizers and tokenfilters -->
<field name="teststop" type="teststop" indexed="true" stored="true"/>
<field name="lowertok" type="lowertok" indexed="true" stored="true"/>
<field name="standardtok" type="standardtok" indexed="true" stored="true"/>
<field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
<field name="lettertok" type="lettertok" indexed="true" stored="true"/>
<field name="whitetok" type="whitetok" indexed="true" stored="true"/>
<field name="HTMLwhitetok" type="HTMLwhitetok" indexed="true" stored="true"/>
<field name="standardtokfilt" type="standardtokfilt" indexed="true" stored="true"/>
<field name="standardfilt" type="standardfilt" indexed="true" stored="true"/>
<field name="lowerfilt" type="lowerfilt" indexed="true" stored="true"/>
<field name="porterfilt" type="porterfilt" indexed="true" stored="true"/>
<field name="engporterfilt" type="engporterfilt" indexed="true" stored="true"/>
<field name="custengporterfilt" type="custengporterfilt" indexed="true" stored="true"/>
<field name="stopfilt" type="stopfilt" indexed="true" stored="true"/>
<field name="custstopfilt" type="custstopfilt" indexed="true" stored="true"/>
<field name="lengthfilt" type="lengthfilt" indexed="true" stored="true"/>
<field name="subword" type="subword" indexed="true" stored="true"/>
<field name="sku1" type="skutype1" indexed="true" stored="true"/>
<field name="sku2" type="skutype2" indexed="true" stored="true"/>
<!-- Dynamic field definitions. If a field name is not found, dynamicFields
will be used if the name matches any of the patterns.
RESTRICTION: the glob-like pattern in the name attribute must have
a "*" only at the start or the end.
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
Longer patterns will be matched first. if equal size patterns
both match, the first appearing in the schema will be used.
-->
<dynamicField name="*_i" type="sint" indexed="true" stored="true"/>
<dynamicField name="*_s" type="string" indexed="true" stored="true"/>
<dynamicField name="*_l" type="slong" indexed="true" stored="true"/>
<dynamicField name="*_t" type="text" indexed="true" stored="true"/>
<dynamicField name="*_b" type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_f" type="sfloat" indexed="true" stored="true"/>
<dynamicField name="*_d" type="sdouble" indexed="true" stored="true"/>
<dynamicField name="*_dt" type="date" indexed="true" stored="true"/>
<dynamicField name="*_bcd" type="bcdstr" indexed="true" stored="true"/>
<dynamicField name="*_sI" type="string" indexed="true" stored="false"/>
<dynamicField name="*_sS" type="string" indexed="false" stored="true"/>
<dynamicField name="t_*" type="text" indexed="true" stored="true"/>
<!-- for testing to ensure that longer patterns are matched first -->
<dynamicField name="*aa" type="string" indexed="true" stored="true"/>
<dynamicField name="*aaa" type="integer" indexed="false" stored="true"/>
</fields>
<defaultSearchField>text</defaultSearchField>
<uniqueKey>id</uniqueKey>
<!-- copyField commands copy one field to another at the time a document
is added to the index. It's used either to index the same field different
ways, or to add multiple fields to the same field for easier/faster searching.
-->
<copyField source="title" dest="title_stemmed"/>
<copyField source="title" dest="title_lettertok"/>
<copyField source="title" dest="text"/>
<copyField source="subject" dest="text"/>
<!-- Similarity is the scoring routine for each document vs a query.
A custom similarity may be specified here, but the default is fine
for most applications.
-->
<!-- <similarity class="org.apache.lucene.search.DefaultSimilarity"/> -->
</schema>

View File

@ -0,0 +1,191 @@
<?xml version="1.0" ?>
<!-- $Id$
$Source$
$Name$
-->
<config>
<!-- Used to specify an alternate directory to hold all index data.
It defaults to "index" if not present, and should probably
not be changed if replication is in use. -->
<!--
<indexDir>index</indexDir>
-->
<indexDefaults>
<!-- Values here affect all index writers and act as a default
unless overridden. -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>10</mergeFactor>
<maxBufferedDocs>1000</maxBufferedDocs>
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<!-- these are global... can't currently override per index -->
<writeLockTimeout>1000</writeLockTimeout>
<commitLockTimeout>10000</commitLockTimeout>
</indexDefaults>
<mainIndex>
<!-- lucene options specific to the main on-disk lucene index -->
<useCompoundFile>false</useCompoundFile>
<mergeFactor>10</mergeFactor>
<maxBufferedDocs>1000</maxBufferedDocs>
<maxMergeDocs>2147483647</maxMergeDocs>
<maxFieldLength>10000</maxFieldLength>
<unlockOnStartup>true</unlockOnStartup>
</mainIndex>
<updateHandler class="solar.DirectUpdateHandler2">
<!-- autocommit pending docs if certain criteria are met -->
<autocommit> <!-- NOTE: autocommit not implemented yet -->
<maxDocs>10000</maxDocs>
<maxSec>3600</maxSec>
</autocommit>
<!-- represents a lower bound on the frequency that commits may
occur (in seconds). NOTE: not yet implemented
-->
<commitIntervalLowerBound>0</commitIntervalLowerBound>
<!-- The RunExecutableListener executes an external command.
exe - the name of the executable to run
dir - dir to use as the current working directory. default="."
wait - the calling thread waits until the executable returns. default="true"
args - the arguments to pass to the program. default=nothing
env - environment variables to set. default=nothing
-->
<!-- A postCommit event is fired after every commit
<listener event="postCommit" class="solar.RunExecutableListener">
<str name="exe">/var/opt/resin3/__PORT__/scripts/solar/snapshooter</str>
<str name="dir">/var/opt/resin3/__PORT__</str>
<bool name="wait">true</bool>
<arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
<arr name="env"> <str>MYVAR=val1</str> </arr>
</listener>
-->
</updateHandler>
<query>
<!-- Maximum number of clauses in a boolean query... can affect
range or wildcard queries that expand to big boolean
queries. An exception is thrown if exceeded.
-->
<maxBooleanClauses>1024</maxBooleanClauses>
<!-- Cache specification for Filters or DocSets - unordered set of *all* documents
that match a particular query.
-->
<filterCache
class="solar.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="256"/>
<queryResultCache
class="solar.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="1024"/>
<documentCache
class="solar.search.LRUCache"
size="512"
initialSize="512"
autowarmCount="0"/>
<!--
<cache name="myUserCache"
class="solar.search.LRUCache"
size="4096"
initialSize="1024"
autowarmCount="1024"
regenerator="MyRegenerator"
/>
-->
<useFilterForSortedQuery>true</useFilterForSortedQuery>
<queryResultWindowSize>10</queryResultWindowSize>
<HashDocSet maxSize="3000" loadFactor="0.75"/>
<!-- boolToFilterOptimizer converts boolean clauses with zero boost
into cached filters if the number of docs selected by the clause exceeds
the threshold (represented as a fraction of the total index)
-->
<boolTofilterOptimizer enabled="true" cacheSize="32" threshold=".05"/>
<!-- a newSearcher event is fired whenever a new searcher is being prepared
and there is a current searcher handling requests (aka registered). -->
<!-- QuerySenderListener takes an array of NamedList and executes a
local query request for each NamedList in sequence. -->
<!--
<listener event="newSearcher" class="solar.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">solar</str> <str name="start">0</str> <str name="rows">10</str> </lst>
<lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
<!-- a firstSearcher event is fired whenever a new searcher is being
prepared but there is no current registered searcher to handle
requests or to gain prewarming data from. -->
<!--
<listener event="firstSearcher" class="solar.QuerySenderListener">
<arr name="queries">
<lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst>
</arr>
</listener>
-->
</query>
<!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
based HashBitset. -->
<!-- requestHandler plugins... incoming queries will be dispatched to the
correct handler based on the qt (query type) param matching the
name of registered handlers.
The "standard" request handler is the default and will be used if qt
is not specified in the request.
-->
<requestHandler name="standard" class="solar.StandardRequestHandler" />
<requestHandler name="old" class="solar.tst.OldRequestHandler" >
<int name="myparam">1000</int>
<float name="ratio">1.4142135</float>
<arr name="myarr"><int>1</int><int>2</int></arr>
<str>foo</str>
</requestHandler>
<requestHandler name="oldagain" class="solar.tst.OldRequestHandler" >
<lst name="lst1"> <str name="op">sqrt</str> <int name="val">2</int> </lst>
<lst name="lst2"> <str name="op">log</str> <float name="val">10</float> </lst>
</requestHandler>
<requestHandler name="test" class="solar.tst.TestRequestHandler" />
<admin>
<defaultQuery>solar</defaultQuery>
<gettableFiles>solarconfig.xml conf/solar/WEB-INF/web.external.xml conf/resin.conf </gettableFiles>
</admin>
</config>

View File

@ -0,0 +1,367 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.solr.core.SolrCore;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.request.*;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.XPathConstants;
import java.io.*;
import java.util.*;
import java.util.logging.Logger;
import java.util.logging.Level;
import java.util.logging.Handler;
import java.util.logging.ConsoleHandler;
import org.w3c.dom.Document;
/**
* User: Yonik Seeley
* Date: Aug 16, 2004
*/
public class SolrTest extends Thread {
static SolrCore core;
static String[] requestDict;
static String[] updateDict;
static String[] testDict;
static List<Integer> testDictLineno;
static List<Integer> lineno;
public static String[] readDict(String filename) throws IOException {
BufferedReader br = new BufferedReader(new FileReader(filename));
ArrayList lst = new ArrayList(1024);
lineno = new ArrayList<Integer>(1024);
String line;
int lineNum=0;
while ((line = br.readLine())!=null) {
lineNum++;
if (line.length() <= 1) continue;
lst.add(line);
lineno.add(lineNum);
}
br.close();
return (String[]) lst.toArray(new String[lst.size()]);
}
public static boolean verbose=false;
static boolean doValidate=true;
static int countdown;
static synchronized boolean runAgain() {
return countdown-- > 0;
}
// statistics per client
int numReq=0;
int numErr=0;
int numBodyChars=0;
boolean isWriter=false;
boolean sequenceTest=false;
public void run() {
if (sequenceTest) {
for (int i=0; i<testDict.length; i++) {
String s = testDict[i];
int lineno = testDictLineno.get(i);
String req;
String test=null;
String params=null;
char[] resp;
if (s.length()<2 || s.startsWith("#")) continue; // comment
System.out.println("LINE=" + lineno + " EXECUTING " + s);
int endQuery = s.length();
int startParams = s.indexOf("%%");
int endParams = s.length();
int endTests = s.length();
if (startParams > 0) {
endQuery = startParams;
endParams = s.length();
}
int startTests = s.indexOf('%', startParams+2);
if (startTests > 0) {
if (endQuery == s.length()) endQuery = startTests;
endParams = startTests;
}
req = s.substring(0,endQuery).trim();
if (startParams > 0) params = s.substring(startParams+2,endParams).trim();
if (startTests > 0) test = s.substring(startTests+1,endTests).trim();
System.out.println("###req=" + req);
System.out.println("###params=" + params);
System.out.println("###tests=" + test);
if (req.startsWith("<")) {
resp = doUpdate(req);
} else {
resp = doReq(req,params);
}
if (doValidate) {
validate(req,test,resp);
} else {
System.out.println("#### no validation performed");
}
}
System.out.println(">>>>>>>>>>>>>>>>>>>>>>>> SUCCESS <<<<<<<<<<<<<<<<<<<<<<<<<<");
}
else {
while(runAgain()) {
if (isWriter) doUpdate(updateDict[(int)(Math.random()*updateDict.length)]);
else doReq(requestDict[(int)(Math.random()*requestDict.length)], null);
}
}
}
private DocumentBuilder builder;
private XPath xpath = XPathFactory.newInstance().newXPath();
{
try {
builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
}
private void validate(String req, String test, char[] resp) {
if (test==null || test.length()==0) return;
Document document=null;
try {
// the resp[] contains a declaration that it is UTF-8, so we
// need to change it to that for the XML parser.
document = builder.parse(new ByteArrayInputStream(new String(resp).getBytes("UTF-8")));
// document = builder.parse(new String(resp));
} catch (Exception e) {
System.out.println("ERROR parsing '" + new String(resp) + "'");
throw new RuntimeException(e);
}
String[] tests = test.split("%");
for (String xp : tests) {
Boolean bool=false;
xp=xp.trim();
try {
bool = (Boolean) xpath.evaluate(xp, document, XPathConstants.BOOLEAN);
} catch (Exception e) {
System.out.println("##################ERROR EVALUATING XPATH '" + xp + "'");
throw new RuntimeException(e);
}
if (!bool) {
System.out.println("##################ERROR");
System.out.println("req="+req);
System.out.println("xp="+xp);
throw new RuntimeException("test failed.");
}
}
}
public char[] doUpdate(String req) {
try {
// String lucene=updateDict[(int)(Math.random()*updateDict.length)];
String lucene=req;
StringReader ureq = new StringReader(lucene);
CharArrayWriter writer = new CharArrayWriter(32000);
core.update(ureq, writer);
if (verbose) System.out.println("UPDATE RESPONSE:'" + writer + "'");
// if (verbose) System.out.println("BODY chars read:" + writer.size());
this.numBodyChars+=writer.size();
this.numReq++;
return writer.toCharArray();
} catch (Exception e) {
this.numErr++;
e.printStackTrace();
}
return null;
}
static XMLResponseWriter xmlwriter = new XMLResponseWriter();
static SolrRequestHandler handler =
// new OldRequestHandler();
new StandardRequestHandler();
public char[] doReq(String req, String params) {
int start=0;
int limit=10;
String handler="standard";
//handler="test";
Map args = new HashMap();
args.put("indent", "on");
args.put("debugQuery", "on");
args.put("fl", "score");
args.put("version", "2.0");
if (params != null) {
String[] plist = params.split("&");
for (String decl : plist) {
String[] nv = decl.split("=");
if (nv.length==1) {
nv = new String[] { nv[0], "" };
}
if (nv[0].equals("start")) {
start=Integer.parseInt(nv[1]);
}
else if (nv[0].equals("limit")) {
limit=Integer.parseInt(nv[1]);
}
else if (nv[0].equals("qt")) {
handler = nv[1];
} else {
args.put(nv[0], nv[1]);
}
}
}
try {
// String lucene=requestDict[(int)(Math.random()*requestDict.length)];
String lucene=req;
CharArrayWriter writer = new CharArrayWriter(32000);
System.out.println("start="+start+" limit="+limit+" handler="+handler);
LocalSolrQueryRequest qreq = new LocalSolrQueryRequest(core,lucene,handler,start,limit,args);
SolrQueryResponse qrsp = new SolrQueryResponse();
try {
core.execute(qreq,qrsp);
if (qrsp.getException() != null) throw qrsp.getException();
// handler.handleRequest(qreq,qrsp);
xmlwriter.write(writer,qreq,qrsp);
} finally {
qreq.close();
}
if (verbose) System.out.println("GOT:'" + writer + "'");
if (verbose) System.out.println("BODY chars read:" + writer.size());
this.numBodyChars+=writer.size();
this.numReq++;
return writer.toCharArray();
} catch (Exception e) {
this.numErr++;
e.printStackTrace();
}
return null;
}
public static void main(String[] args) throws Exception {
int readers=1;
int requests=1;
int writers=0;
Logger log = Logger.getLogger("solar");
log.setUseParentHandlers(false);
log.setLevel(Level.FINEST);
Handler handler = new ConsoleHandler();
handler.setLevel(Level.FINEST);
log.addHandler(handler);
String filename="dict.txt";
String updateFilename="update_dict.txt";
String luceneDir=null;
String schemaFile="schema.xml";
String testFile=null;
boolean b_numUpdates=false; boolean b_writers=false;
int i=0; String arg;
while (i < args.length && args[i].startsWith("-")) {
arg = args[i++];
if (arg.equals("-verbose")) {
verbose=true;
} else if (arg.equals("-dict")) {
filename=args[i++];
} else if (arg.equals("-index")) {
luceneDir=args[i++];
} else if (arg.equals("-readers")) {
readers=Integer.parseInt(args[i++]);
} else if (arg.equals("-numRequests")) {
requests=Integer.parseInt(args[i++]);
} else if (arg.equals("-writers")) {
writers=Integer.parseInt(args[i++]);
b_writers=true;
} else if (arg.equals("-schema")) {
schemaFile=args[i++];
} else if (arg.equals("-test")) {
testFile=args[i++];
} else if (arg.equals("-noValidate")) {
doValidate=false;
} else {
System.out.println("Unknown option: " + arg);
return;
}
}
try {
IndexSchema schema = new IndexSchema(schemaFile);
countdown = requests;
core=new SolrCore(luceneDir,schema);
try {
if (readers > 0) requestDict = readDict(filename);
if (writers > 0) updateDict = readDict(updateFilename);
if (testFile != null) {
testDict = readDict(testFile);
testDictLineno = lineno;
}
} catch (IOException e) {
e.printStackTrace();
System.out.println("Can't read "+filename);
return;
}
SolrTest[] clients = new SolrTest[readers+writers];
for (i=0; i<readers; i++) {
clients[i] = new SolrTest();
if (testFile != null) clients[i].sequenceTest=true;
clients[i].start();
}
for (i=readers; i<readers+writers; i++) {
clients[i] = new SolrTest();
clients[i].isWriter = true;
clients[i].start();
}
for (i=0; i<readers; i++) {
clients[i].join();
}
for (i=readers; i<readers+writers; i++) {
clients[i].join();
}
} finally {
if (core != null) core.close();
}
}
}

View File

@ -0,0 +1,2 @@
stopworda
stopwordb

View File

@ -0,0 +1,6 @@
a => aa
b => b1 b2
c => c1,c2
a\=>a => b\=>b
a\,a => b\,b
foo,bar,baz

View File

@ -0,0 +1,51 @@
<delete><query>id:[* TO *]</query></delete>
<optimize/>
<delete><query>id[0 TO 9]</query></delete>
<commit/>
<add><doc><field name="id">3</field></doc></add>
<add><doc><field name="id">1</field></doc></add>
<add><doc><field name="id">7</field></doc></add>
<add><doc><field name="id">0</field></doc></add>
<add><doc><field name="id">5</field></doc></add>
<commit/>
_val_:"linear(id,2,3)"
+id:[ 0 TO 5 ] +_val_:"linear(id,2,3)"^0.1
+id:[ 0 TO 5 ] +_val_:"linear(rord(id),2,3)"^0.1
+id:[ 0 TO 5 ] +_val_:"recip(rord(id),2,3,4)"^0.1
+id:[ 0 TO 5 ] +_val_:"linear(linear(rord(id),6,5),2,3)"^0.1
#<delete><query>id:[0 TO 9]</query></delete>
#<commit/>
<delete><query>weight:[* TO *]</query></delete>
<commit/>
<add><doc><field name="id">10</field><field name="weight">3</field></doc></add>
<add><doc><field name="id">11</field><field name="weight">1</field></doc></add>
<add><doc><field name="id">12</field><field name="weight">7</field></doc></add>
<add><doc><field name="id">13</field><field name="weight">0</field></doc></add>
<add><doc><field name="id">14</field><field name="weight">5</field></doc></add>
<commit/>
+id:[10 TO 14] +_val_:weight^2
+id:[10 TO 14] +_val_:"ord(weight)"^2
+id:[10 TO 14] +_val_:"rord(weight)"^2
#+id:[10 TO 14] +weight:_int_^2
#+id:[10 TO 14] +weight:_ord_^2
#+id:[10 TO 14] +weight:_rord_^2
<add><doc><field name="id">10</field><field name="q_i">2</field></doc></add>
<add><doc><field name="id">11</field><field name="q_f">3.14159</field></doc></add>
<add><doc><field name="id">12</field><field name="q_l">900</field></doc></add>
<add><doc><field name="id">13</field><field name="q_d">.1</field></doc></add>
<add><doc><field name="id">14</field><field name="q_dt">2005-01-01T01:01:01Z</field></doc></add>
<commit/>
_val_:q_i %%fl=score %//@maxScore = //doc/float[@name="score"] %//doc/float[@name="score"] = "2.0"
_val_:q_f %%fl=score %//@maxScore = //doc/float[@name="score"] %//doc/float[@name="score"] = "3.14159"
_val_:q_l %%fl=score %//@maxScore = //doc/float[@name="score"] %//doc/float[@name="score"] = "900.0"
_val_:q_d %%fl=score %//@maxScore = //doc/float[@name="score"] %//doc/float[@name="score"] = "0.1"
_val_:q_dt %%fl=score %//@maxScore = //doc/float[@name="score"] %//doc/float[@name="score"] = "1.0"

View File

@ -0,0 +1,68 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.util.Map;
/**
* @author yonik
* @version $Id: BaseTokenFilterFactory.java,v 1.5 2005/12/06 04:16:16 yonik Exp $
*/
public abstract class BaseTokenFilterFactory implements TokenFilterFactory {
protected Map<String,String> args;
public void init(Map<String,String> args) {
this.args=args;
}
public Map<String,String> getArgs() {
return args;
}
// TODO: move these somewhere that tokenizers and others
// can also use them...
protected int getInt(String name) {
return getInt(name,-1,false);
}
protected int getInt(String name, int defaultVal) {
return getInt(name,defaultVal,true);
}
protected int getInt(String name, int defaultVal, boolean useDefault) {
String s = args.get(name);
if (s==null) {
if (useDefault) return defaultVal;
throw new RuntimeException("Configuration Error: missing parameter '" + name + "'");
}
return Integer.parseInt(s);
}
protected boolean getBoolean(String name, boolean defaultVal) {
return getBoolean(name,defaultVal,true);
}
protected boolean getBoolean(String name, boolean defaultVal, boolean useDefault) {
String s = args.get(name);
if (s==null) {
if (useDefault) return defaultVal;
throw new RuntimeException("Configuration Error: missing parameter '" + name + "'");
}
return Boolean.parseBoolean(s);
}
}

View File

@ -0,0 +1,34 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.util.Map;
/**
* @author yonik
* @version $Id: BaseTokenizerFactory.java,v 1.3 2005/09/20 04:57:50 yonik Exp $
*/
public abstract class BaseTokenizerFactory implements TokenizerFactory {
protected Map<String,String> args;
public void init(Map<String,String> args) {
this.args=args;
}
public Map<String,String> getArgs() {
return args;
}
}

View File

@ -0,0 +1,111 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.solr.core.Config;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.Token;
import java.util.Map;
import java.util.List;
import java.util.Set;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class EnglishPorterFilterFactory extends BaseTokenFilterFactory {
public void init(Map<String, String> args) {
super.init(args);
String wordFile = args.get("protected");
if (wordFile != null) {
try {
List<String> wlist = Config.getLines(wordFile);
protectedWords = StopFilter.makeStopSet((String[])wlist.toArray(new String[0]));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
private Set protectedWords = null;
public TokenStream create(TokenStream input) {
return new EnglishPorterFilter(input,protectedWords);
}
}
/** English Porter2 filter that doesn't use reflection to
/* adapt lucene to the snowball stemmer code.
*/
class EnglishPorterFilter extends TokenFilter {
private final Set protWords;
private net.sf.snowball.ext.EnglishStemmer stemmer;
public EnglishPorterFilter(TokenStream source, Set protWords) {
super(source);
this.protWords=protWords;
stemmer = new net.sf.snowball.ext.EnglishStemmer();
}
/** the original code from lucene sandbox
public final Token next() throws IOException {
Token token = input.next();
if (token == null)
return null;
stemmer.setCurrent(token.termText());
try {
stemMethod.invoke(stemmer, EMPTY_ARGS);
} catch (Exception e) {
throw new RuntimeException(e.toString());
}
return new Token(stemmer.getCurrent(),
token.startOffset(), token.endOffset(), token.type());
}
**/
public Token next() throws IOException {
Token tok = input.next();
if (tok==null) return null;
String tokstr = tok.termText();
// if protected, don't stem. use this to avoid stemming collisions.
if (protWords != null && protWords.contains(tokstr)) {
return tok;
}
stemmer.setCurrent(tokstr);
stemmer.stem();
String newstr = stemmer.getCurrent();
if (tokstr.equals(newstr)) {
return tok;
} else {
// TODO: it would be nice if I could just set termText directly like
// lucene packages can.
Token newtok = new Token(newstr, tok.startOffset(), tok.endOffset(), tok.type());
newtok.setPositionIncrement(tok.getPositionIncrement());
return newtok;
}
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,32 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import java.io.Reader;
/**
* @author yonik
* @version $Id$
*/
public class HTMLStripStandardTokenizerFactory extends BaseTokenizerFactory {
public TokenStream create(Reader input) {
return new StandardTokenizer(new HTMLStripReader(input));
}
}

View File

@ -0,0 +1,32 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import java.io.Reader;
/**
* @author yonik
* @version $Id$
*/
public class HTMLStripWhitespaceTokenizerFactory extends BaseTokenizerFactory {
public TokenStream create(Reader input) {
return new WhitespaceTokenizer(new HTMLStripReader(input));
}
}

View File

@ -0,0 +1,47 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import java.io.IOException;
/**
* @author yonik
* @version $Id: LengthFilter.java,v 1.2 2005/04/24 02:53:35 yonik Exp $
*/
public final class LengthFilter extends TokenFilter {
final int min,max;
public LengthFilter(TokenStream in, int min, int max) {
super(in);
this.min=min;
this.max=max;
//System.out.println("min="+min+" max="+max);
}
public final Token next() throws IOException {
for (Token token=input.next(); token!=null; token=input.next()) {
final int len = token.endOffset() - token.startOffset();
if (len<min || len>max) continue;
return token;
}
return null;
}
}

View File

@ -0,0 +1,38 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.LengthFilter;
import java.util.Map;
/**
* @author yonik
* @version $Id$
*/
public class LengthFilterFactory extends BaseTokenFilterFactory {
int min,max;
public void init(Map<String, String> args) {
super.init(args);
min=Integer.parseInt(args.get("min"));
max=Integer.parseInt(args.get("max"));
}
public TokenStream create(TokenStream input) {
return new LengthFilter(input,min,max);
}
}

View File

@ -0,0 +1,32 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.LetterTokenizer;
import java.io.Reader;
/**
* @author yonik
* @version $Id$
*/
public class LetterTokenizerFactory extends BaseTokenizerFactory {
public TokenStream create(Reader input) {
return new LetterTokenizer(input);
}
}

View File

@ -0,0 +1,30 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.LowerCaseFilter;
/**
* @author yonik
* @version $Id$
*/
public class LowerCaseFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new LowerCaseFilter(input);
}
}

View File

@ -0,0 +1,32 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.LowerCaseTokenizer;
import java.io.Reader;
/**
* @author yonik
* @version $Id$
*/
public class LowerCaseTokenizerFactory extends BaseTokenizerFactory {
public TokenStream create(Reader input) {
return new LowerCaseTokenizer(input);
}
}

View File

@ -0,0 +1,30 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.PorterStemFilter;
/**
* @author yonik
* @version $Id$
*/
public class PorterStemFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new PorterStemFilter(input);
}
}

View File

@ -0,0 +1,34 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.snowball.SnowballFilter;
/**
* @author yonik
* @version $Id$
*/
public class SnowballPorterFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
// Browsing the code, SnowballFilter uses reflection to adapt to Lucene...
// don't use this if you are concerned about speed. Use EnglishPorterFilterFactory.
// TODO: make language configurable
return new SnowballFilter(input,"English");
}
}

View File

@ -0,0 +1,30 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardFilter;
/**
* @author yonik
* @version $Id$
*/
public class StandardFilterFactory extends BaseTokenFilterFactory {
public TokenStream create(TokenStream input) {
return new StandardFilter(input);
}
}

View File

@ -0,0 +1,33 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import java.io.Reader;
/**
* @author yonik
* @version $Id$
*/
public class StandardTokenizerFactory extends BaseTokenizerFactory {
public TokenStream create(Reader input) {
return new StandardTokenizer(input);
}
}

View File

@ -0,0 +1,55 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.solr.core.Config;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import java.util.Map;
import java.util.List;
import java.util.Set;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class StopFilterFactory extends BaseTokenFilterFactory {
public void init(Map<String, String> args) {
super.init(args);
String stopWordFile = args.get("words");
ignoreCase = getBoolean("ignoreCase",false);
if (stopWordFile != null) {
try {
List<String> wlist = Config.getLines(stopWordFile);
stopWords = StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), ignoreCase);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
private Set stopWords = StopFilter.makeStopSet(StopAnalyzer.ENGLISH_STOP_WORDS);
private boolean ignoreCase;
public TokenStream create(TokenStream input) {
return new StopFilter(input,stopWords,ignoreCase);
}
}

View File

@ -0,0 +1,125 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.SynonymFilter;
import org.apache.lucene.analysis.SynonymMap;
import java.util.Map;
import java.util.ArrayList;
import java.util.List;
import java.io.IOException;
import org.apache.solr.util.StrUtils;
import org.apache.solr.analysis.BaseTokenFilterFactory;
import org.apache.solr.core.Config;
import org.apache.solr.core.SolrCore;
/**
* @author yonik
* @version $Id$
*/
public class SynonymFilterFactory extends BaseTokenFilterFactory {
public void init(Map<String, String> args) {
super.init(args);
String synonyms = args.get("synonyms");
ignoreCase = getBoolean("ignoreCase",false);
expand = getBoolean("expand",true);
if (synonyms != null) {
List<String> wlist=null;
try {
wlist = Config.getLines(synonyms);
} catch (IOException e) {
throw new RuntimeException(e);
}
synMap = new SynonymMap();
parseRules(wlist, synMap, "=>", ",", ignoreCase,expand);
if (wlist.size()<=20) {
SolrCore.log.fine("SynonymMap "+synonyms +":"+synMap);
}
}
}
private SynonymMap synMap;
private boolean ignoreCase;
private boolean expand;
private static void parseRules(List<String> rules, SynonymMap map, String mappingSep, String synSep, boolean ignoreCase, boolean expansion) {
int count=0;
for (String rule : rules) {
// To use regexes, we need an expression that specifies an odd number of chars.
// This can't really be done with string.split(), and since we need to
// do unescaping at some point anyway, we wouldn't be saving any effort
// by using regexes.
List<String> mapping = StrUtils.splitSmart(rule, mappingSep, false);
List<List<String>> source;
List<List<String>> target;
if (mapping.size() > 2) {
throw new RuntimeException("Invalid Synonym Rule:" + rule);
} else if (mapping.size()==2) {
source = getSynList(mapping.get(0), synSep);
target = getSynList(mapping.get(1), synSep);
} else {
source = getSynList(mapping.get(0), synSep);
if (expansion) {
// expand to all arguments
target = source;
} else {
// reduce to first argument
target = new ArrayList<List<String>>(1);
target.add(source.get(0));
}
}
boolean includeOrig=false;
for (List<String> fromToks : source) {
count++;
for (List<String> toToks : target) {
map.add(ignoreCase ? StrUtils.toLower(fromToks) : fromToks,
SynonymMap.makeTokens(toToks),
includeOrig,
true);
}
}
}
}
// a , b c , d e f => [[a],[b,c],[d,e,f]]
private static List<List<String>> getSynList(String str, String separator) {
List<String> strList = StrUtils.splitSmart(str, separator, false);
// now split on whitespace to get a list of token strings
List<List<String>> synList = new ArrayList<List<String>>();
for (String toks : strList) {
List<String> tokList = StrUtils.splitWS(toks, true);
synList.add(tokList);
}
return synList;
}
public TokenStream create(TokenStream input) {
return new SynonymFilter(input,synMap,ignoreCase);
}
}

View File

@ -0,0 +1,34 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import java.util.Map;
/**
* Factory to create a token filter that transforms one TokenStream to another.
*
* @author yonik
* @version $Id: TokenFilterFactory.java,v 1.3 2005/09/20 04:58:28 yonik Exp $
*/
public interface TokenFilterFactory {
public void init(Map<String,String> args);
public Map<String,String> getArgs();
public TokenStream create(TokenStream input);
}

View File

@ -0,0 +1,65 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.solr.analysis.TokenizerFactory;
import java.io.Reader;
/**
* @author yonik
* @version $Id: TokenizerChain.java,v 1.3 2005/08/26 05:21:08 yonik Exp $
*/
//
// An analyzer that uses a tokenizer and a list of token filters to
// create a TokenStream.
//
public class TokenizerChain extends Analyzer {
final private TokenizerFactory tokenizer;
final private TokenFilterFactory[] filters;
public TokenizerChain(TokenizerFactory tokenizer, TokenFilterFactory[] filters) {
this.tokenizer = tokenizer;
this.filters = filters;
}
public TokenizerFactory getTokenizerFactory() { return tokenizer; }
public TokenFilterFactory[] getTokenFilterFactories() { return filters; }
public TokenStream tokenStream(String fieldName, Reader reader) {
TokenStream ts = tokenizer.create(reader);
for (int i=0; i<filters.length; i++) {
ts = filters[i].create(ts);
}
return ts;
}
public String toString() {
StringBuilder sb = new StringBuilder("TokenizerChain(");
sb.append(tokenizer);
for (TokenFilterFactory filter: filters) {
sb.append(", ");
sb.append(filter);
}
sb.append(')');
return sb.toString();
}
}

View File

@ -0,0 +1,37 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import java.io.*;
import java.util.Map;
import org.apache.lucene.analysis.*;
/**
* A <code>TokenizerFactory</code> creates a <code>Tokenizer</code> on demand
* that breaks up a stream of characters into tokens.
*
* @author yonik
* @version $Id: TokenizerFactory.java,v 1.10 2005/12/13 05:16:03 yonik Exp $
*/
public interface TokenizerFactory {
public void init(Map<String,String> args);
public Map<String,String> getArgs();
public TokenStream create(Reader input);
}

View File

@ -0,0 +1,32 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
import java.io.Reader;
/**
* @author yonik
* @version $Id$
*/
public class WhitespaceTokenizerFactory extends BaseTokenizerFactory {
public TokenStream create(Reader input) {
return new WhitespaceTokenizer(input);
}
}

View File

@ -0,0 +1,444 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Token;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* Splits words into subwords and performs optional transformations on subword groups.
* Words are split into subwords with the following rules:
* - split on intra-word delimiters (by default, all non alpha-numeric characters).
* - "Wi-Fi" -> "Wi", "Fi"
* - split on case transitions
* - "PowerShot" -> "Power", "Shot"
* - split on letter-number transitions
* - "SD500" -> "SD", "500"
* - leading and trailing intra-word delimiters on each subword are ignored
* - "//hello---there, 'dude'" -> "hello", "there", "dude"
* - trailing "'s" are removed for each subword
* - "O'Neil's" -> "O", "Neil"
* - Note: this step isn't performed in a separate filter because of possible subword combinations.
*
* The <b>combinations</b> parameter affects how subwords are combined:
* - combinations="0" causes no subword combinations.
* - "PowerShot" -> 0:"Power", 1:"Shot" (0 and 1 are the token positions)
* - combinations="1" means that in addition to the subwords, maximum runs of non-numeric subwords are catenated and produced at the same position of the last subword in the run.
* - "PowerShot" -> 0:"Power", 1:"Shot" 1:"PowerShot"
* - "A's+B's&C's" -> 0:"A", 1:"B", 2:"C", 2:"ABC"
* - "Super-Duper-XL500-42-AutoCoder!" -> 0:"Super", 1:"Duper", 2:"XL", 2:"SuperDuperXL", 3:"500" 4:"42", 5:"Auto", 6:"Coder", 6:"AutoCoder"
*
* One use for WordDelimiterFilter is to help match words with different subword delimiters.
* For example, if the source text contained "wi-fi" one may want "wifi" "WiFi" "wi-fi" "wi+fi"
* queries to all match.
* One way of doing so is to specify combinations="1" in the analyzer
* used for indexing, and combinations="0" (the default) in the analyzer
* used for querying. Given that the current StandardTokenizer
* immediately removes many intra-word delimiters, it is recommended that
* this filter be used after a tokenizer that does not do this
* (such as WhitespaceTokenizer).
*
* @author yonik
* @version $Id: WordDelimiterFilter.java,v 1.6 2005/09/20 03:54:05 yonik Exp $
*/
final class WordDelimiterFilter extends TokenFilter {
private final byte[] charTypeTable;
public static final int LOWER=0x01;
public static final int UPPER=0x02;
public static final int DIGIT=0x04;
public static final int SUBWORD_DELIM=0x08;
// combinations: for testing, not for setting bits
public static final int ALPHA=0x03;
public static final int ALPHANUM=0x07;
// TODO: should there be a WORD_DELIM category for
// chars that only separate words (no catenation of subwords
// will be done if separated by these chars?)
// "," would be an obvious candidate...
static byte[] defaultWordDelimTable;
static {
byte[] tab = new byte[256];
for (int i=0; i<256; i++) {
byte code = 0;
if (Character.isLowerCase(i)) code |= LOWER;
else if (Character.isUpperCase(i)) code |= UPPER;
else if (Character.isDigit(i)) code |= DIGIT;
if (code==0) code=SUBWORD_DELIM;
tab[i]=code;
}
defaultWordDelimTable = tab;
}
final int generateWordParts;
final int generateNumberParts;
final int catenateWords;
final int catenateNumbers;
final int catenateAll;
public WordDelimiterFilter(TokenStream in, byte[] charTypeTable, int generateWordParts, int generateNumberParts, int catenateWords, int catenateNumbers, int catenateAll) {
super(in);
this.generateWordParts = generateWordParts;
this.generateNumberParts = generateNumberParts;
this.catenateWords = catenateWords;
this.catenateNumbers = catenateNumbers;
this.catenateAll = catenateAll;
this.charTypeTable = charTypeTable;
}
public WordDelimiterFilter(TokenStream in, int generateWordParts, int generateNumberParts, int catenateWords, int catenateNumbers, int catenateAll) {
this(in, defaultWordDelimTable, generateWordParts, generateNumberParts, catenateWords, catenateNumbers, catenateAll);
}
int charType(int ch) {
if (ch<charTypeTable.length) {
return charTypeTable[ch];
} else if (Character.isLowerCase(ch)) {
return LOWER;
} else if (Character.isLetter(ch)) {
return UPPER;
} else {
return SUBWORD_DELIM;
}
}
private int charType(String s, int pos) {
return charType(s.charAt(pos));
}
// use the type of the first char as the type
// of the token.
private int tokType(Token t) {
return charType(t.termText().charAt(0));
}
// There isn't really an efficient queue class, so we will
// just use an array for now.
private ArrayList<Token> queue = new ArrayList<Token>(4);
private int queuePos=0;
// temporary working queue
private ArrayList<Token> tlist = new ArrayList<Token>(4);
private Token newTok(Token orig, int start, int end) {
return new Token(orig.termText().substring(start,end),
orig.startOffset() + start,
orig.startOffset() + end,
orig.type());
}
public final Token next() throws IOException {
// check the queue first
if (queuePos<queue.size()) {
return queue.get(queuePos++);
}
// reset the queue if it had been previously used
if (queuePos!=0) {
queuePos=0;
queue.clear();
}
// optimize for the common case: assume there will be
// no subwords (just a simple word)
//
// Would it actually be faster to check for the common form
// of isLetter() isLower()*, and then backtrack if it doesn't match?
while(true) {
Token t = input.next();
if (t == null) return null;
String s = t.termText();
int off=t.startOffset();
int start=0;
int end=s.length();
if (end==0) continue;
// Avoid calling charType more than once for each char (basically
// avoid any backtracking).
// makes code slightly more difficult, but faster.
int ch=s.charAt(start);
int type=charType(ch);
int numWords=0;
while (start<end) {
// first eat delimiters at the start of this subword
while ((type & SUBWORD_DELIM)!=0 && ++start<end) {
ch=s.charAt(start);
type=charType(ch);
}
int pos=start;
// save the type of the first char of the subword
// as a way to tell what type of subword token this is (number, word, etc)
int firstType=type;
int lastType=type; // type of the previously read char
while (pos<end) {
if (type!=lastType) {
// check and remove "'s" from the end of a token.
// the pattern to check for is
// ALPHA "'" ("s"|"S") (SUBWORD_DELIM | END)
if ((lastType & ALPHA)!=0) {
if (ch=='\'' && pos+1<end
&& (s.charAt(pos+1)=='s' || s.charAt(pos+1)=='S'))
{
int subWordEnd=pos;
if (pos+2>=end) {
// end of string detected after "'s"
pos+=2;
} else {
// make sure that a delimiter follows "'s"
int ch2 = s.charAt(pos+2);
int type2 = charType(ch2);
if ((type2 & SUBWORD_DELIM)!=0) {
// if delimiter, move position pointer
// to it (skipping over "'s"
ch=ch2;
type=type2;
pos+=2;
}
}
queue.add(newTok(t,start,subWordEnd));
if ((firstType & ALPHA)!=0) numWords++;
break;
}
}
// For case changes, only split on a transition from
// lower to upper case, not vice-versa.
// That will correctly handle the
// case of a word starting with a capital (won't split).
// It will also handle pluralization of
// an uppercase word such as FOOs (won't split).
if ((lastType & UPPER)!=0 && (type & LOWER)!=0) {
// UPPER->LOWER: Don't split
} else {
// NOTE: this code currently assumes that only one flag
// is set for each character now, so we don't have
// to explicitly check for all the classes of transitions
// listed below.
// LOWER->UPPER
// ALPHA->NUMERIC
// NUMERIC->ALPHA
// *->DELIMITER
queue.add(newTok(t,start,pos));
if ((firstType & ALPHA)!=0) numWords++;
break;
}
}
if (++pos >= end) {
if (start==0) {
// the subword is the whole original token, so
// return it unchanged.
return t;
}
Token newtok = newTok(t,start,pos);
// optimization... if this is the only token,
// return it immediately.
if (queue.size()==0) {
return newtok;
}
queue.add(newtok);
if ((firstType & ALPHA)!=0) numWords++;
break;
}
lastType = type;
ch = s.charAt(pos);
type = charType(ch);
}
// start of the next subword is the current position
start=pos;
}
// System.out.println("##########TOKEN=" + s + " ######### WORD DELIMITER QUEUE=" + str(queue));
final int numtok = queue.size();
// We reached the end of the current token.
// If the queue is empty, we should continue by reading
// the next token
if (numtok==0) {
continue;
}
// if number of tokens is 1, always return the single tok
if (numtok==1) {
break;
}
final int numNumbers = numtok - numWords;
// check conditions under which the current token
// queue may be used as-is (no catenations needed)
if (catenateAll==0 // no "everything" to catenate
&& (catenateWords==0 || numWords<=1) // no words to catenate
&& (catenateNumbers==0 || numNumbers<=1) // no numbers to catenate
&& (generateWordParts!=0 || numWords==0) // word generation is on
&& (generateNumberParts!=0 || numNumbers==0)) // number generation is on
{
break;
}
// swap queue and the temporary working list, then clear the
// queue in preparation for adding all combinations back to it.
ArrayList<Token> tmp=tlist;
tlist=queue;
queue=tmp;
queue.clear();
if (numWords==0) {
// all numbers
addCombos(tlist,0,numtok,generateNumberParts!=0,catenateNumbers!=0 || catenateAll!=0, 1);
break;
} else if (numNumbers==0) {
// all words
addCombos(tlist,0,numtok,generateWordParts!=0,catenateWords!=0 || catenateAll!=0, 1);
break;
} else if (generateNumberParts==0 && generateWordParts==0 && catenateNumbers==0 && catenateWords==0) {
// catenate all *only*
// OPT:could be optimized to add to current queue...
addCombos(tlist,0,numtok,false,catenateAll!=0, 1);
break;
}
//
// Find all adjacent tokens of the same type.
//
Token tok = tlist.get(0);
boolean isWord = (tokType(tok) & ALPHA) != 0;
boolean wasWord=isWord;
for(int i=0; i<numtok;) {
int j;
for (j=i+1; j<numtok; j++) {
wasWord=isWord;
tok = tlist.get(j);
isWord = (tokType(tok) & ALPHA) != 0;
if (isWord != wasWord) break;
}
if (wasWord) {
addCombos(tlist,i,j,generateWordParts!=0,catenateWords!=0,1);
} else {
addCombos(tlist,i,j,generateNumberParts!=0,catenateNumbers!=0,1);
}
i=j;
}
// take care catenating all subwords
if (catenateAll!=0) {
addCombos(tlist,0,numtok,false,true,0);
}
break;
}
// System.out.println("##########AFTER COMBINATIONS:"+ str(queue));
queuePos=1;
return queue.get(0);
}
// index "a","b","c" as pos0="a", pos1="b", pos2="c", pos2="abc"
private void addCombos(List<Token> lst, int start, int end, boolean generateSubwords, boolean catenateSubwords, int posOffset) {
if (end-start==1) {
// always generate a word alone, even if generateSubwords=0 because
// the catenation of all the subwords *is* the subword.
queue.add(lst.get(start));
return;
}
StringBuilder sb = null;
if (catenateSubwords) sb=new StringBuilder();
Token firstTok=null;
Token tok=null;
for (int i=start; i<end; i++) {
tok = lst.get(i);
if (catenateSubwords) {
if (i==start) firstTok=tok;
sb.append(tok.termText());
}
if (generateSubwords) {
queue.add(tok);
}
}
if (catenateSubwords) {
Token concatTok = new Token(sb.toString(),
firstTok.startOffset(),
tok.endOffset(),
firstTok.type());
// if we indexed some other tokens, then overlap concatTok with the last.
// Otherwise, use the value passed in as the position offset.
concatTok.setPositionIncrement(generateSubwords==true ? 0 : posOffset);
queue.add(concatTok);
}
}
private String str(List<Token> lst) {
StringBuilder sb = new StringBuilder();
sb.append('{');
for (Token t : lst) {
sb.append('(');
sb.append('"');
sb.append(t.termText());
sb.append("\",increment=");
sb.append(Integer.toString(t.getPositionIncrement()));
sb.append(')');
sb.append(',');
}
sb.append('}');
return sb.toString();
}
// questions:
// negative numbers? -42 indexed as just 42?
// dollar sign? $42
// percent sign? 33%
// downsides: if source text is "powershot" then a query of "PowerShot" won't match!
}

View File

@ -0,0 +1,48 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.analysis;
import org.apache.lucene.analysis.TokenStream;
import java.util.Map;
/**
* @author yonik
* @version $Id$
*/
public class WordDelimiterFilterFactory extends BaseTokenFilterFactory {
int generateWordParts=0;
int generateNumberParts=0;
int catenateWords=0;
int catenateNumbers=0;
int catenateAll=0;
public void init(Map<String, String> args) {
super.init(args);
generateWordParts = getInt("generateWordParts",1);
generateNumberParts = getInt("generateNumberParts",1);
catenateWords = getInt("catenateWords",0);
catenateNumbers = getInt("catenateNumbers",0);
catenateAll = getInt("catenateAll",0);
}
public TokenStream create(TokenStream input) {
return new WordDelimiterFilter(input,
generateWordParts, generateNumberParts,
catenateWords, catenateNumbers, catenateAll);
}
}

View File

@ -0,0 +1,43 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.NamedList;
/**
* @author yonik
*/
class AbstractSolrEventListener implements SolrEventListener {
protected NamedList args;
public void init(NamedList args) {
this.args = args;
}
public void postCommit() {
throw new UnsupportedOperationException();
}
public void newSearcher(SolrIndexSearcher newSearcher, SolrIndexSearcher currentSearcher) {
throw new UnsupportedOperationException();
}
public String toString() {
return getClass().getName() + args;
}
}

View File

@ -0,0 +1,260 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.xml.sax.SAXException;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrException;
import javax.xml.parsers.*;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.namespace.QName;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
import java.util.logging.Logger;
/**
* @author yonik
* @version $Id: Config.java,v 1.10 2005/12/20 16:05:46 yonik Exp $
*/
public class Config {
public static final Logger log = Logger.getLogger(SolrCore.class.getName());
static final XPathFactory xpathFactory = XPathFactory.newInstance();
private Document doc;
private String prefix;
private String name;
public Config(String name, InputStream is, String prefix) throws ParserConfigurationException, IOException, SAXException {
this.name = name;
this.prefix = prefix;
if (prefix!=null && !prefix.endsWith("/")) prefix += '/';
javax.xml.parsers.DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
doc = builder.parse(is);
}
public Document getDocument() {
return doc;
}
public XPath getXPath() {
return xpathFactory.newXPath();
}
private String normalize(String path) {
return (prefix==null || path.startsWith("/")) ? path : prefix+path;
}
public Object evaluate(String path, QName type) {
XPath xpath = xpathFactory.newXPath();
try {
String xstr=normalize(path);
// TODO: instead of prepending /prefix/, we could do the search rooted at /prefix...
Object o = xpath.evaluate(xstr, doc, type);
return o;
} catch (XPathExpressionException e) {
throw new SolrException(500,"Error in xpath:" + path +" for " + name,e,false);
}
}
public Node getNode(String path, boolean errIfMissing) {
XPath xpath = xpathFactory.newXPath();
Node nd = null;
String xstr = normalize(path);
try {
nd = (Node)xpath.evaluate(xstr, doc, XPathConstants.NODE);
if (nd==null) {
if (errIfMissing) {
throw new RuntimeException(name + " missing "+path);
} else {
log.fine(name + " missing optional " + path);
return null;
}
}
log.finest(name + ":" + path + "=" + nd);
return nd;
} catch (XPathExpressionException e) {
SolrException.log(log,"Error in xpath",e);
throw new SolrException(500,"Error in xpath:" + xstr + " for " + name,e,false);
} catch (SolrException e) {
throw(e);
} catch (Throwable e) {
SolrException.log(log,"Error in xpath",e);
throw new SolrException(500,"Error in xpath:" + xstr+ " for " + name,e,false);
}
}
public String getVal(String path, boolean errIfMissing) {
Node nd = getNode(path,errIfMissing);
if (nd==null) return null;
// should do the right thing for both attributes and elements.
// Oops, when running in Resin, I get an unsupported operation
// exception... need to use Sun default (apache)
String txt = nd.getTextContent();
log.fine(name + ' '+path+'='+txt);
return txt;
/******
short typ = nd.getNodeType();
if (typ==Node.ATTRIBUTE_NODE || typ==Node.TEXT_NODE) {
return nd.getNodeValue();
}
return nd.getTextContent();
******/
}
public String get(String path) {
return getVal(path,true);
}
public String get(String path, String def) {
String val = getVal(path, false);
return val!=null ? val : def;
}
public int getInt(String path) {
return Integer.parseInt(getVal(path, false));
}
public int getInt(String path, int def) {
String val = getVal(path, false);
return val!=null ? Integer.parseInt(val) : def;
}
public boolean getBool(String path) {
return Boolean.parseBoolean(getVal(path, false));
}
public boolean getBool(String path, boolean def) {
String val = getVal(path, false);
return val!=null ? Boolean.parseBoolean(val) : def;
}
public float getFloat(String path) {
return Float.parseFloat(getVal(path, false));
}
public float getFloat(String path, float def) {
String val = getVal(path, false);
return val!=null ? Float.parseFloat(val) : def;
}
//
// classloader related functions
//
private static final String project = "solr";
private static final String base = "org.apache" + "." + project;
private static final String[] packages = {"","analysis.","schema.","search.","update.","core.","request.","util."};
public static Class findClass(String cname, String... subpackages) {
ClassLoader loader = Thread.currentThread().getContextClassLoader();
if (subpackages.length==0) subpackages = packages;
// first try cname == full name
try {
return Class.forName(cname, true, loader);
} catch (ClassNotFoundException e) {
String newName=cname;
if (newName.startsWith("solar.")) {
// handle legacy package names
newName = cname.substring("solar.".length());
} else if (cname.startsWith(project+".")) {
newName = cname.substring(project.length()+1);
}
for (String subpackage : subpackages) {
try {
String name = base + '.' + subpackage + newName;
log.finest("Trying class name " + name);
return Class.forName(name, true, loader);
} catch (ClassNotFoundException e1) {
// ignore... assume first exception is best.
}
}
throw new SolrException(500, "Error loading class '" + cname + "'", e, false);
}
}
public static Object newInstance(String cname, String... subpackages) {
Class clazz = findClass(cname,subpackages);
try {
return clazz.newInstance();
} catch (Exception e) {
throw new SolrException(500,"Error instantiating class " + clazz, e, false);
}
}
public static InputStream openResource(String resource) {
ClassLoader loader = Thread.currentThread().getContextClassLoader();
InputStream is = loader.getResourceAsStream(resource);
if (is==null) {
throw new SolrException(500,"Can't open " + resource);
}
return is;
}
/**
* Returns a list of non-blank non-comment lines with whitespace trimmed from front and back.
* @param resource
* @return
* @throws IOException
*/
public static List<String> getLines(String resource) throws IOException {
BufferedReader input = null;
try {
// todo - allow configurable charset?
input = new BufferedReader(new InputStreamReader(openResource(resource), "UTF-8"));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException(e);
}
ArrayList<String> lines = new ArrayList<String>();
for (String word=null; (word=input.readLine())!=null;) {
// skip comments
if (word.startsWith("#")) continue;
word=word.trim();
// skip blank lines
if (word.length()==0) continue;
lines.add(word);
}
return lines;
}
}

View File

@ -0,0 +1,51 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.util.NamedList;
import java.util.List;
/**
* @author yonik
* @version $Id$
*/
class QuerySenderListener extends AbstractSolrEventListener {
public void newSearcher(SolrIndexSearcher newSearcher, SolrIndexSearcher currentSearcher) {
SolrCore core = SolrCore.getSolrCore();
log.info("QuerySenderListener sending requests to " + newSearcher);
for (NamedList nlst : (List<NamedList>)args.get("queries")) {
try {
LocalSolrQueryRequest req = new LocalSolrQueryRequest(core, nlst);
SolrQueryResponse rsp = new SolrQueryResponse();
core.execute(req,rsp);
} catch (Exception e) {
// do nothing... we want to continue with the other requests.
// the failure should have already been logged.
}
log.info("QuerySenderListener done.");
}
}
}

View File

@ -0,0 +1,91 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import org.apache.solr.util.DOMUtil;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.request.StandardRequestHandler;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import javax.xml.xpath.XPathConstants;
import java.util.logging.Logger;
import java.util.HashMap;
/**
* @author yonik
*/
final class RequestHandlers {
public static Logger log = Logger.getLogger(org.apache.solr.core.RequestHandlers.class.getName());
public static final String DEFAULT_HANDLER_NAME="standard";
final HashMap<String, SolrRequestHandler> map = new HashMap<String,SolrRequestHandler>();
public RequestHandlers(Config config) {
NodeList nodes = (NodeList)config.evaluate("requestHandler", XPathConstants.NODESET);
if (nodes!=null) {
for (int i=0; i<nodes.getLength(); i++) {
Node node = nodes.item(i);
// We can tolerate an error in some request handlers, still load the
// others, and have a working system.
try {
String name = DOMUtil.getAttr(node,"name","requestHandler config");
String className = DOMUtil.getAttr(node,"class","requestHandler config");
log.info("adding requestHandler " + name + "=" + className);
SolrRequestHandler handler = (SolrRequestHandler) Config.newInstance(className);
handler.init(DOMUtil.childNodesToNamedList(node));
put(name, handler);
} catch (Exception e) {
SolrException.logOnce(log,null,e);
}
}
}
//
// Get the default handler and add it in the map under null and empty
// to act as the default.
//
SolrRequestHandler handler = get(DEFAULT_HANDLER_NAME);
if (handler == null) {
handler = new StandardRequestHandler();
put(DEFAULT_HANDLER_NAME, handler);
}
put(null, handler);
put("", handler);
}
public SolrRequestHandler get(String handlerName) {
return map.get(handlerName);
}
public void put(String handlerName, SolrRequestHandler handler) {
map.put(handlerName, handler);
if (handlerName != null && handlerName != "") {
if (handler instanceof SolrInfoMBean) {
SolrInfoRegistry.getRegistry().put(handlerName, (SolrInfoMBean)handler);
}
}
}
}

View File

@ -0,0 +1,103 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import org.apache.solr.util.NamedList;
import org.apache.solr.search.SolrIndexSearcher;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.ArrayList;
import java.util.logging.Level;
/**
* @author yonik
*/
class RunExecutableListener extends AbstractSolrEventListener {
protected String[] cmd;
protected File dir;
protected String[] envp;
protected boolean wait=true;
public void init(NamedList args) {
super.init(args);
List cmdlist = new ArrayList();
cmdlist.add(args.get("exe"));
List lst = (List)args.get("args");
if (lst != null) cmdlist.addAll(lst);
cmd = (String[])cmdlist.toArray(new String[cmdlist.size()]);
lst = (List)args.get("env");
if (lst != null) {
envp = (String[])lst.toArray(new String[lst.size()]);
}
String str = (String)args.get("dir");
if (str==null || str.equals("") || str.equals(".") || str.equals("./")) {
dir = null;
} else {
dir = new File(str);
}
if ("false".equals(args.get("wait"))) wait=false;
}
protected int exec(String callback) {
int ret = 0;
try {
boolean doLog = log.isLoggable(Level.FINE);
if (doLog) {
log.fine("About to exec " + cmd[0]);
}
Process proc = Runtime.getRuntime().exec(cmd, envp ,dir);
if (wait) {
try {
ret = proc.waitFor();
} catch (InterruptedException e) {
SolrException.log(log,e);
}
}
if (wait && doLog) {
log.fine("Executable " + cmd[0] + " returned " + ret);
}
} catch (IOException e) {
// don't throw exception, just log it...
SolrException.log(log,e);
}
return ret;
}
public void postCommit() {
// anything generic need to be passed to the external program?
// the directory of the index? the command that caused it to be
// invoked? the version of the index?
exec("postCommit");
}
public void newSearcher(SolrIndexSearcher newSearcher, SolrIndexSearcher currentSearcher) {
exec("newSearcher");
}
}

View File

@ -0,0 +1,51 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import java.io.InputStream;
/**
* @author yonik
* @version $Id: SolrConfig.java,v 1.3 2005/12/02 04:31:06 yonik Exp $
*/
public class SolrConfig {
public static Config config;
static {
Exception e=null;
String file="solrconfig.xml";
InputStream is;
try {
is = Config.openResource(file);
} catch (Exception ee) {
e=ee;
file = "solarconfig.xml"; // backward compat
is = Config.openResource(file);
}
if (is!=null) {
try {
config=new Config(file, is, "/config/");
is.close();
} catch (Exception ee) {
throw new RuntimeException(ee);
}
Config.log.info("Loaded Config solarconfig.xml");
} else {
throw new RuntimeException(e);
}
}
}

View File

@ -0,0 +1,970 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.update.*;
import org.apache.solr.util.DOMUtil;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.StrUtils;
import org.apache.solr.util.XML;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import org.xmlpull.v1.XmlPullParserFactory;
import javax.xml.xpath.XPathConstants;
import java.io.File;
import java.io.IOException;
import java.io.Reader;
import java.io.Writer;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.logging.Logger;
/**
* @author yonik
* @version $Id: SolrCore.java,v 1.47 2006/01/10 05:04:44 yonik Exp $
*/
public final class SolrCore {
public static final String cvsId="$Id: SolrCore.java,v 1.47 2006/01/10 05:04:44 yonik Exp $";
public static final String cvsSource="$Source: /cvs/main/searching/solr/solarcore/src/solr/SolrCore.java,v $";
public static final String cvsTag="$Name: $";
public static final String version="1.0";
public static Logger log = Logger.getLogger(SolrCore.class.getName());
private final IndexSchema schema;
private final String index_path;
private final UpdateHandler updateHandler;
public static SolrIndexConfig mainIndexConfig = new SolrIndexConfig("mainIndex");
static {
BooleanQuery.setMaxClauseCount(SolrConfig.config.getInt("query/maxBooleanClauses",BooleanQuery.getMaxClauseCount()));
}
public static List<SolrEventListener> parseListener(String path) {
List<SolrEventListener> lst = new ArrayList<SolrEventListener>();
log.info("Searching for listeners: " +path);
NodeList nodes = (NodeList)SolrConfig.config.evaluate(path, XPathConstants.NODESET);
if (nodes!=null) {
for (int i=0; i<nodes.getLength(); i++) {
Node node = nodes.item(i);
String className = DOMUtil.getAttr(node,"class");
SolrEventListener listener = (SolrEventListener)Config.newInstance(className);
listener.init(DOMUtil.childNodesToNamedList(node));
lst.add(listener);
log.info("added SolrEventListener: " + listener);
}
}
return lst;
}
List<SolrEventListener> firstSearcherListeners;
List<SolrEventListener> newSearcherListeners;
private void parseListeners() {
firstSearcherListeners = parseListener("//listener[@event=\"firstSearcher\"]");
newSearcherListeners = parseListener("//listener[@event=\"newSearcher\"]");
}
public IndexSchema getSchema() { return schema; }
public String getDir() { return index_path; }
private final RequestHandlers reqHandlers = new RequestHandlers(SolrConfig.config);
public SolrRequestHandler getRequestHandler(String handlerName) {
return reqHandlers.get(handlerName);
}
// TODO - what about a master that not might have a searcher normally open?
@Deprecated
public int maxDoc() {
RefCounted<SolrIndexSearcher> holder=null;
int num=0;
try {
holder = getSearcher();
SolrIndexSearcher searcher = holder.get();
num = searcher.maxDoc();
} catch (IOException e) {
log(e);
} finally {
if (holder != null) holder.decref();
}
return num;
}
// gets a non-caching searcher
public SolrIndexSearcher newSearcher(String name) throws IOException {
return new SolrIndexSearcher(schema, name,getDir(),false);
}
void initIndex() {
try {
File dirFile = new File(getDir());
boolean indexExists = dirFile.canRead();
boolean removeLocks = SolrConfig.config.getBool("mainIndex/unlockOnStartup", false);
if (removeLocks) {
// to remove locks, the directory must already exist... so we create it
// if it didn't exist already...
Directory dir = FSDirectory.getDirectory(dirFile, !indexExists);
if (IndexReader.isLocked(dir)) {
log.warning("WARNING: Solr index directory '" + getDir() + "' is locked. Unlocking...");
IndexReader.unlock(dir);
}
}
// Create the index if it doesn't exist. Note that indexExists was tested *before*
// lock removal, since that will result in the creation of the directory.
if(!indexExists) {
log.warning("Solr index directory '" + dirFile + "' doesn't exist."
+ " Creating new index...");
SolrIndexWriter writer = new SolrIndexWriter("SolrCore.initIndex",getDir(), true, schema, mainIndexConfig);
writer.close();
}
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private UpdateHandler createUpdateHandler(String className) {
try {
Class handlerClass = Config.findClass(className);
java.lang.reflect.Constructor cons = handlerClass.getConstructor(new Class[]{SolrCore.class});
return (UpdateHandler)cons.newInstance(new Object[]{this});
} catch (SolrException e) {
throw e;
} catch (Exception e) {
throw new SolrException(500,"Error Instantiating Update Handler "+className, e);
}
}
// Singleton for now...
private static SolrCore core;
public static SolrCore getSolrCore() {
synchronized (SolrCore.class) {
if (core==null) core = new SolrCore(null,null);
return core;
}
}
public SolrCore(String index_path, IndexSchema schema) {
synchronized (SolrCore.class) {
// this is for backward compatibility (and also the reason
// the sync block is needed)
core = this; // set singleton
try {
if (index_path==null) {
index_path=SolrConfig.config.get("indexDir","index");
}
log.info("Opening new SolrCore at " + index_path);
if (schema==null) {
schema = new IndexSchema("schema.xml");
}
this.schema = schema;
this.index_path = index_path;
parseListeners();
initIndex();
try {
// Open the searcher *before* the handler so we don't end up opening
// one in the middle.
getSearcher(false,false,null);
updateHandler = createUpdateHandler(
SolrConfig.config.get("updateHandler/@class", DirectUpdateHandler.class.getName())
);
} catch (IOException e) {
throw new RuntimeException(e);
}
} finally {
}
}
}
public void close() {
log.info("CLOSING SolrCore!");
try {
closeSearcher();
} catch (Exception e) {
SolrException.log(log,e);
}
try {
searcherExecutor.shutdown();
} catch (Exception e) {
SolrException.log(log,e);
}
try {
updateHandler.close();
} catch (Exception e) {
SolrException.log(log,e);
}
}
void finalizer() { close(); }
////////////////////////////////////////////////////////////////////////////////
// Searcher Control
////////////////////////////////////////////////////////////////////////////////
// The current searcher used to service queries.
// Don't access this directly!!!! use getSearcher() to
// get it (and it will increment the ref count at the same time)
private RefCounted<SolrIndexSearcher> _searcher;
final ExecutorService searcherExecutor = Executors.newSingleThreadExecutor();
private int onDeckSearchers; // number of searchers preparing
private Object searcherLock = new Object(); // the sync object for the searcher
public RefCounted<SolrIndexSearcher> getSearcher() {
try {
return getSearcher(false,true,null);
} catch (IOException e) {
SolrException.log(log,null,e);
return null;
}
}
/**
* Get a {@link SolrIndexSearcher} or start the process of creating a new one.
* <p>
* The registered searcher is the default searcher used to service queries.
* A searcher will normally be registered after all of the warming
* and event handlers (newSearcher or firstSearcher events) have run.
* In the case where there is no registered searcher, the newly created searcher will
* be registered before running the event handlers (a slow searcher is better than no searcher).
*
* <p>
* If <tt>forceNew==true</tt> then
* A new searcher will be opened and registered irregardless if there is already
* a registered searcher or other searchers in the process of being created.
* <p>
* If <tt>forceNew==false</tt> then:<ul>
* <li>If a searcher is already registered, that searcher will be returned</li>
* <li>If no searcher is currently registered, but at least one is in the process of being created, then
* this call will block until the first searcher is registered</li>
* <li>If no searcher is currently registered, and no searchers in the process of being registered, a new
* searcher will be created.</li>
* </ul>
* <p>
* If <tt>returnSearcher==true</tt> then a {@link RefCounted}&lt{@link SolrIndexSearcher}&gt will be returned with
* the reference count incremented. It <b>must</b> be decremented when no longer needed.
* <p>
* If <tt>waitSearcher!=null</tt> and a new {@link SolrIndexSearcher} was created,
* then it is filled in with a Future that will return after the searcher is registered. The Future may be set to
* <tt>null</tt> in which case the SolrIndexSearcher created has already been registered at the time
* this method returned.
* <p>
* @param forceNew if true, force the open of a new index searcher regardless if there is already one open.
* @param returnSearcher if true, returns a {@link &ltSolrIndexSearcher&gt} holder with the refcount already incremented.
* @param waitSearcher if non-null, will be filled in with a {@link Future} that will return after the new searcher is registered.
* @return
* @throws IOException
*/
public RefCounted<SolrIndexSearcher> getSearcher(boolean forceNew, boolean returnSearcher, final Future[] waitSearcher) throws IOException {
// it may take some time to open an index.... we may need to make
// sure that two threads aren't trying to open one at the same time
// if it isn't necessary.
synchronized (searcherLock) {
// see if we can return the current searcher
if (_searcher!=null && !forceNew) {
if (returnSearcher) {
_searcher.incref();
return _searcher;
} else {
return null;
}
}
// check to see if we can wait for someone elses searcher to be set
if (onDeckSearchers>0 && !forceNew && _searcher==null) {
try {
searcherLock.wait();
} catch (InterruptedException e) {
log.info(SolrException.toStr(e));
}
}
// check again: see if we can return right now
if (_searcher!=null && !forceNew) {
if (returnSearcher) {
_searcher.incref();
return _searcher;
} else {
return null;
}
}
// At this point, we know we need to open a new searcher...
// first: increment count to signal other threads that we are
// opening a new searcher.
onDeckSearchers++;
}
// open the index synchronously
// if this fails, we need to decrement onDeckSearchers again.
SolrIndexSearcher tmp;
try {
if (onDeckSearchers < 1) {
// should never happen... just a sanity check
log.severe("ERROR!!! onDeckSearchers is " + onDeckSearchers);
// reset to 1 (don't bother synchronizing)
onDeckSearchers=1;
} else if (onDeckSearchers > 1) {
log.info("PERFORMANCE WARNING: Overlapping onDeckSearchers=" + onDeckSearchers);
}
tmp = new SolrIndexSearcher(schema, "main", index_path, true);
} catch (Throwable th) {
synchronized(searcherLock) {
onDeckSearchers--;
// notify another waiter to continue... it may succeed
// and wake any others.
searcherLock.notify();
}
// need to close the searcher here??? we shouldn't have to.
throw new RuntimeException(th);
}
final SolrIndexSearcher newSearcher=tmp;
RefCounted<SolrIndexSearcher> currSearcherHolder=null;
final RefCounted<SolrIndexSearcher> newSearchHolder=newHolder(newSearcher);
if (returnSearcher) newSearchHolder.incref();
// a signal to decrement onDeckSearchers if something goes wrong.
final boolean[] decrementOnDeckCount=new boolean[1];
decrementOnDeckCount[0]=true;
try {
synchronized (searcherLock) {
if (_searcher == null) {
// if there isn't a current searcher then register this one
// before warming is complete instead of waiting.
registerSearcher(newSearchHolder);
decrementOnDeckCount[0]=false;
} else {
// get a reference to the current searcher for purposes of autowarming.
currSearcherHolder=_searcher;
currSearcherHolder.incref();
}
}
final SolrIndexSearcher currSearcher = currSearcherHolder==null ? null : currSearcherHolder.get();
//
// Note! if we registered the new searcher (but didn't increment it's
// reference count because returnSearcher==false, it's possible for
// someone else to register another searcher, and thus cause newSearcher
// to close while we are warming.
//
// Should we protect against that by incrementing the reference count?
// Maybe we should just let it fail? After all, if returnSearcher==false
// and newSearcher has been de-registered, what's the point of continuing?
//
Future future=null;
// warm the new searcher based on the current searcher.
// should this go before the other event handlers or after?
if (currSearcher != null) {
future = searcherExecutor.submit(
new Callable() {
public Object call() throws Exception {
try {
newSearcher.warm(currSearcher);
} catch (Throwable e) {
SolrException.logOnce(log,null,e);
}
return null;
}
}
);
}
if (currSearcher==null && firstSearcherListeners.size() > 0) {
future = searcherExecutor.submit(
new Callable() {
public Object call() throws Exception {
try {
for (SolrEventListener listener : firstSearcherListeners) {
listener.newSearcher(newSearcher,null);
}
} catch (Throwable e) {
SolrException.logOnce(log,null,e);
}
return null;
}
}
);
}
if (currSearcher!=null && newSearcherListeners.size() > 0) {
future = searcherExecutor.submit(
new Callable() {
public Object call() throws Exception {
try {
for (SolrEventListener listener : newSearcherListeners) {
listener.newSearcher(newSearcher,null);
}
} catch (Throwable e) {
SolrException.logOnce(log,null,e);
}
return null;
}
}
);
}
// WARNING: this code assumes a single threaded executor (that all tasks
// queued will finish first).
final RefCounted<SolrIndexSearcher> currSearcherHolderF = currSearcherHolder;
Future finalFuture=null;
if (currSearcherHolder != null) {
finalFuture = searcherExecutor.submit(
new Callable() {
public Object call() throws Exception {
try {
// signal that we no longer need to decrement
// the count *before* registering the searcher since
// registertSearcher will decrement even if it errors.
decrementOnDeckCount[0]=false;
registerSearcher(newSearchHolder);
} catch (Throwable e) {
SolrException.logOnce(log,null,e);
} finally {
// we are all done with the old searcher we used
// for warming...
currSearcherHolderF.decref();
}
return null;
}
}
);
}
if (waitSearcher != null) {
waitSearcher[0] = finalFuture;
}
// Return the searcher as the warming tasks run in parallel
// callers may wait on the waitSearcher future returned.
return returnSearcher ? newSearchHolder : null;
}
catch (Exception e) {
SolrException.logOnce(log,null,e);
if (currSearcherHolder != null) currSearcherHolder.decref();
synchronized (searcherLock) {
if (decrementOnDeckCount[0]) {
onDeckSearchers--;
}
if (onDeckSearchers < 0) {
// sanity check... should never happen
log.severe("ERROR!!! onDeckSearchers after decrement=" + onDeckSearchers);
onDeckSearchers=0; // try and recover
}
// if we failed, we need to wake up at least one waiter to continue the process
searcherLock.notify();
}
// since the indexreader was already opened, assume we can continue on
// even though we got an exception.
return returnSearcher ? newSearchHolder : null;
}
}
private RefCounted<SolrIndexSearcher> newHolder(SolrIndexSearcher newSearcher) {
RefCounted<SolrIndexSearcher> holder = new RefCounted<SolrIndexSearcher>(newSearcher)
{
public void close() {
try {
resource.close();
} catch (IOException e) {
log.severe("Error closing searcher:" + SolrException.toStr(e));
}
}
};
holder.incref(); // set ref count to 1 to account for this._searcher
return holder;
}
// Take control of newSearcherHolder (which should have a reference count of at
// least 1 already. If the caller wishes to use the newSearcherHolder directly
// after registering it, then they should increment the reference count *before*
// calling this method.
//
// onDeckSearchers will also be decremented (it should have been incremented
// as a result of opening a new searcher).
private void registerSearcher(RefCounted<SolrIndexSearcher> newSearcherHolder) throws IOException {
synchronized (searcherLock) {
try {
if (_searcher != null) {
_searcher.decref(); // dec refcount for this._searcher
_searcher=null;
}
_searcher = newSearcherHolder;
SolrIndexSearcher newSearcher = newSearcherHolder.get();
SolrInfoRegistry.getRegistry().put("currentSearcher", newSearcher);
newSearcher.register(); // register subitems (caches)
log.info("Registered new searcher " + newSearcher);
} catch (Throwable e) {
log(e);
} finally {
// wake up anyone waiting for a searcher
// even in the face of errors.
onDeckSearchers--;
searcherLock.notifyAll();
}
}
}
public void closeSearcher() {
log.info("Closing main searcher on request.");
synchronized (searcherLock) {
if (_searcher != null) {
_searcher.decref(); // dec refcount for this._searcher
_searcher=null;
SolrInfoRegistry.getRegistry().remove("currentSearcher");
}
}
}
public void execute(SolrQueryRequest req, SolrQueryResponse rsp) {
SolrRequestHandler handler = getRequestHandler(req.getQueryType());
if (handler==null) {
log.warning("Unknown Request Handler '" + req.getQueryType() +"' :" + req);
throw new SolrException(400,"Unknown Request Handler '" + req.getQueryType() + "'", true);
}
handler.handleRequest(req,rsp);
log.info(req.getParamString()+ " 0 "+
(int)(rsp.getEndTime() - req.getStartTime()));
}
XmlPullParserFactory factory;
{
try {
factory = XmlPullParserFactory.newInstance();
} catch (XmlPullParserException e) {
throw new RuntimeException(e);
}
factory.setNamespaceAware(false);
}
private int findNextTag(XmlPullParser xpp, String tag) throws XmlPullParserException, IOException {
int eventType;
while((eventType=xpp.next()) != XmlPullParser.END_DOCUMENT) {
if(eventType == XmlPullParser.START_TAG) {
if (tag.equals(xpp.getName())) break;
}
}
return eventType;
}
public void update(Reader reader, Writer writer) {
// TODO: add param to specify maximum time to commit?
// todo - might be nice to separate command parsing w/ a factory
// then new commands could be added w/o risk to old ones
XmlPullParser xpp = null;
try {
xpp = factory.newPullParser();
} catch (XmlPullParserException e) {
throw new RuntimeException(e);
}
long startTime=System.currentTimeMillis();
try {
xpp.setInput(reader);
xpp.nextTag();
String currTag = xpp.getName();
if ("add".equals(currTag)) {
log.finest("SolrCore.update(add)");
AddUpdateCommand cmd = new AddUpdateCommand();
cmd.allowDups=false; // the default
int status=0;
boolean pendingAttr=false, committedAttr=false;
int attrcount = xpp.getAttributeCount();
for (int i=0; i<attrcount; i++) {
String attrName = xpp.getAttributeName(i);
String attrVal = xpp.getAttributeValue(i);
if ("allowDups".equals(attrName)) {
cmd.allowDups = StrUtils.parseBoolean(attrVal);
} else if ("overwritePending".equals(attrName)) {
cmd.overwritePending = StrUtils.parseBoolean(attrVal);
pendingAttr=true;
} else if ("overwriteCommitted".equals(attrName)) {
cmd.overwriteCommitted = StrUtils.parseBoolean(attrVal);
committedAttr=true;
} else {
log.warning("Unknown attribute id in add:" + attrName);
}
}
//set defaults for committed and pending based on allowDups value
if (!pendingAttr) cmd.overwritePending=!cmd.allowDups;
if (!committedAttr) cmd.overwriteCommitted=!cmd.allowDups;
DocumentBuilder builder = new DocumentBuilder(schema);
int eventType=0;
while(true) {
// this may be our second time through the loop in the case
// that there are multiple docs in the add... so make sure that
// objects can handle that.
cmd.id = null; // reset the id for this add
if (eventType !=0) {
eventType=xpp.getEventType();
if (eventType==XmlPullParser.END_DOCUMENT) break;
}
// eventType = xpp.next();
eventType = xpp.nextTag();
if (eventType == XmlPullParser.END_TAG || eventType == XmlPullParser.END_DOCUMENT) break; // should match </add>
try {
readDoc(builder,xpp);
builder.endDoc();
cmd.doc = builder.getDoc();
log.finest("adding doc...");
updateHandler.addDoc(cmd);
log.info("add "+status+" "+(System.currentTimeMillis()-startTime));
writer.write("<result status=\"" + status + "\"></result>");
} catch (SolrException e) {
log(e);
log.info("add "+e.code+" "+(System.currentTimeMillis()-startTime));
writeResult(writer,e);
// we may not have finised reading the XML for this cmd,
// so eat any unused input up till "</add>"
eventType = xpp.getEventType();
while (true) {
if ( eventType == XmlPullParser.END_DOCUMENT
|| (eventType == XmlPullParser.END_TAG && "add".equals(xpp.getName())))
{
break;
}
eventType = xpp.next();
}
}
}
/***
while (findNextTag(xpp,"doc") != XmlPullParser.END_DOCUMENT) {
readDoc(builder,xpp);
Document doc = builder.endDoc();
indexWriter.addDocument(doc);
docsAdded++;
}
***/
} // end add
else if ("commit".equals(currTag) || "optimize".equals(currTag)) {
log.finest("parsing "+currTag);
try {
CommitUpdateCommand cmd = new CommitUpdateCommand("optimize".equals(currTag));
boolean sawWaitSearcher=false, sawWaitFlush=false;
int attrcount = xpp.getAttributeCount();
for (int i=0; i<attrcount; i++) {
String attrName = xpp.getAttributeName(i);
String attrVal = xpp.getAttributeValue(i);
if ("waitFlush".equals(attrName)) {
cmd.waitFlush = StrUtils.parseBoolean(attrVal);
sawWaitFlush=true;
} else if ("waitSearcher".equals(attrName)) {
cmd.waitSearcher = StrUtils.parseBoolean(attrVal);
sawWaitSearcher=true;
} else {
log.warning("unexpected attribute commit/@" + attrName);
}
}
// If waitFlush is specified and waitSearcher wasn't, then
// clear waitSearcher.
if (sawWaitFlush && !sawWaitSearcher) {
cmd.waitSearcher=false;
}
updateHandler.commit(cmd);
if ("optimize".equals(currTag)) {
log.info("optimize 0 "+(System.currentTimeMillis()-startTime));
}
else {
log.info("commit 0 "+(System.currentTimeMillis()-startTime));
}
while (true) {
int eventType = xpp.nextTag();
if (eventType == XmlPullParser.END_TAG) break; // match </commit>
}
writer.write("<result status=\"0\"></result>");
} catch (SolrException e) {
log(e);
if ("optimize".equals(currTag)) {
log.info("optimize "+e.code+" "+
(System.currentTimeMillis()-startTime));
}
else {
log.info("commit "+e.code+" "+
(System.currentTimeMillis()-startTime));
}
writeResult(writer,e);
} catch (Exception e) {
SolrException.log(log, "Exception during commit/optimize",e);
writeResult(writer,e);
}
} // end commit
else if ("delete".equals(currTag)) {
log.finest("parsing delete");
try {
DeleteUpdateCommand cmd = new DeleteUpdateCommand();
cmd.fromPending=true;
cmd.fromCommitted=true;
int attrcount = xpp.getAttributeCount();
for (int i=0; i<attrcount; i++) {
String attrName = xpp.getAttributeName(i);
String attrVal = xpp.getAttributeValue(i);
if ("fromPending".equals(attrName)) {
cmd.fromPending = StrUtils.parseBoolean(attrVal);
} else if ("fromCommitted".equals(attrName)) {
cmd.fromCommitted = StrUtils.parseBoolean(attrVal);
} else {
log.warning("unexpected attribute delete/@" + attrName);
}
}
int eventType = xpp.nextTag();
currTag = xpp.getName();
String val = xpp.nextText();
if ("id".equals(currTag)) {
cmd.id = val;
updateHandler.delete(cmd);
log.info("delete(id " + val + ") 0 " +
(System.currentTimeMillis()-startTime));
} else if ("query".equals(currTag)) {
cmd.query = val;
updateHandler.deleteByQuery(cmd);
log.info("deleteByQuery(query " + val + ") 0 " +
(System.currentTimeMillis()-startTime));
} else {
log.warning("unexpected XML tag /delete/"+currTag);
throw new SolrException(400,"unexpected XML tag /delete/"+currTag);
}
writer.write("<result status=\"0\"></result>");
while (xpp.nextTag() != XmlPullParser.END_TAG);
} catch (SolrException e) {
log(e);
log.info("delete "+e.code+" "+(System.currentTimeMillis()-startTime));
writeResult(writer,e);
} catch (Exception e) {
log(e);
writeResult(writer,e);
}
} // end delete
} catch (XmlPullParserException e) {
log(e);
writeResult(writer,e);
} catch (IOException e) {
log(e);
writeResult(writer,e);
} catch (SolrException e) {
log(e);
log.info("update "+e.code+" "+(System.currentTimeMillis()-startTime));
writeResult(writer,e);
} catch (Throwable e) {
log(e);
writeResult(writer,e);
}
}
private void readDoc(DocumentBuilder builder, XmlPullParser xpp) throws IOException, XmlPullParserException {
// xpp should be at <doc> at this point
builder.startDoc();
int attrcount = xpp.getAttributeCount();
float docBoost = 1.0f;
for (int i=0; i<attrcount; i++) {
String attrName = xpp.getAttributeName(i);
String attrVal = xpp.getAttributeValue(i);
if ("boost".equals(attrName)) {
docBoost = Float.parseFloat(attrVal);
} else {
log.warning("Unknown attribute doc/@" + attrName);
}
}
if (docBoost != 1.0f) builder.setBoost(docBoost);
// while (findNextTag(xpp,"field") != XmlPullParser.END_DOCUMENT) {
while(true) {
int eventType = xpp.nextTag();
if (eventType == XmlPullParser.END_TAG) break; // </doc>
String tname=xpp.getName();
// System.out.println("FIELD READER AT TAG " + tname);
if (!"field".equals(tname)) {
log.warning("unexpected XML tag doc/"+tname);
throw new SolrException(400,"unexpected XML tag doc/"+tname);
}
//
// get field name and parse field attributes
//
attrcount = xpp.getAttributeCount();
String name=null;
float boost=1.0f;
boolean isNull=false;
for (int i=0; i<attrcount; i++) {
String attrName = xpp.getAttributeName(i);
String attrVal = xpp.getAttributeValue(i);
if ("name".equals(attrName)) {
name=attrVal;
} else if ("boost".equals(attrName)) {
boost=Float.parseFloat(attrVal);
} else if ("null".equals(attrName)) {
isNull=StrUtils.parseBoolean(attrVal);
} else {
log.warning("Unknown attribute doc/field/@" + attrName);
}
}
// now get the field value
String val = xpp.nextText(); // todo... text event for <field></field>???
// need this line for isNull???
// Don't add fields marked as null (for now at least)
if (!isNull) {
if (docBoost != 1.0f) {
builder.addField(name,val,docBoost);
} else {
builder.addField(name,val);
}
}
// do I have to do a nextTag here to read the end_tag?
} // end field loop
}
final public static void log(Throwable e) {
SolrException.logOnce(log,null,e);
}
final static void writeResult(Writer out, SolrException e) {
try {
XML.writeXML(out,"result",e.getMessage(),"status",e.code());
} catch (Exception ee) {
log.severe("Error writing to putput stream: "+ee);
}
}
final static void writeResult(Writer out, Throwable e) {
try {
XML.writeXML(out,"result",SolrException.toStr(e),"status","1");
} catch (Exception ee) {
log.severe("Error writing to putput stream: "+ee);
}
}
}

View File

@ -0,0 +1,37 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import org.apache.solr.util.NamedList;
import org.apache.solr.search.SolrIndexSearcher;
import java.util.logging.Logger;
/**
* @author yonik
* @version $Id: SolrEventListener.java,v 1.4 2005/05/25 04:26:47 yonik Exp $
*/
public interface SolrEventListener {
static final Logger log = Logger.getLogger(SolrCore.class.getName());
public void init(NamedList args);
public void postCommit();
public void newSearcher(SolrIndexSearcher newSearcher, SolrIndexSearcher currentSearcher);
}

View File

@ -0,0 +1,108 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import java.util.logging.Logger;
import java.io.CharArrayWriter;
import java.io.PrintWriter;
/**
* @author yonik
* @version $Id: SolrException.java,v 1.6 2005/06/14 20:42:26 yonik Exp $
*/
public class SolrException extends RuntimeException {
public boolean logged=false;
public SolrException(int code, String msg) {
super(msg);
this.code=code;
}
public SolrException(int code, String msg, boolean alreadyLogged) {
super(msg);
this.code=code;
this.logged=alreadyLogged;
}
public SolrException(int code, String msg, Throwable th, boolean alreadyLogged) {
super(msg,th);
this.code=code;
logged=alreadyLogged;
}
public SolrException(int code, String msg, Throwable th) {
this(code,msg,th,true);
}
public SolrException(int code, Throwable th) {
super(th);
this.code=code;
logged=true;
}
int code=0;
public int code() { return code; }
public void log(Logger log) { log(log,this); }
public static void log(Logger log, Throwable e) {
log.severe(toStr(e));
if (e instanceof SolrException) {
((SolrException)e).logged = true;
}
}
public static void log(Logger log, String msg, Throwable e) {
log.severe(msg + ':' + toStr(e));
if (e instanceof SolrException) {
((SolrException)e).logged = true;
}
}
public static void logOnce(Logger log, String msg, Throwable e) {
if (e instanceof SolrException) {
if(((SolrException)e).logged) return;
}
if (msg!=null) log(log,msg,e);
else log(log,e);
}
// public String toString() { return toStr(this); } // oops, inf loop
public String toString() { return super.toString(); }
public static String toStr(Throwable e) {
CharArrayWriter cw = new CharArrayWriter();
PrintWriter pw = new PrintWriter(cw);
e.printStackTrace(pw);
pw.flush();
return cw.toString();
/** This doesn't work for some reason!!!!!
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
e.printStackTrace(pw);
pw.flush();
System.out.println("The STRING:" + sw.toString());
return sw.toString();
**/
}
}

View File

@ -0,0 +1,104 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import java.net.URL;
import org.apache.solr.util.*;
/**
* @author ronp
* @version $Id: SolrInfo.java,v 1.3 2005/05/02 19:04:59 ronp Exp $
*/
// MBean pattern for holding various ui friendly strings and URLs
// for use by objects which are 'plugable' to make administering
// production use easier
// name - simple common usage name, e.g. BasicQueryHandler
// version - simple common usage version, e.g. 2.0
// description - simple one or two line description
// cvsId - yes, really the CVS Id (type 'man co')
// cvsName - yes, really the CVS Name (type 'man co')
// cvsSource - yes, really the CVS Source (type 'man co')
// docs - URL list: TWIKI, Faq, Design doc, something! :)
abstract class SolrInfo implements SolrInfoMBean {
public static String _cvsId="$Id: SolrInfo.java,v 1.3 2005/05/02 19:04:59 ronp Exp $";
public static String _cvsSource="$Source: /cvs/main/searching/solr/solarcore/src/solr/SolrInfo.java,v $";
public static String _cvsName="$Name: $";
public String getName() { return this.name; }
public String getVersion() { return this.version; }
public String getDescription() { return this.description; }
public Category getCategory() { return SolrInfoMBean.Category.QUERYHANDLER; }
public String getCvsId() { return this.cvsId; }
public String getCvsName() { return this.cvsName; }
public String getCvsSource() { return this.cvsSource; }
public URL[] getDocs() { return this.docs; }
public NamedList getStatistics() { return null; }
public void setName(String name ) { this.name = name; }
public void setVersion(String vers) { this.version = vers; }
public void setDescription(String desc) { this.description = desc; }
public void setCvsId(String cvsId) { this.cvsId = cvsId; }
public void setCvsName(String cvsName) { this.cvsName = cvsName; }
public void setCvsSource(String cvsSource) { this.cvsSource = cvsSource; }
public void setDocs(URL[] docs) { this.docs = docs; }
public void addDoc(URL doc)
{
if (doc == null) {
// should throw runtime exception
return;
}
if (docs != null) {
URL[] newDocs = new URL[docs.length+1];
int i;
for (i = 0; i < docs.length; i++) {
newDocs[i] = docs[i];
}
newDocs[i] = doc;
docs = newDocs;
} else {
docs = new URL[1];
docs[0] = doc;
}
}
public void addDoc(String doc)
{
if (doc == null) {
// should throw runtime exception
return;
}
try {
URL docURL = new URL(doc);
addDoc(docURL);
} catch (Exception e) {
// ignore for now
}
}
private String name;
private String version;
private String description;
public String cvsId;
public String cvsSource;
public String cvsName;
private URL[] docs;
}

View File

@ -0,0 +1,52 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import java.net.URL;
import org.apache.solr.util.*;
/**
* @author ronp
* @version $Id: SolrInfoMBean.java,v 1.3 2005/05/04 19:15:23 ronp Exp $
*/
// MBean interface for getting various ui friendly strings and URLs
// for use by objects which are 'plugable' to make administering
// production use easier
// name - simple common usage name, e.g. BasicQueryHandler
// version - simple common usage version, e.g. 2.0
// description - simple one or two line description
// cvsId - yes, really the CVS Id (type 'man co')
// cvsName - yes, really the CVS Name (type 'man co')
// cvsSource - yes, really the CVS Source (type 'man co')
// docs - URL list: TWIKI, Faq, Design doc, something! :)
public interface SolrInfoMBean {
public enum Category { CORE, QUERYHANDLER, UPDATEHANDLER, CACHE, OTHER };
public String getName();
public String getVersion();
public String getDescription();
public Category getCategory();
public String getCvsId();
public String getCvsName();
public String getCvsSource();
public URL[] getDocs();
public NamedList getStatistics();
}

View File

@ -0,0 +1,42 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.core;
import org.apache.solr.core.SolrInfoMBean;
import java.util.*;
/**
* @author ronp
* @version $Id: SolrInfoRegistry.java,v 1.5 2005/05/14 03:34:39 yonik Exp $
*/
// A Registry to hold a collection of SolrInfo objects
public class SolrInfoRegistry {
public static final String cvsId="$Id: SolrInfoRegistry.java,v 1.5 2005/05/14 03:34:39 yonik Exp $";
public static final String cvsSource="$Source: /cvs/main/searching/solr/solarcore/src/solr/SolrInfoRegistry.java,v $";
public static final String cvsName="$Name: $";
private static final Map<String,SolrInfoMBean> inst = Collections.synchronizedMap(new LinkedHashMap<String,SolrInfoMBean>());
public static Map<String, SolrInfoMBean> getRegistry()
{
return inst;
}
}

View File

@ -0,0 +1,198 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.apache.solr.util.StrUtils;
import org.apache.solr.util.NamedList;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.core.SolrCore;
import java.util.Map;
import java.util.HashMap;
/**
* @author yonik
* @version $Id: LocalSolrQueryRequest.java,v 1.6 2005/06/02 22:03:38 yonik Exp $
*/
public class LocalSolrQueryRequest extends SolrQueryRequestBase {
private final NamedList args;
private final String query;
private final String qtype;
private final int start;
private final int limit;
public final static Map emptyArgs = new HashMap(0,1);
public LocalSolrQueryRequest(SolrCore core, String query, String qtype, int start, int limit, Map args) {
super(core);
this.query=query;
this.qtype=qtype;
this.start=start;
this.limit=limit;
this.args = new NamedList();
if (query!=null) this.args.add(SolrQueryRequestBase.QUERY_NAME, query);
if (qtype!=null) this.args.add(SolrQueryRequestBase.QUERYTYPE_NAME, qtype);
this.args.add(SolrQueryRequestBase.START_NAME, Integer.toString(start));
this.args.add(SolrQueryRequestBase.ROWS_NAME, Integer.toString(limit));
if (args!=null) this.args.addAll(args);
}
public LocalSolrQueryRequest(SolrCore core, NamedList args) {
super(core);
this.args=args;
this.query=getStrParam(QUERY_NAME,null);
this.qtype=getStrParam(QUERYTYPE_NAME,null);;
this.start=getIntParam(START_NAME,0);
this.limit=getIntParam(ROWS_NAME,10);
}
public String getParam(String name) {
return (String)args.get(name);
}
public String getQueryString() {
return query;
}
// signifies the syntax and the handler that should be used
// to execute this query.
public String getQueryType() {
return qtype;
}
// starting position in matches to return to client
public int getStart() {
return start;
}
// number of matching documents to return
public int getLimit() {
return limit;
}
final long startTime=System.currentTimeMillis();
// Get the start time of this request in milliseconds
public long getStartTime() {
return startTime;
}
// The index searcher associated with this request
RefCounted<SolrIndexSearcher> searcherHolder;
public SolrIndexSearcher getSearcher() {
// should this reach out and get a searcher from the core singleton, or
// should the core populate one in a factory method to create requests?
// or there could be a setSearcher() method that Solr calls
if (searcherHolder==null) {
searcherHolder = core.getSearcher();
}
return searcherHolder.get();
}
// The solr core (coordinator, etc) associated with this request
public SolrCore getCore() {
return core;
}
// The index schema associated with this request
public IndexSchema getSchema() {
return core.getSchema();
}
public String getParamString() {
StringBuilder sb = new StringBuilder(128);
try {
boolean first=true;
if (query!=null) {
if (!first) {
sb.append('&');
}
first=false;
sb.append("q=");
StrUtils.partialURLEncodeVal(sb,query);
}
// null, "", and "standard" are all the default query handler.
if (qtype!=null && !(qtype.equals("") || qtype.equals("standard"))) {
if (!first) {
sb.append('&');
}
first=false;
sb.append("qt=");
sb.append(qtype);
}
if (start!=0) {
if (!first) {
sb.append('&');
}
first=false;
sb.append("start=");
sb.append(start);
}
if (!first) {
sb.append('&');
}
first=false;
sb.append("rows=");
sb.append(limit);
if (args != null && args.size() > 0) {
for (int i=0; i<args.size(); i++) {
if (!first) {
sb.append('&');
}
first=false;
sb.append(args.getName(i));
sb.append('=');
StrUtils.partialURLEncodeVal(sb,args.getVal(i).toString());
}
}
} catch (Exception e) {
// should never happen... we only needed this because
// partialURLEncodeVal can throw an IOException, but it
// never will when adding to a StringBuilder.
throw new RuntimeException(e);
}
return sb.toString();
}
public void close() {
if (searcherHolder!=null) {
searcherHolder.decref();
}
}
}

View File

@ -0,0 +1,29 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import java.io.Writer;
import java.io.IOException;
/**
* @author yonik
* @version $Id: QueryResponseWriter.java,v 1.2 2005/04/24 02:53:35 yonik Exp $
*/
public interface QueryResponseWriter {
public void write(Writer writer, SolrQueryRequest request, SolrQueryResponse response) throws IOException;
}

View File

@ -0,0 +1,65 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.core.SolrCore;
/**
* @author yonik
* @version $Id: SolrQueryRequest.java,v 1.3 2005/05/10 19:40:12 yonik Exp $
*/
public interface SolrQueryRequest {
public String getParam(String name);
public String getQueryString();
// signifies the syntax and the handler that should be used
// to execute this query.
public String getQueryType();
// starting position in matches to return to client
public int getStart();
// number of matching documents to return
public int getLimit();
// Get the start time of this request in milliseconds
public long getStartTime();
// The index searcher associated with this request
public SolrIndexSearcher getSearcher();
// The solr core (coordinator, etc) associated with this request
public SolrCore getCore();
// The index schema associated with this request
public IndexSchema getSchema();
/**
* Returns a string representing all the important parameters.
* Suitable for logging.
*/
public String getParamString();
/******
// Get the current elapsed time in milliseconds
public long getElapsedTime();
******/
}

View File

@ -0,0 +1,130 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.RefCounted;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrException;
/**
* @author yonik
* @version $Id: SolrQueryRequestBase.java,v 1.6 2005/06/12 02:36:09 yonik Exp $
*/
public abstract class SolrQueryRequestBase implements SolrQueryRequest {
// some standard query argument names
public static final String QUERY_NAME="q";
public static final String START_NAME="start";
public static final String ROWS_NAME="rows";
public static final String XSL_NAME="xsl";
public static final String QUERYTYPE_NAME="qt";
protected final SolrCore core;
public SolrQueryRequestBase(SolrCore core) {
this.core=core;
}
public int getIntParam(String name) {
String s = getParam(name);
if (s==null) {
throw new SolrException(500,"Missing required parameter '"+name+"' from " + this);
}
return Integer.parseInt(s);
}
public int getIntParam(String name, int defval) {
String s = getParam(name);
return s==null ? defval : Integer.parseInt(s);
}
public String getStrParam(String name) {
String s = getParam(name);
if (s==null) {
throw new SolrException(500,"Missing required parameter '"+name+"' from " + this);
}
return s;
}
public String getStrParam(String name, String defval) {
String s = getParam(name);
return s==null ? defval : s;
}
public String getQueryString() {
return getParam(QUERY_NAME);
}
public String getQueryType() {
return getParam(QUERYTYPE_NAME);
}
// starting position in matches to return to client
public int getStart() {
return getIntParam(START_NAME, 0);
}
// number of matching documents to return
public int getLimit() {
return getIntParam(ROWS_NAME, 10);
}
protected final long startTime=System.currentTimeMillis();
// Get the start time of this request in milliseconds
public long getStartTime() {
return startTime;
}
// The index searcher associated with this request
protected RefCounted<SolrIndexSearcher> searcherHolder;
public SolrIndexSearcher getSearcher() {
// should this reach out and get a searcher from the core singleton, or
// should the core populate one in a factory method to create requests?
// or there could be a setSearcher() method that Solr calls
if (searcherHolder==null) {
searcherHolder = core.getSearcher();
}
return searcherHolder.get();
}
// The solr core (coordinator, etc) associated with this request
public SolrCore getCore() {
return core;
}
// The index schema associated with this request
public IndexSchema getSchema() {
return core.getSchema();
}
public void close() {
if (searcherHolder!=null) {
searcherHolder.decref();
}
}
public String toString() {
return this.getClass().getSimpleName() + '{' + getParamString() + '}';
}
}

View File

@ -0,0 +1,125 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import org.apache.solr.util.NamedList;
import java.util.*;
/**
* <code>SolrQueryResponse</code> is used by a query handler to return
* the response to a query.
* @author yonik
* @version $Id: SolrQueryResponse.java,v 1.5 2005/08/10 04:27:04 yonik Exp $
* @since solr 0.9
*/
public class SolrQueryResponse {
protected NamedList values = new NamedList();
// current holder for user defined values
protected Set<String> defaultReturnFields;
// error if this is set...
protected Exception err;
/***
// another way of returning an error
int errCode;
String errMsg;
***/
public NamedList getValues() { return values; }
/**
* Sets a list of all the named values to return.
*/
public void setAllValues(NamedList nameValuePairs) {
values=nameValuePairs;
}
/**
* Sets the document field names of fields to return by default.
*/
public void setReturnFields(Set<String> fields) {
defaultReturnFields=fields;
}
// TODO: should this be represented as a String[] such
// that order can be maintained if needed?
/**
* The document field names to return by default.
*/
public Set<String> getReturnFields() {
return defaultReturnFields;
}
/**
* Appends a named value to the list of named values to be returned.
* @param name the name of the value - may be null if unnamed
* @param val the value to add - also may be null since null is a legal value
*/
public void add(String name, Object val) {
values.add(name,val);
}
/**
* Causes an error to be returned instead of the results.
*/
public void setException(Exception e) {
err=e;
}
/**
* Returns an Exception if there was a fatal error in processing the request.
* Returns null if the request succeeded.
*/
public Exception getException() {
return err;
}
// Get and Set the endtime in milliseconds... used
// to calculate query time.
protected long endtime;
/** Time in milliseconds when the response officially finished.
*/
public long getEndTime() {
if (endtime==0) {
setEndTime();
}
return endtime;
}
/**
* Stop the timer for how long this query took.
*/
public long setEndTime() {
return setEndTime(System.currentTimeMillis());
}
public long setEndTime(long endtime) {
if (endtime!=0) {
this.endtime=endtime;
}
return this.endtime;
}
}

View File

@ -0,0 +1,63 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import org.apache.solr.util.NamedList;
import org.apache.solr.core.SolrInfoMBean;
/**
* Implementations of <code>SolrRequestHandler</code> are called to handle query requests.
*
* Different <code>SolrRequestHandler</code>s are registered with the <code>SolrCore</code>.
* One way to register a SolrRequestHandler with the core is thorugh the <code>solarconfig.xml</code> file.
* <p>
* Example <code>solarconfig.xml</code> entry to register a <code>SolrRequestHandler</code> implementation to
* handle all queries with a query type of "test":
* <p>
* <code>
* &lt;requestHandler name="test" class="solr.tst.TestRequestHandler" /&gt;
* </code>
* <p>
* A single instance of any registered SolrRequestHandler is created
* via the default constructor and is reused for all relevant queries.
*
* @author yonik
* @version $Id: SolrRequestHandler.java,v 1.7 2005/12/02 04:31:06 yonik Exp $
*/
public interface SolrRequestHandler extends SolrInfoMBean {
/** <code>init</code> will be called just once, immediately after creation.
* <p>The args are user-level initialization parameters that
* may be specified when declaring a request handler in
* solarconfig.xml
*/
public void init(NamedList args);
/**
* Handles a query request. This method must be thread safe.
* <p>
* Information about the request may be obtained from <code>req</code> and
* response information may be set using <code>rsp</code>.
* <p>
* There are no mandatory actions that handleRequest must perform.
* An empty handleRequest implementation would fulfill
* all interface obligations.
*/
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp);
}

View File

@ -0,0 +1,225 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import org.apache.lucene.search.*;
import org.apache.lucene.document.Document;
import java.util.List;
import java.util.Set;
import java.util.HashSet;
import java.util.logging.Level;
import java.util.regex.Pattern;
import java.io.IOException;
import java.net.URL;
import org.apache.solr.util.StrUtils;
import org.apache.solr.util.NamedList;
import org.apache.solr.search.*;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrInfoMBean;
import org.apache.solr.core.SolrException;
/**
* @author yonik
* @version $Id: StandardRequestHandler.java,v 1.17 2005/12/02 04:31:06 yonik Exp $
*/
public class StandardRequestHandler implements SolrRequestHandler, SolrInfoMBean {
// statistics
// TODO: should we bother synchronizing these, or is an off-by-one error
// acceptable every million requests or so?
long numRequests;
long numErrors;
public void init(NamedList args) {
SolrCore.log.log(Level.INFO, "Unused request handler arguments:" + args);
}
private final Pattern splitList=Pattern.compile(",| ");
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
numRequests++;
// TODO: test if lucene will accept an escaped ';', otherwise
// we need to un-escape them before we pass to QueryParser
try {
String sreq = req.getQueryString();
String debug = req.getParam("debugQuery");
// find fieldnames to return (fieldlist)
String fl = req.getParam("fl");
int flags=0;
if (fl != null) {
// TODO - this could become more efficient if widely used.
// TODO - should field order be maintained?
String[] flst = splitList.split(fl,0);
if (flst.length > 0 && !(flst.length==1 && flst[0].length()==0)) {
Set<String> set = new HashSet<String>();
for (String fname : flst) {
if ("score".equals(fname)) flags |= SolrIndexSearcher.GET_SCORES;
set.add(fname);
}
rsp.setReturnFields(set);
}
}
if (sreq==null) throw new SolrException(400,"Missing queryString");
List<String> commands = StrUtils.splitSmart(sreq,';');
String qs = commands.size() >= 1 ? commands.get(0) : "";
Query query = QueryParsing.parseQuery(qs, req.getSchema());
// If the first non-query, non-filter command is a simple sort on an indexed field, then
// we can use the Lucene sort ability.
Sort sort = null;
if (commands.size() >= 2) {
QueryParsing.SortSpec sortSpec = QueryParsing.parseSort(commands.get(1), req.getSchema());
if (sortSpec != null) {
sort = sortSpec.getSort();
// ignore the count for now... it's currently only controlled by start & limit on req
// count = sortSpec.getCount();
}
}
DocList results = req.getSearcher().getDocList(query, null, sort, req.getStart(), req.getLimit(), flags);
rsp.add(null,results);
if (debug!=null) {
NamedList dbg = new NamedList();
try {
dbg.add("querystring",qs);
dbg.add("parsedquery",QueryParsing.toString(query,req.getSchema()));
dbg.add("explain", getExplainList(query, results, req.getSearcher(), req.getSchema()));
String otherQueryS = req.getParam("explainOther");
if (otherQueryS != null && otherQueryS.length() > 0) {
DocList otherResults = doQuery(otherQueryS,req.getSearcher(), req.getSchema(),0,10);
dbg.add("otherQuery",otherQueryS);
dbg.add("explainOther", getExplainList(query, otherResults, req.getSearcher(), req.getSchema()));
}
} catch (Exception e) {
SolrException.logOnce(SolrCore.log,"Exception during debug:",e);
dbg.add("exception_during_debug", SolrException.toStr(e));
}
rsp.add("debug",dbg);
}
} catch (SolrException e) {
rsp.setException(e);
numErrors++;
return;
} catch (Exception e) {
SolrException.log(SolrCore.log,e);
rsp.setException(e);
numErrors++;
return;
}
}
private NamedList getExplainList(Query query, DocList results, SolrIndexSearcher searcher, IndexSchema schema) throws IOException {
NamedList explainList = new NamedList();
DocIterator iterator = results.iterator();
for (int i=0; i<results.size(); i++) {
int id = iterator.nextDoc();
Explanation explain = searcher.explain(query, id);
//explainList.add(Integer.toString(id), explain.toString().split("\n"));
Document doc = searcher.doc(id);
String strid = schema.printableUniqueKey(doc);
String docname = "";
if (strid != null) docname="id="+strid+",";
docname = docname + "internal_docid="+id;
explainList.add(docname, "\n" +explain.toString());
}
return explainList;
}
private DocList doQuery(String sreq, SolrIndexSearcher searcher, IndexSchema schema, int start, int limit) throws IOException {
List<String> commands = StrUtils.splitSmart(sreq,';');
String qs = commands.size() >= 1 ? commands.get(0) : "";
Query query = QueryParsing.parseQuery(qs, schema);
// If the first non-query, non-filter command is a simple sort on an indexed field, then
// we can use the Lucene sort ability.
Sort sort = null;
if (commands.size() >= 2) {
QueryParsing.SortSpec sortSpec = QueryParsing.parseSort(commands.get(1), schema);
if (sortSpec != null) {
sort = sortSpec.getSort();
if (sortSpec.getCount() >= 0) {
limit = sortSpec.getCount();
}
}
}
DocList results = searcher.getDocList(query,(DocSet)null, sort, start, limit);
return results;
}
//////////////////////// SolrInfoMBeans methods //////////////////////
public String getName() {
return StandardRequestHandler.class.getName();
}
public String getVersion() {
return SolrCore.version;
}
public String getDescription() {
return "The standard Solr request handler";
}
public Category getCategory() {
return Category.QUERYHANDLER;
}
public String getCvsId() {
return "$Id: StandardRequestHandler.java,v 1.17 2005/12/02 04:31:06 yonik Exp $";
}
public String getCvsName() {
return "$Name: $";
}
public String getCvsSource() {
return "$Source: /cvs/main/searching/solr/solarcore/src/solr/StandardRequestHandler.java,v $";
}
public URL[] getDocs() {
return null;
}
public NamedList getStatistics() {
NamedList lst = new NamedList();
lst.add("requests", numRequests);
lst.add("errors", numErrors);
return lst;
}
}

View File

@ -0,0 +1,33 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import java.io.Writer;
import java.io.IOException;
/**
* @author yonik
* @version $Id: XMLResponseWriter.java,v 1.6 2005/04/24 02:53:35 yonik Exp $
*/
public class XMLResponseWriter implements QueryResponseWriter {
public void write(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
XMLWriter.writeResponse(writer,req,rsp);
}
}

View File

@ -0,0 +1,620 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.request;
import org.apache.solr.util.NamedList;
import org.apache.solr.util.XML;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.search.DocList;
import org.apache.solr.search.DocIterator;
import org.apache.solr.search.DocSet;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import java.io.Writer;
import java.io.IOException;
import java.util.*;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Document;
/**
* @author yonik
* @version $Id: XMLWriter.java,v 1.16 2005/12/02 04:31:06 yonik Exp $
*/
final public class XMLWriter {
//
// static thread safe part
//
private static final char[] XML_START1="<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n".toCharArray();
private static final char[] XML_STYLESHEET="<?xml-stylesheet type=\"text/xsl\" href=\"/admin/".toCharArray();
private static final char[] XML_STYLESHEET_END=".xsl\"?>\n".toCharArray();
private static final char[] XML_START2_SCHEMA=(
"<response xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"
+" xsi:noNamespaceSchemaLocation=\"http://pi.cnet.com/cnet-search/response.xsd\">\n"
).toCharArray();
private static final char[] XML_START2_NOSCHEMA=(
"<response>\n"
).toCharArray();
public static void writeResponse(Writer writer, SolrQueryRequest req, SolrQueryResponse rsp) throws IOException {
// get total time up until now
int qtime=(int)(rsp.getEndTime() - req.getStartTime());
String ver = req.getParam("version");
writer.write(XML_START1);
String stylesheet = req.getParam("stylesheet");
if (stylesheet != null && stylesheet.length() > 0) {
writer.write(XML_STYLESHEET);
writer.write(stylesheet);
writer.write(XML_STYLESHEET_END);
}
String noSchema = req.getParam("noSchema");
// todo - change when schema becomes available?
if (false && noSchema == null)
writer.write(XML_START2_SCHEMA);
else
writer.write(XML_START2_NOSCHEMA);
writer.write("<responseHeader><status>");
writer.write('0'); // it's 0 (success) if we got this far...
writer.write("</status><QTime>");
writer.write(Integer.toString((int)qtime));
writer.write("</QTime></responseHeader>\n");
//
// create an instance for each request to handle
// non-thread safe stuff (indentation levels, etc)
// and to encapsulate writer, schema, and searcher so
// they don't have to be passed around in every function.
//
XMLWriter xw = new XMLWriter(writer, req.getSchema(), req.getSearcher(), ver);
xw.defaultFieldList = rsp.getReturnFields();
String indent = req.getParam("indent");
if (indent != null) {
if ("".equals(indent) || "off".equals(indent)) {
xw.setIndent(false);
} else {
xw.setIndent(true);
}
}
NamedList lst = rsp.getValues();
int sz = lst.size();
for (int i=0; i<sz; i++) {
xw.writeVal(lst.getName(i),lst.getVal(i));
}
writer.write("\n</response>\n");
}
////////////////////////////////////////////////////////////
// request instance specific (non-static, not shared between threads)
////////////////////////////////////////////////////////////
private final Writer writer;
private final IndexSchema schema; // needed to write fields of docs
private final SolrIndexSearcher searcher; // needed to retrieve docs
private int level;
private boolean defaultIndent=false;
private boolean doIndent=false;
// fieldList... the set of fields to return for each document
private Set<String> defaultFieldList;
// if a list smaller than this threshold is encountered, elements
// will be written on the same line.
// maybe constructed types should always indent first?
private final int indentThreshold=0;
private final int version;
// temporary working objects...
// be careful not to use these recursively...
private final ArrayList tlst = new ArrayList();
private final Calendar cal = Calendar.getInstance(TimeZone.getTimeZone("GMT"));
private final StringBuilder sb = new StringBuilder();
XMLWriter(Writer writer, IndexSchema schema, SolrIndexSearcher searcher, String version) {
this.writer = writer;
this.schema = schema;
this.searcher = searcher;
float ver = version==null? 2.1f : Float.parseFloat(version);
this.version = (int)(ver*1000);
}
//
// Functions to manipulate the current logical nesting level.
// Any indentation will be partially based on level.
//
public void setLevel(int level) { this.level = level; }
public int level() { return level; }
public int incLevel() { return ++level; }
public int decLevel() { return --level; }
public void setIndent(boolean doIndent) {
this.doIndent = doIndent;
defaultIndent = doIndent;
}
public void writeAttr(String name, String val) throws IOException {
if (val != null) {
writer.write(' ');
writer.write(name);
writer.write("=\"");
writer.write(val);
writer.write('"');
}
}
public void startTag(String tag, String name, boolean closeTag) throws IOException {
if (doIndent) indent();
writer.write('<');
writer.write(tag);
if (name!=null) {
writer.write(" name=\"");
writer.write(name);
if (closeTag) {
writer.write("\"/>");
} else {
writer.write("\">");
}
} else {
if (closeTag) {
writer.write("/>");
} else {
writer.write('>');
}
}
}
private static final String[] indentArr = new String[] {
"\n",
"\n ",
"\n ",
"\n\t",
"\n\t ",
"\n\t ", // could skip this one (the only 3 char seq)
"\n\t\t" };
public void indent() throws IOException {
indent(level);
}
public void indent(int lev) throws IOException {
int arrsz = indentArr.length-1;
// another option would be lev % arrsz (wrap around)
String istr = indentArr[ lev > arrsz ? arrsz : lev ];
writer.write(istr);
}
private static final Comparator fieldnameComparator = new Comparator() {
public int compare(Object o, Object o1) {
Field f1 = (Field)o; Field f2 = (Field)o1;
int cmp = f1.name().compareTo(f2.name());
return cmp;
// note - the sort is stable, so this should not have affected the ordering
// of fields with the same name w.r.t eachother.
}
};
public final void writeDoc(String name, Document doc, Set<String> returnFields, float score, boolean includeScore) throws IOException {
startTag("doc", name, false);
incLevel();
if (includeScore) {
writeFloat("score", score);
}
// Lucene Documents have multivalued types as multiple fields
// with the same name.
// The XML needs to represent these as
// an array. The fastest way to detect multiple fields
// with the same name is to sort them first.
Enumeration ee = doc.fields();
// using global tlst here, so we shouldn't call any other
// function that uses it until we are done.
tlst.clear();
while (ee.hasMoreElements()) {
Field ff = (Field) ee.nextElement();
// skip this field if it is not a field to be returned.
if (returnFields!=null && !returnFields.contains(ff.name())) {
continue;
}
tlst.add(ff);
}
Collections.sort(tlst, fieldnameComparator);
int sz = tlst.size();
int fidx1 = 0, fidx2 = 0;
while (fidx1 < sz) {
Field f1 = (Field)tlst.get(fidx1);
String fname = f1.name();
// find the end of fields with this name
fidx2 = fidx1+1;
while (fidx2 < sz && fname.equals(((Field)tlst.get(fidx2)).name()) ) {
fidx2++;
}
/***
// more efficient to use getFieldType instead of
// getField since that way dynamic fields won't have
// to create a SchemaField on the fly.
FieldType ft = schema.getFieldType(fname);
***/
SchemaField sf = schema.getField(fname);
if (fidx1+1 == fidx2) {
// single field value
if (version>=2100 && sf.multiValued()) {
startTag("arr",fname,false);
doIndent=false;
sf.write(this, null, f1);
writer.write("</arr>");
doIndent=defaultIndent;
} else {
sf.write(this, f1.name(), f1);
}
} else {
// multiple fields with same name detected
startTag("arr",fname,false);
incLevel();
doIndent=false;
int cnt=0;
for (int i=fidx1; i<fidx2; i++) {
if (defaultIndent && ++cnt==4) { // only indent every 4th item
indent();
cnt=0;
}
sf.write(this, null, (Field)tlst.get(i));
}
decLevel();
// if (doIndent) indent();
writer.write("</arr>");
// doIndent=true;
doIndent=defaultIndent;
}
fidx1 = fidx2;
}
decLevel();
if (doIndent) indent();
writer.write("</doc>");
}
public final void writeDocList(String name, DocList ids, Set<String> fields) throws IOException {
boolean includeScore=false;
if (fields!=null) {
includeScore = fields.contains("score");
if (fields.size()==0 || (fields.size()==1 && includeScore) || fields.contains("*")) {
fields=null; // null means return all stored fields
}
}
int sz=ids.size();
if (doIndent) indent();
writer.write("<result");
writeAttr("name",name);
writeAttr("numFound",Integer.toString(ids.matches()));
writeAttr("start",Integer.toString(ids.offset()));
if (includeScore) {
writeAttr("maxScore",Float.toString(ids.maxScore()));
}
if (sz==0) {
writer.write("/>");
return;
} else {
writer.write('>');
}
incLevel();
DocIterator iterator = ids.iterator();
for (int i=0; i<sz; i++) {
int id = iterator.nextDoc();
Document doc = searcher.doc(id);
writeDoc(null, doc, fields, (includeScore ? iterator.score() : 0.0f), includeScore);
}
decLevel();
if (doIndent) indent();
writer.write("</result>");
}
public void writeVal(String name, Object val) throws IOException {
// if there get to be enough types, perhaps hashing on the type
// to get a handler might be faster (but types must be exact to do that...)
// go in order of most common to least common
if (val==null) {
writeNull(name);
} else if (val instanceof String) {
writeStr(name, (String)val);
} else if (val instanceof Integer) {
// it would be slower to pass the int ((Integer)val).intValue()
writeInt(name, val.toString());
} else if (val instanceof Boolean) {
// could be optimized... only two vals
writeBool(name, val.toString());
} else if (val instanceof Long) {
writeLong(name, val.toString());
} else if (val instanceof Date) {
writeDate(name,(Date)val);
} else if (val instanceof Float) {
// we pass the float instead of using toString() because
// it may need special formatting. same for double.
writeFloat(name, ((Float)val).floatValue());
} else if (val instanceof Double) {
writeDouble(name, ((Double)val).doubleValue());
} else if (val instanceof Document) {
writeDoc(name, (Document)val, null, 0.0f, false);
} else if (val instanceof DocList) {
// requires access to IndexReader
writeDocList(name, (DocList)val, defaultFieldList);
} else if (val instanceof DocSet) {
// how do we know what fields to read?
// todo: have a DocList/DocSet wrapper that
// restricts the fields to write...?
} else if (val instanceof Map) {
writeMap(name, (Map)val);
} else if (val instanceof NamedList) {
writeNamedList(name, (NamedList)val);
} else if (val instanceof Collection) {
writeArray(name,(Collection)val);
} else if (val instanceof Object[]) {
writeArray(name,(Object[])val);
} else {
// default...
writeStr(name, val.getClass().getName() + ':' + val.toString());
}
}
//
// Generic compound types
//
public void writeNamedList(String name, NamedList val) throws IOException {
int sz = val.size();
startTag("lst", name, sz<=0);
if (sz<indentThreshold) {
doIndent=false;
}
incLevel();
for (int i=0; i<sz; i++) {
writeVal(val.getName(i),val.getVal(i));
}
decLevel();
if (sz > 0) {
if (doIndent) indent();
writer.write("</lst>");
}
}
//A map is currently represented as a named list
public void writeMap(String name, Map val) throws IOException {
Map map = val;
int sz = map.size();
startTag("lst", name, sz<=0);
incLevel();
for (Map.Entry entry : (Set<Map.Entry>)map.entrySet()) {
// possible class-cast exception here...
String k = (String)entry.getKey();
Object v = entry.getValue();
// if (sz<indentThreshold) indent();
writeVal(k,v);
}
decLevel();
if (sz > 0) {
if (doIndent) indent();
writer.write("</lst>");
}
}
public void writeArray(String name, Object[] val) throws IOException {
writeArray(name, Arrays.asList(val));
}
public void writeArray(String name, Collection val) throws IOException {
int sz = val.size();
startTag("arr", name, sz<=0);
incLevel();
for (Object o : val) {
// if (sz<indentThreshold) indent();
writeVal(null, o);
}
decLevel();
if (sz > 0) {
if (doIndent) indent();
writer.write("</arr>");
}
}
//
// Primitive types
//
public void writeNull(String name) throws IOException {
writePrim("null",name,"",false);
}
public void writeStr(String name, String val) throws IOException {
writePrim("str",name,val,true);
}
public void writeInt(String name, String val) throws IOException {
writePrim("int",name,val,false);
}
public void writeInt(String name, int val) throws IOException {
writeInt(name,Integer.toString(val));
}
public void writeLong(String name, String val) throws IOException {
writePrim("long",name,val,false);
}
public void writeLong(String name, long val) throws IOException {
writeLong(name,Long.toString(val));
}
public void writeBool(String name, String val) throws IOException {
writePrim("bool",name,val,false);
}
public void writeBool(String name, boolean val) throws IOException {
writeBool(name,Boolean.toString(val));
}
public void writeFloat(String name, String val) throws IOException {
writePrim("float",name,val,false);
}
public void writeFloat(String name, float val) throws IOException {
writeFloat(name,Float.toString(val));
}
public void writeDouble(String name, String val) throws IOException {
writePrim("double",name,val,false);
}
public void writeDouble(String name, double val) throws IOException {
writeDouble(name,Double.toString(val));
}
public void writeDate(String name, Date val) throws IOException {
// using a stringBuilder for numbers can be nice since
// a temporary string isn't used (it's added directly to the
// builder's buffer.
cal.setTime(val);
sb.setLength(0);
int i = cal.get(Calendar.YEAR);
sb.append(i);
sb.append('-');
i = cal.get(Calendar.MONTH) + 1; // 0 based, so add 1
if (i<10) sb.append('0');
sb.append(i);
sb.append('-');
i=cal.get(Calendar.DAY_OF_MONTH);
if (i<10) sb.append('0');
sb.append(i);
sb.append('T');
i=cal.get(Calendar.HOUR_OF_DAY); // 24 hour time format
if (i<10) sb.append('0');
sb.append(i);
sb.append(':');
i=cal.get(Calendar.MINUTE);
if (i<10) sb.append('0');
sb.append(i);
sb.append(':');
i=cal.get(Calendar.SECOND);
if (i<10) sb.append('0');
sb.append(i);
i=cal.get(Calendar.MILLISECOND);
if (i != 0) {
sb.append('.');
if (i<100) sb.append('0');
if (i<10) sb.append('0');
sb.append(i);
// handle canonical format specifying fractional
// seconds shall not end in '0'. Given the slowness of
// integer div/mod, simply checking the last character
// is probably the fastest way to check.
int lastIdx = sb.length()-1;
if (sb.charAt(lastIdx)=='0') {
lastIdx--;
if (sb.charAt(lastIdx)=='0') {
lastIdx--;
}
sb.setLength(lastIdx+1);
}
}
sb.append('Z');
writeDate(name, sb.toString());
}
public void writeDate(String name, String val) throws IOException {
writePrim("date",name,val,false);
}
//
// OPT - specific writeInt, writeFloat, methods might be faster since
// there would be less write calls (write("<int name=\"" + name + ... + </int>)
//
public void writePrim(String tag, String name, String val, boolean escape) throws IOException {
// OPT - we could use a temp char[] (or a StringBuilder) and if the
// size was small enough to fit (if escape==false we can calc exact size)
// then we could put things directly in the temp buf.
// need to see what percent of CPU this takes up first though...
// Could test a reusable StringBuilder...
// is this needed here???
// Only if a fieldtype calls writeStr or something
// with a null val instead of calling writeNull
/***
if (val==null) {
if (name==null) writer.write("<null/>");
else writer.write("<null name=\"" + name + "/>");
}
***/
int contentLen=val.length();
startTag(tag, name, contentLen==0);
if (contentLen==0) return;
if (escape) {
XML.escapeCharData(val,writer);
} else {
writer.write(val,0,contentLen);
}
writer.write("</");
writer.write(tag);
writer.write('>');
}
}

View File

@ -0,0 +1,60 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.function.ValueSource;
import org.apache.lucene.document.Field;
import org.apache.solr.util.BCDUtils;
import org.apache.solr.request.XMLWriter;
import java.util.Map;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class BCDIntField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) {
}
public SortField getSortField(SchemaField field,boolean reverse) {
return getStringSort(field,reverse);
}
public ValueSource getValueSource(SchemaField field) {
throw new UnsupportedOperationException("ValueSource not implemented");
}
public String toInternal(String val) {
return BCDUtils.base10toBase10kSortableInt(val);
}
public String toExternal(Field f) {
return indexedToReadable(f.stringValue());
}
public String indexedToReadable(String indexedForm) {
return BCDUtils.base10kSortableIntToBase10(indexedForm);
}
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
xmlWriter.writeInt(name,toExternal(f));
}
}

View File

@ -0,0 +1,31 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.solr.request.XMLWriter;
import org.apache.lucene.document.Field;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class BCDLongField extends BCDIntField {
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
xmlWriter.writeLong(name,toExternal(f));
}
}

View File

@ -0,0 +1,31 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.solr.request.XMLWriter;
import org.apache.lucene.document.Field;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class BCDStrField extends BCDIntField {
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
xmlWriter.writeStr(name,toExternal(f));
}
}

View File

@ -0,0 +1,97 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.function.ValueSource;
import org.apache.lucene.search.function.OrdFieldSource;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.document.Field;
import org.apache.solr.request.XMLWriter;
import java.util.Map;
import java.io.Reader;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class BoolField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) {
}
public SortField getSortField(SchemaField field,boolean reverse) {
return getStringSort(field,reverse);
}
public ValueSource getValueSource(SchemaField field) {
return new OrdFieldSource(field.name);
}
// avoid instantiating every time...
protected final static Token TRUE_TOKEN = new Token("T",0,1);
protected final static Token FALSE_TOKEN = new Token("F",0,1);
////////////////////////////////////////////////////////////////////////
// TODO: look into creating my own queryParser that can more efficiently
// handle single valued non-text fields (int,bool,etc) if needed.
protected final static Analyzer boolAnalyzer = new Analyzer() {
public TokenStream tokenStream(String fieldName, Reader reader) {
return new Tokenizer(reader) {
boolean done=false;
public Token next() throws IOException {
if (done) return null;
done=true;
int ch = input.read();
if (ch==-1) return null;
return (ch=='t' || ch=='T' || ch=='1') ? TRUE_TOKEN : FALSE_TOKEN;
}
};
}
};
public Analyzer getAnalyzer() {
return boolAnalyzer;
}
public Analyzer getQueryAnalyzer() {
return boolAnalyzer;
}
public String toInternal(String val) {
char ch = (val!=null && val.length()>0) ? val.charAt(0) : 0;
return (ch=='1' || ch=='t' || ch=='T') ? "T" : "F";
}
public String toExternal(Field f) {
return indexedToReadable(f.stringValue());
}
public String indexedToReadable(String indexedForm) {
char ch = indexedForm.charAt(0);
return ch=='T' ? "true" : "false";
}
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
xmlWriter.writeBool(name, f.stringValue().charAt(0) =='T');
}
}

View File

@ -0,0 +1,94 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.solr.core.SolrException;
import org.apache.solr.request.XMLWriter;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.function.ValueSource;
import org.apache.lucene.search.function.OrdFieldSource;
import java.util.Map;
import java.io.IOException;
/***
Date Format for the XML, incoming and outgoing:
A date field shall be of the form 1995-12-31T23:59:59Z
The trailing "Z" designates UTC time and is mandatory.
Optional fractional seconds are allowed: 1995-12-31T23:59:59.999Z
All other parts are mandatory.
This format was derived to be standards compliant (ISO 8601) and is a more
restricted form of the canonical representation of dateTime from XML schema part 2.
http://www.w3.org/TR/xmlschema-2/#dateTime
"In 1970 the Coordinated Universal Time system was devised by an international
advisory group of technical experts within the International Telecommunication
Union (ITU). The ITU felt it was best to designate a single abbreviation for
use in all languages in order to minimize confusion. Since unanimous agreement
could not be achieved on using either the English word order, CUT, or the
French word order, TUC, the acronym UTC was chosen as a compromise."
***/
// The XML (external) date format will sort correctly, except if
// fractions of seconds are present (because '.' is lower than 'Z').
// The easiest fix is to simply remove the 'Z' for the internal
// format.
// TODO: make a FlexibleDateField that can accept dates in multiple
// formats, better for human entered dates.
// TODO: make a DayField that only stores the day?
/**
* @author yonik
* @version $Id$
*/
public class DateField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) {
}
public String toInternal(String val) {
int len=val.length();
if (val.charAt(len-1)=='Z') {
return val.substring(0,len-1);
}
throw new SolrException(1,"Invalid Date String:'" +val+'\'');
}
public String indexedToReadable(String indexedForm) {
return indexedForm + 'Z';
}
public String toExternal(Field f) {
return indexedToReadable(f.stringValue());
}
public SortField getSortField(SchemaField field,boolean reverse) {
return getStringSort(field,reverse);
}
public ValueSource getValueSource(SchemaField field) {
return new OrdFieldSource(field.name);
}
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
xmlWriter.writeDate(name, toExternal(f));
}
}

View File

@ -0,0 +1,50 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.function.ValueSource;
import org.apache.lucene.search.function.FloatFieldSource;
import org.apache.lucene.document.Field;
import org.apache.solr.request.XMLWriter;
import java.util.Map;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class DoubleField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) {
restrictProps(SORT_MISSING_FIRST | SORT_MISSING_LAST);
}
/////////////////////////////////////////////////////////////
// TODO: ACK.. there is currently no SortField.DOUBLE!
public SortField getSortField(SchemaField field,boolean reverse) {
return new SortField(field.name,SortField.FLOAT, reverse);
}
public ValueSource getValueSource(SchemaField field) {
// fieldCache doesn't support double
return new FloatFieldSource(field.name);
}
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
xmlWriter.writeDouble(name, f.stringValue());
}
}

View File

@ -0,0 +1,144 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import java.util.Map;
import java.util.HashMap;
/**
* @author yonik
* @version $Id$
*/
abstract class FieldProperties {
// use a bitfield instead of many different boolean variables since
// many of the variables are independent or semi-independent.
// bit values for boolean field properties.
final static int INDEXED = 0x00000001;
final static int TOKENIZED = 0x00000002;
final static int STORED = 0x00000004;
final static int BINARY = 0x00000008;
final static int COMPRESSED = 0x00000010;
final static int OMIT_NORMS = 0x00000020;
final static int STORE_TERMVECTORS = 0x00000040;
final static int STORE_TERMPOSITIONS = 0x00000080;
final static int STORE_TERMOFFSETS = 0x00000100;
final static int MULTIVALUED = 0x00000200;
final static int SORT_MISSING_FIRST = 0x00000400;
final static int SORT_MISSING_LAST = 0x00000800;
static final String[] propertyNames = {
"indexed", "tokenized", "stored",
"binary", "compressed", "omitNorms",
"termVectors", "termPositions", "termOffsets",
"multiValued",
"sortMissingFirst","sortMissingLast"
};
static final Map<String,Integer> propertyMap = new HashMap<String,Integer>();
static {
for (String prop : propertyNames) {
propertyMap.put(prop, propertyNameToInt(prop));
}
}
/** Returns the symbolic name for the property. */
static String getPropertyName(int property) {
return propertyNames[ Integer.numberOfTrailingZeros(property) ];
}
static int propertyNameToInt(String name) {
for (int i=0; i<propertyNames.length; i++) {
if (propertyNames[i].equals(name)) {
return 1 << i;
}
}
return 0;
}
static String propertiesToString(int properties) {
StringBuilder sb = new StringBuilder();
boolean first=true;
while (properties != 0) {
if (!first) sb.append(',');
first=false;
int bitpos = Integer.numberOfTrailingZeros(properties);
sb.append(getPropertyName(1 << bitpos));
properties &= ~(1<<bitpos); // clear that bit position
}
return sb.toString();
}
static boolean on(int bitfield, int props) {
return (bitfield & props) != 0;
}
static boolean off(int bitfield, int props) {
return (bitfield & props) == 0;
}
/***
static int normalize(int properties) {
int p = properties;
if (on(p,TOKENIZED) && off(p,INDEXED)) {
throw new RuntimeException("field must be indexed to be tokenized.");
}
if (on(p,STORE_TERMPOSITIONS)) p|=STORE_TERMVECTORS;
if (on(p,STORE_TERMOFFSETS)) p|=STORE_TERMVECTORS;
if (on(p,STORE_TERMOFFSETS) && off(p,INDEXED)) {
throw new RuntimeException("field must be indexed to store term vectors.");
}
if (on(p,OMIT_NORMS) && off(p,INDEXED)) {
throw new RuntimeException("field must be indexed for norms to be omitted.");
}
if (on(p,SORT_MISSING_FIRST) && on(p,SORT_MISSING_LAST)) {
throw new RuntimeException("conflicting options sortMissingFirst,sortMissingLast.");
}
if ((on(p,SORT_MISSING_FIRST) || on(p,SORT_MISSING_LAST)) && off(p,INDEXED)) {
throw new RuntimeException("field must be indexed to be sorted.");
}
if ((on(p,BINARY) || on(p,COMPRESSED)) && off(p,STORED)) {
throw new RuntimeException("field must be stored for compressed or binary options.");
}
return p;
}
***/
static int parseProperties(Map<String,String> properties, boolean which) {
int props = 0;
for (String prop : properties.keySet()) {
if (propertyMap.get(prop)==null) continue;
String val = properties.get(prop);
if (Boolean.parseBoolean(val) == which) {
props |= propertyNameToInt(prop);
}
}
return props;
}
}

View File

@ -0,0 +1,271 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.document.Field;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.function.ValueSource;
import org.apache.lucene.search.function.OrdFieldSource;
import org.apache.solr.search.Sorting;
import org.apache.solr.request.XMLWriter;
import java.util.logging.Logger;
import java.util.Map;
import java.util.HashMap;
import java.io.Reader;
import java.io.IOException;
/**
* Base class for all field types used by an index schema.
*
* @author yonik
* @version $Id: FieldType.java,v 1.14 2006/01/06 04:23:15 yonik Exp $
*/
public abstract class FieldType extends FieldProperties {
public static final Logger log = Logger.getLogger(FieldType.class.getName());
protected String typeName; // the name of the type, not the name of the field
protected Map<String,String> args; // additional arguments
protected int trueProperties; // properties explicitly set to true
protected int falseProperties; // properties explicitly set to false
int properties;
// these are common enough, they were moved to the base class to handle.
// not all subclasses will be able to support these options.
protected int positionIncrementGap;
protected boolean isTokenized() {
return (properties & TOKENIZED) != 0;
}
/** subclasses should initialize themselves with the args provided
* and remove valid arguments. leftover arguments will cause an exception.
* Common boolean properties have already been handled.
*
*/
protected void init(IndexSchema schema, Map<String,String> args) {
}
// Handle additional arguments...
void setArgs(IndexSchema schema, Map<String,String> args) {
// default to STORED and INDEXED, and MULTIVALUED depending on schema version
properties = (STORED | INDEXED);
if (schema.getVersion()< 1.1f) properties |= MULTIVALUED;
this.args=args;
Map<String,String> initArgs = new HashMap<String,String>(args);
String str;
str = initArgs.get("positionIncrementGap");
if (str!=null) positionIncrementGap = Integer.parseInt(str);
initArgs.remove("positionIncrementGap");
trueProperties = FieldProperties.parseProperties(initArgs,true);
falseProperties = FieldProperties.parseProperties(initArgs,false);
properties &= ~falseProperties;
properties |= trueProperties;
for (String prop : FieldProperties.propertyNames) initArgs.remove(prop);
init(schema, initArgs);
if (initArgs.size() > 0) {
throw new RuntimeException("schema fieldtype " + typeName
+ "("+ this.getClass().getName() + ")"
+ " invalid arguments:" + initArgs);
}
}
protected void restrictProps(int props) {
if ((properties & props) != 0) {
throw new RuntimeException("schema fieldtype " + typeName
+ "("+ this.getClass().getName() + ")"
+ " invalid properties:" + propertiesToString(properties & props));
}
}
public String getTypeName() {
return typeName;
}
void setTypeName(String typeName) {
this.typeName = typeName;
}
public String toString() {
return typeName + "{class=" + this.getClass().getName()
// + propertiesToString(properties)
+ (analyzer != null ? ",analyzer=" + analyzer.getClass().getName() : "")
+ ",args=" + args
+"}";
}
// used for adding a document when a field needs to be created from a type and a string
// by default, the indexed value is the same as the stored value (taken from toInternal())
// Having a different representation for external, internal, and indexed would present quite
// a few problems given the current Lucene architecture. An analyzer for adding docs would
// need to translate internal->indexed while an analyzer for querying would need to
// translate external->indexed.
//
// The only other alternative to having internal==indexed would be to have
// internal==external.
// In this case, toInternal should convert to the indexed representation,
// toExternal() should do nothing, and createField() should *not* call toInternal,
// but use the external value and set tokenized=true to get Lucene to convert
// to the internal(indexed) form.
public Field createField(SchemaField field, String externalVal, float boost) {
String val = toInternal(externalVal);
if (val==null) return null;
Field f = new Field(field.getName(), val, field.stored(), field.indexed(), isTokenized());
f.setOmitNorms(field.omitNorms());
f.setBoost(boost);
return f;
}
// Convert an external value (from XML update command or from query string)
// into the internal format.
// - used in delete when a Term needs to be created.
// - used by the default getTokenizer() and createField()
public String toInternal(String val) {
return val;
}
// Convert the stored-field format to an external (string, human readable) value
// currently used in writing XML of the search result (but perhaps
// a more efficient toXML(Field f, Writer w) should be used
// in the future.
public String toExternal(Field f) {
return f.stringValue();
}
public String indexedToReadable(String indexedForm) {
return indexedForm;
}
/*********
// default analyzer for non-text fields.
// Only reads 80 bytes, but that should be plenty for a single value.
public Analyzer getAnalyzer() {
if (analyzer != null) return analyzer;
// the default analyzer...
return new Analyzer() {
public TokenStream tokenStream(String fieldName, Reader reader) {
return new Tokenizer(reader) {
final char[] cbuf = new char[80];
public Token next() throws IOException {
int n = input.read(cbuf,0,80);
if (n<=0) return null;
String s = toInternal(new String(cbuf,0,n));
return new Token(s,0,n);
};
};
}
};
}
**********/
//
// Default analyzer for types that only produce 1 verbatim token...
// A maximum size of chars to be read must be specified
//
protected final class DefaultAnalyzer extends Analyzer {
final int maxChars;
DefaultAnalyzer(int maxChars) {
this.maxChars=maxChars;
}
public TokenStream tokenStream(String fieldName, Reader reader) {
return new Tokenizer(reader) {
char[] cbuf = new char[maxChars];
public Token next() throws IOException {
int n = input.read(cbuf,0,maxChars);
if (n<=0) return null;
String s = toInternal(new String(cbuf,0,n)); // virtual func on parent
return new Token(s,0,n);
};
};
}
public int getPositionIncrementGap(String fieldName) {
return positionIncrementGap;
}
}
//analyzer set by schema for text types.
//subclasses can set analyzer themselves or override getAnalyzer()
protected Analyzer analyzer=new DefaultAnalyzer(256);
protected Analyzer queryAnalyzer=analyzer;
// get analyzer should be fast to call... since the addition of dynamic fields,
// this can be called all the time instead of just once at startup.
// The analyzer will only be used in the following scenarios:
// - during a document add for any field that has "tokenized" set (typically
// only Text fields)
// - during query parsing
public Analyzer getAnalyzer() {
return analyzer;
}
public Analyzer getQueryAnalyzer() {
return queryAnalyzer;
}
// This is called by the schema parser if a custom analyzer is defined
public void setAnalyzer(Analyzer analyzer) {
this.analyzer = analyzer;
log.finest("FieldType: " + typeName + ".setAnalyzer(" + analyzer.getClass().getName() + ")" );
}
// This is called by the schema parser if a custom analyzer is defined
public void setQueryAnalyzer(Analyzer analyzer) {
this.queryAnalyzer = analyzer;
log.finest("FieldType: " + typeName + ".setQueryAnalyzer(" + analyzer.getClass().getName() + ")" );
}
public abstract void write(XMLWriter xmlWriter, String name, Field f) throws IOException;
public abstract SortField getSortField(SchemaField field, boolean top);
protected SortField getStringSort(SchemaField field, boolean reverse) {
return Sorting.getStringSortField(field.name, reverse, field.sortMissingLast(),field.sortMissingFirst());
}
/** called to get the default value source (normally, from the
* Lucene FieldCache.)
*/
public ValueSource getValueSource(SchemaField field) {
return new OrdFieldSource(field.name);
}
}

View File

@ -0,0 +1,48 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.function.ValueSource;
import org.apache.lucene.search.function.FloatFieldSource;
import org.apache.lucene.document.Field;
import org.apache.solr.request.XMLWriter;
import java.util.Map;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class FloatField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) {
restrictProps(SORT_MISSING_FIRST | SORT_MISSING_LAST);
}
public SortField getSortField(SchemaField field,boolean reverse) {
return new SortField(field.name,SortField.FLOAT, reverse);
}
public ValueSource getValueSource(SchemaField field) {
return new FloatFieldSource(field.name);
}
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
xmlWriter.writeFloat(name, f.stringValue());
}
}

View File

@ -0,0 +1,542 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.search.Similarity;
import org.apache.solr.core.SolrException;
import org.apache.solr.core.Config;
import org.apache.solr.analysis.TokenFilterFactory;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.analysis.TokenizerFactory;
import org.apache.solr.util.DOMUtil;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathConstants;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathFactory;
import java.io.InputStream;
import java.io.Reader;
import java.util.*;
import java.util.logging.Logger;
/**
* <code>IndexSchema</code> contains information about the valid fields in an index
* and the types of those fields.
*
* @author yonik
* @version $Id: IndexSchema.java,v 1.21 2005/12/20 16:05:46 yonik Exp $
*/
public final class IndexSchema {
final static Logger log = Logger.getLogger(IndexSchema.class.getName());
private final String schemaFile;
private String name;
private float version;
public IndexSchema(String schemaFile) {
this.schemaFile=schemaFile;
readConfig();
}
public InputStream getInputStream() {
return Config.openResource(schemaFile);
}
float getVersion() {
return version;
}
public String getName() { return name; }
private final HashMap<String, SchemaField> fields = new HashMap<String,SchemaField>();
private final HashMap<String, FieldType> fieldTypes = new HashMap<String,FieldType>();
public Map<String,SchemaField> getFields() { return fields; }
public Map<String,FieldType> getFieldTypes() { return fieldTypes; }
private Similarity similarity;
public Similarity getSimilarity() { return similarity; }
private Analyzer analyzer;
public Analyzer getAnalyzer() { return analyzer; }
private Analyzer queryAnalyzer;
public Analyzer getQueryAnalyzer() { return queryAnalyzer; }
private String defaultSearchFieldName=null;
public String getDefaultSearchFieldName() {
return defaultSearchFieldName;
}
private SchemaField uniqueKeyField;
public SchemaField getUniqueKeyField() { return uniqueKeyField; }
private String uniqueKeyFieldName;
private FieldType uniqueKeyFieldType;
public Field getUniqueKeyField(org.apache.lucene.document.Document doc) {
return doc.getField(uniqueKeyFieldName); // this should return null if name is null
}
public String printableUniqueKey(org.apache.lucene.document.Document doc) {
Field f = doc.getField(uniqueKeyFieldName);
return f==null ? null : uniqueKeyFieldType.toExternal(f);
}
private SchemaField getIndexedField(String fname) {
SchemaField f = getFields().get(fname);
if (f==null) {
throw new RuntimeException("unknown field '" + fname + "'");
}
if (!f.indexed()) {
throw new RuntimeException("'"+fname+"' is not an indexed field:" + f);
}
return f;
}
private class SolrAnalyzer extends Analyzer {
protected final HashMap<String,Analyzer> analyzers;
SolrAnalyzer() {
analyzers = analyzerCache();
}
protected HashMap<String,Analyzer> analyzerCache() {
HashMap<String,Analyzer> cache = new HashMap<String,Analyzer>();
for (SchemaField f : getFields().values()) {
Analyzer analyzer = f.getType().getAnalyzer();
cache.put(f.getName(), analyzer);
}
return cache;
}
protected Analyzer getAnalyzer(String fieldName)
{
Analyzer analyzer = analyzers.get(fieldName);
return analyzer!=null ? analyzer : getDynamicFieldType(fieldName).getAnalyzer();
}
public TokenStream tokenStream(String fieldName, Reader reader)
{
return getAnalyzer(fieldName).tokenStream(fieldName,reader);
}
public int getPositionIncrementGap(String fieldName) {
return getAnalyzer(fieldName).getPositionIncrementGap(fieldName);
}
}
private class SolrQueryAnalyzer extends SolrAnalyzer {
protected HashMap<String,Analyzer> analyzerCache() {
HashMap<String,Analyzer> cache = new HashMap<String,Analyzer>();
for (SchemaField f : getFields().values()) {
Analyzer analyzer = f.getType().getQueryAnalyzer();
cache.put(f.getName(), analyzer);
}
return cache;
}
protected Analyzer getAnalyzer(String fieldName)
{
Analyzer analyzer = analyzers.get(fieldName);
return analyzer!=null ? analyzer : getDynamicFieldType(fieldName).getQueryAnalyzer();
}
}
private void readConfig() {
log.info("Reading Solr Schema");
try {
/***
DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
Document document = builder.parse(getInputStream());
***/
Config config = new Config("schema", getInputStream(), "/schema/");
Document document = config.getDocument();
XPath xpath = config.getXPath();
Node nd = (Node) xpath.evaluate("/schema/@name", document, XPathConstants.NODE);
if (nd==null) {
log.warning("schema has no name!");
} else {
name = nd.getNodeValue();
log.info("Schema name=" + name);
}
version = config.getFloat("/schema/@version", 1.0f);
String expression = "/schema/types/fieldtype";
NodeList nodes = (NodeList) xpath.evaluate(expression, document, XPathConstants.NODESET);
for (int i=0; i<nodes.getLength(); i++) {
Node node = nodes.item(i);
NamedNodeMap attrs = node.getAttributes();
String name = DOMUtil.getAttr(attrs,"name","fieldtype error");
log.finest("reading fieldtype "+name);
String clsName = DOMUtil.getAttr(attrs,"class", "fieldtype error");
FieldType ft = (FieldType)Config.newInstance(clsName);
ft.setTypeName(name);
expression = "./analyzer[@type='query']";
Node anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
Analyzer queryAnalyzer = readAnalyzer(anode);
// An analyzer without a type specified, or with type="index"
expression = "./analyzer[not(@type)] | ./analyzer[@type='index']";
anode = (Node)xpath.evaluate(expression, node, XPathConstants.NODE);
Analyzer analyzer = readAnalyzer(anode);
if (queryAnalyzer==null) queryAnalyzer=analyzer;
if (analyzer==null) analyzer=queryAnalyzer;
if (analyzer!=null) {
ft.setAnalyzer(analyzer);
ft.setQueryAnalyzer(queryAnalyzer);
}
ft.setArgs(this, DOMUtil.toMapExcept(attrs,"name","class"));
fieldTypes.put(ft.typeName,ft);
log.finest("fieldtype defined: " + ft);
}
ArrayList<DynamicField> dFields = new ArrayList<DynamicField>();
expression = "/schema/fields/field | /schema/fields/dynamicField";
nodes = (NodeList) xpath.evaluate(expression, document, XPathConstants.NODESET);
for (int i=0; i<nodes.getLength(); i++) {
Node node = nodes.item(i);
NamedNodeMap attrs = node.getAttributes();
String name = DOMUtil.getAttr(attrs,"name","field definition");
log.finest("reading field def "+name);
String type = DOMUtil.getAttr(attrs,"type","field " + name);
String val;
FieldType ft = fieldTypes.get(type);
if (ft==null) {
throw new SolrException(400,"Unknown fieldtype '" + type + "'",false);
}
Map<String,String> args = DOMUtil.toMapExcept(attrs, "name", "type");
SchemaField f = SchemaField.create(name,ft,args);
if (node.getNodeName().equals("field")) {
fields.put(f.getName(),f);
log.fine("field defined: " + f);
} else if (node.getNodeName().equals("dynamicField")) {
dFields.add(new DynamicField(f));
log.fine("dynamic field defined: " + f);
} else {
// we should never get here
throw new RuntimeException("Unknown field type");
}
}
// OK, now sort the dynamic fields largest to smallest size so we don't get
// any false matches. We want to act like a compiler tool and try and match
// the largest string possible.
Collections.sort(dFields, new Comparator<DynamicField>() {
public int compare(DynamicField a, DynamicField b) {
// swap natural ordering to get biggest first.
// The sort is stable, so elements of the same size should
// be
if (a.regex.length() < b.regex.length()) return 1;
else if (a.regex.length() > b.regex.length()) return -1;
return 0;
}
}
);
log.finest("Dynamic Field Ordering:" + dFields);
// stuff it in a normal array for faster access
dynamicFields = (DynamicField[])dFields.toArray(new DynamicField[dFields.size()]);
Node node = (Node) xpath.evaluate("/schema/similarity/@class", document, XPathConstants.NODE);
if (node==null) {
similarity = new DefaultSimilarity();
log.fine("using default similarity");
} else {
similarity = (Similarity)Config.newInstance(node.getNodeValue().trim());
log.fine("using similarity " + similarity.getClass().getName());
}
node = (Node) xpath.evaluate("/schema/defaultSearchField/text()", document, XPathConstants.NODE);
if (node==null) {
log.warning("no default search field specified in schema.");
} else {
String defName=node.getNodeValue().trim();
defaultSearchFieldName = getIndexedField(defName)!=null ? defName : null;
log.info("default search field is "+defName);
}
node = (Node) xpath.evaluate("/schema/uniqueKey/text()", document, XPathConstants.NODE);
if (node==null) {
log.warning("no uniqueKey specified in schema.");
} else {
uniqueKeyField=getIndexedField(node.getNodeValue().trim());
uniqueKeyFieldName=uniqueKeyField.getName();
uniqueKeyFieldType=uniqueKeyField.getType();
log.info("unique key field: "+uniqueKeyFieldName);
}
/////////////// parse out copyField commands ///////////////
// Map<String,ArrayList<SchemaField>> cfields = new HashMap<String,ArrayList<SchemaField>>();
// expression = "/schema/copyField";
expression = "//copyField";
nodes = (NodeList) xpath.evaluate(expression, document, XPathConstants.NODESET);
for (int i=0; i<nodes.getLength(); i++) {
node = nodes.item(i);
NamedNodeMap attrs = node.getAttributes();
String source = DOMUtil.getAttr(attrs,"source","copyField definition");
String dest = DOMUtil.getAttr(attrs,"dest","copyField definition");
log.fine("copyField source='"+source+"' dest='"+dest+"'");
SchemaField f = getField(source);
SchemaField d = getField(dest);
SchemaField[] destArr = copyFields.get(source);
if (destArr==null) {
destArr=new SchemaField[]{d};
} else {
destArr = (SchemaField[])append(destArr,d);
}
copyFields.put(source,destArr);
}
} catch (SolrException e) {
throw e;
} catch(Exception e) {
// unexpected exception...
throw new SolrException(1,"Schema Parsing Failed",e,false);
}
analyzer = new SolrAnalyzer();
queryAnalyzer = new SolrQueryAnalyzer();
}
private static Object[] append(Object[] orig, Object item) {
Object[] newArr = (Object[])java.lang.reflect.Array.newInstance(orig.getClass().getComponentType(), orig.length+1);
System.arraycopy(orig, 0, newArr, 0, orig.length);
newArr[orig.length] = item;
return newArr;
}
//
// <analyzer><tokenizer class="...."/><tokenizer class="...." arg="....">
//
//
private Analyzer readAnalyzer(Node node) throws XPathExpressionException {
// parent node used to be passed in as "fieldtype"
// if (!fieldtype.hasChildNodes()) return null;
// Node node = DOMUtil.getChild(fieldtype,"analyzer");
if (node == null) return null;
NamedNodeMap attrs = node.getAttributes();
String analyzerName = DOMUtil.getAttr(attrs,"class");
if (analyzerName != null) {
return (Analyzer)Config.newInstance(analyzerName);
}
XPath xpath = XPathFactory.newInstance().newXPath();
Node tokNode = (Node)xpath.evaluate("./tokenizer", node, XPathConstants.NODE);
NodeList nList = (NodeList)xpath.evaluate("./filter", node, XPathConstants.NODESET);
if (tokNode==null){
throw new SolrException(1,"analyzer without class or tokenizer & filter list");
}
TokenizerFactory tfac = readTokenizerFactory(tokNode);
/******
// oops, getChildNodes() includes text (newlines, etc) in addition
// to the actual child elements
NodeList nList = node.getChildNodes();
TokenizerFactory tfac = readTokenizerFactory(nList.item(0));
if (tfac==null) {
throw new SolrException(1,"TokenizerFactory must be specified first in analyzer");
}
******/
ArrayList<TokenFilterFactory> filters = new ArrayList<TokenFilterFactory>();
for (int i=0; i<nList.getLength(); i++) {
TokenFilterFactory filt = readTokenFilterFactory(nList.item(i));
if (filt != null) filters.add(filt);
}
return new TokenizerChain(tfac, filters.toArray(new TokenFilterFactory[filters.size()]));
};
// <tokenizer class="solr.StandardFilterFactory"/>
private TokenizerFactory readTokenizerFactory(Node node) {
// if (node.getNodeName() != "tokenizer") return null;
NamedNodeMap attrs = node.getAttributes();
String className = DOMUtil.getAttr(attrs,"class","tokenizer");
TokenizerFactory tfac = (TokenizerFactory)Config.newInstance(className);
tfac.init(DOMUtil.toMapExcept(attrs,"class"));
return tfac;
}
// <tokenizer class="solr.StandardFilterFactory"/>
private TokenFilterFactory readTokenFilterFactory(Node node) {
// if (node.getNodeName() != "filter") return null;
NamedNodeMap attrs = node.getAttributes();
String className = DOMUtil.getAttr(attrs,"class","token filter");
TokenFilterFactory tfac = (TokenFilterFactory)Config.newInstance(className);
tfac.init(DOMUtil.toMapExcept(attrs,"class"));
return tfac;
}
//
// Instead of storing a type, this could be implemented as a hierarchy
// with a virtual matches().
// Given how often a search will be done, however, speed is the overriding
// concern and I'm not sure which is faster.
//
final static class DynamicField {
final static int STARTS_WITH=1;
final static int ENDS_WITH=2;
final String regex;
final int type;
final SchemaField prototype;
final String str;
DynamicField(SchemaField prototype) {
this.regex=prototype.name;
if (regex.startsWith("*")) {
type=ENDS_WITH;
str=regex.substring(1);
}
else if (regex.endsWith("*")) {
type=STARTS_WITH;
str=regex.substring(0,regex.length()-1);
}
else {
throw new RuntimeException("dynamic field name must start or end with *");
}
this.prototype=prototype;
}
boolean matches(String name) {
if (type==STARTS_WITH && name.startsWith(str)) return true;
else if (type==ENDS_WITH && name.endsWith(str)) return true;
else return false;
}
SchemaField makeSchemaField(String name) {
// could have a cache instead of returning a new one each time, but it might
// not be worth it.
// Actually, a higher level cache could be worth it to avoid too many
// .startsWith() and .endsWith() comparisons. it depends on how many
// dynamic fields there are.
return new SchemaField(prototype, name);
}
public String toString() {
return prototype.toString();
}
}
private DynamicField[] dynamicFields;
// get a field, and if not statically defined, check dynamic fields.
public SchemaField getField(String fieldName) {
SchemaField f = fields.get(fieldName);
if (f != null) return f;
for (DynamicField df : dynamicFields) {
if (df.matches(fieldName)) return df.makeSchemaField(fieldName);
}
// Hmmm, default field could also be implemented with a dynamic field of "*".
// It would have to be special-cased and only used if nothing else matched.
/*** REMOVED -YCS
if (defaultFieldType != null) return new SchemaField(fieldName,defaultFieldType);
***/
throw new SolrException(1,"undefined field "+fieldName);
}
// This method exists because it can be more efficient for dynamic fields
// if a full SchemaField isn't needed.
public FieldType getFieldType(String fieldName) {
SchemaField f = fields.get(fieldName);
if (f != null) return f.getType();
return getDynamicFieldType(fieldName);
}
/**
* return null instead of throwing an exception if
* the field is undefined.
*/
public FieldType getFieldTypeNoEx(String fieldName) {
SchemaField f = fields.get(fieldName);
if (f != null) return f.getType();
return dynFieldType(fieldName);
}
public FieldType getDynamicFieldType(String fieldName) {
for (DynamicField df : dynamicFields) {
if (df.matches(fieldName)) return df.prototype.getType();
}
throw new SolrException(400,"undefined field "+fieldName);
}
private FieldType dynFieldType(String fieldName) {
for (DynamicField df : dynamicFields) {
if (df.matches(fieldName)) return df.prototype.getType();
}
return null;
};
private final Map<String, SchemaField[]> copyFields = new HashMap<String,SchemaField[]>();
public SchemaField[] getCopyFields(String sourceField) {
return copyFields.get(sourceField);
}
}

View File

@ -0,0 +1,47 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.function.ValueSource;
import org.apache.lucene.search.function.IntFieldSource;
import org.apache.lucene.document.Field;
import org.apache.solr.request.XMLWriter;
import java.util.Map;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class IntField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) {
restrictProps(SORT_MISSING_FIRST | SORT_MISSING_LAST);
}
public SortField getSortField(SchemaField field,boolean reverse) {
return new SortField(field.name,SortField.INT, reverse);
}
public ValueSource getValueSource(SchemaField field) {
return new IntFieldSource(field.name);
}
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
xmlWriter.writeInt(name, f.stringValue());
}
}

View File

@ -0,0 +1,52 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.function.ValueSource;
import org.apache.lucene.search.function.IntFieldSource;
import org.apache.lucene.document.Field;
import org.apache.solr.request.XMLWriter;
import java.util.Map;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class LongField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) {
restrictProps(SORT_MISSING_FIRST | SORT_MISSING_LAST);
}
/////////////////////////////////////////////////////////////
// TODO: ACK.. there is no SortField.LONG!
public SortField getSortField(SchemaField field,boolean reverse) {
// todo - log warning
return new SortField(field.name,SortField.INT, reverse);
}
public ValueSource getValueSource(SchemaField field) {
// todo - log warning
return new IntFieldSource(field.name);
}
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
xmlWriter.writeLong(name, f.stringValue());
}
}

View File

@ -0,0 +1,159 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.document.Field;
import org.apache.lucene.search.SortField;
import org.apache.solr.request.XMLWriter;
import java.util.Map;
import java.io.IOException;
/**
* @author yonik
* @version $Id: SchemaField.java,v 1.8 2005/11/28 06:03:19 yonik Exp $
*/
public final class SchemaField extends FieldProperties {
final String name;
final FieldType type;
final int properties;
/** Create a new SchemaField with the given name and type,
* using all the default properties from the type.
*/
public SchemaField(String name, FieldType type) {
this(name, type, type.properties);
}
/** Create a new SchemaField from an existing one by using all
* of the properties of the prototype except the field name.
*/
public SchemaField(SchemaField prototype, String name) {
this(name, prototype.type, prototype.properties);
}
/** Create a new SchemaField with the given name and type,
* and with the specified properties. Properties are *not*
* inherited from the type in this case, so users of this
* constructor should derive the properties from type.getProperties()
* using all the default properties from the type.
*/
public SchemaField(String name, FieldType type, int properties) {
this.name = name;
this.type = type;
this.properties = properties;
}
public String getName() { return name; }
public FieldType getType() { return type; }
int getProperties() { return properties; }
public boolean indexed() { return (properties & INDEXED)!=0; }
public boolean stored() { return (properties & STORED)!=0; }
public boolean storeTermVector() { return (properties & STORE_TERMVECTORS)!=0; }
public boolean storeTermPositions() { return (properties & STORE_TERMPOSITIONS)!=0; }
public boolean storeTermOffsets() { return (properties & STORE_TERMOFFSETS)!=0; }
public boolean omitNorms() { return (properties & OMIT_NORMS)!=0; }
public boolean multiValued() { return (properties & MULTIVALUED)!=0; }
public boolean sortMissingFirst() { return (properties & SORT_MISSING_FIRST)!=0; }
public boolean sortMissingLast() { return (properties & SORT_MISSING_LAST)!=0; }
// things that should be determined by field type, not set as options
boolean isTokenized() { return (properties & TOKENIZED)!=0; }
boolean isBinary() { return (properties & BINARY)!=0; }
boolean isCompressed() { return (properties & COMPRESSED)!=0; }
public Field createField(String val, float boost) {
return type.createField(this,val,boost);
}
public String toString() {
return name + "{type="+type.getTypeName()
+ ",properties=" + propertiesToString(properties)
+ "}";
}
public void write(XMLWriter writer, String name, Field val) throws IOException {
// name is passed in because it may be null if name should not be used.
type.write(writer,name,val);
}
public SortField getSortField(boolean top) {
return type.getSortField(this, top);
}
static SchemaField create(String name, FieldType ft, Map props) {
int trueProps = parseProperties(props,true);
int falseProps = parseProperties(props,false);
int p = ft.properties;
//
// If any properties were explicitly turned off, then turn off other properties
// that depend on that.
//
if (on(falseProps,STORED)) {
int pp = STORED | BINARY | COMPRESSED;
if (on(pp,trueProps)) {
throw new RuntimeException("SchemaField: " + name + " conflicting stored field options:" + props);
}
p &= ~pp;
}
if (on(falseProps,INDEXED)) {
int pp = (INDEXED | OMIT_NORMS
| STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS
| SORT_MISSING_FIRST | SORT_MISSING_LAST);
if (on(pp,trueProps)) {
throw new RuntimeException("SchemaField: " + name + " conflicting indexed field options:" + props);
}
p &= ~pp;
}
if (on(falseProps,STORE_TERMVECTORS)) {
int pp = (STORE_TERMVECTORS | STORE_TERMPOSITIONS | STORE_TERMOFFSETS);
if (on(pp,trueProps)) {
throw new RuntimeException("SchemaField: " + name + " conflicting termvector field options:" + props);
}
p &= ~pp;
}
// override sort flags
if (on(trueProps,SORT_MISSING_FIRST)) {
p &= ~SORT_MISSING_LAST;
}
if (on(trueProps,SORT_MISSING_LAST)) {
p &= ~SORT_MISSING_FIRST;
}
p &= ~falseProps;
p |= trueProps;
return new SchemaField(name, ft, p);
}
}

View File

@ -0,0 +1,132 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.function.ValueSource;
import org.apache.lucene.search.function.FieldCacheSource;
import org.apache.lucene.search.function.DocValues;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.solr.util.NumberUtils;
import org.apache.solr.request.XMLWriter;
import java.util.Map;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class SortableDoubleField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) {
}
public SortField getSortField(SchemaField field,boolean reverse) {
return getStringSort(field,reverse);
}
public ValueSource getValueSource(SchemaField field) {
return new SortableDoubleFieldSource(field.name);
}
public String toInternal(String val) {
return NumberUtils.double2sortableStr(val);
}
public String toExternal(Field f) {
return indexedToReadable(f.stringValue());
}
public String indexedToReadable(String indexedForm) {
return NumberUtils.SortableStr2doubleStr(indexedForm);
}
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
String sval = f.stringValue();
xmlWriter.writeDouble(name, NumberUtils.SortableStr2double(sval));
}
}
class SortableDoubleFieldSource extends FieldCacheSource {
protected double defVal;
public SortableDoubleFieldSource(String field) {
this(field, 0.0);
}
public SortableDoubleFieldSource(String field, double defVal) {
super(field);
this.defVal = defVal;
}
public String description() {
return "sdouble(" + field + ')';
}
public DocValues getValues(IndexReader reader) throws IOException {
final FieldCache.StringIndex index = cache.getStringIndex(reader, field);
final int[] order = index.order;
final String[] lookup = index.lookup;
final double def = defVal;
return new DocValues() {
public float floatVal(int doc) {
return (float)doubleVal(doc);
}
public int intVal(int doc) {
return (int)doubleVal(doc);
}
public long longVal(int doc) {
return (long)doubleVal(doc);
}
public double doubleVal(int doc) {
int ord=order[doc];
return ord==0 ? def : NumberUtils.SortableStr2double(lookup[ord]);
}
public String strVal(int doc) {
return Double.toString(doubleVal(doc));
}
public String toString(int doc) {
return description() + '=' + doubleVal(doc);
}
};
}
public boolean equals(Object o) {
return o instanceof SortableDoubleFieldSource
&& super.equals(o)
&& defVal == ((SortableDoubleFieldSource)o).defVal;
}
private static int hcode = SortableDoubleFieldSource.class.hashCode();
public int hashCode() {
long bits = Double.doubleToLongBits(defVal);
int ibits = (int)(bits ^ (bits>>>32)); // mix upper bits into lower.
return hcode + super.hashCode() + ibits;
};
}

View File

@ -0,0 +1,129 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.function.ValueSource;
import org.apache.lucene.search.function.FieldCacheSource;
import org.apache.lucene.search.function.DocValues;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.solr.util.NumberUtils;
import org.apache.solr.request.XMLWriter;
import java.util.Map;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class SortableFloatField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) {
}
public SortField getSortField(SchemaField field,boolean reverse) {
return getStringSort(field,reverse);
}
public ValueSource getValueSource(SchemaField field) {
return new SortableFloatFieldSource(field.name);
}
public String toInternal(String val) {
return NumberUtils.float2sortableStr(val);
}
public String toExternal(Field f) {
return indexedToReadable(f.stringValue());
}
public String indexedToReadable(String indexedForm) {
return NumberUtils.SortableStr2floatStr(indexedForm);
}
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
String sval = f.stringValue();
xmlWriter.writeFloat(name, NumberUtils.SortableStr2float(sval));
}
}
class SortableFloatFieldSource extends FieldCacheSource {
protected float defVal;
public SortableFloatFieldSource(String field) {
this(field, 0.0f);
}
public SortableFloatFieldSource(String field, float defVal) {
super(field);
this.defVal = defVal;
}
public String description() {
return "sfloat(" + field + ')';
}
public DocValues getValues(IndexReader reader) throws IOException {
final FieldCache.StringIndex index = cache.getStringIndex(reader, field);
final int[] order = index.order;
final String[] lookup = index.lookup;
final float def = defVal;
return new DocValues() {
public float floatVal(int doc) {
int ord=order[doc];
return ord==0 ? def : NumberUtils.SortableStr2float(lookup[ord]);
}
public int intVal(int doc) {
return (int)floatVal(doc);
}
public long longVal(int doc) {
return (long)floatVal(doc);
}
public double doubleVal(int doc) {
return (double)floatVal(doc);
}
public String strVal(int doc) {
return Float.toString(floatVal(doc));
}
public String toString(int doc) {
return description() + '=' + floatVal(doc);
}
};
}
public boolean equals(Object o) {
return o instanceof SortableFloatFieldSource
&& super.equals(o)
&& defVal == ((SortableFloatFieldSource)o).defVal;
}
private static int hcode = SortableFloatFieldSource.class.hashCode();
public int hashCode() {
return hcode + super.hashCode() + Float.floatToIntBits(defVal);
};
}

View File

@ -0,0 +1,132 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.function.ValueSource;
import org.apache.lucene.search.function.FieldCacheSource;
import org.apache.lucene.search.function.DocValues;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.solr.util.NumberUtils;
import org.apache.solr.request.XMLWriter;
import java.util.Map;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class SortableIntField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) {
}
public SortField getSortField(SchemaField field,boolean reverse) {
return getStringSort(field,reverse);
}
public ValueSource getValueSource(SchemaField field) {
return new SortableIntFieldSource(field.name);
}
public String toInternal(String val) {
// special case single digits? years?, etc
// stringCache? general stringCache on a
// global field level?
return NumberUtils.int2sortableStr(val);
}
public String toExternal(Field f) {
return indexedToReadable(f.stringValue());
}
public String indexedToReadable(String indexedForm) {
return NumberUtils.SortableStr2int(indexedForm);
}
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
String sval = f.stringValue();
// since writeInt an int instead of a String since that may be more efficient
// in the future (saves the construction of one String)
xmlWriter.writeInt(name, NumberUtils.SortableStr2int(sval,0,sval.length()));
}
}
class SortableIntFieldSource extends FieldCacheSource {
protected int defVal;
public SortableIntFieldSource(String field) {
this(field, 0);
}
public SortableIntFieldSource(String field, int defVal) {
super(field);
this.defVal = defVal;
}
public String description() {
return "sint(" + field + ')';
}
public DocValues getValues(IndexReader reader) throws IOException {
final FieldCache.StringIndex index = cache.getStringIndex(reader, field);
final int[] order = index.order;
final String[] lookup = index.lookup;
final int def = defVal;
return new DocValues() {
public float floatVal(int doc) {
return (float)intVal(doc);
}
public int intVal(int doc) {
int ord=order[doc];
return ord==0 ? def : NumberUtils.SortableStr2int(lookup[ord],0,3);
}
public long longVal(int doc) {
return (long)intVal(doc);
}
public double doubleVal(int doc) {
return (double)intVal(doc);
}
public String strVal(int doc) {
return Integer.toString(intVal(doc));
}
public String toString(int doc) {
return description() + '=' + intVal(doc);
}
};
}
public boolean equals(Object o) {
return o instanceof SortableIntFieldSource
&& super.equals(o)
&& defVal == ((SortableIntFieldSource)o).defVal;
}
private static int hcode = SortableIntFieldSource.class.hashCode();
public int hashCode() {
return hcode + super.hashCode() + defVal;
};
}

View File

@ -0,0 +1,129 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.function.ValueSource;
import org.apache.lucene.search.function.FieldCacheSource;
import org.apache.lucene.search.function.DocValues;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.solr.util.NumberUtils;
import org.apache.solr.request.XMLWriter;
import java.util.Map;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
public class SortableLongField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) {
}
public SortField getSortField(SchemaField field,boolean reverse) {
return getStringSort(field,reverse);
}
public ValueSource getValueSource(SchemaField field) {
return new SortableLongFieldSource(field.name);
}
public String toInternal(String val) {
return NumberUtils.long2sortableStr(val);
}
public String indexedToReadable(String indexedForm) {
return NumberUtils.SortableStr2long(indexedForm);
}
public String toExternal(Field f) {
return indexedToReadable(f.stringValue());
}
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
String sval = f.stringValue();
xmlWriter.writeLong(name, NumberUtils.SortableStr2long(sval,0,sval.length()));
}
}
class SortableLongFieldSource extends FieldCacheSource {
protected long defVal;
public SortableLongFieldSource(String field) {
this(field, 0);
}
public SortableLongFieldSource(String field, long defVal) {
super(field);
this.defVal = defVal;
}
public String description() {
return "slong(" + field + ')';
}
public DocValues getValues(IndexReader reader) throws IOException {
final FieldCache.StringIndex index = cache.getStringIndex(reader, field);
final int[] order = index.order;
final String[] lookup = index.lookup;
final long def = defVal;
return new DocValues() {
public float floatVal(int doc) {
return (float)longVal(doc);
}
public int intVal(int doc) {
return (int)longVal(doc);
}
public long longVal(int doc) {
int ord=order[doc];
return ord==0 ? def : NumberUtils.SortableStr2long(lookup[ord],0,5);
}
public double doubleVal(int doc) {
return (double)longVal(doc);
}
public String strVal(int doc) {
return Long.toString(longVal(doc));
}
public String toString(int doc) {
return description() + '=' + longVal(doc);
}
};
}
public boolean equals(Object o) {
return o instanceof SortableLongFieldSource
&& super.equals(o)
&& defVal == ((SortableLongFieldSource)o).defVal;
}
private static int hcode = SortableLongFieldSource.class.hashCode();
public int hashCode() {
return hcode + super.hashCode() + (int)defVal;
};
}

View File

@ -0,0 +1,41 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.search.SortField;
import org.apache.lucene.document.Field;
import org.apache.solr.request.XMLWriter;
import java.util.Map;
import java.io.IOException;
/**
* @author yonik
* @version $Id$
*/
//TODO: allow specification of max string size?
public class StrField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) {
}
public SortField getSortField(SchemaField field,boolean reverse) {
return getStringSort(field,reverse);
}
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
xmlWriter.writeStr(name, f.stringValue());
}
}

View File

@ -0,0 +1,43 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.schema;
import org.apache.lucene.search.SortField;
import org.apache.lucene.document.Field;
import org.apache.solr.request.XMLWriter;
import java.util.Map;
import java.io.IOException;
/** <code>TextField</code> is the basic type for configurable text analysis.
* Analyzers for field types using this implementation should be defined in the schema.
* @author yonik
* @version $Id$
*/
public class TextField extends FieldType {
protected void init(IndexSchema schema, Map<String,String> args) {
properties |= TOKENIZED;
}
public SortField getSortField(SchemaField field, boolean reverse) {
return new SortField(field.name,SortField.STRING, reverse);
}
public void write(XMLWriter xmlWriter, String name, Field f) throws IOException {
xmlWriter.writeStr(name, f.stringValue());
}
}

View File

@ -0,0 +1,112 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.util.BitSet;
/**
* <code>BitDocSet</code> represents an unordered set of Lucene Document Ids
* using a BitSet. A set bit represents inclusion in the set for that document.
*
* @author yonik
* @version $Id: BitDocSet.java,v 1.4 2005/10/27 04:14:49 yonik Exp $
* @since solr 0.9
*/
public class BitDocSet extends DocSetBase {
final BitSet bits;
int size; // number of docs in the set (cached for perf)
public BitDocSet() {
bits = new BitSet();
}
public BitDocSet(BitSet bits) {
this.bits = bits;
size=-1;
}
public BitDocSet(BitSet bits, int size) {
this.bits = bits;
this.size = size;
}
public DocIterator iterator() {
return new DocIterator() {
int pos=bits.nextSetBit(0);
public boolean hasNext() {
return pos>=0;
}
public Integer next() {
return nextDoc();
}
public void remove() {
bits.clear(pos);
}
public int nextDoc() {
int old=pos;
pos=bits.nextSetBit(old+1);
return old;
}
public float score() {
return 0.0f;
}
};
}
/**
*
* @return the <b>internal</b> BitSet that should <b>not</b> be modified.
*/
public BitSet getBits() {
return bits;
}
public void add(int doc) {
bits.set(doc);
size=-1; // invalidate size
}
public void addUnique(int doc) {
size++;
bits.set(doc);
}
public int size() {
if (size!=-1) return size;
return size=bits.cardinality();
}
/**
* The number of set bits - size - is cached. If the bitset is changed externally,
* this method should be used to invalidate the previously cached size.
*/
public void invalidateSize() {
size=-1;
}
public boolean exists(int doc) {
return bits.get(doc);
}
public long memSize() {
return (bits.size() >> 3) + 16;
}
}

View File

@ -0,0 +1,110 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import java.util.Map;
import org.apache.solr.util.DOMUtil;
import org.apache.solr.core.SolrException;
import org.apache.solr.core.SolrConfig;
import org.apache.solr.core.Config;
import javax.xml.xpath.XPathConstants;
/**
* Contains the knowledge of how cache config is
* stored in the solarconfig.xml file, and implements a
* factory to create caches.
*
* @author yonik
* @version $Id: CacheConfig.java,v 1.2 2005/09/07 20:37:57 yonik Exp $
*/
class CacheConfig {
private String nodeName;
private Map args;
private String cacheImpl;
private Class clazz;
private Object[] persistence = new Object[1];
private String regenImpl;
private CacheRegenerator regenerator;
public CacheRegenerator getRegenerator() {
return regenerator;
}
public void setRegenerator(CacheRegenerator regenerator) {
this.regenerator = regenerator;
}
public static CacheConfig[] getMultipleConfigs(String configPath) {
NodeList nodes = (NodeList)SolrConfig.config.evaluate(configPath, XPathConstants.NODESET);
if (nodes==null || nodes.getLength()==0) return null;
CacheConfig[] configs = new CacheConfig[nodes.getLength()];
for (int i=0; i<nodes.getLength(); i++) {
configs[i] = getConfig(nodes.item(i));
}
return configs;
}
public static CacheConfig getConfig(String xpath) {
Node node = (Node)SolrConfig.config.getNode(xpath, false);
return getConfig(node);
}
public static CacheConfig getConfig(Node node) {
if (node==null) return null;
CacheConfig config = new CacheConfig();
config.nodeName = node.getNodeName();
config.args = DOMUtil.toMap(node.getAttributes());
String nameAttr = (String)config.args.get("name"); // OPTIONAL
if (nameAttr==null) {
config.args.put("name",config.nodeName);
}
config.cacheImpl = (String)config.args.get("class");
config.regenImpl = (String)config.args.get("regenerator");
config.clazz = Config.findClass(config.cacheImpl);
if (config.regenImpl != null) {
config.regenerator = (CacheRegenerator) Config.newInstance(config.regenImpl);
}
return config;
}
public SolrCache newInstance() {
try {
SolrCache cache = (SolrCache)clazz.newInstance();
persistence[0] = cache.init(args, persistence[0], regenerator);
return cache;
} catch (Exception e) {
SolrException.log(SolrCache.log,"Error instantiating cache",e);
// we can carry on without a cache... but should we?
// in some cases (like an OOM) we probably should try to continue.
return null;
}
}
}

View File

@ -0,0 +1,43 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.io.IOException;
/**
* Implementations of <code>CacheRegenerator</code> are used in autowarming to populate a new cache
* based on an old cache. <code>regenerateItem</code> is called for each item that should be inserted into the new cache.
* <p>
* Implementations should have a noarg constructor and be thread safe (a single instance will be
* used for all cache autowarmings).
*
* @author yonik
* @version $Id: CacheRegenerator.java,v 1.2 2005/09/07 20:37:57 yonik Exp $
*/
public interface CacheRegenerator {
/**
* Regenerate an old cache item and insert it into <code>newCache</code>
*
* @param newSearcher the new searcher who's caches are being autowarmed
* @param newCache where regenerated cache items should be stored. the target of the autowarming
* @param oldCache the old cache being used as a source for autowarming
* @param oldKey the key of the old cache item to regenerate in the new cache
* @param oldVal the old value of the cache item
* @return true to continue with autowarming, false to stop
*/
public boolean regenerateItem(SolrIndexSearcher newSearcher, SolrCache newCache, SolrCache oldCache, Object oldKey, Object oldVal) throws IOException;
}

View File

@ -0,0 +1,37 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.util.Iterator;
/**
* @author yonik
* @version $Id$
*/
public interface DocIterator extends Iterator<Integer> {
public boolean hasNext();
/**
* returns the next document id if hasNext()==true
*/
public int nextDoc();
/**
* returns the score for the document just returned by nextDoc()
*/
public float score();
}

View File

@ -0,0 +1,128 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
/**
* <code>DocList</code> represents the result of a query: an ordered list of document ids with optional score.
* This list contains a subset of the complete list of documents actually matched: <code>size()</code>
* document ids starting at <code>offset()</code>.
*
* @author yonik
* @version $Id: DocList.java,v 1.6 2005/11/11 21:57:56 yonik Exp $
* @since solr 0.9
*/
public interface DocList extends DocSet {
/**
* Returns the zero based offset of this list within the total ordered list of matches to the query.
*/
public int offset();
/**
* Returns the number of ids in this list.
*/
public int size();
/**
* Returns the total number of matches for the search
* (as opposed to just the number collected according
* to <code>offset()</code> and <code>size()</code>).
* Hence it's always true that matches() >= size()
* @return number of matches for the search(query&filter)
*/
public int matches();
/***
public int getDoc(int pos);
***/
// hmmm, what if a different slice could be generated from an existing DocSet
// (and was before)...
// how to distinguish cached values from logical values?
// docSet could represent docs 10-20, but actually contain 0-100
// should the big slice be cached independently, and a new class called
// DocListSubset be created to refer to a range within the DocList?
/**
* Get a subset of an existing DocList.
* Returns null if not possible.
*/
public DocList subset(int offset, int len);
/** True if scores were retained */
public boolean hasScores();
/** The maximum score for the search... only valid if
* scores were retained (if hasScores()==true)
*/
public float maxScore();
}
/**** Maybe do this at a higher level (more efficient)
class SmartDocSet implements DocSet {
static int INITIAL_SIZE=10;
static int TRANSITION_SIZE=10;
protected BitSet bits;
int size;
protected int[] arr; // keep small set as an array, or as a hash?
protected int arrsize;
public SmartDocSet() {
if (INITIAL_SIZE>0) {
arr=new int[INITIAL_SIZE];
} else {
bits=new BitSet();
}
}
public void addUnique(int doc) {
size++;
if (bits != null) {
bits.set(doc);
}
else {
if (arrsize<10) {
arr[arrsize++]=doc;
} else {
// TODO: transition to bit set
}
}
};
public int size() {
return size;
}
public boolean exists(int docid) {
return false;
}
public DocSet intersection(DocSet other) {
return null;
}
public DocSet union(DocSet other) {
return null;
}
}
***/

View File

@ -0,0 +1,37 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
/**
* A struct who's only purpose is to hold both a DocList and a DocSet so that both
* may be returned from a single method.
* <p>
* The DocList and DocSet returned should <b>not</b> be modified as they may
* have been retrieved or inserted into a cache and should be considered shared.
* <p>
* Oh, if only java had "out" parameters or multiple return args...
* <p>
*
* @author yonik
* @version $Id: DocListAndSet.java,v 1.3 2005/04/08 05:38:05 yonik Exp $
* @since solr 0.9
*/
public final class DocListAndSet {
public DocList docList;
public DocSet docSet;
}

View File

@ -0,0 +1,182 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.solr.core.SolrException;
import java.util.BitSet;
/**
* <code>DocSet</code> represents an unordered set of Lucene Document Ids.
* <p>
* WARNING: Any DocSet returned from SolrIndexSearcher should <b>not</b> be modified as it may have been retrieved from
* a cache and could be shared.
* @author yonik
* @version $Id: DocSet.java,v 1.6 2005/05/13 21:20:15 yonik Exp $
* @since solr 0.9
*/
public interface DocSet /* extends Collection<Integer> */ {
public void add(int doc);
public void addUnique(int doc);
/**
* @return The number of document ids in the set.
*/
public int size();
/**
*
* @param docid
* @return
* true if the docid is in the set
*/
public boolean exists(int docid);
/**
*
* @return an interator that may be used to iterate over all of the documents in the set.
*/
public DocIterator iterator();
/**
* Returns a BitSet view of the DocSet. Any changes to this BitSet <b>may</b>
* be reflected in the DocSet, hence if the DocSet is shared or was returned from
* a SolrIndexSearcher method, it's not safe to modify the BitSet.
*
* @return
* A BitSet with the bit number of every docid set in the set.
*/
@Deprecated
public BitSet getBits();
/**
* Returns the approximate amount of memory taken by this DocSet.
* This is only an approximation and doesn't take into account java object overhead.
*
* @return
* the approximate memory consumption in bytes
*/
public long memSize();
/**
* Returns the intersection of this set with another set. Neither set is modified - a new DocSet is
* created and returned.
* @param other
* @return a DocSet representing the intersection
*/
public DocSet intersection(DocSet other);
/**
* Returns the number of documents of the intersection of this set with another set.
* May be more efficient than actually creating the intersection and then getting it's size.
*/
public int intersectionSize(DocSet other);
/**
* Returns the union of this set with another set. Neither set is modified - a new DocSet is
* created and returned.
* @param other
* @return a DocSet representing the union
*/
public DocSet union(DocSet other);
/**
* Returns the number of documents of the union of this set with another set.
* May be more efficient than actually creating the union and then getting it's size.
*/
public int unionSize(DocSet other);
}
abstract class DocSetBase implements DocSet {
// Not implemented efficiently... for testing purposes only
public boolean equals(Object obj) {
if (!(obj instanceof DocSet)) return false;
DocSet other = (DocSet)obj;
if (this.size() != other.size()) return false;
if (this instanceof DocList && other instanceof DocList) {
// compare ordering
DocIterator i1=this.iterator();
DocIterator i2=this.iterator();
while(i1.hasNext() && i2.hasNext()) {
if (i1.nextDoc() != i2.nextDoc()) return false;
}
return true;
// don't compare matches
}
// if (this.size() != other.size()) return false;
return this.getBits().equals(other.getBits());
}
public void add(int doc) {
throw new SolrException(500,"Unsupported Operation");
}
public void addUnique(int doc) {
throw new SolrException(500,"Unsupported Operation");
}
// Only the inefficient base implementation. DocSets based on
// BitSets will return the actual BitSet without making a copy.
public BitSet getBits() {
BitSet bits = new BitSet();
for (DocIterator iter = iterator(); iter.hasNext();) {
bits.set(iter.nextDoc());
}
return bits;
};
public DocSet intersection(DocSet other) {
// intersection is overloaded in HashDocSet to be more
// efficient, so if "other" is a HashDocSet, dispatch off
// of it instead.
if (other instanceof HashDocSet) {
return other.intersection(this);
}
// Default... handle with bitsets.
BitSet newbits = (BitSet)(this.getBits().clone());
newbits.and(other.getBits());
return new BitDocSet(newbits);
}
public DocSet union(DocSet other) {
BitSet newbits = (BitSet)(this.getBits().clone());
newbits.or(other.getBits());
return new BitDocSet(newbits);
}
// TODO: more efficient implementations
public int intersectionSize(DocSet other) {
return intersection(other).size();
}
// TODO: more efficient implementations
public int unionSize(DocSet other) {
return union(other).size();
}
}

View File

@ -0,0 +1,119 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
/**
* <code>DocSlice</code> implements DocList as an array of docids and optional scores.
*
* @author yonik
* @version $Id: DocSlice.java,v 1.9 2005/11/11 21:57:56 yonik Exp $
* @since solr 0.9
*/
public class DocSlice extends DocSetBase implements DocList {
final int offset; // starting position of the docs (zero based)
final int len; // number of positions used in arrays
final int[] docs; // a slice of documents (docs 0-100 of the query)
final float[] scores; // optional score list
final int matches;
final float maxScore;
/**
*
* @param offset starting offset for this range of docs
* @param len length of results
* @param docs array of docids starting at position 0
* @param scores
* @param matches total number of matches for the query
*/
public DocSlice(int offset, int len, int[] docs, float[] scores, int matches, float maxScore) {
this.offset=offset;
this.len=len;
this.docs=docs;
this.scores=scores;
this.matches=matches;
this.maxScore=maxScore;
}
public DocList subset(int offset, int len) {
if (this.offset == offset && this.len==len) return this;
// if we didn't store enough (and there was more to store)
// then we can't take a subset.
int requestedEnd = offset + len;
if (requestedEnd > docs.length && this.matches > docs.length) return null;
int realEndDoc = Math.min(requestedEnd, docs.length);
int realLen = Math.max(realEndDoc-offset,0);
if (this.offset == offset && this.len == realLen) return this;
return new DocSlice(offset, realLen, docs, scores, matches, maxScore);
}
public boolean hasScores() {
return scores!=null;
}
public float maxScore() {
return maxScore;
}
public int offset() { return offset; }
public int size() { return len; }
public int matches() { return matches; }
public long memSize() {
return (docs.length<<2)
+ (scores==null ? 0 : (scores.length<<2))
+ 24;
}
public boolean exists(int doc) {
for (int i: docs) {
if (i==doc) return true;
}
return false;
}
// Hmmm, maybe I could have reused the scorer interface here...
// except that it carries Similarity baggage...
public DocIterator iterator() {
return new DocIterator() {
int pos=offset;
final int end=offset+len;
public boolean hasNext() {
return pos < end;
}
public Integer next() {
return nextDoc();
}
public void remove() {
}
public int nextDoc() {
return docs[pos++];
}
public float score() {
return scores[pos-1];
}
};
}
}

View File

@ -0,0 +1,280 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.solr.core.SolrConfig;
/**
* <code>HashDocSet</code> represents an unordered set of Lucene Document Ids
* using a primitive int hash table. It can be a better choice if there are few docs
* in the set because it takes up less memory and is faster to iterate and take
* set intersections.
*
* @author yonik
* @version $Id: HashDocSet.java,v 1.7 2005/11/22 17:16:19 yonik Exp $
* @since solr 0.9
*/
public final class HashDocSet extends DocSetBase {
// keep track of the inverse of the Loadfactor since
// multiplication is so much faster than division.
final static float inverseLoadfactor = 1.0f / SolrConfig.config.getFloat("//HashDocSet/@loadFactor",0.75f);
public final static int MAX_SIZE = SolrConfig.config.getInt("//HashDocSet/@maxSize",-1);
// lucene docs are numbered from 0, so a neg number must be used for missing.
// an alternative to having to init the array to EMPTY at the start is
//
private final static int EMPTY=-1;
private final int tablesize;
private final int[] table;
private final int size;
private final int mask;
public HashDocSet(int[] docs, int offset, int len) {
int tsize = Math.max(nextHighestPowerOfTwo(len), 1);
if (tsize < len * inverseLoadfactor) {
tsize <<= 1;
}
tablesize = tsize;
mask=tablesize-1;
table = new int[tablesize];
for (int i=0; i<tablesize; i++) table[i]=EMPTY;
for (int i=offset; i<len; i++) {
put(docs[i]);
}
size = len;
}
static int nextHighestPowerOfTwo(int v) {
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
}
void put(int doc) {
table[getSlot(doc)]=doc;
}
private int getSlot(int val) {
int s,v;
s=val & mask;
v=table[s];
// check for EMPTY first since that value is more likely
if (v==EMPTY || v==val) return s;
s=rehash(val);
return s;
}
// As the size of this int hashtable is expected to be small
// (thousands at most), I did not try to keep the rehash function
// reversible (important to avoid collisions in large hash tables).
private int rehash(int val) {
int h,s,v;
final int comp=~val;
// don't left shift too far... the only bits
// that count in the answer are the ones on the right.
// We want to put more of the bits on the left
// into the answer.
// Keep small tables in mind. We may be only using
// the first 5 or 6 bits.
// on the first rehash, use complement instead of val to shift
// so we don't end up with 0 again if val==0.
h = val ^ (comp>>8);
s = h & mask;
v = table[s];
if (v==EMPTY || v==val) return s;
h ^= (v << 17) | (comp >>> 16); // this is reversible
s = h & mask;
v = table[s];
if (v==EMPTY || v==val) return s;
h ^= (h << 8) | (comp >>> 25); // this is reversible
s = h & mask;
v = table[s];
if (v==EMPTY || v==val) return s;
/**********************
// Knuth, Thomas Wang, http://www.concentric.net/~Ttwang/tech/inthash.htm
// This magic number has no common factors with 2^32, and magic/(2^32) approximates
// the golden ratio.
private static final int magic = (int)2654435761L;
h = magic*val;
s = h & mask;
v=table[s];
if (v==EMPTY || v==val) return s;
// the mult with magic should have thoroughly mixed the bits.
// add entropy to the right half from the left half.
h ^= h>>>16;
s = h & mask;
v=table[s];
if (v==EMPTY || v==val) return s;
*************************/
// linear scan now... ug.
final int start=s;
while (++s<tablesize) {
v=table[s];
if (v==EMPTY || v==val) return s;
}
s=start;
while (--s>=0) {
v=table[s];
if (v==EMPTY || v==val) return s;
}
return s;
}
/**
*
* @return The number of document ids in the set.
*/
public int size() {
return size;
}
public boolean exists(int docid) {
int v = table[docid & mask];
if (v==EMPTY) return false;
else if (v==docid) return true;
else {
v = table[rehash(docid)];
if (v==docid) return true;
else return false;
}
}
public DocIterator iterator() {
return new DocIterator() {
int pos=0;
int doc;
{ goNext(); }
public boolean hasNext() {
return pos < tablesize;
}
public Integer next() {
return nextDoc();
}
public void remove() {
}
void goNext() {
while (pos<tablesize && table[pos]==EMPTY) pos++;
}
// modify to return -1 at end of iteration?
public int nextDoc() {
int doc = table[pos];
pos++;
goNext();
return doc;
}
public float score() {
return 0.0f;
}
};
}
public long memSize() {
return (tablesize<<2) + 20;
}
public DocSet intersection(DocSet other) {
if (other instanceof HashDocSet) {
// set "a" to the smallest doc set for the most efficient
// intersection.
final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other;
final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this;
int[] result = new int[a.size()];
int resultCount=0;
for (int i=0; i<a.table.length; i++) {
int id=a.table[i];
if (id >= 0 && b.exists(id)) {
result[resultCount++]=id;
}
}
return new HashDocSet(result,0,resultCount);
} else {
int[] result = new int[size()];
int resultCount=0;
for (int i=0; i<table.length; i++) {
int id=table[i];
if (id >= 0 && other.exists(id)) {
result[resultCount++]=id;
}
}
return new HashDocSet(result,0,resultCount);
}
}
public int intersectionSize(DocSet other) {
if (other instanceof HashDocSet) {
// set "a" to the smallest doc set for the most efficient
// intersection.
final HashDocSet a = size()<=other.size() ? this : (HashDocSet)other;
final HashDocSet b = size()<=other.size() ? (HashDocSet)other : this;
int resultCount=0;
for (int i=0; i<a.table.length; i++) {
int id=a.table[i];
if (id >= 0 && b.exists(id)) {
resultCount++;
}
}
return resultCount;
} else {
int resultCount=0;
for (int i=0; i<table.length; i++) {
int id=table[i];
if (id >= 0 && other.exists(id)) {
resultCount++;
}
}
return resultCount;
}
}
}

View File

@ -0,0 +1,274 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrException;
import org.apache.solr.util.NamedList;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
import java.io.IOException;
import java.net.URL;
/**
* @author yonik
* @version $Id: LRUCache.java,v 1.12 2005/11/30 06:12:55 yonik Exp $
*/
public class LRUCache implements SolrCache {
/* An instance of this class will be shared across multiple instances
* of an LRUCache at the same time. Make sure everything is thread safe.
*/
private static class CumulativeStats {
AtomicLong lookups = new AtomicLong();
AtomicLong hits = new AtomicLong();
AtomicLong inserts = new AtomicLong();
AtomicLong evictions = new AtomicLong();
}
private CumulativeStats stats;
// per instance stats. The synchronization used for the map will also be
// used for updating these statistics (and hence they are not AtomicLongs
private long lookups;
private long hits;
private long inserts;
private long evictions;
private Map map;
private String name;
private int autowarmCount;
private State state;
private CacheRegenerator regenerator;
public Object init(Map args, Object persistence, CacheRegenerator regenerator) {
state=State.CREATED;
this.regenerator = regenerator;
name = (String)args.get("name");
String str = (String)args.get("size");
final int limit = str==null ? 1024 : Integer.parseInt(str);
str = (String)args.get("initialSize");
final int initialSize = Math.min(str==null ? 1024 : Integer.parseInt(str), limit);
str = (String)args.get("autowarmCount");
autowarmCount = str==null ? 0 : Integer.parseInt(str);
map = new LinkedHashMap(initialSize, 0.75f, true) {
protected boolean removeEldestEntry(Map.Entry eldest) {
if (size() > limit) {
// increment evictions regardless of state.
// this doesn't need to be synchronized because it will
// only be called in the context of a higher level synchronized block.
evictions++;
stats.evictions.incrementAndGet();
return true;
}
return false;
}
};
if (persistence==null) {
// must be the first time a cache of this type is being created
persistence = new CumulativeStats();
}
stats = (CumulativeStats)persistence;
return persistence;
}
public String name() {
return name;
}
public int size() {
synchronized(map) {
return map.size();
}
}
public synchronized Object put(Object key, Object value) {
if (state == State.LIVE) {
stats.inserts.incrementAndGet();
}
synchronized (map) {
// increment local inserts regardless of state???
// it does make it more consistent with the current size...
inserts++;
return map.put(key,value);
}
}
public Object get(Object key) {
synchronized (map) {
Object val = map.get(key);
if (state == State.LIVE) {
// only increment lookups and hits if we are live.
lookups++;
stats.lookups.incrementAndGet();
if (val!=null) {
hits++;
stats.hits.incrementAndGet();
}
}
return val;
}
}
public void clear() {
synchronized(map) {
map.clear();
}
}
public void setState(State state) {
this.state = state;
}
public State getState() {
return state;
}
public void warm(SolrIndexSearcher searcher, SolrCache old) throws IOException {
if (regenerator==null) return;
LRUCache other = (LRUCache)old;
// warm entries
if (autowarmCount != 0) {
Object[] keys,vals = null;
// Don't do the autowarming in the synchronized block, just pull out the keys and values.
synchronized (other.map) {
int sz = other.map.size();
if (autowarmCount!=-1) sz = Math.min(sz,autowarmCount);
keys = new Object[sz];
vals = new Object[sz];
Iterator iter = other.map.entrySet().iterator();
// iteration goes from oldest (least recently used) to most recently used,
// so we need to skip over the oldest entries.
int skip = other.map.size() - sz;
for (int i=0; i<skip; i++) iter.next();
for (int i=0; i<sz; i++) {
Map.Entry entry = (Map.Entry)iter.next();
keys[i]=entry.getKey();
vals[i]=entry.getValue();
}
}
// autowarm from the oldest to the newest entries so that the ordering will be
// correct in the new cache.
for (int i=0; i<keys.length; i++) {
try {
boolean continueRegen = regenerator.regenerateItem(searcher, this, old, keys[i], vals[i]);
if (!continueRegen) break;
}
catch (Throwable e) {
SolrException.log(log,"Error during auto-warming of key:" + keys[i], e);
}
}
}
}
public void close() {
}
//////////////////////// SolrInfoMBeans methods //////////////////////
public String getName() {
return LRUCache.class.getName();
}
public String getVersion() {
return SolrCore.version;
}
public String getDescription() {
return "LRU Cache";
}
public Category getCategory() {
return Category.CACHE;
}
public String getCvsId() {
return "$Id: LRUCache.java,v 1.12 2005/11/30 06:12:55 yonik Exp $";
}
public String getCvsName() {
return "$Name: $";
}
public String getCvsSource() {
return "$Source: /cvs/main/searching/solr/solarcore/src/solr/search/LRUCache.java,v $";
}
public URL[] getDocs() {
return null;
}
// returns a ratio, not a percent.
private static String calcHitRatio(long lookups, long hits) {
if (lookups==0) return "0.00";
if (lookups==hits) return "1.00";
int hundredths = (int)(hits*100/lookups); // rounded down
if (hundredths < 10) return "0.0" + hundredths;
return "0." + hundredths;
/*** code to produce a percent, if we want it...
int ones = (int)(hits*100 / lookups);
int tenths = (int)(hits*1000 / lookups) - ones*10;
return Integer.toString(ones) + '.' + tenths;
***/
}
public NamedList getStatistics() {
NamedList lst = new NamedList();
synchronized (map) {
lst.add("lookups", lookups);
lst.add("hits", hits);
lst.add("hitratio", calcHitRatio(lookups,hits));
lst.add("inserts", inserts);
lst.add("evictions", evictions);
lst.add("size", map.size());
}
long clookups = stats.lookups.get();
long chits = stats.hits.get();
lst.add("cumulative_lookups", clookups);
lst.add("cumulative_hits", chits);
lst.add("cumulative_hitratio", calcHitRatio(clookups,chits));
lst.add("cumulative_inserts", stats.inserts.get());
lst.add("cumulative_evictions", stats.evictions.get());
return lst;
}
public String toString() {
return name + getStatistics().toString();
}
}

View File

@ -0,0 +1,116 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */
/* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */
import org.apache.lucene.search.*;
import java.util.LinkedHashMap;
import java.util.Map;
import java.io.IOException;
/** Utility which converts certain query clauses into {@link QueryFilter}s and
* caches these. Only required {@link TermQuery}s whose boost is zero and
* whose term occurs in at least a certain fraction of documents are converted
* to cached filters. This accellerates query constraints like language,
* document format, etc., which do not affect ranking but might otherwise slow
* search considerably. */
// Taken from Nutch and modified - YCS
class LuceneQueryOptimizer {
private LinkedHashMap cache; // an LRU cache of QueryFilter
private float threshold;
/** Construct an optimizer that caches and uses filters for required {@link
* TermQuery}s whose boost is zero.
* @param cacheSize the number of QueryFilters to cache
* @param threshold the fraction of documents which must contain term
*/
public LuceneQueryOptimizer(final int cacheSize, float threshold) {
this.cache = new LinkedHashMap(cacheSize, 0.75f, true) {
protected boolean removeEldestEntry(Map.Entry eldest) {
return size() > cacheSize; // limit size of cache
}
};
this.threshold = threshold;
}
public TopDocs optimize(BooleanQuery original,
Searcher searcher,
int numHits,
Query[] queryOut,
Filter[] filterOut
)
throws IOException {
BooleanQuery query = new BooleanQuery();
BooleanQuery filterQuery = null;
BooleanClause[] clauses = original.getClauses();
for (int i = 0; i < clauses.length; i++) {
BooleanClause c = clauses[i];
/***
System.out.println("required="+c.required);
System.out.println("boost="+c.query.getBoost());
System.out.println("isTermQuery="+(c.query instanceof TermQuery));
if (c.query instanceof TermQuery) {
System.out.println("term="+((TermQuery)c.query).getTerm());
System.out.println("docFreq="+searcher.docFreq(((TermQuery)c.query).getTerm()));
}
***/
if (c.required // required
&& c.query.getBoost() == 0.0f // boost is zero
&& c.query instanceof TermQuery // TermQuery
&& (searcher.docFreq(((TermQuery)c.query).getTerm())
/ (float)searcher.maxDoc()) >= threshold) { // check threshold
if (filterQuery == null)
filterQuery = new BooleanQuery();
filterQuery.add(c.query, true, false); // filter it
//System.out.println("WooHoo... qualified to be hoisted to a filter!");
} else {
query.add(c); // query it
}
}
Filter filter = null;
if (filterQuery != null) {
synchronized (cache) { // check cache
filter = (Filter)cache.get(filterQuery);
}
if (filter == null) { // miss
filter = new QueryFilter(filterQuery); // construct new entry
synchronized (cache) {
cache.put(filterQuery, filter); // cache it
}
}
}
// YCS: added code to pass out optimized query and filter
// so they can be used with Hits
if (queryOut != null && filterOut != null) {
queryOut[0] = query; filterOut[0] = filter;
return null;
} else {
return searcher.search(query, filter, numHits);
}
}
}

View File

@ -0,0 +1,115 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.search.*;
import org.apache.lucene.index.IndexReader;
import java.io.IOException;
/**
* A {@link SortComparatorSource} for strings that orders null values after non-null values.
* Based on FieldSortedHitQueue.comparatorString
* <p>
*
* @author Chris Hostetter
* @author yonik
* @version $Id: MissingStringLastComparatorSource.java,v 1.1 2005/06/02 04:43:06 yonik Exp $
*
*/
// move to apache package and make public if it is accepted as a patch
class MissingStringLastComparatorSource implements SortComparatorSource {
public static final String bigString="\uffff\uffff\uffff\uffff\uffff\uffff\uffff\uffffNULL_VAL";
private final String missingValueProxy;
public MissingStringLastComparatorSource() {
this(bigString);
}
/**
* Returns the value used to sort the given document. The
* object returned must implement the java.io.Serializable
* interface. This is used by multisearchers to determine how to collate results from their searchers.
* @see FieldDoc
* @param i Document
* @return Serializable object
*/
/** Creates a {@link SortComparatorSource} that uses <tt>missingValueProxy</tt> as the value to return from ScoreDocComparator.sortValue()
* which is only used my multisearchers to determine how to collate results from their searchers.
*
* @param missingValueProxy The value returned when sortValue() is called for a document missing the sort field.
* This value is *not* normally used for sorting, but used to create
*/
public MissingStringLastComparatorSource(String missingValueProxy) {
this.missingValueProxy=missingValueProxy;
}
public ScoreDocComparator newComparator(final IndexReader reader,
final String fieldname)
throws IOException {
final String field = fieldname.intern();
final FieldCache.StringIndex index =
FieldCache.DEFAULT.getStringIndex (reader, field);
// :HACK:
// final String lastString =
// (index.lookup[index.lookup.length-1]+"X").intern();
//
// Note: basing lastStringValue on the StringIndex won't work
// with a multisearcher.
return new ScoreDocComparator () {
public final int compare (final ScoreDoc i, final ScoreDoc j) {
final int fi = index.order[i.doc];
final int fj = index.order[j.doc];
// 0 is the magic position of null
/**** alternate logic
if (fi < fj && fi != 0) return -1;
if (fj < fi && fj != 0) return 1;
if (fi==fj) return 0;
return fi==0 ? 1 : -1;
****/
if (fi==fj) return 0;
if (fi==0) return 1;
if (fj==0) return -1;
return fi < fj ? -1 : 1;
}
public Comparable sortValue (final ScoreDoc i) {
int f = index.order[i.doc];
return (0 == f) ? missingValueProxy : index.lookup[f];
}
public int sortType() {
return SortField.CUSTOM;
}
};
}
}

View File

@ -0,0 +1,479 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.search.*;
import org.apache.lucene.search.function.*;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.Term;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrException;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.FieldType;
import java.util.ArrayList;
import java.util.regex.Pattern;
import java.util.logging.Level;
import java.io.IOException;
/**
* @author yonik
* @version $Id: QueryParsing.java,v 1.10 2005/12/20 21:34:44 yonik Exp $
*/
public class QueryParsing {
public static Query parseQuery(String qs, IndexSchema schema) {
try {
Query query = new SolrQueryParser(schema).parse(qs);
if (SolrCore.log.isLoggable(Level.FINEST)) {
SolrCore.log.finest("After QueryParser:" + query);
}
return query;
} catch (ParseException e) {
SolrCore.log(e);
throw new SolrException(400,"Error parsing Lucene query",e);
}
}
/***
* SortSpec encapsulates a Lucene Sort and a count of the number of documents
* to return.
*/
public static class SortSpec {
private final Sort sort;
private final int num;
SortSpec(Sort sort, int num) {
this.sort=sort;
this.num=num;
}
/**
* Gets the Lucene Sort object, or null for the default sort
* by score descending.
*/
public Sort getSort() { return sort; }
/**
* Gets the number of documens to return after sorting.
* -1 means there is no cutoff (only do the sort)
* @return
*/
public int getCount() { return num; }
}
private static Pattern sortSeparator = Pattern.compile("[\\s,]+");
/**
* Returns null if the sortSpec string doesn't look like a sort specification,
* or if the sort specification couldn't be converted into a Lucene Sort
* (because of a field not being indexed or undefined, etc).
*
* The form of the sort specification string currently parsed is:
* SortSpec ::= SingleSort [, SingleSort]* <number>?
* SingleSort ::= <fieldname> SortDirection
* SortDirection ::= top | desc | bottom | asc
*
* Examples:
* top 10 #take the top 10 by score
* desc 10 #take the top 10 by score
* score desc 10 #take the top 10 by score
* weight bottom 10 #sort by weight ascending and take the first 10
* weight desc #sort by weight descending
* height desc,weight desc #sort by height descending, and use weight descending to break any ties
* height desc,weight asc top 20 #sort by height descending, using weight ascending as a tiebreaker
*
*/
public static SortSpec parseSort(String sortSpec, IndexSchema schema) {
if (sortSpec==null || sortSpec.length()==0) return null;
// I wonder how fast the regex is??? as least we cache the pattern.
String[] parts = sortSeparator.split(sortSpec.trim(),0);
if (parts.length == 0) return null;
ArrayList<SortField> lst = new ArrayList<SortField>();
int num=-1;
int pos=0;
String fn;
boolean top=true;
boolean normalSortOnScore=false;
while (pos < parts.length) {
String str=parts[pos];
if ("top".equals(str) || "bottom".equals(str) || "asc".equals(str) || "desc".equals(str)) {
// if the field name seems to be missing, default to "score".
// note that this will mess up a field name that has the same name
// as a sort direction specifier.
fn="score";
} else {
fn=str;
pos++;
}
// get the direction of the sort
str=parts[pos];
if ("top".equals(str) || "desc".equals(str)) {
top=true;
} else if ("bottom".equals(str) || "asc".equals(str)) {
top=false;
} else {
return null; // must not be a sort command
}
// get the field to sort on
// hmmm - should there be a fake/pseudo-field named "score" in the schema?
if ("score".equals(fn)) {
if (top) {
normalSortOnScore=true;
lst.add(SortField.FIELD_SCORE);
} else {
lst.add(new SortField(null, SortField.SCORE, true));
}
} else {
// getField could throw an exception if the name isn't found
try {
SchemaField f = schema.getField(fn);
if (f == null || !f.indexed()) return null;
lst.add(f.getType().getSortField(f,top));
} catch (Exception e) {
return null;
}
}
pos++;
// If there is a leftover part, assume it is a count
if (pos+1 == parts.length) {
try {
num = Integer.parseInt(parts[pos]);
} catch (Exception e) {
return null;
}
pos++;
}
}
Sort sort;
if (normalSortOnScore && lst.size() == 1) {
// Normalize the default sort on score descending to sort=null
sort=null;
} else {
sort = new Sort((SortField[]) lst.toArray(new SortField[lst.size()]));
}
return new SortSpec(sort,num);
}
///////////////////////////
///////////////////////////
///////////////////////////
static FieldType writeFieldName(String name, IndexSchema schema, Appendable out, int flags) throws IOException {
FieldType ft = null;
ft = schema.getFieldTypeNoEx(name);
out.append(name);
if (ft==null) {
out.append("(UNKNOWN FIELD "+name+')');
}
out.append(':');
return ft;
}
static void writeFieldVal(String val, FieldType ft, Appendable out, int flags) throws IOException {
if (ft!=null) {
out.append(ft.toExternal(new Field("",val,true,true,false)));
} else {
out.append(val);
}
}
public static void toString(Query query, IndexSchema schema, Appendable out, int flags) throws IOException {
boolean writeBoost=true;
if (query instanceof TermQuery) {
TermQuery q = (TermQuery)query;
Term t = q.getTerm();
FieldType ft = writeFieldName(t.field(), schema, out, flags);
writeFieldVal(t.text(), ft, out, flags);
} else if (query instanceof RangeQuery) {
RangeQuery q = (RangeQuery)query;
String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags);
out.append( q.isInclusive() ? '[' : '{' );
Term lt = q.getLowerTerm();
Term ut = q.getUpperTerm();
if (lt==null) {
out.append('*');
} else {
writeFieldVal(lt.text(), ft, out, flags);
}
out.append(" TO ");
if (ut==null) {
out.append('*');
} else {
writeFieldVal(ut.text(), ft, out, flags);
}
out.append( q.isInclusive() ? ']' : '}' );
} else if (query instanceof ConstantScoreRangeQuery) {
ConstantScoreRangeQuery q = (ConstantScoreRangeQuery)query;
String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags);
out.append( q.includesLower() ? '[' : '{' );
String lt = q.getLowerVal();
String ut = q.getUpperVal();
if (lt==null) {
out.append('*');
} else {
writeFieldVal(lt, ft, out, flags);
}
out.append(" TO ");
if (ut==null) {
out.append('*');
} else {
writeFieldVal(ut, ft, out, flags);
}
out.append( q.includesUpper() ? ']' : '}' );
} else if (query instanceof BooleanQuery) {
BooleanQuery q = (BooleanQuery)query;
boolean needParens=false;
if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0) {
needParens=true;
}
if (needParens) {
out.append('(');
}
BooleanClause[] clauses = q.getClauses();
boolean first=true;
for (BooleanClause c : clauses) {
if (!first) {
out.append(' ');
} else {
first=false;
}
if (c.prohibited) {
out.append('-');
} else if (c.required) {
out.append('+');
}
Query subQuery = c.query;
boolean wrapQuery=false;
// TODO: may need to put parens around other types
// of queries too, depending on future syntax.
if (subQuery instanceof BooleanQuery) {
wrapQuery=true;
}
if (wrapQuery) {
out.append('(');
}
toString(subQuery, schema, out, flags);
if (wrapQuery) {
out.append(')');
}
}
if (needParens) {
out.append(')');
}
if (q.getMinimumNumberShouldMatch()>0) {
out.append('~');
out.append(Integer.toString(q.getMinimumNumberShouldMatch()));
}
} else if (query instanceof PrefixQuery) {
PrefixQuery q = (PrefixQuery)query;
Term prefix = q.getPrefix();
FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
out.append(prefix.text());
out.append('*');
} else if (query instanceof ConstantScorePrefixQuery) {
ConstantScorePrefixQuery q = (ConstantScorePrefixQuery)query;
Term prefix = q.getPrefix();
FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
out.append(prefix.text());
out.append('*');
} else if (query instanceof WildcardQuery) {
out.append(query.toString());
writeBoost=false;
} else if (query instanceof FuzzyQuery) {
out.append(query.toString());
writeBoost=false;
} else if (query instanceof ConstantScoreQuery) {
out.append(query.toString());
writeBoost=false;
} else {
out.append(query.getClass().getSimpleName()
+ '(' + query.toString() + ')' );
writeBoost=false;
}
if (writeBoost && query.getBoost() != 1.0f) {
out.append("^");
out.append(Float.toString(query.getBoost()));
}
}
public static String toString(Query query, IndexSchema schema) {
try {
StringBuilder sb = new StringBuilder();
toString(query, schema, sb, 0);
return sb.toString();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
// simple class to help with parsing a string
private static class StrParser {
String val;
int pos;
int end;
StrParser(String val) {this.val = val; end=val.length(); }
void eatws() {
while (pos<end && Character.isWhitespace(val.charAt(pos))) pos++;
}
boolean opt(String s) {
eatws();
int slen=s.length();
if (val.regionMatches(pos, s, 0, slen)) {
pos+=slen;
return true;
}
return false;
}
void expect(String s) throws ParseException {
eatws();
int slen=s.length();
if (val.regionMatches(pos, s, 0, slen)) {
pos+=slen;
} else {
throw new ParseException("Expected '"+s+"' at position " + pos + " in '"+val+"'");
}
}
float getFloat() throws ParseException {
eatws();
char[] arr = new char[end-pos];
int i;
for (i=0; i<arr.length; i++) {
char ch = val.charAt(pos);
if ( (ch>='0' && ch<='9')
|| ch=='+' || ch=='-'
|| ch=='.' || ch=='e' || ch=='E'
) {
pos++;
arr[i]=ch;
} else {
break;
}
}
return Float.parseFloat(new String(arr,0,i));
}
String getId() throws ParseException {
eatws();
int id_start=pos;
while (pos<end && Character.isJavaIdentifierPart(val.charAt(pos))) pos++;
return val.substring(id_start, pos);
}
char peek() {
eatws();
return pos<end ? val.charAt(pos) : 0;
}
public String toString() {
return "'" + val + "'" + ", pos=" + pos;
}
}
private static ValueSource parseValSource(StrParser sp, IndexSchema schema) throws ParseException {
String id = sp.getId();
if (sp.opt("(")) {
// a function: could contain a fieldname or another function.
ValueSource vs=null;
if (id.equals("ord")) {
String field = sp.getId();
vs = new OrdFieldSource(field);
} else if (id.equals("rord")) {
String field = sp.getId();
vs = new ReverseOrdFieldSource(field);
} else if (id.equals("linear")) {
ValueSource source = parseValSource(sp, schema);
sp.expect(",");
float slope = sp.getFloat();
sp.expect(",");
float intercept = sp.getFloat();
vs = new LinearFloatFunction(source,slope,intercept);
} else if (id.equals("recip")) {
ValueSource source = parseValSource(sp,schema);
sp.expect(",");
float m = sp.getFloat();
sp.expect(",");
float a = sp.getFloat();
sp.expect(",");
float b = sp.getFloat();
vs = new ReciprocalFloatFunction(source,m,a,b);
} else {
throw new ParseException("Unknown function " + id + " in FunctionQuery(" + sp + ")");
}
sp.expect(")");
return vs;
}
SchemaField f = schema.getField(id);
return f.getType().getValueSource(f);
}
/** Parse a function, returning a FunctionQuery
*/
public static FunctionQuery parseFunction(String func, IndexSchema schema) throws ParseException {
return new FunctionQuery(parseValSource(new StrParser(func), schema));
}
}

View File

@ -0,0 +1,107 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import java.util.List;
/** A hash key encapsulating a query, a list of filters, and a sort
* @author yonik
* @version $Id$
*/
public final class QueryResultKey {
final Query query;
final Sort sort; // change to normal Sort after Lucene 1.4.3
final SortField[] sfields;
final List<Query> filters;
final int nc_flags; // non-comparable flags... ignored by hashCode and equals
private final int hc; // cached hashCode
private static SortField[] defaultSort = new SortField[0];
public QueryResultKey(Query query, List<Query> filters, Sort sort, int nc_flags) {
this.query = query;
this.sort = sort;
this.filters = filters;
this.nc_flags = nc_flags;
int h = query.hashCode();
if (filters != null) h ^= filters.hashCode();
sfields = (this.sort !=null) ? this.sort.getSort() : defaultSort;
for (SortField sf : sfields) {
// mix the bits so that sortFields are position dependent
// so that a,b won't hash to the same value as b,a
h ^= (h << 8) | (h >>> 25); // reversible hash
if (sf.getField() != null) h += sf.getField().hashCode();
h += sf.getType();
if (sf.getReverse()) h=~h;
if (sf.getLocale()!=null) h+=sf.getLocale().hashCode();
if (sf.getFactory()!=null) h+=sf.getFactory().hashCode();
}
hc = h;
}
public int hashCode() {
return hc;
}
public boolean equals(Object o) {
if (o==this) return true;
if (!(o instanceof QueryResultKey)) return false;
QueryResultKey other = (QueryResultKey)o;
// fast check of the whole hash code... most hash tables will only use
// some of the bits, so if this is a hash collision, it's still likely
// that the full cached hash code will be different.
if (this.hc != other.hc) return false;
// check for the thing most likely to be different (and the fastest things)
// first.
if (this.sfields.length != other.sfields.length) return false;
if (!this.query.equals(other.query)) return false;
if (!isEqual(this.filters, other.filters)) return false;
for (int i=0; i<sfields.length; i++) {
SortField sf1 = this.sfields[i];
SortField sf2 = other.sfields[i];
if (sf1.getType() != sf2.getType()) return false;
if (sf1.getReverse() != sf2.getReverse()) return false;
if (!isEqual(sf1.getField(),sf2.getField())) return false;
if (!isEqual(sf1.getLocale(), sf2.getLocale())) return false;
if (!isEqual(sf1.getFactory(), sf2.getFactory())) return false;
// NOTE: the factory must be identical!!! use singletons!
}
return true;
}
private static boolean isEqual(Object o1, Object o2) {
if (o1==o2) return true; // takes care of identity and null cases
if (o1==null || o2==null) return false;
return o1.equals(o2);
}
}

View File

@ -0,0 +1,102 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.solr.core.SolrInfoMBean;
import java.util.Map;
import java.util.logging.Logger;
import java.io.IOException;
/**
*
* @author yonik
* @version $Id: SolrCache.java,v 1.6 2005/06/21 05:26:43 yonik Exp $
*/
public interface SolrCache extends SolrInfoMBean {
public final static Logger log = Logger.getLogger(SolrCache.class.getName());
/**
* The initialization routine. Instance specific arguments are passed in
* the <code>args</code> map.
* <p>
* The persistence object will exist across different lifetimes of similar caches.
* For example, all filter caches will share the same persistence object, sometimes
* at the same time (it must be threadsafe). If null is passed, then the cache
* implementation should create and return a new persistence object. If not null,
* the passed in object should be returned again.
* <p>
* Since it will exist across the lifetime of many caches, care should be taken to
* not reference any particular cache instance and prevent it from being
* garbage collected (no using inner classes unless they are static).
* <p>
* Since the persistence object is designed to be used as a way for statistics
* to accumulate across all instances of the same type of cache, however the
* object may be of any type desired by the cache implementation.
* <p>
* The {@link CacheRegenerator} is what the cache uses during auto-warming to
* renenerate an item in the new cache from an entry in the old cache.
*
*/
public Object init(Map args, Object persistence, CacheRegenerator regenerator);
// I don't think we need a factory for faster creation given that these
// will be associated with slow-to-create SolrIndexSearchers.
// change to NamedList when other plugins do?
// symbolic name for this cache
public String name();
// Should SolrCache just extend the java.util.Map interface?
// Following the conventions of the java.util.Map interface in any case.
public int size();
public Object put(Object key, Object value);
public Object get(Object key);
public void clear();
/**
* Set different cache states.
* The state a cache is in can have an effect on how statistics are kept.
* The cache user (SolrIndexSearcher) will take care of switching
* cache states.
*/
public enum State { CREATED, STATICWARMING, AUTOWARMING, LIVE }
public void setState(State state);
public State getState();
/**
* Warm this cache associated with <code>searcher</code> using the <code>old</code>
* cache object. <code>this</code> and <code>old</code> will have the same concrete type.
*/
void warm(SolrIndexSearcher searcher, SolrCache old) throws IOException;
// Q: an alternative to passing the searcher here would be to pass it in
// init and have the cache implementation save it.
/** Frees any non-memory resources */
public void close();
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,81 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.*;
import org.apache.lucene.index.Term;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.FieldType;
// TODO: implement the analysis of simple fields with
// FieldType.toInternal() instead of going through the
// analyzer. Should lead to faster query parsing.
/**
* @author yonik
*/
public class SolrQueryParser extends QueryParser {
protected final IndexSchema schema;
public SolrQueryParser(IndexSchema schema) {
super(schema.getDefaultSearchFieldName(), schema.getQueryAnalyzer());
this.schema = schema;
setLowercaseExpandedTerms(false);
}
protected Query getFieldQuery(String field, String queryText) throws ParseException {
// intercept magic field name of "_" to use as a hook for our
// own functions.
if (field.equals("_val_")) {
return QueryParsing.parseFunction(queryText, schema);
}
// default to a normal field query
return super.getFieldQuery(field, queryText);
}
protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive) throws ParseException {
FieldType ft = schema.getFieldType(field);
return new ConstantScoreRangeQuery(
field,
"*".equals(part1) ? null : ft.toInternal(part1),
"*".equals(part2) ? null : ft.toInternal(part2),
inclusive, inclusive);
}
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
if (getLowercaseExpandedTerms()) {
termStr = termStr.toLowerCase();
}
// TODO: toInternal() won't necessarily work on partial
// values, so it looks like i need a getPrefix() function
// on fieldtype? Or at the minimum, a method on fieldType
// that can tell me if I should lowercase or not...
// Schema could tell if lowercase filter is in the chain,
// but a more sure way would be to run something through
// the first time and check if it got lowercased.
// TODO: throw exception of field type doesn't support prefixes?
// (sortable numeric types don't do prefixes, but can do range queries)
Term t = new Term(field, termStr);
return new ConstantScorePrefixQuery(t);
}
}

View File

@ -0,0 +1,37 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.search.DefaultSimilarity;
import java.util.HashMap;
/**
* @author yonik
*/
// don't make it public for now... easier to change later.
// This class is currently unused.
class SolrSimilarity extends DefaultSimilarity {
private final HashMap<String,Float> lengthNormConfig = new HashMap<String,Float>();
public float lengthNorm(String fieldName, int numTerms) {
// Float f = lengthNormConfig.
// if (lengthNormDisabled.)
return super.lengthNorm(fieldName, numTerms);
}
}

View File

@ -0,0 +1,57 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.search.*;
/**
* Extra lucene sorting utilities & convenience methods
*
* @author yonik
* @version $Id: Sorting.java,v 1.1 2005/06/02 04:43:06 yonik Exp $
*
*/
public class Sorting {
/** Returns a {@link SortField} for a string field.
* If nullLast and nullFirst are both false, then default lucene string sorting is used where
* null strings sort first in an ascending sort, and last in a descending sort.
*
* @param fieldName the name of the field to sort on
* @param reverse true for a reverse (desc) sort
* @param nullLast true if null should come last, regardless of sort order
* @param nullFirst true if null should come first, regardless of sort order
* @return SortField
*/
public static SortField getStringSortField(String fieldName, boolean reverse, boolean nullLast, boolean nullFirst) {
if (nullLast) {
if (!reverse) return new SortField(fieldName, nullStringLastComparatorSource);
else return new SortField(fieldName, SortField.STRING, true);
} else if (nullFirst) {
if (reverse) return new SortField(fieldName, nullStringLastComparatorSource);
else return new SortField(fieldName, SortField.STRING, false);
} else {
return new SortField(fieldName, SortField.STRING, reverse);
}
}
static final SortComparatorSource nullStringLastComparatorSource = new MissingStringLastComparatorSource();
}

View File

@ -0,0 +1,180 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.test;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.HashDocSet;
import org.apache.solr.search.DocSet;
import java.util.Random;
import java.util.BitSet;
/**
* @author yonik
*/
public class TestDocSet {
// use test instead of assert since asserts may be turned off
public static void test(boolean condition) {
if (!condition) {
throw new RuntimeException("test requestHandler: assertion failed!");
}
}
static Random rand = new Random();
static BitSet bs;
static BitDocSet bds;
static HashDocSet hds;
static int[] ids; // not unique
static void generate(int maxSize, int bitsToSet) {
bs = new BitSet(maxSize);
ids = new int[bitsToSet];
int count=0;
if (maxSize>0) {
for (int i=0; i<bitsToSet; i++) {
int id=rand.nextInt(maxSize);
if (!bs.get(id)) {
bs.set(id);
ids[count++]=id;
}
}
}
bds = new BitDocSet(bs,bitsToSet);
hds = new HashDocSet(ids,0,count);
}
public static void main(String[] args) {
String bsSize=args[0];
boolean randSize=false;
if (bsSize.endsWith("-")) {
bsSize=bsSize.substring(0,bsSize.length()-1);
randSize=true;
}
int bitSetSize = Integer.parseInt(bsSize);
int numSets = Integer.parseInt(args[1]);
int numBitsSet = Integer.parseInt(args[2]);
String test = args[3].intern();
int iter = Integer.parseInt(args[4]);
int ret=0;
BitSet[] sets = new BitSet[numSets];
DocSet[] bset = new DocSet[numSets];
DocSet[] hset = new DocSet[numSets];
BitSet scratch=new BitSet();
for (int i=0; i<numSets; i++) {
generate(randSize ? rand.nextInt(bitSetSize) : bitSetSize, numBitsSet);
sets[i] = bs;
bset[i] = bds;
hset[i] = hds;
}
long start = System.currentTimeMillis();
if ("test".equals(test)) {
for (int it=0; it<iter; it++) {
generate(randSize ? rand.nextInt(bitSetSize) : bitSetSize, numBitsSet);
BitSet bs1=bs;
BitDocSet bds1=bds;
HashDocSet hds1=hds;
generate(randSize ? rand.nextInt(bitSetSize) : bitSetSize, numBitsSet);
BitSet res = ((BitSet)bs1.clone());
res.and(bs);
int icount = res.cardinality();
test(bds1.intersection(bds).size() == icount);
test(bds1.intersectionSize(bds) == icount);
if (bds1.intersection(hds).size() != icount) {
DocSet ds = bds1.intersection(hds);
System.out.println("STOP");
}
test(bds1.intersection(hds).size() == icount);
test(bds1.intersectionSize(hds) == icount);
test(hds1.intersection(bds).size() == icount);
test(hds1.intersectionSize(bds) == icount);
test(hds1.intersection(hds).size() == icount);
test(hds1.intersectionSize(hds) == icount);
ret += icount;
}
}
String type=null;
String oper=null;
if (test.endsWith("B")) { type="B"; }
if (test.endsWith("H")) { type="H"; }
if (test.endsWith("M")) { type="M"; }
if (test.startsWith("intersect")) oper="intersect";
if (test.startsWith("intersectSize")) oper="intersectSize";
if (test.startsWith("intersectAndSize")) oper="intersectSize";
if (oper!=null) {
for (int it=0; it<iter; it++) {
int idx1 = rand.nextInt(numSets);
int idx2 = rand.nextInt(numSets);
DocSet a=null,b=null;
if (type=="B") {
a=bset[idx1]; b=bset[idx2];
} else if (type=="H") {
a=hset[idx1]; b=bset[idx2];
} else if (type=="M") {
if (idx1 < idx2) {
a=bset[idx1];
b=hset[idx2];
} else {
a=hset[idx1];
b=bset[idx2];
}
}
if (oper=="intersect") {
DocSet res = a.intersection(b);
ret += res.memSize();
} else if (oper=="intersectSize") {
ret += a.intersectionSize(b);
} else if (oper=="intersectAndSize") {
DocSet res = a.intersection(b);
ret += res.size();
}
}
}
long end = System.currentTimeMillis();
System.out.println("TIME="+(end-start));
// System.out.println("ret="+ret + " scratchsize="+scratch.size());
System.out.println("ret="+ret);
}
}

View File

@ -0,0 +1,149 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.tst;
import org.apache.lucene.search.*;
import org.apache.lucene.document.Document;
import java.util.List;
import java.util.logging.Level;
import java.io.IOException;
import java.net.URL;
import org.apache.solr.util.StrUtils;
import org.apache.solr.util.NamedList;
import org.apache.solr.search.DocSlice;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.request.SolrQueryResponse;
import org.apache.solr.request.SolrQueryRequest;
/**
* @author yonik
* @version $Id: OldRequestHandler.java,v 1.7 2005/12/02 04:31:05 yonik Exp $
*/
public class OldRequestHandler implements SolrRequestHandler {
long numRequests;
long numErrors;
public void init(NamedList args) {
SolrCore.log.log(Level.INFO, "Unused request handler arguments:" + args);
}
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
numRequests++;
Query query = null;
Filter filter = null;
List<String> commands = StrUtils.splitSmart(req.getQueryString(),';');
String qs = commands.size() >= 1 ? commands.get(0) : "";
query = QueryParsing.parseQuery(qs, req.getSchema());
// If the first non-query, non-filter command is a simple sort on an indexed field, then
// we can use the Lucene sort ability.
Sort sort = null;
if (commands.size() >= 2) {
QueryParsing.SortSpec sortSpec = QueryParsing.parseSort(commands.get(1), req.getSchema());
if (sortSpec != null) {
sort = sortSpec.getSort();
// ignore the count for now... it's currently only controlled by start & limit on req
// count = sortSpec.getCount();
}
}
Hits hits=null;
try {
hits = req.getSearcher().search(query,filter,sort);
int numHits = hits.length();
int startRow = Math.min(numHits, req.getStart());
int endRow = Math.min(numHits,req.getStart()+req.getLimit());
int numRows = endRow-startRow;
int[] ids = new int[numRows];
Document[] data = new Document[numRows];
for (int i=startRow; i<endRow; i++) {
ids[i] = hits.id(i);
data[i] = hits.doc(i);
}
rsp.add(null, new DocSlice(0,numRows,ids,null,numHits,0.0f));
/***********************
rsp.setResults(new DocSlice(0,numRows,ids,null,numHits));
// Setting the actual document objects is optional
rsp.setResults(data);
************************/
} catch (IOException e) {
rsp.setException(e);
numErrors++;
return;
}
}
public String getName() {
return OldRequestHandler.class.getName();
}
public String getVersion() {
return SolrCore.version;
}
public String getDescription() {
return "The original Hits based request handler";
}
public Category getCategory() {
return Category.QUERYHANDLER;
}
public String getCvsId() {
return "$Id: OldRequestHandler.java,v 1.7 2005/12/02 04:31:05 yonik Exp $";
}
public String getCvsName() {
return "$Name: $";
}
public String getCvsSource() {
return "$Source: /cvs/main/searching/solr/solarcore/src/solr/tst/OldRequestHandler.java,v $";
}
public URL[] getDocs() {
return null;
}
public NamedList getStatistics() {
NamedList lst = new NamedList();
lst.add("requests", numRequests);
lst.add("errors", numErrors);
return lst;
}
}

View File

@ -0,0 +1,300 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.tst;
import org.apache.lucene.search.*;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import java.util.*;
import java.util.regex.Pattern;
import java.util.logging.Logger;
import java.util.logging.Level;
import java.net.URL;
import org.apache.solr.util.StrUtils;
import org.apache.solr.util.NamedList;
import org.apache.solr.search.*;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrException;
import org.apache.solr.request.SolrRequestHandler;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrQueryResponse;
/**
* @author yonik
* @version $Id: TestRequestHandler.java,v 1.19 2005/12/02 04:31:05 yonik Exp $
*/
public class TestRequestHandler implements SolrRequestHandler {
private static Logger log = Logger.getLogger(SolrIndexSearcher.class.getName());
public void init(NamedList args) {
SolrCore.log.log(Level.INFO, "Unused request handler arguments:" + args);
}
// use test instead of assert since asserts may be turned off
public void test(boolean condition) {
try {
if (!condition) {
throw new RuntimeException("test requestHandler: assertion failed!");
}
} catch (RuntimeException e) {
SolrException.log(log,e);
throw(e);
}
}
private long numRequests;
private long numErrors;
private final Pattern splitList=Pattern.compile(",| ");
public void handleRequest(SolrQueryRequest req, SolrQueryResponse rsp) {
numRequests++;
// TODO: test if lucene will accept an escaped ';', otherwise
// we need to un-escape them before we pass to QueryParser
try {
String sreq = req.getQueryString();
if (sreq==null) throw new SolrException(400,"Missing queryString");
List<String> commands = StrUtils.splitSmart(sreq,';');
String qs = commands.size() >= 1 ? commands.get(0) : "";
Query query = QueryParsing.parseQuery(qs, req.getSchema());
// find fieldnames to return (fieldlist)
String fl = req.getParam("fl");
int flags=0;
if (fl != null) {
// TODO - this could become more efficient if widely used.
// TODO - should field order be maintained?
String[] flst = splitList.split(fl,0);
if (flst.length > 0 && !(flst.length==1 && flst[0].length()==0)) {
Set<String> set = new HashSet<String>();
for (String fname : flst) {
if ("score".equals(fname)) flags |= SolrIndexSearcher.GET_SCORES;
set.add(fname);
}
rsp.setReturnFields(set);
}
}
// If the first non-query, non-filter command is a simple sort on an indexed field, then
// we can use the Lucene sort ability.
Sort sort = null;
if (commands.size() >= 2) {
QueryParsing.SortSpec sortSpec = QueryParsing.parseSort(commands.get(1), req.getSchema());
if (sortSpec != null) {
sort = sortSpec.getSort();
// ignore the count for now... it's currently only controlled by start & limit on req
// count = sortSpec.getCount();
}
}
SolrIndexSearcher searcher = req.getSearcher();
/***
Object o = searcher.cacheLookup("dfllNode", query);
if (o == null) {
searcher.cacheInsert("dfllNode",query,"Hello Bob");
} else {
System.out.println("User Cache Hit On " + o);
}
***/
int start=req.getStart();
int limit=req.getLimit();
Query filterQuery=null;
DocSet filter=null;
Filter lfilter=null;
DocList results = req.getSearcher().getDocList(query, null, sort, req.getStart(), req.getLimit(), flags);
rsp.add(null, results);
if (qs.startsWith("values")) {
rsp.add("testname1","testval1");
rsp.add("testarr1",new String[]{"my val 1","my val 2"});
NamedList nl = new NamedList();
nl.add("myInt", 333);
nl.add("myNullVal", null);
nl.add("myFloat",1.414213562f);
nl.add("myDouble", 1e100d);
nl.add("myBool", false);
nl.add("myLong",999999999999L);
Document doc = new Document();
doc.add(new Field("id","55",true,true,false));
nl.add("myDoc",doc);
nl.add("myResult",results);
nl.add("myStr","&wow! test escaping: a&b<c&");
nl.add(null, "this value had a null name...");
nl.add("myIntArray", new Integer[] { 100, 5, -10, 42 });
nl.add("epoch", new Date(0));
nl.add("currDate", new Date(System.currentTimeMillis()));
rsp.add("myNamedList", nl);
} else if (qs.startsWith("fields")) {
NamedList nl = new NamedList();
Collection flst;
flst = searcher.getReader().getFieldNames(IndexReader.FieldOption.INDEXED);
nl.add("indexed",flst);
flst = searcher.getReader().getFieldNames(IndexReader.FieldOption.UNINDEXED);
nl.add("unindexed",flst);
rsp.add("fields", nl);
}
test(results.size() <= limit);
test(results.size() <= results.matches());
// System.out.println("limit="+limit+" results.size()="+results.size()+" matches="+results.matches());
test((start==0 && limit>=results.matches()) ? results.size()==results.matches() : true );
//
// test against hits
//
Hits hits = searcher.search(query, lfilter, sort);
test(hits.length() == results.matches());
DocList rrr2 = results.subset(start,limit);
test(rrr2 == results);
DocIterator iter=results.iterator();
/***
for (int i=0; i<hits.length(); i++) {
System.out.println("doc="+hits.id(i) + " score="+hits.score(i));
}
***/
for (int i=0; i<results.size(); i++) {
test( iter.nextDoc() == hits.id(i+results.offset()) );
// Document doesn't implement equals()
// test( searcher.document(i).equals(hits.doc(i)));
}
DocList results2 = req.getSearcher().getDocList(query,query,sort,start,limit);
test(results2.size()==results.size() && results2.matches()==results.matches());
DocList results3 = req.getSearcher().getDocList(query,query,null,start,limit);
test(results3.size()==results.size() && results3.matches()==results.matches());
//
// getting both the list and set
//
DocListAndSet both = searcher.getDocListAndSet(query,filter,sort,start, limit);
test( both.docList.equals(results) );
test( both.docList.matches() == both.docSet.size() );
test( (start==0 && both.docSet.size() <= limit) ? both.docSet.equals(both.docList) : true);
// use the result set as a filter itself...
DocListAndSet both2 = searcher.getDocListAndSet(query,both.docSet,sort,start, limit);
test( both2.docList.equals(both.docList) );
test( both2.docSet.equals(both.docSet) );
BitSet bits = both.docSet.getBits();
BitSet neg = ((BitSet)bits.clone());
neg.flip(0, bits.length());
// use the negative as a filter (should result in 0 matches)
// todo - fix if filter is not null
both2 = searcher.getDocListAndSet(query,new BitDocSet(neg),sort, start, limit);
test( both2.docList.size() == 0 );
test( both2.docList.matches() == 0 );
test( both2.docSet.size() == 0 );
DocSet allResults=searcher.getDocSet(query,filter);
test ( allResults.equals(both.docSet) );
if (filter != null) {
DocSet res=searcher.getDocSet(query);
test( res.size() >= results.size() );
test( res.intersection(filter).equals(both.docSet));
test( res.intersectionSize(filter) == both.docSet.size() );
if (filterQuery != null) {
test( searcher.numDocs(filterQuery,res) == both.docSet.size() );
}
}
} catch (Exception e) {
rsp.setException(e);
numErrors++;
return;
}
}
//////////////////////// SolrInfoMBeans methods //////////////////////
public String getName() {
return TestRequestHandler.class.getName();
}
public String getVersion() {
return SolrCore.version;
}
public String getDescription() {
return "A test handler that runs some sanity checks on results";
}
public Category getCategory() {
return Category.QUERYHANDLER;
}
public String getCvsId() {
return "$Id: TestRequestHandler.java,v 1.19 2005/12/02 04:31:05 yonik Exp $";
}
public String getCvsName() {
return "$Name: $";
}
public String getCvsSource() {
return "$Source: /cvs/main/searching/solr/solarcore/src/solr/tst/TestRequestHandler.java,v $";
}
public URL[] getDocs() {
return null;
}
public NamedList getStatistics() {
NamedList lst = new NamedList();
lst.add("requests", numRequests);
lst.add("errors", numErrors);
return lst;
}
}

View File

@ -0,0 +1,44 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update;
import org.apache.lucene.document.Document;
/**
* @author yonik
* @version $Id$
*/
public class AddUpdateCommand extends UpdateCommand {
public String id;
public Document doc;
public boolean allowDups;
public boolean overwritePending;
public boolean overwriteCommitted;
public AddUpdateCommand() {
super("add");
}
public String toString() {
StringBuilder sb = new StringBuilder(commandName);
sb.append(':');
if (id!=null) sb.append("id=").append(id);
sb.append(",allowDups=").append(allowDups);
sb.append(",overwritePending=").append(overwritePending);
sb.append(",overwriteCommitted=").append(overwriteCommitted);
return sb.toString();
}
}

View File

@ -0,0 +1,37 @@
/**
* Copyright 2006 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.update;
/**
* @author yonik
* @version $Id$
*/
public class CommitUpdateCommand extends UpdateCommand {
public boolean optimize;
public boolean waitFlush;
public boolean waitSearcher=true;
public CommitUpdateCommand(boolean optimize) {
super("commit");
this.optimize=optimize;
}
public String toString() {
return "commit(optimize="+optimize
+",waitFlush="+waitFlush
+",waitSearcher="+waitSearcher
+')';
}
}

Some files were not shown because too many files have changed in this diff Show More