mirror of https://github.com/apache/lucene.git
LUCENE-4055: merge trunk (1338960:1341010)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4055@1341015 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
commit
d5f7dbdde7
|
@ -266,6 +266,11 @@ Changes in backwards compatibility policy
|
|||
* LUCENE-3970: Rename Fields.getUniqueFieldCount -> .size() and
|
||||
Terms.getUniqueTermCount -> .size(). (Iulius Curt via Mike McCandless)
|
||||
|
||||
* LUCENE-3514: IndexSearcher.setDefaultFieldSortScoring was removed
|
||||
and replaced with per-search control via new expert search methods
|
||||
that take two booleans indicating whether hit scores and max
|
||||
score should be computed. (Mike McCandless)
|
||||
|
||||
Changes in Runtime Behavior
|
||||
|
||||
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
|
||||
|
@ -508,6 +513,9 @@ API Changes
|
|||
immutable instances of NormalizeCharMap. (Dawid Weiss, Mike
|
||||
McCandless)
|
||||
|
||||
* LUCENE-4063: FrenchLightStemmer no longer deletes repeated digits.
|
||||
(Tanguy Moal via Steve Rowe)
|
||||
|
||||
New features
|
||||
|
||||
* LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions
|
||||
|
@ -857,6 +865,10 @@ New features
|
|||
* LUCENE-4039: Add AddIndexesTask to benchmark, which uses IW.addIndexes.
|
||||
(Shai Erera)
|
||||
|
||||
* LUCENE-3514: Added IndexSearcher.searchAfter when Sort is used,
|
||||
returning results after a specified FieldDoc for deep
|
||||
paging. (Mike McCandless)
|
||||
|
||||
Optimizations
|
||||
|
||||
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
|
||||
|
@ -905,7 +917,11 @@ Optimizations
|
|||
FST under the hood, which requires less RAM. NormalizeCharMap no
|
||||
longer accepts empty string match (it did previously, but ignored
|
||||
it). (Dawid Weiss, Mike McCandless)
|
||||
|
||||
|
||||
* LUCENE-4061: improve synchronization in DirectoryTaxonomyWriter.addCategory
|
||||
and few general improvements to DirectoryTaxonomyWriter.
|
||||
(Shai Erera, Gilad Barkai)
|
||||
|
||||
Bug fixes
|
||||
|
||||
* LUCENE-2803: The FieldCache can miss values if an entry for a reader
|
||||
|
@ -953,6 +969,11 @@ Bug fixes
|
|||
offset calculation in PathHierarchyTokenizer.
|
||||
(Mike McCandless, Uwe Schindler, Robert Muir)
|
||||
|
||||
* LUCENE-4060: Fix a synchronization bug in
|
||||
DirectoryTaxonomyWriter.addTaxonomies(). Also, the method has been renamed to
|
||||
addTaxonomy and now takes only one Directory and one OrdinalMap.
|
||||
(Shai Erera, Gilad Barkai)
|
||||
|
||||
Documentation
|
||||
|
||||
* LUCENE-3958: Javadocs corrections for IndexWriter.
|
||||
|
@ -990,6 +1011,10 @@ Build
|
|||
* LUCENE-3286: Moved remainder of contrib/xml-query-parser to lucene/queryparser.
|
||||
Classes now found at org.apache.lucene.queryparser.xml.*
|
||||
|
||||
* LUCENE-4059: Improve ANT task prepare-webpages (used by documentation
|
||||
tasks) to correctly encode build file names as URIs for later processing by
|
||||
XSL. (Greg Bowyer, Uwe Schindler)
|
||||
|
||||
======================= Lucene 3.6.0 =======================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -62,8 +62,16 @@ CharacterEntities = ( "AElig" | "Aacute" | "Acirc" | "Agrave" | "Alpha"
|
|||
| "weierp" | "xi" | "yacute" | "yen" | "yuml" | "zeta"
|
||||
| "zwj" | "zwnj" )
|
||||
%{
|
||||
private static final Set<String> upperCaseVariantsAccepted
|
||||
= new HashSet<String>(Arrays.asList("quot","copy","gt","lt","reg","amp"));
|
||||
private static final Map<String,String> upperCaseVariantsAccepted
|
||||
= new HashMap<String,String>();
|
||||
static {
|
||||
upperCaseVariantsAccepted.put("quot", "QUOT");
|
||||
upperCaseVariantsAccepted.put("copy", "COPY");
|
||||
upperCaseVariantsAccepted.put("gt", "GT");
|
||||
upperCaseVariantsAccepted.put("lt", "LT");
|
||||
upperCaseVariantsAccepted.put("reg", "REG");
|
||||
upperCaseVariantsAccepted.put("amp", "AMP");
|
||||
}
|
||||
private static final CharArrayMap<Character> entityValues
|
||||
= new CharArrayMap<Character>(Version.LUCENE_40, 253, false);
|
||||
static {
|
||||
|
@ -145,8 +153,9 @@ CharacterEntities = ( "AElig" | "Aacute" | "Acirc" | "Agrave" | "Alpha"
|
|||
for (int i = 0 ; i < entities.length ; i += 2) {
|
||||
Character value = entities[i + 1].charAt(0);
|
||||
entityValues.put(entities[i], value);
|
||||
if (upperCaseVariantsAccepted.contains(entities[i])) {
|
||||
entityValues.put(entities[i].toUpperCase(), value);
|
||||
String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);
|
||||
if (upperCaseVariant != null) {
|
||||
entityValues.put(upperCaseVariant, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 3/24/12 4:50 PM */
|
||||
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 5/18/12 12:24 PM */
|
||||
|
||||
package org.apache.lucene.analysis.charfilter;
|
||||
|
||||
|
@ -21,7 +21,8 @@ package org.apache.lucene.analysis.charfilter;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
|
@ -39,8 +40,8 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
|
|||
/**
|
||||
* This class is a scanner generated by
|
||||
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
|
||||
* on 3/24/12 4:50 PM from the specification file
|
||||
* <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
|
||||
* on 5/18/12 12:24 PM from the specification file
|
||||
* <tt>C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
|
||||
*/
|
||||
public final class HTMLStripCharFilter extends BaseCharFilter {
|
||||
|
||||
|
@ -30522,8 +30523,16 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
|||
private boolean zzEOFDone;
|
||||
|
||||
/* user code: */
|
||||
private static final Set<String> upperCaseVariantsAccepted
|
||||
= new HashSet<String>(Arrays.asList("quot","copy","gt","lt","reg","amp"));
|
||||
private static final Map<String,String> upperCaseVariantsAccepted
|
||||
= new HashMap<String,String>();
|
||||
static {
|
||||
upperCaseVariantsAccepted.put("quot", "QUOT");
|
||||
upperCaseVariantsAccepted.put("copy", "COPY");
|
||||
upperCaseVariantsAccepted.put("gt", "GT");
|
||||
upperCaseVariantsAccepted.put("lt", "LT");
|
||||
upperCaseVariantsAccepted.put("reg", "REG");
|
||||
upperCaseVariantsAccepted.put("amp", "AMP");
|
||||
}
|
||||
private static final CharArrayMap<Character> entityValues
|
||||
= new CharArrayMap<Character>(Version.LUCENE_40, 253, false);
|
||||
static {
|
||||
|
@ -30605,8 +30614,9 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
|
|||
for (int i = 0 ; i < entities.length ; i += 2) {
|
||||
Character value = entities[i + 1].charAt(0);
|
||||
entityValues.put(entities[i], value);
|
||||
if (upperCaseVariantsAccepted.contains(entities[i])) {
|
||||
entityValues.put(entities[i].toUpperCase(), value);
|
||||
String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);
|
||||
if (upperCaseVariant != null) {
|
||||
entityValues.put(upperCaseVariant, value);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,8 @@ package org.apache.lucene.analysis.charfilter;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.util.Version;
|
||||
|
|
|
@ -50,8 +50,16 @@ def main():
|
|||
print output_line, ')'
|
||||
|
||||
print '%{'
|
||||
print ' private static final Set<String> upperCaseVariantsAccepted'
|
||||
print ' = new HashSet<String>(Arrays.asList("quot","copy","gt","lt","reg","amp"));'
|
||||
print ' private static final Map<String,String> upperCaseVariantsAccepted'
|
||||
print ' = new HashMap<String,String>();'
|
||||
print ' static {'
|
||||
print ' upperCaseVariantsAccepted.put("quot", "QUOT");'
|
||||
print ' upperCaseVariantsAccepted.put("copy", "COPY");'
|
||||
print ' upperCaseVariantsAccepted.put("gt", "GT");'
|
||||
print ' upperCaseVariantsAccepted.put("lt", "LT");'
|
||||
print ' upperCaseVariantsAccepted.put("reg", "REG");'
|
||||
print ' upperCaseVariantsAccepted.put("amp", "AMP");'
|
||||
print ' }'
|
||||
print ' private static final CharArrayMap<Character> entityValues'
|
||||
print ' = new CharArrayMap<Character>(Version.LUCENE_40, %i, false);' % len(keys)
|
||||
print ' static {'
|
||||
|
@ -68,8 +76,9 @@ def main():
|
|||
print ' for (int i = 0 ; i < entities.length ; i += 2) {'
|
||||
print ' Character value = entities[i + 1].charAt(0);'
|
||||
print ' entityValues.put(entities[i], value);'
|
||||
print ' if (upperCaseVariantsAccepted.contains(entities[i])) {'
|
||||
print ' entityValues.put(entities[i].toUpperCase(), value);'
|
||||
print ' String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);'
|
||||
print ' if (upperCaseVariant != null) {'
|
||||
print ' entityValues.put(upperCaseVariant, value);'
|
||||
print ' }'
|
||||
print ' }'
|
||||
print " }"
|
||||
|
|
|
@ -246,7 +246,7 @@ public class FrenchLightStemmer {
|
|||
|
||||
char ch = s[0];
|
||||
for (int i = 1; i < len; i++) {
|
||||
if (s[i] == ch)
|
||||
if (s[i] == ch && Character.isLetter(ch))
|
||||
len = delete(s, i--, len);
|
||||
else
|
||||
ch = s[i];
|
||||
|
@ -260,7 +260,7 @@ public class FrenchLightStemmer {
|
|||
if (s[len-1] == 'r') len--;
|
||||
if (s[len-1] == 'e') len--;
|
||||
if (s[len-1] == 'e') len--;
|
||||
if (s[len-1] == s[len-2]) len--;
|
||||
if (s[len-1] == s[len-2] && Character.isLetter(s[len-1])) len--;
|
||||
}
|
||||
return len;
|
||||
}
|
||||
|
|
|
@ -153,6 +153,22 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
|
|||
|
||||
checkOneTerm(analyzer, "disposition", "dispos");
|
||||
checkOneTerm(analyzer, "dispose", "dispos");
|
||||
|
||||
// SOLR-3463 : abusive compression of repeated characters in numbers
|
||||
// Trailing repeated char elision :
|
||||
checkOneTerm(analyzer, "1234555", "1234555");
|
||||
// Repeated char within numbers with more than 4 characters :
|
||||
checkOneTerm(analyzer, "12333345", "12333345");
|
||||
// Short numbers weren't affected already:
|
||||
checkOneTerm(analyzer, "1234", "1234");
|
||||
// Ensure behaviour is preserved for words!
|
||||
// Trailing repeated char elision :
|
||||
checkOneTerm(analyzer, "abcdeff", "abcdef");
|
||||
// Repeated char within words with more than 4 characters :
|
||||
checkOneTerm(analyzer, "abcccddeef", "abcdef");
|
||||
checkOneTerm(analyzer, "créées", "cre");
|
||||
// Combined letter and digit repetition
|
||||
checkOneTerm(analyzer, "22hh00", "22h00"); // 10:00pm
|
||||
}
|
||||
|
||||
/** Test against a vocabulary from the reference impl */
|
||||
|
|
|
@ -252,6 +252,8 @@ public class ToStringUtil {
|
|||
/**
|
||||
* Romanize katakana with modified hepburn
|
||||
*/
|
||||
// TODO: now that this is used by readingsfilter and not just for
|
||||
// debugging, fix this to really be a scheme that works best with IMEs
|
||||
public static void getRomanization(Appendable builder, CharSequence s) throws IOException {
|
||||
final int len = s.length();
|
||||
for (int i = 0; i < len; i++) {
|
||||
|
@ -522,6 +524,9 @@ public class ToStringUtil {
|
|||
if (ch2 == 'ウ') {
|
||||
builder.append("tō");
|
||||
i++;
|
||||
} else if (ch2 == 'ゥ') {
|
||||
builder.append("tu");
|
||||
i++;
|
||||
} else {
|
||||
builder.append("to");
|
||||
}
|
||||
|
@ -665,7 +670,7 @@ public class ToStringUtil {
|
|||
builder.append("mu");
|
||||
break;
|
||||
case 'メ':
|
||||
builder.append("mi");
|
||||
builder.append("me");
|
||||
break;
|
||||
case 'モ':
|
||||
if (ch2 == 'ウ') {
|
||||
|
@ -690,7 +695,12 @@ public class ToStringUtil {
|
|||
}
|
||||
break;
|
||||
case 'ラ':
|
||||
builder.append("ra");
|
||||
if (ch2 == '゜') {
|
||||
builder.append("la");
|
||||
i++;
|
||||
} else {
|
||||
builder.append("ra");
|
||||
}
|
||||
break;
|
||||
case 'リ':
|
||||
if (ch2 == 'ョ' && ch3 == 'ウ') {
|
||||
|
@ -711,20 +721,36 @@ public class ToStringUtil {
|
|||
} else if (ch2 == 'ェ') {
|
||||
builder.append("rye");
|
||||
i++;
|
||||
} else if (ch2 == '゜') {
|
||||
builder.append("li");
|
||||
i++;
|
||||
} else {
|
||||
builder.append("ri");
|
||||
}
|
||||
break;
|
||||
case 'ル':
|
||||
builder.append("ru");
|
||||
if (ch2 == '゜') {
|
||||
builder.append("lu");
|
||||
i++;
|
||||
} else {
|
||||
builder.append("ru");
|
||||
}
|
||||
break;
|
||||
case 'レ':
|
||||
builder.append("re");
|
||||
if (ch2 == '゜') {
|
||||
builder.append("le");
|
||||
i++;
|
||||
} else {
|
||||
builder.append("re");
|
||||
}
|
||||
break;
|
||||
case 'ロ':
|
||||
if (ch2 == 'ウ') {
|
||||
builder.append("rō");
|
||||
i++;
|
||||
} else if (ch2 == '゜') {
|
||||
builder.append("lo");
|
||||
i++;
|
||||
} else {
|
||||
builder.append("ro");
|
||||
}
|
||||
|
@ -887,7 +913,28 @@ public class ToStringUtil {
|
|||
builder.append("da");
|
||||
break;
|
||||
case 'ヂ':
|
||||
builder.append("ji");
|
||||
// TODO: investigate all this
|
||||
if (ch2 == 'ョ' && ch3 == 'ウ') {
|
||||
builder.append("jō");
|
||||
i += 2;
|
||||
} else if (ch2 == 'ュ' && ch3 == 'ウ') {
|
||||
builder.append("jū");
|
||||
i += 2;
|
||||
} else if (ch2 == 'ャ') {
|
||||
builder.append("ja");
|
||||
i++;
|
||||
} else if (ch2 == 'ョ') {
|
||||
builder.append("jo");
|
||||
i++;
|
||||
} else if (ch2 == 'ュ') {
|
||||
builder.append("ju");
|
||||
i++;
|
||||
} else if (ch2 == 'ェ') {
|
||||
builder.append("je");
|
||||
i++;
|
||||
} else {
|
||||
builder.append("ji");
|
||||
}
|
||||
break;
|
||||
case 'ヅ':
|
||||
builder.append("zu");
|
||||
|
@ -994,6 +1041,18 @@ public class ToStringUtil {
|
|||
builder.append("po");
|
||||
}
|
||||
break;
|
||||
case 'ヷ':
|
||||
builder.append("va");
|
||||
break;
|
||||
case 'ヸ':
|
||||
builder.append("vi");
|
||||
break;
|
||||
case 'ヹ':
|
||||
builder.append("ve");
|
||||
break;
|
||||
case 'ヺ':
|
||||
builder.append("vo");
|
||||
break;
|
||||
case 'ヴ':
|
||||
if (ch2 == 'ィ' && ch3 == 'ェ') {
|
||||
builder.append("vye");
|
||||
|
|
|
@ -17,6 +17,9 @@ package org.apache.lucene.analysis.ja.util;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class TestToStringUtil extends LuceneTestCase {
|
||||
|
@ -31,4 +34,79 @@ public class TestToStringUtil extends LuceneTestCase {
|
|||
assertEquals("chashu", ToStringUtil.getRomanization("チャーシュー"));
|
||||
assertEquals("shumai", ToStringUtil.getRomanization("シューマイ"));
|
||||
}
|
||||
|
||||
// see http://en.wikipedia.org/wiki/Hepburn_romanization,
|
||||
// but this isnt even thorough or really probably what we want!
|
||||
public void testHepburnTable() {
|
||||
Map<String,String> table = new HashMap<String,String>() {{
|
||||
put("ア", "a"); put("イ", "i"); put("ウ", "u"); put("エ", "e"); put("オ", "o");
|
||||
put("カ", "ka"); put("キ", "ki"); put("ク", "ku"); put("ケ", "ke"); put("コ", "ko");
|
||||
put("サ", "sa"); put("シ", "shi"); put("ス", "su"); put("セ", "se"); put("ソ", "so");
|
||||
put("タ", "ta"); put("チ", "chi"); put("ツ", "tsu"); put("テ", "te"); put("ト", "to");
|
||||
put("ナ", "na"); put("ニ", "ni"); put("ヌ", "nu"); put("ネ", "ne"); put("ノ", "no");
|
||||
put("ハ", "ha"); put("ヒ", "hi"); put("フ", "fu"); put("ヘ", "he"); put("ホ", "ho");
|
||||
put("マ", "ma"); put("ミ", "mi"); put("ム", "mu"); put("メ", "me"); put("モ", "mo");
|
||||
put("ヤ", "ya"); put("ユ", "yu"); put("ヨ", "yo");
|
||||
put("ラ", "ra"); put("リ", "ri"); put("ル", "ru"); put("レ", "re"); put("ロ", "ro");
|
||||
put("ワ", "wa"); put("ヰ", "i"); put("ヱ", "e"); put("ヲ", "o");
|
||||
put("ン", "n");
|
||||
put("ガ", "ga"); put("ギ", "gi"); put("グ", "gu"); put("ゲ", "ge"); put("ゴ", "go");
|
||||
put("ザ", "za"); put("ジ", "ji"); put("ズ", "zu"); put("ゼ", "ze"); put("ゾ", "zo");
|
||||
put("ダ", "da"); put("ヂ", "ji"); put("ヅ", "zu"); put("デ", "de"); put("ド", "do");
|
||||
put("バ", "ba"); put("ビ", "bi"); put("ブ", "bu"); put("ベ", "be"); put("ボ", "bo");
|
||||
put("パ", "pa"); put("ピ", "pi"); put("プ", "pu"); put("ペ", "pe"); put("ポ", "po");
|
||||
|
||||
put("キャ", "kya"); put("キュ", "kyu"); put("キョ", "kyo");
|
||||
put("シャ", "sha"); put("シュ", "shu"); put("ショ", "sho");
|
||||
put("チャ", "cha"); put("チュ", "chu"); put("チョ", "cho");
|
||||
put("ニャ", "nya"); put("ニュ", "nyu"); put("ニョ", "nyo");
|
||||
put("ヒャ", "hya"); put("ヒュ", "hyu"); put("ヒョ", "hyo");
|
||||
put("ミャ", "mya"); put("ミュ", "myu"); put("ミョ", "myo");
|
||||
put("リャ", "rya"); put("リュ", "ryu"); put("リョ", "ryo");
|
||||
put("ギャ", "gya"); put("ギュ", "gyu"); put("ギョ", "gyo");
|
||||
put("ジャ", "ja"); put("ジュ", "ju"); put("ジョ", "jo");
|
||||
put("ヂャ", "ja"); put("ヂュ", "ju"); put("ヂョ", "jo");
|
||||
put("ビャ", "bya"); put("ビュ", "byu"); put("ビョ", "byo");
|
||||
put("ピャ", "pya"); put("ピュ", "pyu"); put("ピョ", "pyo");
|
||||
|
||||
put("イィ", "yi"); put("イェ", "ye");
|
||||
put("ウァ", "wa"); put("ウィ", "wi"); put("ウゥ", "wu"); put("ウェ", "we"); put("ウォ", "wo");
|
||||
put("ウュ", "wyu");
|
||||
// TODO: really should be vu
|
||||
put("ヴァ", "va"); put("ヴィ", "vi"); put("ヴ", "v"); put("ヴェ", "ve"); put("ヴォ", "vo");
|
||||
put("ヴャ", "vya"); put("ヴュ", "vyu"); put("ヴィェ", "vye"); put("ヴョ", "vyo");
|
||||
put("キェ", "kye");
|
||||
put("ギェ", "gye");
|
||||
put("クァ", "kwa"); put("クィ", "kwi"); put("クェ", "kwe"); put("クォ", "kwo");
|
||||
put("クヮ", "kwa");
|
||||
put("グァ", "gwa"); put("グィ", "gwi"); put("グェ", "gwe"); put("グォ", "gwo");
|
||||
put("グヮ", "gwa");
|
||||
put("シェ", "she");
|
||||
put("ジェ", "je");
|
||||
put("スィ", "si");
|
||||
put("ズィ", "zi");
|
||||
put("チェ", "che");
|
||||
put("ツァ", "tsa"); put("ツィ", "tsi"); put("ツェ", "tse"); put("ツォ", "tso");
|
||||
put("ツュ", "tsyu");
|
||||
put("ティ", "ti"); put("トゥ", "tu");
|
||||
put("テュ", "tyu");
|
||||
put("ディ", "di"); put("ドゥ", "du");
|
||||
put("デュ", "dyu");
|
||||
put("ニェ", "nye");
|
||||
put("ヒェ", "hye");
|
||||
put("ビェ", "bye");
|
||||
put("ピェ", "pye");
|
||||
put("ファ", "fa"); put("フィ", "fi"); put("フェ", "fe"); put("フォ", "fo");
|
||||
put("フャ", "fya"); put("フュ", "fyu"); put("フィェ", "fye"); put("フョ", "fyo");
|
||||
put("ホゥ", "hu");
|
||||
put("ミェ", "mye");
|
||||
put("リェ", "rye");
|
||||
put("ラ゜", "la"); put("リ゜", "li"); put("ル゜", "lu"); put("レ゜", "le"); put("ロ゜", "lo");
|
||||
put("ヷ", "va"); put("ヸ", "vi"); put("ヹ", "ve"); put("ヺ", "vo");
|
||||
}};
|
||||
|
||||
for (String s : table.keySet()) {
|
||||
assertEquals(s, table.get(s), ToStringUtil.getRomanization(s));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -25,20 +25,20 @@ import java.util.HashMap;
|
|||
import java.util.Locale;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
|
||||
import org.apache.lucene.benchmark.byTask.stats.Points;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -80,6 +80,7 @@ public class PerfRunData implements Closeable {
|
|||
private Directory directory;
|
||||
private Analyzer analyzer;
|
||||
private DocMaker docMaker;
|
||||
private ContentSource contentSource;
|
||||
private FacetSource facetSource;
|
||||
private Locale locale;
|
||||
|
||||
|
@ -105,10 +106,16 @@ public class PerfRunData implements Closeable {
|
|||
// analyzer (default is standard analyzer)
|
||||
analyzer = NewAnalyzerTask.createAnalyzer(config.get("analyzer",
|
||||
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
|
||||
|
||||
// content source
|
||||
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
|
||||
contentSource = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
|
||||
contentSource.setConfig(config);
|
||||
|
||||
// doc maker
|
||||
docMaker = Class.forName(config.get("doc.maker",
|
||||
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
|
||||
docMaker.setConfig(config);
|
||||
docMaker.setConfig(config, contentSource);
|
||||
// facet source
|
||||
facetSource = Class.forName(config.get("facet.source",
|
||||
"org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
|
||||
|
@ -129,10 +136,11 @@ public class PerfRunData implements Closeable {
|
|||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
IOUtils.close(indexWriter, indexReader, directory,
|
||||
taxonomyWriter, taxonomyReader, taxonomyDir,
|
||||
docMaker, facetSource);
|
||||
docMaker, facetSource, contentSource);
|
||||
|
||||
// close all perf objects that are closeable.
|
||||
ArrayList<Closeable> perfObjectsToClose = new ArrayList<Closeable>();
|
||||
|
@ -361,7 +369,12 @@ public class PerfRunData implements Closeable {
|
|||
this.analyzer = analyzer;
|
||||
}
|
||||
|
||||
/** Returns the docMaker. */
|
||||
/** Returns the ContentSource. */
|
||||
public ContentSource getContentSource() {
|
||||
return contentSource;
|
||||
}
|
||||
|
||||
/** Returns the DocMaker. */
|
||||
public DocMaker getDocMaker() {
|
||||
return docMaker;
|
||||
}
|
||||
|
@ -393,6 +406,7 @@ public class PerfRunData implements Closeable {
|
|||
}
|
||||
|
||||
public void resetInputs() throws IOException {
|
||||
contentSource.resetInputs();
|
||||
docMaker.resetInputs();
|
||||
facetSource.resetInputs();
|
||||
for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {
|
||||
|
|
|
@ -131,7 +131,6 @@ public abstract class ContentItemsSource implements Closeable {
|
|||
* items generated since the last reset, so it's important to call
|
||||
* super.resetInputs in case you override this method.
|
||||
*/
|
||||
@SuppressWarnings("unused")
|
||||
public void resetInputs() throws IOException {
|
||||
bytesCount = 0;
|
||||
itemCount = 0;
|
||||
|
|
|
@ -355,26 +355,11 @@ public class DocMaker implements Closeable {
|
|||
* {@link ContentSource}, and it can be overridden to do more work (but make
|
||||
* sure to call super.close()).
|
||||
*/
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
source.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of bytes generated by the content source since last
|
||||
* reset.
|
||||
*/
|
||||
public synchronized long getBytesCount() {
|
||||
return source.getBytesCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of bytes that were generated by the content source
|
||||
* defined to that doc maker.
|
||||
*/
|
||||
public long getTotalBytesCount() {
|
||||
return source.getTotalBytesCount();
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a {@link Document} object ready for indexing. This method uses the
|
||||
* {@link ContentSource} to get the next document from the source, and creates
|
||||
|
@ -426,26 +411,16 @@ public class DocMaker implements Closeable {
|
|||
public synchronized void resetInputs() throws IOException {
|
||||
source.printStatistics("docs");
|
||||
// re-initiate since properties by round may have changed.
|
||||
setConfig(config);
|
||||
setConfig(config, source);
|
||||
source.resetInputs();
|
||||
numDocsCreated.set(0);
|
||||
resetLeftovers();
|
||||
}
|
||||
|
||||
/** Set the configuration parameters of this doc maker. */
|
||||
public void setConfig(Config config) {
|
||||
public void setConfig(Config config, ContentSource source) {
|
||||
this.config = config;
|
||||
try {
|
||||
if (source != null) {
|
||||
source.close();
|
||||
}
|
||||
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
|
||||
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
|
||||
source.setConfig(config);
|
||||
} catch (Exception e) {
|
||||
// Should not get here. Throw runtime exception.
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
this.source = source;
|
||||
|
||||
boolean stored = config.get("doc.stored", false);
|
||||
boolean bodyStored = config.get("doc.body.stored", stored);
|
||||
|
|
|
@ -20,34 +20,16 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
|||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.DocData;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
|
||||
/**
|
||||
* Consumes a {@link org.apache.lucene.benchmark.byTask.feeds.ContentSource}.
|
||||
* Supports the following parameters:
|
||||
* <ul>
|
||||
* <li>content.source - the content source to use. (mandatory)
|
||||
* </ul>
|
||||
*/
|
||||
/** Consumes a {@link org.apache.lucene.benchmark.byTask.feeds.ContentSource}. */
|
||||
public class ConsumeContentSourceTask extends PerfTask {
|
||||
|
||||
private ContentSource source;
|
||||
private DocData dd = new DocData();
|
||||
private final ContentSource source;
|
||||
private ThreadLocal<DocData> dd = new ThreadLocal<DocData>();
|
||||
|
||||
public ConsumeContentSourceTask(PerfRunData runData) {
|
||||
super(runData);
|
||||
Config config = runData.getConfig();
|
||||
String sourceClass = config.get("content.source", null);
|
||||
if (sourceClass == null) {
|
||||
throw new IllegalArgumentException("content.source must be defined");
|
||||
}
|
||||
try {
|
||||
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
|
||||
source.setConfig(config);
|
||||
source.resetInputs();
|
||||
} catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
source = runData.getContentSource();
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -55,15 +37,9 @@ public class ConsumeContentSourceTask extends PerfTask {
|
|||
return "read " + recsCount + " documents from the content source";
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws Exception {
|
||||
source.close();
|
||||
super.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int doLogic() throws Exception {
|
||||
dd = source.getNextDocData(dd);
|
||||
dd.set(source.getNextDocData(dd.get()));
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
|
|
@ -22,7 +22,9 @@ import java.io.FileWriter;
|
|||
import java.io.IOException;
|
||||
import java.util.Properties;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -122,15 +124,19 @@ public class ExtractWikipedia {
|
|||
} else if (arg.equals("--discardImageOnlyDocs") || arg.equals("-d")) {
|
||||
keepImageOnlyDocs = false;
|
||||
}
|
||||
|
||||
}
|
||||
DocMaker docMaker = new DocMaker();
|
||||
|
||||
Properties properties = new Properties();
|
||||
properties.setProperty("content.source", "org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource");
|
||||
properties.setProperty("docs.file", wikipedia.getAbsolutePath());
|
||||
properties.setProperty("content.source.forever", "false");
|
||||
properties.setProperty("keep.image.only.docs", String.valueOf(keepImageOnlyDocs));
|
||||
docMaker.setConfig(new Config(properties));
|
||||
Config config = new Config(properties);
|
||||
|
||||
ContentSource source = new EnwikiContentSource();
|
||||
source.setConfig(config);
|
||||
|
||||
DocMaker docMaker = new DocMaker();
|
||||
docMaker.setConfig(config, source);
|
||||
docMaker.resetInputs();
|
||||
if (wikipedia.exists()) {
|
||||
System.out.println("Extracting Wikipedia to: " + outputDir + " using EnwikiContentSource");
|
||||
|
|
|
@ -28,7 +28,6 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
|
|||
import org.apache.lucene.benchmark.byTask.tasks.AddDocTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.CloseIndexTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.ResetInputsTask;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
|
||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
import org.apache.lucene.document.Document;
|
||||
|
@ -42,7 +41,7 @@ import org.apache.lucene.search.TopDocs;
|
|||
/** Tests the functionality of {@link DocMaker}. */
|
||||
public class DocMakerTest extends BenchmarkTestCase {
|
||||
|
||||
static final class OneDocSource extends ContentSource {
|
||||
public static final class OneDocSource extends ContentSource {
|
||||
|
||||
private boolean finish = false;
|
||||
|
||||
|
@ -106,7 +105,6 @@ public class DocMakerTest extends BenchmarkTestCase {
|
|||
|
||||
// Indexing configuration.
|
||||
props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
|
||||
props.setProperty("content.source", OneDocSource.class.getName());
|
||||
props.setProperty("directory", "RAMDirectory");
|
||||
if (setNormsProp) {
|
||||
props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
|
||||
|
@ -119,7 +117,7 @@ public class DocMakerTest extends BenchmarkTestCase {
|
|||
Config config = new Config(props);
|
||||
|
||||
DocMaker dm = new DocMaker();
|
||||
dm.setConfig(config);
|
||||
dm.setConfig(config, new OneDocSource());
|
||||
return dm.makeDocument();
|
||||
}
|
||||
|
||||
|
@ -175,12 +173,15 @@ public class DocMakerTest extends BenchmarkTestCase {
|
|||
ps.close();
|
||||
|
||||
Properties props = new Properties();
|
||||
props.setProperty("content.source", "org.apache.lucene.benchmark.byTask.feeds.LineDocSource");
|
||||
props.setProperty("docs.file", f.getAbsolutePath());
|
||||
props.setProperty("content.source.forever", "false");
|
||||
Config config = new Config(props);
|
||||
|
||||
ContentSource source = new LineDocSource();
|
||||
source.setConfig(config);
|
||||
|
||||
DocMaker dm = new DocMaker();
|
||||
dm.setConfig(config);
|
||||
dm.setConfig(config, source);
|
||||
dm.resetInputs();
|
||||
dm.resetInputs();
|
||||
dm.close();
|
||||
|
|
|
@ -225,8 +225,13 @@
|
|||
</target>
|
||||
|
||||
<target name="process-webpages" depends="resolve-pegdown">
|
||||
<pathconvert pathsep="|" dirsep="/" property="buildfiles">
|
||||
<pathconvert pathsep="|" property="buildfiles">
|
||||
<fileset dir="." includes="**/build.xml" excludes="build.xml,analysis/*,build/**,tools/**,backwards/**,site/**"/>
|
||||
<mapper>
|
||||
<scriptmapper language="javascript">
|
||||
self.addMappedName((new java.io.File(source)).toURI());
|
||||
</scriptmapper>
|
||||
</mapper>
|
||||
</pathconvert>
|
||||
<!--
|
||||
The XSL input file is ignored completely, but XSL expects one to be given,
|
||||
|
|
|
@ -53,7 +53,7 @@ public final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return current.freq();
|
||||
}
|
||||
|
||||
|
|
|
@ -52,7 +52,7 @@ public final class MappingMultiDocsEnum extends DocsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return current.freq();
|
||||
}
|
||||
|
||||
|
|
|
@ -1022,7 +1022,7 @@ class Lucene3xFields extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return docs.freq();
|
||||
}
|
||||
|
||||
|
@ -1069,7 +1069,7 @@ class Lucene3xFields extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return pos.freq();
|
||||
}
|
||||
|
||||
|
|
|
@ -528,7 +528,7 @@ class Lucene3xTermVectorsReader extends TermVectorsReader {
|
|||
private Bits liveDocs;
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return freq;
|
||||
}
|
||||
|
||||
|
@ -574,7 +574,7 @@ class Lucene3xTermVectorsReader extends TermVectorsReader {
|
|||
private int[] endOffsets;
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
if (positions != null) {
|
||||
return positions.length;
|
||||
} else {
|
||||
|
|
|
@ -353,7 +353,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final int freq() {
|
||||
public final int freq() throws IOException {
|
||||
assert !indexOmitsTF;
|
||||
return freq;
|
||||
}
|
||||
|
@ -772,7 +772,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return freq;
|
||||
}
|
||||
|
||||
|
@ -991,7 +991,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return freq;
|
||||
}
|
||||
|
||||
|
|
|
@ -549,7 +549,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
|||
private Bits liveDocs;
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return freq;
|
||||
}
|
||||
|
||||
|
@ -595,7 +595,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
|
|||
private int[] endOffsets;
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
if (positions != null) {
|
||||
return positions.length;
|
||||
} else {
|
||||
|
|
|
@ -424,7 +424,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
assert indexOptions != IndexOptions.DOCS_ONLY;
|
||||
return freq;
|
||||
}
|
||||
|
@ -624,7 +624,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return freq;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -356,7 +356,7 @@ public class PulsingPostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
assert indexOptions != IndexOptions.DOCS_ONLY;
|
||||
return freq;
|
||||
}
|
||||
|
@ -462,7 +462,7 @@ public class PulsingPostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return freq;
|
||||
}
|
||||
|
||||
|
|
|
@ -420,7 +420,7 @@ public class SepPostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
assert !omitTF;
|
||||
return freq;
|
||||
}
|
||||
|
@ -598,7 +598,7 @@ public class SepPostingsReader extends PostingsReaderBase {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return freq;
|
||||
}
|
||||
|
||||
|
|
|
@ -269,7 +269,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
assert !omitTF;
|
||||
return tf;
|
||||
}
|
||||
|
@ -370,7 +370,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return tf;
|
||||
}
|
||||
|
||||
|
|
|
@ -398,7 +398,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
private Bits liveDocs;
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
assert freq != -1;
|
||||
return freq;
|
||||
}
|
||||
|
@ -445,7 +445,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
|
|||
private int[] endOffsets;
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
if (positions != null) {
|
||||
return positions.length;
|
||||
} else {
|
||||
|
|
|
@ -17,6 +17,8 @@ package org.apache.lucene.index;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
|
@ -29,8 +31,9 @@ public abstract class DocsEnum extends DocIdSetIterator {
|
|||
|
||||
/** Returns term frequency in the current document. Do
|
||||
* not call this before {@link #nextDoc} is first called,
|
||||
* nor after {@link #nextDoc} returns NO_MORE_DOCS. */
|
||||
public abstract int freq();
|
||||
* nor after {@link #nextDoc} returns NO_MORE_DOCS.
|
||||
**/
|
||||
public abstract int freq() throws IOException;
|
||||
|
||||
/** Returns the related attributes. */
|
||||
public AttributeSource attributes() {
|
||||
|
|
|
@ -26,6 +26,7 @@ import java.util.Queue;
|
|||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
|
||||
import org.apache.lucene.index.DocumentsWriterStallControl.MemoryController;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
|
||||
/**
|
||||
|
@ -40,7 +41,7 @@ import org.apache.lucene.util.ThreadInterruptedException;
|
|||
* {@link IndexWriterConfig#getRAMPerThreadHardLimitMB()} to prevent address
|
||||
* space exhaustion.
|
||||
*/
|
||||
final class DocumentsWriterFlushControl {
|
||||
final class DocumentsWriterFlushControl implements MemoryController {
|
||||
|
||||
private final long hardMaxBytesPerDWPT;
|
||||
private long activeBytes = 0;
|
||||
|
@ -88,7 +89,7 @@ final class DocumentsWriterFlushControl {
|
|||
return flushBytes + activeBytes;
|
||||
}
|
||||
|
||||
long stallLimitBytes() {
|
||||
public long stallLimitBytes() {
|
||||
final double maxRamMB = config.getRAMBufferSizeMB();
|
||||
return maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH ? (long)(2 * (maxRamMB * 1024 * 1024)) : Long.MAX_VALUE;
|
||||
}
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/**
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
|
@ -19,6 +19,7 @@ package org.apache.lucene.index;
|
|||
import java.util.concurrent.locks.AbstractQueuedSynchronizer;
|
||||
|
||||
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
|
||||
import org.apache.lucene.util.ThreadInterruptedException;
|
||||
|
||||
/**
|
||||
* Controls the health status of a {@link DocumentsWriter} sessions. This class
|
||||
|
@ -55,11 +56,11 @@ final class DocumentsWriterStallControl {
|
|||
|
||||
boolean tryReset() {
|
||||
final int oldState = getState();
|
||||
if (oldState == 0)
|
||||
if (oldState == 0) {
|
||||
return true;
|
||||
}
|
||||
if (compareAndSetState(oldState, 0)) {
|
||||
releaseShared(0);
|
||||
return true;
|
||||
return releaseShared(0);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -97,11 +98,11 @@ final class DocumentsWriterStallControl {
|
|||
* {@link DocumentsWriterStallControl} to healthy and release all threads waiting on
|
||||
* {@link #waitIfStalled()}
|
||||
*/
|
||||
void updateStalled(DocumentsWriterFlushControl flushControl) {
|
||||
void updateStalled(MemoryController controller) {
|
||||
do {
|
||||
// if we have more flushing / blocked DWPT than numActiveDWPT we stall!
|
||||
// don't stall if we have queued flushes - threads should be hijacked instead
|
||||
while (flushControl.netBytes() > flushControl.stallLimitBytes()) {
|
||||
while (controller.netBytes() > controller.stallLimitBytes()) {
|
||||
if (sync.trySetStalled()) {
|
||||
assert wasStalled = true;
|
||||
return;
|
||||
|
@ -111,10 +112,19 @@ final class DocumentsWriterStallControl {
|
|||
}
|
||||
|
||||
void waitIfStalled() {
|
||||
sync.acquireShared(0);
|
||||
try {
|
||||
sync.acquireSharedInterruptibly(0);
|
||||
} catch (InterruptedException e) {
|
||||
throw new ThreadInterruptedException(e);
|
||||
}
|
||||
}
|
||||
|
||||
boolean hasBlocked() { // for tests
|
||||
return sync.hasBlockedThreads;
|
||||
}
|
||||
}
|
||||
|
||||
static interface MemoryController {
|
||||
long netBytes();
|
||||
long stallLimitBytes();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -225,7 +225,7 @@ public class FilterAtomicReader extends AtomicReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return in.freq();
|
||||
}
|
||||
|
||||
|
@ -259,7 +259,7 @@ public class FilterAtomicReader extends AtomicReader {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return in.freq();
|
||||
}
|
||||
|
||||
|
|
|
@ -1092,12 +1092,19 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
|
|||
*
|
||||
* <p><b>WARNING</b>: the index does not currently record
|
||||
* which documents were added as a block. Today this is
|
||||
* fine, because merging will preserve the block (as long
|
||||
* as none them were deleted). But it's possible in the
|
||||
* future that Lucene may more aggressively re-order
|
||||
* documents (for example, perhaps to obtain better index
|
||||
* compression), in which case you may need to fully
|
||||
* re-index your documents at that time.
|
||||
* fine, because merging will preserve a block. The order of
|
||||
* documents within a segment will be preserved, even when child
|
||||
* documents within a block are deleted. Most search features
|
||||
* (like result grouping and block joining) require you to
|
||||
* mark documents; when these documents are deleted these
|
||||
* search features will not work as expected. Obviously adding
|
||||
* documents to an existing block will require you the reindex
|
||||
* the entire block.
|
||||
*
|
||||
* <p>However it's possible that in the future Lucene may
|
||||
* merge more aggressively re-order documents (for example,
|
||||
* perhaps to obtain better index compression), in which case
|
||||
* you may need to fully re-index your documents at that time.
|
||||
*
|
||||
* <p>See {@link #addDocument(Iterable)} for details on
|
||||
* index and IndexWriter state after an Exception, and
|
||||
|
|
|
@ -69,7 +69,7 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return current.freq();
|
||||
}
|
||||
|
||||
|
|
|
@ -69,7 +69,7 @@ public final class MultiDocsEnum extends DocsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return current.freq();
|
||||
}
|
||||
|
||||
|
|
|
@ -190,6 +190,10 @@ public abstract class FieldComparator<T> {
|
|||
}
|
||||
}
|
||||
|
||||
/** Returns negative result if the doc's value is less
|
||||
* than the provided value. */
|
||||
public abstract int compareDocToValue(int doc, T value) throws IOException;
|
||||
|
||||
public static abstract class NumericComparator<T extends Number> extends FieldComparator<T> {
|
||||
protected final T missingValue;
|
||||
protected final String field;
|
||||
|
@ -274,9 +278,19 @@ public abstract class FieldComparator<T> {
|
|||
public Byte value(int slot) {
|
||||
return Byte.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Byte value) {
|
||||
byte docValue = currentReaderValues[doc];
|
||||
// Test for docValue == 0 to save Bits.get method call for
|
||||
// the common case (doc has value and value is non-zero):
|
||||
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
|
||||
docValue = missingValue;
|
||||
}
|
||||
return docValue - value.byteValue();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Parses field's values as double (using {@link
|
||||
* FieldCache#getDoubles} and sorts by ascending value */
|
||||
public static final class DoubleComparator extends NumericComparator<Double> {
|
||||
|
@ -351,6 +365,24 @@ public abstract class FieldComparator<T> {
|
|||
public Double value(int slot) {
|
||||
return Double.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Double valueObj) {
|
||||
final double value = valueObj.doubleValue();
|
||||
double docValue = currentReaderValues[doc];
|
||||
// Test for docValue == 0 to save Bits.get method call for
|
||||
// the common case (doc has value and value is non-zero):
|
||||
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
|
||||
docValue = missingValue;
|
||||
}
|
||||
if (docValue < value) {
|
||||
return -1;
|
||||
} else if (docValue > value) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Uses float index values to sort by ascending value */
|
||||
|
@ -415,6 +447,19 @@ public abstract class FieldComparator<T> {
|
|||
public Double value(int slot) {
|
||||
return Double.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Double valueObj) {
|
||||
final double value = valueObj.doubleValue();
|
||||
final double docValue = currentReaderValues.getFloat(doc);
|
||||
if (docValue < value) {
|
||||
return -1;
|
||||
} else if (docValue > value) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses field's values as float (using {@link
|
||||
|
@ -494,6 +539,24 @@ public abstract class FieldComparator<T> {
|
|||
public Float value(int slot) {
|
||||
return Float.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Float valueObj) {
|
||||
final float value = valueObj.floatValue();
|
||||
float docValue = currentReaderValues[doc];
|
||||
// Test for docValue == 0 to save Bits.get method call for
|
||||
// the common case (doc has value and value is non-zero):
|
||||
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
|
||||
docValue = missingValue;
|
||||
}
|
||||
if (docValue < value) {
|
||||
return -1;
|
||||
} else if (docValue > value) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses field's values as short (using {@link
|
||||
|
@ -556,6 +619,18 @@ public abstract class FieldComparator<T> {
|
|||
public Short value(int slot) {
|
||||
return Short.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Short valueObj) {
|
||||
final short value = valueObj.shortValue();
|
||||
short docValue = currentReaderValues[doc];
|
||||
// Test for docValue == 0 to save Bits.get method call for
|
||||
// the common case (doc has value and value is non-zero):
|
||||
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
|
||||
docValue = missingValue;
|
||||
}
|
||||
return docValue - value;
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses field's values as int (using {@link
|
||||
|
@ -640,6 +715,24 @@ public abstract class FieldComparator<T> {
|
|||
public Integer value(int slot) {
|
||||
return Integer.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Integer valueObj) {
|
||||
final int value = valueObj.intValue();
|
||||
int docValue = currentReaderValues[doc];
|
||||
// Test for docValue == 0 to save Bits.get method call for
|
||||
// the common case (doc has value and value is non-zero):
|
||||
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
|
||||
docValue = missingValue;
|
||||
}
|
||||
if (docValue < value) {
|
||||
return -1;
|
||||
} else if (docValue > value) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Loads int index values and sorts by ascending value. */
|
||||
|
@ -708,6 +801,19 @@ public abstract class FieldComparator<T> {
|
|||
public Long value(int slot) {
|
||||
return Long.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Long valueObj) {
|
||||
final long value = valueObj.longValue();
|
||||
final long docValue = currentReaderValues.getInt(doc);
|
||||
if (docValue < value) {
|
||||
return -1;
|
||||
} else if (docValue > value) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Parses field's values as long (using {@link
|
||||
|
@ -788,6 +894,24 @@ public abstract class FieldComparator<T> {
|
|||
public Long value(int slot) {
|
||||
return Long.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Long valueObj) {
|
||||
final long value = valueObj.longValue();
|
||||
long docValue = currentReaderValues[doc];
|
||||
// Test for docValue == 0 to save Bits.get method call for
|
||||
// the common case (doc has value and value is non-zero):
|
||||
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
|
||||
docValue = missingValue;
|
||||
}
|
||||
if (docValue < value) {
|
||||
return -1;
|
||||
} else if (docValue > value) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Sorts by descending relevance. NOTE: if you are
|
||||
|
@ -815,12 +939,14 @@ public abstract class FieldComparator<T> {
|
|||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
float score = scorer.score();
|
||||
assert !Float.isNaN(score);
|
||||
return bottom > score ? -1 : (bottom < score ? 1 : 0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
scores[slot] = scorer.score();
|
||||
assert !Float.isNaN(scores[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -857,6 +983,22 @@ public abstract class FieldComparator<T> {
|
|||
// sorts descending:
|
||||
return second.compareTo(first);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Float valueObj) throws IOException {
|
||||
final float value = valueObj.floatValue();
|
||||
float docValue = scorer.score();
|
||||
assert !Float.isNaN(docValue);
|
||||
if (docValue < value) {
|
||||
// reverse of FloatComparator
|
||||
return 1;
|
||||
} else if (docValue > value) {
|
||||
// reverse of FloatComparator
|
||||
return -1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Sorts by ascending docID */
|
||||
|
@ -904,6 +1046,19 @@ public abstract class FieldComparator<T> {
|
|||
public Integer value(int slot) {
|
||||
return Integer.valueOf(docIDs[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Integer valueObj) {
|
||||
final int value = valueObj.intValue();
|
||||
int docValue = docBase + doc;
|
||||
if (docValue < value) {
|
||||
return -1;
|
||||
} else if (docValue > value) {
|
||||
return 1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/** Sorts by field's natural Term sort order, using
|
||||
|
@ -998,6 +1153,20 @@ public abstract class FieldComparator<T> {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, BytesRef value) {
|
||||
BytesRef docValue = termsIndex.getTerm(doc, tempBR);
|
||||
if (docValue == null) {
|
||||
if (value == null) {
|
||||
return 0;
|
||||
}
|
||||
return -1;
|
||||
} else if (value == null) {
|
||||
return 1;
|
||||
}
|
||||
return docValue.compareTo(value);
|
||||
}
|
||||
|
||||
/** Base class for specialized (per bit width of the
|
||||
* ords) per-segment comparator. NOTE: this is messy;
|
||||
* we do this only because hotspot can't reliably inline
|
||||
|
@ -1038,6 +1207,11 @@ public abstract class FieldComparator<T> {
|
|||
}
|
||||
return val1.compareTo(val2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, BytesRef value) {
|
||||
return TermOrdValComparator.this.compareDocToValue(doc, value);
|
||||
}
|
||||
}
|
||||
|
||||
// Used per-segment when bit width of doc->ord is 8:
|
||||
|
@ -1385,6 +1559,11 @@ public abstract class FieldComparator<T> {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, BytesRef value) {
|
||||
return termsIndex.getBytes(doc, tempBR).compareTo(value);
|
||||
}
|
||||
|
||||
// TODO: would be nice to share these specialized impls
|
||||
// w/ TermOrdValComparator
|
||||
|
||||
|
@ -1422,6 +1601,11 @@ public abstract class FieldComparator<T> {
|
|||
assert val2 != null;
|
||||
return comp.compare(val1, val2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, BytesRef value) {
|
||||
return TermOrdValDocValuesComparator.this.compareDocToValue(doc, value);
|
||||
}
|
||||
}
|
||||
|
||||
// Used per-segment when bit width of doc->ord is 8:
|
||||
|
@ -1801,6 +1985,11 @@ public abstract class FieldComparator<T> {
|
|||
}
|
||||
return val1.compareTo(val2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, BytesRef value) {
|
||||
return docTerms.getTerm(doc, tempBR).compareTo(value);
|
||||
}
|
||||
}
|
||||
|
||||
/** Sorts by field's natural Term sort order. All
|
||||
|
@ -1869,6 +2058,11 @@ public abstract class FieldComparator<T> {
|
|||
assert val2 != null;
|
||||
return val1.compareTo(val2);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, BytesRef value) {
|
||||
return docTerms.getBytes(doc, tempBR).compareTo(value);
|
||||
}
|
||||
}
|
||||
|
||||
final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key) {
|
||||
|
|
|
@ -304,14 +304,51 @@ public class IndexSearcher {
|
|||
* <code>sort</code>.
|
||||
*
|
||||
* <p>NOTE: this does not compute scores by default; use
|
||||
* {@link IndexSearcher#setDefaultFieldSortScoring} to
|
||||
* enable scoring.
|
||||
* {@link IndexSearcher#search(Query,Filter,int,Sort,boolean,boolean)} to
|
||||
* control scoring.
|
||||
*
|
||||
* @throws BooleanQuery.TooManyClauses
|
||||
*/
|
||||
public TopFieldDocs search(Query query, Filter filter, int n,
|
||||
Sort sort) throws IOException {
|
||||
return search(createNormalizedWeight(wrapFilter(query, filter)), n, sort);
|
||||
return search(createNormalizedWeight(wrapFilter(query, filter)), n, sort, false, false);
|
||||
}
|
||||
|
||||
/** Search implementation with arbitrary sorting, plus
|
||||
* control over whether hit scores and max score
|
||||
* should be computed. Finds
|
||||
* the top <code>n</code> hits for <code>query</code>, applying
|
||||
* <code>filter</code> if non-null, and sorting the hits by the criteria in
|
||||
* <code>sort</code>. If <code>doDocScores</code> is <code>true</code>
|
||||
* then the score of each hit will be computed and
|
||||
* returned. If <code>doMaxScore</code> is
|
||||
* <code>true</code> then the maximum score over all
|
||||
* collected hits will be computed.
|
||||
*
|
||||
* @throws BooleanQuery.TooManyClauses
|
||||
*/
|
||||
public TopFieldDocs search(Query query, Filter filter, int n,
|
||||
Sort sort, boolean doDocScores, boolean doMaxScore) throws IOException {
|
||||
return search(createNormalizedWeight(wrapFilter(query, filter)), n, sort, doDocScores, doMaxScore);
|
||||
}
|
||||
|
||||
/** Finds the top <code>n</code>
|
||||
* hits for <code>query</code>, applying <code>filter</code> if non-null,
|
||||
* where all results are after a previous result (<code>after</code>).
|
||||
* <p>
|
||||
* By passing the bottom result from a previous page as <code>after</code>,
|
||||
* this method can be used for efficient 'deep-paging' across potentially
|
||||
* large result sets.
|
||||
*
|
||||
* @throws BooleanQuery.TooManyClauses
|
||||
*/
|
||||
public TopDocs searchAfter(ScoreDoc after, Query query, Filter filter, int n, Sort sort) throws IOException {
|
||||
if (after != null && !(after instanceof FieldDoc)) {
|
||||
// TODO: if we fix type safety of TopFieldDocs we can
|
||||
// remove this
|
||||
throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
|
||||
}
|
||||
return search(createNormalizedWeight(wrapFilter(query, filter)), (FieldDoc) after, n, sort, true, false, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -324,7 +361,52 @@ public class IndexSearcher {
|
|||
*/
|
||||
public TopFieldDocs search(Query query, int n,
|
||||
Sort sort) throws IOException {
|
||||
return search(createNormalizedWeight(query), n, sort);
|
||||
return search(createNormalizedWeight(query), n, sort, false, false);
|
||||
}
|
||||
|
||||
/** Finds the top <code>n</code>
|
||||
* hits for <code>query</code> where all results are after a previous
|
||||
* result (<code>after</code>).
|
||||
* <p>
|
||||
* By passing the bottom result from a previous page as <code>after</code>,
|
||||
* this method can be used for efficient 'deep-paging' across potentially
|
||||
* large result sets.
|
||||
*
|
||||
* @throws BooleanQuery.TooManyClauses
|
||||
*/
|
||||
public TopDocs searchAfter(ScoreDoc after, Query query, int n, Sort sort) throws IOException {
|
||||
if (after != null && !(after instanceof FieldDoc)) {
|
||||
// TODO: if we fix type safety of TopFieldDocs we can
|
||||
// remove this
|
||||
throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
|
||||
}
|
||||
return search(createNormalizedWeight(query), (FieldDoc) after, n, sort, true, false, false);
|
||||
}
|
||||
|
||||
/** Finds the top <code>n</code>
|
||||
* hits for <code>query</code> where all results are after a previous
|
||||
* result (<code>after</code>), allowing control over
|
||||
* whether hit scores and max score should be computed.
|
||||
* <p>
|
||||
* By passing the bottom result from a previous page as <code>after</code>,
|
||||
* this method can be used for efficient 'deep-paging' across potentially
|
||||
* large result sets. If <code>doDocScores</code> is <code>true</code>
|
||||
* then the score of each hit will be computed and
|
||||
* returned. If <code>doMaxScore</code> is
|
||||
* <code>true</code> then the maximum score over all
|
||||
* collected hits will be computed.
|
||||
*
|
||||
* @throws BooleanQuery.TooManyClauses
|
||||
*/
|
||||
public TopDocs searchAfter(ScoreDoc after, Query query, Filter filter, int n, Sort sort,
|
||||
boolean doDocScores, boolean doMaxScore) throws IOException {
|
||||
if (after != null && !(after instanceof FieldDoc)) {
|
||||
// TODO: if we fix type safety of TopFieldDocs we can
|
||||
// remove this
|
||||
throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
|
||||
}
|
||||
return search(createNormalizedWeight(wrapFilter(query, filter)), (FieldDoc) after, n, sort, true,
|
||||
doDocScores, doMaxScore);
|
||||
}
|
||||
|
||||
/** Expert: Low-level search implementation. Finds the top <code>n</code>
|
||||
|
@ -383,7 +465,9 @@ public class IndexSearcher {
|
|||
return collector.topDocs();
|
||||
}
|
||||
|
||||
/** Expert: Low-level search implementation with arbitrary sorting. Finds
|
||||
/** Expert: Low-level search implementation with arbitrary
|
||||
* sorting and control over whether hit scores and max
|
||||
* score should be computed. Finds
|
||||
* the top <code>n</code> hits for <code>query</code> and sorting the hits
|
||||
* by the criteria in <code>sort</code>.
|
||||
*
|
||||
|
@ -393,12 +477,13 @@ public class IndexSearcher {
|
|||
* @throws BooleanQuery.TooManyClauses
|
||||
*/
|
||||
protected TopFieldDocs search(Weight weight,
|
||||
final int nDocs, Sort sort) throws IOException {
|
||||
return search(weight, nDocs, sort, true);
|
||||
final int nDocs, Sort sort,
|
||||
boolean doDocScores, boolean doMaxScore) throws IOException {
|
||||
return search(weight, null, nDocs, sort, true, doDocScores, doMaxScore);
|
||||
}
|
||||
|
||||
/**
|
||||
* Just like {@link #search(Weight, int, Sort)}, but you choose
|
||||
* Just like {@link #search(Weight, int, Sort, boolean, boolean)}, but you choose
|
||||
* whether or not the fields in the returned {@link FieldDoc} instances should
|
||||
* be set by specifying fillFields.
|
||||
*
|
||||
|
@ -408,27 +493,29 @@ public class IndexSearcher {
|
|||
* then pass that to {@link #search(AtomicReaderContext[], Weight,
|
||||
* Collector)}.</p>
|
||||
*/
|
||||
protected TopFieldDocs search(Weight weight, int nDocs,
|
||||
Sort sort, boolean fillFields)
|
||||
protected TopFieldDocs search(Weight weight, FieldDoc after, int nDocs,
|
||||
Sort sort, boolean fillFields,
|
||||
boolean doDocScores, boolean doMaxScore)
|
||||
throws IOException {
|
||||
|
||||
if (sort == null) throw new NullPointerException();
|
||||
|
||||
if (executor == null) {
|
||||
// use all leaves here!
|
||||
return search (leafContexts, weight, nDocs, sort, fillFields);
|
||||
return search(leafContexts, weight, after, nDocs, sort, fillFields, doDocScores, doMaxScore);
|
||||
} else {
|
||||
final TopFieldCollector topCollector = TopFieldCollector.create(sort, nDocs,
|
||||
after,
|
||||
fillFields,
|
||||
fieldSortDoTrackScores,
|
||||
fieldSortDoMaxScore,
|
||||
doDocScores,
|
||||
doMaxScore,
|
||||
false);
|
||||
|
||||
final Lock lock = new ReentrantLock();
|
||||
final ExecutionHelper<TopFieldDocs> runner = new ExecutionHelper<TopFieldDocs>(executor);
|
||||
for (int i = 0; i < leafSlices.length; i++) { // search each leaf slice
|
||||
runner.submit(
|
||||
new SearcherCallableWithSort(lock, this, leafSlices[i], weight, nDocs, topCollector, sort));
|
||||
new SearcherCallableWithSort(lock, this, leafSlices[i], weight, after, nDocs, topCollector, sort, doDocScores, doMaxScore));
|
||||
}
|
||||
int totalHits = 0;
|
||||
float maxScore = Float.NEGATIVE_INFINITY;
|
||||
|
@ -447,18 +534,12 @@ public class IndexSearcher {
|
|||
|
||||
|
||||
/**
|
||||
* Just like {@link #search(Weight, int, Sort)}, but you choose
|
||||
* Just like {@link #search(Weight, int, Sort, boolean, boolean)}, but you choose
|
||||
* whether or not the fields in the returned {@link FieldDoc} instances should
|
||||
* be set by specifying fillFields.
|
||||
*
|
||||
* <p>NOTE: this does not compute scores by default. If you
|
||||
* need scores, create a {@link TopFieldCollector}
|
||||
* instance by calling {@link TopFieldCollector#create} and
|
||||
* then pass that to {@link #search(AtomicReaderContext[], Weight,
|
||||
* Collector)}.</p>
|
||||
*/
|
||||
protected TopFieldDocs search(AtomicReaderContext[] leaves, Weight weight, int nDocs,
|
||||
Sort sort, boolean fillFields) throws IOException {
|
||||
protected TopFieldDocs search(AtomicReaderContext[] leaves, Weight weight, FieldDoc after, int nDocs,
|
||||
Sort sort, boolean fillFields, boolean doDocScores, boolean doMaxScore) throws IOException {
|
||||
// single thread
|
||||
int limit = reader.maxDoc();
|
||||
if (limit == 0) {
|
||||
|
@ -466,8 +547,9 @@ public class IndexSearcher {
|
|||
}
|
||||
nDocs = Math.min(nDocs, limit);
|
||||
|
||||
TopFieldCollector collector = TopFieldCollector.create(sort, nDocs,
|
||||
fillFields, fieldSortDoTrackScores, fieldSortDoMaxScore, !weight.scoresDocsOutOfOrder());
|
||||
TopFieldCollector collector = TopFieldCollector.create(sort, nDocs, after,
|
||||
fillFields, doDocScores,
|
||||
doMaxScore, !weight.scoresDocsOutOfOrder());
|
||||
search(leaves, weight, collector);
|
||||
return (TopFieldDocs) collector.topDocs();
|
||||
}
|
||||
|
@ -553,26 +635,6 @@ public class IndexSearcher {
|
|||
return weight.explain(leafContexts[n], deBasedDoc);
|
||||
}
|
||||
|
||||
private boolean fieldSortDoTrackScores;
|
||||
private boolean fieldSortDoMaxScore;
|
||||
|
||||
/** By default, no scores are computed when sorting by
|
||||
* field (using {@link #search(Query,Filter,int,Sort)}).
|
||||
* You can change that, per IndexSearcher instance, by
|
||||
* calling this method. Note that this will incur a CPU
|
||||
* cost.
|
||||
*
|
||||
* @param doTrackScores If true, then scores are
|
||||
* returned for every matching document in {@link
|
||||
* TopFieldDocs}.
|
||||
*
|
||||
* @param doMaxScore If true, then the max score for all
|
||||
* matching docs is computed. */
|
||||
public void setDefaultFieldSortScoring(boolean doTrackScores, boolean doMaxScore) {
|
||||
fieldSortDoTrackScores = doTrackScores;
|
||||
fieldSortDoMaxScore = doMaxScore;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a normalized weight for a top-level {@link Query}.
|
||||
* The query is rewritten by this method and {@link Query#createWeight} called,
|
||||
|
@ -626,7 +688,7 @@ public class IndexSearcher {
|
|||
}
|
||||
|
||||
public TopDocs call() throws IOException {
|
||||
final TopDocs docs = searcher.search (slice.leaves, weight, after, nDocs);
|
||||
final TopDocs docs = searcher.search(slice.leaves, weight, after, nDocs);
|
||||
final ScoreDoc[] scoreDocs = docs.scoreDocs;
|
||||
//it would be so nice if we had a thread-safe insert
|
||||
lock.lock();
|
||||
|
@ -657,9 +719,13 @@ public class IndexSearcher {
|
|||
private final TopFieldCollector hq;
|
||||
private final Sort sort;
|
||||
private final LeafSlice slice;
|
||||
private final FieldDoc after;
|
||||
private final boolean doDocScores;
|
||||
private final boolean doMaxScore;
|
||||
|
||||
public SearcherCallableWithSort(Lock lock, IndexSearcher searcher, LeafSlice slice, Weight weight,
|
||||
int nDocs, TopFieldCollector hq, Sort sort) {
|
||||
FieldDoc after, int nDocs, TopFieldCollector hq, Sort sort,
|
||||
boolean doDocScores, boolean doMaxScore) {
|
||||
this.lock = lock;
|
||||
this.searcher = searcher;
|
||||
this.weight = weight;
|
||||
|
@ -667,6 +733,9 @@ public class IndexSearcher {
|
|||
this.hq = hq;
|
||||
this.sort = sort;
|
||||
this.slice = slice;
|
||||
this.after = after;
|
||||
this.doDocScores = doDocScores;
|
||||
this.doMaxScore = doMaxScore;
|
||||
}
|
||||
|
||||
private final class FakeScorer extends Scorer {
|
||||
|
@ -707,7 +776,7 @@ public class IndexSearcher {
|
|||
|
||||
public TopFieldDocs call() throws IOException {
|
||||
assert slice.leaves.length == 1;
|
||||
final TopFieldDocs docs = searcher.search (slice.leaves, weight, nDocs, sort, true);
|
||||
final TopFieldDocs docs = searcher.search(slice.leaves, weight, after, nDocs, sort, true, doDocScores, doMaxScore);
|
||||
lock.lock();
|
||||
try {
|
||||
final int base = slice.leaves[0].docBase;
|
||||
|
@ -718,6 +787,11 @@ public class IndexSearcher {
|
|||
fakeScorer.score = scoreDoc.score;
|
||||
hq.collect(scoreDoc.doc-base);
|
||||
}
|
||||
|
||||
// Carry over maxScore from sub:
|
||||
if (doMaxScore && docs.getMaxScore() > hq.maxScore) {
|
||||
hq.maxScore = docs.getMaxScore();
|
||||
}
|
||||
} finally {
|
||||
lock.unlock();
|
||||
}
|
||||
|
|
|
@ -568,7 +568,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
|
|||
}
|
||||
|
||||
@Override
|
||||
public final int freq() {
|
||||
public final int freq() throws IOException {
|
||||
return _freq;
|
||||
}
|
||||
|
||||
|
|
|
@ -51,7 +51,7 @@ final class TermScorer extends Scorer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public float freq() {
|
||||
public float freq() throws IOException {
|
||||
return docsEnum.freq();
|
||||
}
|
||||
|
||||
|
@ -66,7 +66,7 @@ final class TermScorer extends Scorer {
|
|||
}
|
||||
|
||||
@Override
|
||||
public float score() {
|
||||
public float score() throws IOException {
|
||||
assert docID() != NO_MORE_DOCS;
|
||||
return docScorer.score(docsEnum.docID(), docsEnum.freq());
|
||||
}
|
||||
|
|
|
@ -45,7 +45,7 @@ public class TopDocs {
|
|||
|
||||
/** Sets the maximum score value encountered. */
|
||||
public void setMaxScore(float maxScore) {
|
||||
this.maxScore=maxScore;
|
||||
this.maxScore = maxScore;
|
||||
}
|
||||
|
||||
/** Constructs a TopDocs with a default maxScore=Float.NaN. */
|
||||
|
|
|
@ -843,6 +843,166 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
|
||||
}
|
||||
|
||||
/*
|
||||
* Implements a TopFieldCollector when after != null.
|
||||
*/
|
||||
private final static class PagingFieldCollector extends TopFieldCollector {
|
||||
|
||||
Scorer scorer;
|
||||
int collectedHits;
|
||||
final FieldComparator<?>[] comparators;
|
||||
final int[] reverseMul;
|
||||
final FieldValueHitQueue<Entry> queue;
|
||||
final boolean trackDocScores;
|
||||
final boolean trackMaxScore;
|
||||
final FieldDoc after;
|
||||
int afterDoc;
|
||||
|
||||
public PagingFieldCollector(
|
||||
FieldValueHitQueue<Entry> queue, FieldDoc after, int numHits, boolean fillFields,
|
||||
boolean trackDocScores, boolean trackMaxScore)
|
||||
throws IOException {
|
||||
super(queue, numHits, fillFields);
|
||||
this.queue = queue;
|
||||
this.trackDocScores = trackDocScores;
|
||||
this.trackMaxScore = trackMaxScore;
|
||||
this.after = after;
|
||||
comparators = queue.getComparators();
|
||||
reverseMul = queue.getReverseMul();
|
||||
|
||||
// Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN.
|
||||
maxScore = Float.NEGATIVE_INFINITY;
|
||||
}
|
||||
|
||||
void updateBottom(int doc, float score) {
|
||||
bottom.doc = docBase + doc;
|
||||
bottom.score = score;
|
||||
bottom = pq.updateTop();
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked", "rawtypes"})
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
totalHits++;
|
||||
|
||||
//System.out.println(" collect doc=" + doc);
|
||||
|
||||
// Check if this hit was already collected on a
|
||||
// previous page:
|
||||
boolean sameValues = true;
|
||||
for(int compIDX=0;compIDX<comparators.length;compIDX++) {
|
||||
final FieldComparator comp = comparators[compIDX];
|
||||
|
||||
final int cmp = reverseMul[compIDX] * comp.compareDocToValue(doc, after.fields[compIDX]);
|
||||
if (cmp < 0) {
|
||||
// Already collected on a previous page
|
||||
//System.out.println(" skip: before");
|
||||
return;
|
||||
} else if (cmp > 0) {
|
||||
// Not yet collected
|
||||
sameValues = false;
|
||||
//System.out.println(" keep: after");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Tie-break by docID:
|
||||
if (sameValues && doc <= afterDoc) {
|
||||
// Already collected on a previous page
|
||||
//System.out.println(" skip: tie-break");
|
||||
return;
|
||||
}
|
||||
|
||||
collectedHits++;
|
||||
|
||||
float score = Float.NaN;
|
||||
if (trackMaxScore) {
|
||||
score = scorer.score();
|
||||
if (score > maxScore) {
|
||||
maxScore = score;
|
||||
}
|
||||
}
|
||||
|
||||
if (queueFull) {
|
||||
// Fastmatch: return if this hit is not competitive
|
||||
for (int i = 0;; i++) {
|
||||
final int c = reverseMul[i] * comparators[i].compareBottom(doc);
|
||||
if (c < 0) {
|
||||
// Definitely not competitive.
|
||||
return;
|
||||
} else if (c > 0) {
|
||||
// Definitely competitive.
|
||||
break;
|
||||
} else if (i == comparators.length - 1) {
|
||||
// This is the equals case.
|
||||
if (doc + docBase > bottom.doc) {
|
||||
// Definitely not competitive
|
||||
return;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// This hit is competitive - replace bottom element in queue & adjustTop
|
||||
for (int i = 0; i < comparators.length; i++) {
|
||||
comparators[i].copy(bottom.slot, doc);
|
||||
}
|
||||
|
||||
// Compute score only if it is competitive.
|
||||
if (trackDocScores && !trackMaxScore) {
|
||||
score = scorer.score();
|
||||
}
|
||||
updateBottom(doc, score);
|
||||
|
||||
for (int i = 0; i < comparators.length; i++) {
|
||||
comparators[i].setBottom(bottom.slot);
|
||||
}
|
||||
} else {
|
||||
// Startup transient: queue hasn't gathered numHits yet
|
||||
final int slot = collectedHits - 1;
|
||||
//System.out.println(" slot=" + slot);
|
||||
// Copy hit into queue
|
||||
for (int i = 0; i < comparators.length; i++) {
|
||||
comparators[i].copy(slot, doc);
|
||||
}
|
||||
|
||||
// Compute score only if it is competitive.
|
||||
if (trackDocScores && !trackMaxScore) {
|
||||
score = scorer.score();
|
||||
}
|
||||
bottom = pq.add(new Entry(slot, docBase + doc, score));
|
||||
queueFull = collectedHits == numHits;
|
||||
if (queueFull) {
|
||||
for (int i = 0; i < comparators.length; i++) {
|
||||
comparators[i].setBottom(bottom.slot);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
this.scorer = scorer;
|
||||
for (int i = 0; i < comparators.length; i++) {
|
||||
comparators[i].setScorer(scorer);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) throws IOException {
|
||||
docBase = context.docBase;
|
||||
afterDoc = after.doc - docBase;
|
||||
for (int i = 0; i < comparators.length; i++) {
|
||||
queue.setComparator(i, comparators[i].setNextReader(context));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static final ScoreDoc[] EMPTY_SCOREDOCS = new ScoreDoc[0];
|
||||
|
||||
private final boolean fillFields;
|
||||
|
@ -909,6 +1069,52 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
boolean fillFields, boolean trackDocScores, boolean trackMaxScore,
|
||||
boolean docsScoredInOrder)
|
||||
throws IOException {
|
||||
return create(sort, numHits, null, fillFields, trackDocScores, trackMaxScore, docsScoredInOrder);
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates a new {@link TopFieldCollector} from the given
|
||||
* arguments.
|
||||
*
|
||||
* <p><b>NOTE</b>: The instances returned by this method
|
||||
* pre-allocate a full array of length
|
||||
* <code>numHits</code>.
|
||||
*
|
||||
* @param sort
|
||||
* the sort criteria (SortFields).
|
||||
* @param numHits
|
||||
* the number of results to collect.
|
||||
* @param after
|
||||
* only hits after this FieldDoc will be collected
|
||||
* @param fillFields
|
||||
* specifies whether the actual field values should be returned on
|
||||
* the results (FieldDoc).
|
||||
* @param trackDocScores
|
||||
* specifies whether document scores should be tracked and set on the
|
||||
* results. Note that if set to false, then the results' scores will
|
||||
* be set to Float.NaN. Setting this to true affects performance, as
|
||||
* it incurs the score computation on each competitive result.
|
||||
* Therefore if document scores are not required by the application,
|
||||
* it is recommended to set it to false.
|
||||
* @param trackMaxScore
|
||||
* specifies whether the query's maxScore should be tracked and set
|
||||
* on the resulting {@link TopDocs}. Note that if set to false,
|
||||
* {@link TopDocs#getMaxScore()} returns Float.NaN. Setting this to
|
||||
* true affects performance as it incurs the score computation on
|
||||
* each result. Also, setting this true automatically sets
|
||||
* <code>trackDocScores</code> to true as well.
|
||||
* @param docsScoredInOrder
|
||||
* specifies whether documents are scored in doc Id order or not by
|
||||
* the given {@link Scorer} in {@link #setScorer(Scorer)}.
|
||||
* @return a {@link TopFieldCollector} instance which will sort the results by
|
||||
* the sort criteria.
|
||||
* @throws IOException
|
||||
*/
|
||||
public static TopFieldCollector create(Sort sort, int numHits, FieldDoc after,
|
||||
boolean fillFields, boolean trackDocScores, boolean trackMaxScore,
|
||||
boolean docsScoredInOrder)
|
||||
throws IOException {
|
||||
|
||||
if (sort.fields.length == 0) {
|
||||
throw new IllegalArgumentException("Sort must contain at least one field");
|
||||
}
|
||||
|
@ -918,43 +1124,56 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
|
|||
}
|
||||
|
||||
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
|
||||
if (queue.getComparators().length == 1) {
|
||||
|
||||
if (after == null) {
|
||||
if (queue.getComparators().length == 1) {
|
||||
if (docsScoredInOrder) {
|
||||
if (trackMaxScore) {
|
||||
return new OneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
|
||||
} else if (trackDocScores) {
|
||||
return new OneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
|
||||
} else {
|
||||
return new OneComparatorNonScoringCollector(queue, numHits, fillFields);
|
||||
}
|
||||
} else {
|
||||
if (trackMaxScore) {
|
||||
return new OutOfOrderOneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
|
||||
} else if (trackDocScores) {
|
||||
return new OutOfOrderOneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
|
||||
} else {
|
||||
return new OutOfOrderOneComparatorNonScoringCollector(queue, numHits, fillFields);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// multiple comparators.
|
||||
if (docsScoredInOrder) {
|
||||
if (trackMaxScore) {
|
||||
return new OneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
|
||||
return new MultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
|
||||
} else if (trackDocScores) {
|
||||
return new OneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
|
||||
return new MultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
|
||||
} else {
|
||||
return new OneComparatorNonScoringCollector(queue, numHits, fillFields);
|
||||
return new MultiComparatorNonScoringCollector(queue, numHits, fillFields);
|
||||
}
|
||||
} else {
|
||||
if (trackMaxScore) {
|
||||
return new OutOfOrderOneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
|
||||
return new OutOfOrderMultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
|
||||
} else if (trackDocScores) {
|
||||
return new OutOfOrderOneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
|
||||
return new OutOfOrderMultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
|
||||
} else {
|
||||
return new OutOfOrderOneComparatorNonScoringCollector(queue, numHits, fillFields);
|
||||
return new OutOfOrderMultiComparatorNonScoringCollector(queue, numHits, fillFields);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// multiple comparators.
|
||||
if (docsScoredInOrder) {
|
||||
if (trackMaxScore) {
|
||||
return new MultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
|
||||
} else if (trackDocScores) {
|
||||
return new MultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
|
||||
} else {
|
||||
return new MultiComparatorNonScoringCollector(queue, numHits, fillFields);
|
||||
}
|
||||
} else {
|
||||
if (trackMaxScore) {
|
||||
return new OutOfOrderMultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
|
||||
} else if (trackDocScores) {
|
||||
return new OutOfOrderMultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
|
||||
} else {
|
||||
return new OutOfOrderMultiComparatorNonScoringCollector(queue, numHits, fillFields);
|
||||
if (after.fields == null) {
|
||||
throw new IllegalArgumentException("after.fields wasn't set; you must pass fillFields=true for the previous search");
|
||||
}
|
||||
|
||||
if (after.fields.length != sort.getSort().length) {
|
||||
throw new IllegalArgumentException("after.fields has " + after.fields.length + " values but sort has " + sort.getSort().length);
|
||||
}
|
||||
|
||||
return new PagingFieldCollector(queue, after, numHits, fillFields, trackDocScores, trackMaxScore);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -21,20 +21,19 @@ package org.apache.lucene.search;
|
|||
/** Represents hits returned by {@link
|
||||
* IndexSearcher#search(Query,Filter,int,Sort)}.
|
||||
*/
|
||||
public class TopFieldDocs
|
||||
extends TopDocs {
|
||||
public class TopFieldDocs extends TopDocs {
|
||||
|
||||
/** The fields which were used to sort results by. */
|
||||
public SortField[] fields;
|
||||
/** The fields which were used to sort results by. */
|
||||
public SortField[] fields;
|
||||
|
||||
/** Creates one of these objects.
|
||||
* @param totalHits Total number of hits for the query.
|
||||
* @param scoreDocs The top hits for the query.
|
||||
* @param fields The sort criteria used to find the top hits.
|
||||
* @param maxScore The maximum score encountered.
|
||||
*/
|
||||
public TopFieldDocs (int totalHits, ScoreDoc[] scoreDocs, SortField[] fields, float maxScore) {
|
||||
super (totalHits, scoreDocs, maxScore);
|
||||
this.fields = fields;
|
||||
}
|
||||
/** Creates one of these objects.
|
||||
* @param totalHits Total number of hits for the query.
|
||||
* @param scoreDocs The top hits for the query.
|
||||
* @param fields The sort criteria used to find the top hits.
|
||||
* @param maxScore The maximum score encountered.
|
||||
*/
|
||||
public TopFieldDocs (int totalHits, ScoreDoc[] scoreDocs, SortField[] fields, float maxScore) {
|
||||
super (totalHits, scoreDocs, maxScore);
|
||||
this.fields = fields;
|
||||
}
|
||||
}
|
|
@ -118,16 +118,14 @@ final class CompoundFileWriter implements Closeable{
|
|||
|
||||
private synchronized IndexOutput getOutput() throws IOException {
|
||||
if (dataOut == null) {
|
||||
IndexOutput dataOutput = null;
|
||||
boolean success = false;
|
||||
try {
|
||||
dataOutput = directory.createOutput(dataFileName, IOContext.DEFAULT);
|
||||
dataOutput.writeVInt(FORMAT_CURRENT);
|
||||
dataOut = dataOutput;
|
||||
dataOut = directory.createOutput(dataFileName, IOContext.DEFAULT);
|
||||
dataOut.writeVInt(FORMAT_CURRENT);
|
||||
success = true;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(dataOutput);
|
||||
IOUtils.closeWhileHandlingException(dataOut);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -68,7 +68,7 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
|
|||
assertAllBetween(last2, j, bd2, ids);
|
||||
last2 = j + 1;
|
||||
}
|
||||
assertEquals(uniqueValues.size(), queue.numGlobalTermDeletes());
|
||||
assertEquals(j+1, queue.numGlobalTermDeletes());
|
||||
}
|
||||
assertEquals(uniqueValues, bd1.terms.keySet());
|
||||
assertEquals(uniqueValues, bd2.terms.keySet());
|
||||
|
|
|
@ -0,0 +1,353 @@
|
|||
package org.apache.lucene.index;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with this
|
||||
* work for additional information regarding copyright ownership. The ASF
|
||||
* licenses this file to You under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||
* License for the specific language governing permissions and limitations under
|
||||
* the License.
|
||||
*/
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.CountDownLatch;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import org.apache.lucene.index.DocumentsWriterStallControl.MemoryController;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.annotations.ThreadLeaks;
|
||||
|
||||
/**
|
||||
* Tests for {@link DocumentsWriterStallControl}
|
||||
*/
|
||||
@ThreadLeaks(failTestIfLeaking = true)
|
||||
public class TestDocumentsWriterStallControl extends LuceneTestCase {
|
||||
|
||||
public void testSimpleStall() throws InterruptedException {
|
||||
DocumentsWriterStallControl ctrl = new DocumentsWriterStallControl();
|
||||
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
|
||||
memCtrl.limit = 1000;
|
||||
memCtrl.netBytes = 1000;
|
||||
ctrl.updateStalled(memCtrl);
|
||||
Thread[] waitThreads = waitThreads(atLeast(1), ctrl);
|
||||
start(waitThreads);
|
||||
assertFalse(ctrl.hasBlocked());
|
||||
assertFalse(ctrl.anyStalledThreads());
|
||||
join(waitThreads, 10);
|
||||
|
||||
// now stall threads and wake them up again
|
||||
memCtrl.netBytes = 1001;
|
||||
ctrl.updateStalled(memCtrl);
|
||||
waitThreads = waitThreads(atLeast(1), ctrl);
|
||||
start(waitThreads);
|
||||
awaitState(100, Thread.State.WAITING, waitThreads);
|
||||
assertTrue(ctrl.hasBlocked());
|
||||
assertTrue(ctrl.anyStalledThreads());
|
||||
memCtrl.netBytes = 50;
|
||||
ctrl.updateStalled(memCtrl);
|
||||
assertFalse(ctrl.anyStalledThreads());
|
||||
join(waitThreads, 500);
|
||||
}
|
||||
|
||||
public void testRandom() throws InterruptedException {
|
||||
final DocumentsWriterStallControl ctrl = new DocumentsWriterStallControl();
|
||||
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
|
||||
memCtrl.limit = 1000;
|
||||
memCtrl.netBytes = 1;
|
||||
ctrl.updateStalled(memCtrl);
|
||||
Thread[] stallThreads = new Thread[atLeast(3)];
|
||||
for (int i = 0; i < stallThreads.length; i++) {
|
||||
final int threadId = i;
|
||||
stallThreads[i] = new Thread() {
|
||||
public void run() {
|
||||
int baseBytes = threadId % 2 == 0 ? 500 : 700;
|
||||
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
|
||||
memCtrl.limit = 1000;
|
||||
memCtrl.netBytes = 1;
|
||||
int iters = atLeast(1000);
|
||||
for (int j = 0; j < iters; j++) {
|
||||
memCtrl.netBytes = baseBytes + random().nextInt(1000);
|
||||
ctrl.updateStalled(memCtrl);
|
||||
if (random().nextInt(5) == 0) { // thread 0 only updates
|
||||
ctrl.waitIfStalled();
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
start(stallThreads);
|
||||
long time = System.currentTimeMillis();
|
||||
/*
|
||||
* use a 100 sec timeout to make sure we not hang forever. join will fail in
|
||||
* that case
|
||||
*/
|
||||
while ((System.currentTimeMillis() - time) < 100 * 1000
|
||||
&& !terminated(stallThreads)) {
|
||||
ctrl.updateStalled(memCtrl);
|
||||
if (random().nextBoolean()) {
|
||||
Thread.yield();
|
||||
} else {
|
||||
Thread.sleep(1);
|
||||
}
|
||||
|
||||
}
|
||||
join(stallThreads, 100);
|
||||
|
||||
}
|
||||
|
||||
public void testAccquireReleaseRace() throws InterruptedException {
|
||||
final DocumentsWriterStallControl ctrl = new DocumentsWriterStallControl();
|
||||
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
|
||||
memCtrl.limit = 1000;
|
||||
memCtrl.netBytes = 1;
|
||||
ctrl.updateStalled(memCtrl);
|
||||
final AtomicBoolean stop = new AtomicBoolean(false);
|
||||
final AtomicBoolean checkPoint = new AtomicBoolean(true);
|
||||
|
||||
int numStallers = atLeast(1);
|
||||
int numReleasers = atLeast(1);
|
||||
int numWaiters = atLeast(1);
|
||||
|
||||
final CountDownLatch[] latches = new CountDownLatch[] {
|
||||
new CountDownLatch(numStallers + numReleasers), new CountDownLatch(1),
|
||||
new CountDownLatch(numWaiters)};
|
||||
Thread[] threads = new Thread[numReleasers + numStallers + numWaiters];
|
||||
List<Throwable> exceptions = Collections.synchronizedList(new ArrayList<Throwable>());
|
||||
for (int i = 0; i < numReleasers; i++) {
|
||||
threads[i] = new Updater(stop, checkPoint, ctrl, latches, true, exceptions);
|
||||
}
|
||||
for (int i = numReleasers; i < numReleasers + numStallers; i++) {
|
||||
threads[i] = new Updater(stop, checkPoint, ctrl, latches, false, exceptions);
|
||||
|
||||
}
|
||||
for (int i = numReleasers + numStallers; i < numReleasers + numStallers
|
||||
+ numWaiters; i++) {
|
||||
threads[i] = new Waiter(stop, checkPoint, ctrl, latches, exceptions);
|
||||
|
||||
}
|
||||
|
||||
start(threads);
|
||||
int iters = atLeast(20000);
|
||||
for (int i = 0; i < iters; i++) {
|
||||
if (checkPoint.get()) {
|
||||
|
||||
latches[0].await(5, TimeUnit.SECONDS);
|
||||
if (!exceptions.isEmpty()) {
|
||||
for (Throwable throwable : exceptions) {
|
||||
throwable.printStackTrace();
|
||||
}
|
||||
fail("got exceptions in threads");
|
||||
}
|
||||
|
||||
if (!ctrl.anyStalledThreads()) {
|
||||
assertTrue(
|
||||
"control claims no stalled threads but waiter seems to be blocked",
|
||||
latches[2].await(3, TimeUnit.SECONDS));
|
||||
}
|
||||
checkPoint.set(false);
|
||||
|
||||
latches[1].countDown();
|
||||
}
|
||||
assertFalse(checkPoint.get());
|
||||
if (random().nextInt(2) == 0) {
|
||||
latches[0] = new CountDownLatch(numStallers + numReleasers);
|
||||
latches[1] = new CountDownLatch(1);
|
||||
latches[2] = new CountDownLatch(numWaiters);
|
||||
checkPoint.set(true);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
stop.set(true);
|
||||
memCtrl.limit = 1000;
|
||||
memCtrl.netBytes = 1;
|
||||
ctrl.updateStalled(memCtrl);
|
||||
if (checkPoint.get()) {
|
||||
latches[1].countDown();
|
||||
}
|
||||
|
||||
for (int i = 0; i < threads.length; i++) {
|
||||
threads[i].join(2000);
|
||||
if (threads[i].isAlive() && threads[i] instanceof Waiter) {
|
||||
if (threads[i].getState() == Thread.State.WAITING) {
|
||||
fail("waiter is not released - anyThreadsStalled: "
|
||||
+ ctrl.anyStalledThreads());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class Waiter extends Thread {
|
||||
private CountDownLatch[] latches;
|
||||
private DocumentsWriterStallControl ctrl;
|
||||
private AtomicBoolean checkPoint;
|
||||
private AtomicBoolean stop;
|
||||
private List<Throwable> exceptions;
|
||||
|
||||
public Waiter(AtomicBoolean stop, AtomicBoolean checkPoint,
|
||||
DocumentsWriterStallControl ctrl, CountDownLatch[] latches,
|
||||
List<Throwable> exceptions) {
|
||||
this.stop = stop;
|
||||
this.checkPoint = checkPoint;
|
||||
this.ctrl = ctrl;
|
||||
this.latches = latches;
|
||||
this.exceptions = exceptions;
|
||||
}
|
||||
|
||||
public void run() {
|
||||
try {
|
||||
while (!stop.get()) {
|
||||
ctrl.waitIfStalled();
|
||||
if (checkPoint.get()) {
|
||||
CountDownLatch join = latches[2];
|
||||
CountDownLatch wait = latches[1];
|
||||
join.countDown();
|
||||
try {
|
||||
wait.await();
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (Throwable e) {
|
||||
e.printStackTrace();
|
||||
exceptions.add(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static class Updater extends Thread {
|
||||
|
||||
private CountDownLatch[] latches;
|
||||
private DocumentsWriterStallControl ctrl;
|
||||
private AtomicBoolean checkPoint;
|
||||
private AtomicBoolean stop;
|
||||
private boolean release;
|
||||
private List<Throwable> exceptions;
|
||||
|
||||
public Updater(AtomicBoolean stop, AtomicBoolean checkPoint,
|
||||
DocumentsWriterStallControl ctrl, CountDownLatch[] latches,
|
||||
boolean release, List<Throwable> exceptions) {
|
||||
this.stop = stop;
|
||||
this.checkPoint = checkPoint;
|
||||
this.ctrl = ctrl;
|
||||
this.latches = latches;
|
||||
this.release = release;
|
||||
this.exceptions = exceptions;
|
||||
}
|
||||
|
||||
public void run() {
|
||||
try {
|
||||
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
|
||||
memCtrl.limit = 1000;
|
||||
memCtrl.netBytes = release ? 1 : 2000;
|
||||
while (!stop.get()) {
|
||||
int internalIters = release && random().nextBoolean() ? atLeast(5) : 1;
|
||||
for (int i = 0; i < internalIters; i++) {
|
||||
ctrl.updateStalled(memCtrl);
|
||||
}
|
||||
if (checkPoint.get()) {
|
||||
CountDownLatch join = latches[0];
|
||||
CountDownLatch wait = latches[1];
|
||||
join.countDown();
|
||||
try {
|
||||
wait.await();
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
Thread.yield();
|
||||
}
|
||||
} catch (Throwable e) {
|
||||
e.printStackTrace();
|
||||
exceptions.add(e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public static boolean terminated(Thread[] threads) {
|
||||
for (Thread thread : threads) {
|
||||
if (Thread.State.TERMINATED != thread.getState()) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public static void start(Thread[] tostart) throws InterruptedException {
|
||||
for (Thread thread : tostart) {
|
||||
thread.start();
|
||||
}
|
||||
Thread.sleep(1); // let them start
|
||||
}
|
||||
|
||||
public static void join(Thread[] toJoin, long timeout)
|
||||
throws InterruptedException {
|
||||
for (Thread thread : toJoin) {
|
||||
thread.join(timeout);
|
||||
}
|
||||
}
|
||||
|
||||
public static Thread[] waitThreads(int num,
|
||||
final DocumentsWriterStallControl ctrl) {
|
||||
Thread[] array = new Thread[num];
|
||||
for (int i = 0; i < array.length; i++) {
|
||||
array[i] = new Thread() {
|
||||
public void run() {
|
||||
ctrl.waitIfStalled();
|
||||
}
|
||||
};
|
||||
}
|
||||
return array;
|
||||
}
|
||||
|
||||
public static void awaitState(long timeout, Thread.State state,
|
||||
Thread... threads) throws InterruptedException {
|
||||
long t = System.currentTimeMillis();
|
||||
while (System.currentTimeMillis() - t <= timeout) {
|
||||
boolean done = true;
|
||||
for (Thread thread : threads) {
|
||||
if (thread.getState() != state) {
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
if (done) {
|
||||
return;
|
||||
}
|
||||
if (random().nextBoolean()) {
|
||||
Thread.yield();
|
||||
} else {
|
||||
Thread.sleep(1);
|
||||
}
|
||||
}
|
||||
fail("timed out waiting for state: " + state + " timeout: " + timeout
|
||||
+ " ms");
|
||||
}
|
||||
|
||||
private static class SimpleMemCtrl implements MemoryController {
|
||||
long netBytes;
|
||||
long limit;
|
||||
|
||||
@Override
|
||||
public long netBytes() {
|
||||
return netBytes;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long stallLimitBytes() {
|
||||
return limit;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
|
@ -139,6 +139,10 @@ final class JustCompileSearch {
|
|||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Object value) {
|
||||
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
|
||||
}
|
||||
}
|
||||
|
||||
static final class JustCompileFieldComparatorSource extends FieldComparatorSource {
|
||||
|
|
|
@ -188,6 +188,14 @@ class ElevationComparatorSource extends FieldComparatorSource {
|
|||
public Integer value(int slot) {
|
||||
return Integer.valueOf(values[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Integer valueObj) throws IOException {
|
||||
final int value = valueObj.intValue();
|
||||
final int docValue = docVal(doc);
|
||||
// values will be small enough that there is no overflow concern
|
||||
return value - docValue;
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,12 +17,25 @@ package org.apache.lucene.search;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import org.apache.lucene.codecs.Codec;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DoubleField;
|
||||
import org.apache.lucene.document.FloatDocValuesField;
|
||||
import org.apache.lucene.document.FloatField;
|
||||
import org.apache.lucene.document.IntDocValuesField;
|
||||
import org.apache.lucene.document.IntField;
|
||||
import org.apache.lucene.document.LongField;
|
||||
import org.apache.lucene.document.SortedBytesDocValuesField;
|
||||
import org.apache.lucene.document.StraightBytesDocValuesField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.English;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
@ -30,11 +43,19 @@ import org.apache.lucene.util._TestUtil;
|
|||
/**
|
||||
* Tests IndexSearcher's searchAfter() method
|
||||
*/
|
||||
|
||||
public class TestSearchAfter extends LuceneTestCase {
|
||||
private Directory dir;
|
||||
private IndexReader reader;
|
||||
private IndexSearcher searcher;
|
||||
|
||||
boolean supportsDocValues = Codec.getDefault().getName().equals("Lucene3x") == false;
|
||||
|
||||
private static SortField useDocValues(SortField field) {
|
||||
field.setUseIndexValues(true);
|
||||
return field;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
|
@ -45,6 +66,25 @@ public class TestSearchAfter extends LuceneTestCase {
|
|||
Document document = new Document();
|
||||
document.add(newField("english", English.intToEnglish(i), TextField.TYPE_UNSTORED));
|
||||
document.add(newField("oddeven", (i % 2 == 0) ? "even" : "odd", TextField.TYPE_UNSTORED));
|
||||
document.add(newField("byte", "" + ((byte) random().nextInt()), StringField.TYPE_UNSTORED));
|
||||
document.add(newField("short", "" + ((short) random().nextInt()), StringField.TYPE_UNSTORED));
|
||||
document.add(new IntField("int", random().nextInt()));
|
||||
document.add(new LongField("long", random().nextLong()));
|
||||
|
||||
document.add(new FloatField("float", random().nextFloat()));
|
||||
document.add(new DoubleField("double", random().nextDouble()));
|
||||
document.add(newField("bytes", _TestUtil.randomRealisticUnicodeString(random()), StringField.TYPE_UNSTORED));
|
||||
document.add(newField("bytesval", _TestUtil.randomRealisticUnicodeString(random()), StringField.TYPE_UNSTORED));
|
||||
document.add(new DoubleField("double", random().nextDouble()));
|
||||
|
||||
if (supportsDocValues) {
|
||||
document.add(new IntDocValuesField("intdocvalues", random().nextInt()));
|
||||
document.add(new FloatDocValuesField("floatdocvalues", random().nextFloat()));
|
||||
document.add(new SortedBytesDocValuesField("sortedbytesdocvalues", new BytesRef(_TestUtil.randomRealisticUnicodeString(random()))));
|
||||
document.add(new SortedBytesDocValuesField("sortedbytesdocvaluesval", new BytesRef(_TestUtil.randomRealisticUnicodeString(random()))));
|
||||
document.add(new StraightBytesDocValuesField("straightbytesdocvalues", new BytesRef(_TestUtil.randomRealisticUnicodeString(random()))));
|
||||
}
|
||||
|
||||
iw.addDocument(document);
|
||||
}
|
||||
reader = iw.getReader();
|
||||
|
@ -63,7 +103,7 @@ public class TestSearchAfter extends LuceneTestCase {
|
|||
// because the first page has a null 'after', we get a normal collector.
|
||||
// so we need to run the test a few times to ensure we will collect multiple
|
||||
// pages.
|
||||
int n = atLeast(10);
|
||||
int n = atLeast(20);
|
||||
for (int i = 0; i < n; i++) {
|
||||
Filter odd = new QueryWrapperFilter(new TermQuery(new Term("oddeven", "odd")));
|
||||
assertQuery(new MatchAllDocsQuery(), null);
|
||||
|
@ -78,13 +118,67 @@ public class TestSearchAfter extends LuceneTestCase {
|
|||
}
|
||||
|
||||
void assertQuery(Query query, Filter filter) throws Exception {
|
||||
assertQuery(query, filter, null);
|
||||
assertQuery(query, filter, Sort.RELEVANCE);
|
||||
assertQuery(query, filter, Sort.INDEXORDER);
|
||||
for(int rev=0;rev<2;rev++) {
|
||||
boolean reversed = rev == 1;
|
||||
assertQuery(query, filter, new Sort(new SortField[] {new SortField("byte", SortField.Type.BYTE, reversed)}));
|
||||
assertQuery(query, filter, new Sort(new SortField[] {new SortField("short", SortField.Type.SHORT, reversed)}));
|
||||
assertQuery(query, filter, new Sort(new SortField[] {new SortField("int", SortField.Type.INT, reversed)}));
|
||||
assertQuery(query, filter, new Sort(new SortField[] {new SortField("long", SortField.Type.LONG, reversed)}));
|
||||
assertQuery(query, filter, new Sort(new SortField[] {new SortField("float", SortField.Type.FLOAT, reversed)}));
|
||||
assertQuery(query, filter, new Sort(new SortField[] {new SortField("double", SortField.Type.DOUBLE, reversed)}));
|
||||
assertQuery(query, filter, new Sort(new SortField[] {new SortField("bytes", SortField.Type.STRING, reversed)}));
|
||||
assertQuery(query, filter, new Sort(new SortField[] {new SortField("bytesval", SortField.Type.STRING_VAL, reversed)}));
|
||||
if (supportsDocValues) {
|
||||
assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("intdocvalues", SortField.Type.INT, reversed))}));
|
||||
assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("floatdocvalues", SortField.Type.FLOAT, reversed))}));
|
||||
assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("sortedbytesdocvalues", SortField.Type.STRING, reversed))}));
|
||||
assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("sortedbytesdocvaluesval", SortField.Type.STRING_VAL, reversed))}));
|
||||
assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("straightbytesdocvalues", SortField.Type.STRING_VAL, reversed))}));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void assertQuery(Query query, Filter filter, Sort sort) throws Exception {
|
||||
int maxDoc = searcher.getIndexReader().maxDoc();
|
||||
TopDocs all = searcher.search(query, filter, maxDoc);
|
||||
TopDocs all;
|
||||
int pageSize = _TestUtil.nextInt(random(), 1, maxDoc*2);
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nassertQuery: query=" + query + " filter=" + filter + " sort=" + sort + " pageSize=" + pageSize);
|
||||
}
|
||||
final boolean doMaxScore = random().nextBoolean();
|
||||
if (sort == null) {
|
||||
all = searcher.search(query, filter, maxDoc);
|
||||
} else if (sort == Sort.RELEVANCE) {
|
||||
all = searcher.search(query, filter, maxDoc, sort, true, doMaxScore);
|
||||
} else {
|
||||
all = searcher.search(query, filter, maxDoc, sort);
|
||||
}
|
||||
if (VERBOSE) {
|
||||
System.out.println(" all.totalHits=" + all.totalHits);
|
||||
}
|
||||
int pageStart = 0;
|
||||
ScoreDoc lastBottom = null;
|
||||
while (pageStart < all.totalHits) {
|
||||
TopDocs paged = searcher.searchAfter(lastBottom, query, filter, pageSize);
|
||||
TopDocs paged;
|
||||
if (sort == null) {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" iter lastBottom=" + lastBottom);
|
||||
}
|
||||
paged = searcher.searchAfter(lastBottom, query, filter, pageSize);
|
||||
} else {
|
||||
if (VERBOSE) {
|
||||
System.out.println(" iter lastBottom=" + lastBottom + (lastBottom == null ? "" : " fields=" + Arrays.toString(((FieldDoc) lastBottom).fields)));
|
||||
}
|
||||
if (sort == Sort.RELEVANCE) {
|
||||
paged = searcher.searchAfter(lastBottom, query, filter, pageSize, sort, true, doMaxScore);
|
||||
} else {
|
||||
paged = searcher.searchAfter(lastBottom, query, filter, pageSize, sort);
|
||||
}
|
||||
}
|
||||
|
||||
if (paged.scoreDocs.length == 0) {
|
||||
break;
|
||||
}
|
||||
|
@ -98,8 +192,14 @@ public class TestSearchAfter extends LuceneTestCase {
|
|||
static void assertPage(int pageStart, TopDocs all, TopDocs paged) {
|
||||
assertEquals(all.totalHits, paged.totalHits);
|
||||
for (int i = 0; i < paged.scoreDocs.length; i++) {
|
||||
assertEquals(all.scoreDocs[pageStart + i].doc, paged.scoreDocs[i].doc);
|
||||
assertEquals(all.scoreDocs[pageStart + i].score, paged.scoreDocs[i].score, 0f);
|
||||
ScoreDoc sd1 = all.scoreDocs[pageStart + i];
|
||||
ScoreDoc sd2 = paged.scoreDocs[i];
|
||||
assertEquals(sd1.doc, sd2.doc);
|
||||
assertEquals(sd1.score, sd2.score, 0f);
|
||||
if (sd1 instanceof FieldDoc) {
|
||||
assertTrue(sd2 instanceof FieldDoc);
|
||||
assertEquals(((FieldDoc) sd1).fields, ((FieldDoc) sd2).fields);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -44,9 +44,11 @@ import org.apache.lucene.document.StringField;
|
|||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.AtomicReaderContext;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocValues;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.MultiReader;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
|
@ -218,7 +220,6 @@ public class TestSort extends LuceneTestCase {
|
|||
IndexReader reader = writer.getReader();
|
||||
writer.close ();
|
||||
IndexSearcher s = newSearcher(reader);
|
||||
s.setDefaultFieldSortScoring(true, true);
|
||||
return s;
|
||||
}
|
||||
|
||||
|
@ -734,6 +735,15 @@ public class TestSort extends LuceneTestCase {
|
|||
public Integer value(int slot) {
|
||||
return Integer.valueOf(slotValues[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Integer valueObj) {
|
||||
final int value = valueObj.intValue();
|
||||
final int docValue = docValues[doc];
|
||||
|
||||
// values are small enough that overflow won't happen
|
||||
return docValue - value;
|
||||
}
|
||||
}
|
||||
|
||||
static class MyFieldComparatorSource extends FieldComparatorSource {
|
||||
|
@ -889,7 +899,7 @@ public class TestSort extends LuceneTestCase {
|
|||
|
||||
// try to pick a query that will result in an unnormalized
|
||||
// score greater than 1 to test for correct normalization
|
||||
final TopDocs docs1 = full.search(queryE,null,nDocs,sort);
|
||||
final TopDocs docs1 = full.search(queryE,null,nDocs,sort,true,true);
|
||||
|
||||
// a filter that only allows through the first hit
|
||||
Filter filt = new Filter() {
|
||||
|
@ -903,7 +913,7 @@ public class TestSort extends LuceneTestCase {
|
|||
}
|
||||
};
|
||||
|
||||
TopDocs docs2 = full.search(queryE, filt, nDocs, sort);
|
||||
TopDocs docs2 = full.search(queryE, filt, nDocs, sort,true,true);
|
||||
|
||||
assertEquals(docs1.scoreDocs[0].score, docs2.scoreDocs[0].score, 1e-6);
|
||||
}
|
||||
|
@ -1244,7 +1254,7 @@ public class TestSort extends LuceneTestCase {
|
|||
String expectedResult) throws IOException {
|
||||
|
||||
//ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs;
|
||||
TopDocs hits = searcher.search(query, null, Math.max(1, expectedResult.length()), sort);
|
||||
TopDocs hits = searcher.search(query, null, Math.max(1, expectedResult.length()), sort, true, true);
|
||||
ScoreDoc[] result = hits.scoreDocs;
|
||||
assertEquals(expectedResult.length(),hits.totalHits);
|
||||
StringBuilder buff = new StringBuilder(10);
|
||||
|
@ -1478,4 +1488,38 @@ public class TestSort extends LuceneTestCase {
|
|||
r.close();
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testMaxScore() throws Exception {
|
||||
Directory d = newDirectory();
|
||||
// Not RIW because we need exactly 2 segs:
|
||||
IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
|
||||
int id = 0;
|
||||
for(int seg=0;seg<2;seg++) {
|
||||
for(int docIDX=0;docIDX<10;docIDX++) {
|
||||
Document doc = new Document();
|
||||
doc.add(newField("id", ""+docIDX, StringField.TYPE_STORED));
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for(int i=0;i<id;i++) {
|
||||
sb.append(' ');
|
||||
sb.append("text");
|
||||
}
|
||||
doc.add(newField("body", sb.toString(), TextField.TYPE_UNSTORED));
|
||||
w.addDocument(doc);
|
||||
id++;
|
||||
}
|
||||
w.commit();
|
||||
}
|
||||
|
||||
IndexReader r = DirectoryReader.open(w, true);
|
||||
w.close();
|
||||
Query q = new TermQuery(new Term("body", "text"));
|
||||
IndexSearcher s = newSearcher(r);
|
||||
float maxScore = s.search(q , 10).getMaxScore();
|
||||
assertEquals(maxScore, s.search(q, null, 3, Sort.INDEXORDER, random().nextBoolean(), true).getMaxScore(), 0.0);
|
||||
assertEquals(maxScore, s.search(q, null, 3, Sort.RELEVANCE, random().nextBoolean(), true).getMaxScore(), 0.0);
|
||||
assertEquals(maxScore, s.search(q, null, 3, new Sort(new SortField[] {new SortField("id", SortField.Type.INT, false)}), random().nextBoolean(), true).getMaxScore(), 0.0);
|
||||
assertEquals(maxScore, s.search(q, null, 3, new Sort(new SortField[] {new SortField("id", SortField.Type.INT, true)}), random().nextBoolean(), true).getMaxScore(), 0.0);
|
||||
r.close();
|
||||
d.close();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -53,9 +53,7 @@ public class TestReproduceMessage extends WithNestedTests {
|
|||
public Statement apply(final Statement base, Description description) {
|
||||
return new Statement() {
|
||||
public void evaluate() throws Throwable {
|
||||
if (isRunningNested()) {
|
||||
triggerOn(SorePoint.RULE);
|
||||
}
|
||||
triggerOn(SorePoint.RULE);
|
||||
base.evaluate();
|
||||
}
|
||||
};
|
||||
|
@ -69,9 +67,7 @@ public class TestReproduceMessage extends WithNestedTests {
|
|||
|
||||
@Before
|
||||
public void before() {
|
||||
if (isRunningNested()) {
|
||||
triggerOn(SorePoint.BEFORE);
|
||||
}
|
||||
triggerOn(SorePoint.BEFORE);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -81,9 +77,7 @@ public class TestReproduceMessage extends WithNestedTests {
|
|||
|
||||
@After
|
||||
public void after() {
|
||||
if (isRunningNested()) {
|
||||
triggerOn(SorePoint.AFTER);
|
||||
}
|
||||
triggerOn(SorePoint.AFTER);
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
|
|
|
@ -22,15 +22,18 @@ import java.io.PrintStream;
|
|||
import java.io.UnsupportedEncodingException;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util.TestRuleIgnoreTestSuites;
|
||||
import org.junit.After;
|
||||
import org.junit.Assert;
|
||||
import org.junit.Before;
|
||||
import org.junit.ClassRule;
|
||||
import org.junit.Rule;
|
||||
import org.junit.rules.TestRule;
|
||||
import org.junit.runner.Description;
|
||||
import org.junit.runners.model.Statement;
|
||||
|
||||
import com.carrotsearch.randomizedtesting.RandomizedRunner;
|
||||
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
|
||||
|
||||
/**
|
||||
* An abstract test class that prepares nested test classes to run.
|
||||
|
@ -45,28 +48,11 @@ import com.carrotsearch.randomizedtesting.RandomizedRunner;
|
|||
* cause havoc (static fields).
|
||||
*/
|
||||
public abstract class WithNestedTests {
|
||||
/**
|
||||
* This can no longer be thread local because {@link RandomizedRunner} runs
|
||||
* suites in an isolated threadgroup/thread.
|
||||
*/
|
||||
public static volatile boolean runsAsNested;
|
||||
|
||||
public static abstract class AbstractNestedTest extends LuceneTestCase {
|
||||
@ClassRule
|
||||
public static TestRule ignoreIfRunAsStandalone = new TestRule() {
|
||||
public Statement apply(final Statement s, Description arg1) {
|
||||
return new Statement() {
|
||||
public void evaluate() throws Throwable {
|
||||
if (isRunningNested()) {
|
||||
s.evaluate();
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
||||
public static abstract class AbstractNestedTest extends LuceneTestCase
|
||||
implements TestRuleIgnoreTestSuites.NestedTestSuite {
|
||||
protected static boolean isRunningNested() {
|
||||
return runsAsNested;
|
||||
return TestRuleIgnoreTestSuites.isRunningNested();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -81,6 +67,12 @@ public abstract class WithNestedTests {
|
|||
private ByteArrayOutputStream sysout;
|
||||
private ByteArrayOutputStream syserr;
|
||||
|
||||
/**
|
||||
* Restore properties after test.
|
||||
*/
|
||||
@Rule
|
||||
public SystemPropertiesRestoreRule restoreProperties = new SystemPropertiesRestoreRule();
|
||||
|
||||
@Before
|
||||
public final void before() {
|
||||
if (suppressOutputStreams) {
|
||||
|
@ -97,13 +89,11 @@ public abstract class WithNestedTests {
|
|||
}
|
||||
}
|
||||
|
||||
runsAsNested = true;
|
||||
System.setProperty(TestRuleIgnoreTestSuites.PROPERTY_RUN_NESTED, "true");
|
||||
}
|
||||
|
||||
@After
|
||||
public final void after() {
|
||||
runsAsNested = false;
|
||||
|
||||
if (suppressOutputStreams) {
|
||||
System.out.flush();
|
||||
System.err.flush();
|
||||
|
|
|
@ -81,7 +81,7 @@ public class TaxonomyMergeUtils {
|
|||
OrdinalMap map, IndexWriter destIndexWriter,
|
||||
DirectoryTaxonomyWriter destTaxWriter) throws IOException {
|
||||
// merge the taxonomies
|
||||
destTaxWriter.addTaxonomies(new Directory[] { srcTaxDir }, new OrdinalMap[] { map });
|
||||
destTaxWriter.addTaxonomy(srcTaxDir, map);
|
||||
|
||||
PayloadProcessorProvider payloadProcessor = new FacetsPayloadProcessorProvider(
|
||||
srcIndexDir, map.getMap(), new DefaultFacetIndexingParams());
|
||||
|
|
|
@ -4,8 +4,6 @@ import java.io.IOException;
|
|||
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
import org.apache.lucene.index.StoredFieldVisitor.Status;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
|
@ -42,6 +40,7 @@ abstract class Consts {
|
|||
public static final class LoadFullPathOnly extends StoredFieldVisitor {
|
||||
private String fullPath;
|
||||
|
||||
@Override
|
||||
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
|
||||
fullPath = value;
|
||||
}
|
||||
|
|
|
@ -12,15 +12,22 @@ import java.io.IOException;
|
|||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.analysis.core.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
|
||||
import org.apache.lucene.index.AtomicReader;
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.DocsEnum;
|
||||
|
@ -29,28 +36,18 @@ import org.apache.lucene.index.IndexWriter;
|
|||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.LogByteSizeMergePolicy;
|
||||
import org.apache.lucene.index.MultiFields;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.index.Terms;
|
||||
import org.apache.lucene.index.TermsEnum;
|
||||
import org.apache.lucene.index.SegmentInfos;
|
||||
import org.apache.lucene.index.TieredMergePolicy;
|
||||
import org.apache.lucene.search.DocIdSetIterator;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
import org.apache.lucene.store.NativeFSLockFactory;
|
||||
import org.apache.lucene.store.SimpleFSLockFactory;
|
||||
import org.apache.lucene.util.Bits;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
|
@ -233,7 +230,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// Make sure that the taxonomy always contain the root category
|
||||
// with category id 0.
|
||||
addCategory(new CategoryPath());
|
||||
refreshReader();
|
||||
refreshInternalReader();
|
||||
} else {
|
||||
// There are some categories on the disk, which we have not yet
|
||||
// read into the cache, and therefore the cache is incomplete.
|
||||
|
@ -289,15 +286,15 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
new KeywordAnalyzer()).setOpenMode(openMode).setMergePolicy(
|
||||
new LogByteSizeMergePolicy());
|
||||
}
|
||||
|
||||
// Currently overridden by a unit test that verifies that every index we open is close()ed.
|
||||
/**
|
||||
* Open an {@link IndexReader} from the internal {@link IndexWriter}, by
|
||||
* calling {@link IndexReader#open(IndexWriter, boolean)}. Extending classes can override
|
||||
* this method to return their own {@link IndexReader}.
|
||||
*/
|
||||
protected DirectoryReader openReader() throws IOException {
|
||||
return DirectoryReader.open(indexWriter, true);
|
||||
|
||||
/** Opens a {@link DirectoryReader} from the internal {@link IndexWriter}. */
|
||||
private synchronized void openInternalReader() throws IOException {
|
||||
// verify that the taxo-writer hasn't been closed on us. the method is
|
||||
// synchronized since it may be called from a non sync'ed block, and it
|
||||
// needs to protect against close() happening concurrently.
|
||||
ensureOpen();
|
||||
assert reader == null : "a reader is already open !";
|
||||
reader = DirectoryReader.open(indexWriter, false);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -348,18 +345,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
closeResources();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the number of memory bytes used by the cache.
|
||||
* @return Number of cache bytes in memory, for CL2O only; zero otherwise.
|
||||
*/
|
||||
public int getCacheMemoryUsage() {
|
||||
ensureOpen();
|
||||
if (this.cache == null || !(this.cache instanceof Cl2oTaxonomyWriterCache)) {
|
||||
return 0;
|
||||
}
|
||||
return ((Cl2oTaxonomyWriterCache)this.cache).getMemoryUsage();
|
||||
}
|
||||
|
||||
/**
|
||||
* A hook for extending classes to close additional resources that were used.
|
||||
* The default implementation closes the {@link IndexReader} as well as the
|
||||
|
@ -411,24 +396,29 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// We need to get an answer from the on-disk index. If a reader
|
||||
// is not yet open, do it now:
|
||||
if (reader == null) {
|
||||
reader = openReader();
|
||||
openInternalReader();
|
||||
}
|
||||
|
||||
// TODO (Facet): avoid Multi*?
|
||||
Bits liveDocs = MultiFields.getLiveDocs(reader);
|
||||
DocsEnum docs = MultiFields.getTermDocsEnum(reader, liveDocs, Consts.FULL,
|
||||
new BytesRef(categoryPath.toString(delimiter)),
|
||||
false);
|
||||
if (docs == null || docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return -1; // category does not exist in taxonomy
|
||||
int base = 0;
|
||||
int doc = -1;
|
||||
for (AtomicReader r : reader.getSequentialSubReaders()) {
|
||||
DocsEnum docs = r.termDocsEnum(null, Consts.FULL,
|
||||
new BytesRef(categoryPath.toString(delimiter)), false);
|
||||
if (docs != null) {
|
||||
doc = docs.nextDoc() + base;
|
||||
break;
|
||||
}
|
||||
base += r.maxDoc(); // we don't have deletions, so it's ok to call maxDoc
|
||||
}
|
||||
// Note: we do NOT add to the cache the fact that the category
|
||||
// does not exist. The reason is that our only use for this
|
||||
// method is just before we actually add this category. If
|
||||
// in the future this usage changes, we should consider caching
|
||||
// the fact that the category is not in the taxonomy.
|
||||
addToCache(categoryPath, docs.docID());
|
||||
return docs.docID();
|
||||
if (doc > 0) {
|
||||
addToCache(categoryPath, doc);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -437,7 +427,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
* case the category does not yet exist in the taxonomy.
|
||||
*/
|
||||
private int findCategory(CategoryPath categoryPath, int prefixLen)
|
||||
throws IOException {
|
||||
throws IOException {
|
||||
int res = cache.get(categoryPath, prefixLen);
|
||||
if (res >= 0) {
|
||||
return res;
|
||||
|
@ -450,38 +440,48 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
return cache.get(categoryPath, prefixLen);
|
||||
}
|
||||
if (reader == null) {
|
||||
reader = openReader();
|
||||
openInternalReader();
|
||||
}
|
||||
Bits liveDocs = MultiFields.getLiveDocs(reader);
|
||||
DocsEnum docs = MultiFields.getTermDocsEnum(reader, liveDocs, Consts.FULL,
|
||||
new BytesRef(categoryPath.toString(delimiter, prefixLen)),
|
||||
false);
|
||||
if (docs == null || docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
|
||||
return -1; // category does not exist in taxonomy
|
||||
|
||||
int base = 0;
|
||||
int doc = -1;
|
||||
for (AtomicReader r : reader.getSequentialSubReaders()) {
|
||||
DocsEnum docs = r.termDocsEnum(null, Consts.FULL,
|
||||
new BytesRef(categoryPath.toString(delimiter, prefixLen)), false);
|
||||
if (docs != null) {
|
||||
doc = docs.nextDoc() + base;
|
||||
break;
|
||||
}
|
||||
base += r.maxDoc(); // we don't have deletions, so it's ok to call maxDoc
|
||||
}
|
||||
addToCache(categoryPath, prefixLen, docs.docID());
|
||||
return docs.docID();
|
||||
|
||||
if (doc > 0) {
|
||||
addToCache(categoryPath, prefixLen, doc);
|
||||
}
|
||||
return doc;
|
||||
}
|
||||
|
||||
// TODO (Facet): addCategory() is synchronized. This means that if indexing is
|
||||
// multi-threaded, a new category that needs to be written to disk (and
|
||||
// potentially even trigger a lengthy merge) locks out other addCategory()
|
||||
// calls - even those which could immediately return a cached value.
|
||||
// We definitely need to fix this situation!
|
||||
@Override
|
||||
public synchronized int addCategory(CategoryPath categoryPath) throws IOException {
|
||||
public int addCategory(CategoryPath categoryPath) throws IOException {
|
||||
ensureOpen();
|
||||
// If the category is already in the cache and/or the taxonomy, we
|
||||
// should return its existing ordinal:
|
||||
// should return its existing ordinal
|
||||
int res = findCategory(categoryPath);
|
||||
if (res < 0) {
|
||||
// This is a new category, and we need to insert it into the index
|
||||
// (and the cache). Actually, we might also need to add some of
|
||||
// the category's ancestors before we can add the category itself
|
||||
// (while keeping the invariant that a parent is always added to
|
||||
// the taxonomy before its child). internalAddCategory() does all
|
||||
// this recursively:
|
||||
res = internalAddCategory(categoryPath, categoryPath.length());
|
||||
// the category is neither in the cache nor in the index - following code
|
||||
// cannot be executed in parallel.
|
||||
synchronized (this) {
|
||||
res = findCategory(categoryPath);
|
||||
if (res < 0) {
|
||||
// This is a new category, and we need to insert it into the index
|
||||
// (and the cache). Actually, we might also need to add some of
|
||||
// the category's ancestors before we can add the category itself
|
||||
// (while keeping the invariant that a parent is always added to
|
||||
// the taxonomy before its child). internalAddCategory() does all
|
||||
// this recursively
|
||||
res = internalAddCategory(categoryPath, categoryPath.length());
|
||||
}
|
||||
}
|
||||
}
|
||||
return res;
|
||||
|
||||
|
@ -497,7 +497,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
* recursion.
|
||||
*/
|
||||
private int internalAddCategory(CategoryPath categoryPath, int length)
|
||||
throws CorruptIndexException, IOException {
|
||||
throws IOException {
|
||||
|
||||
// Find our parent's ordinal (recursively adding the parent category
|
||||
// to the taxonomy if it's not already there). Then add the parent
|
||||
|
@ -529,13 +529,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
}
|
||||
}
|
||||
|
||||
// Note that the methods calling addCategoryDocument() are synchornized,
|
||||
// so this method is effectively synchronized as well, but we'll add
|
||||
// synchronized to be on the safe side, and we can reuse class-local objects
|
||||
// instead of allocating them every time
|
||||
protected synchronized int addCategoryDocument(CategoryPath categoryPath,
|
||||
int length, int parent)
|
||||
throws CorruptIndexException, IOException {
|
||||
/**
|
||||
* Note that the methods calling addCategoryDocument() are synchornized, so
|
||||
* this method is effectively synchronized as well.
|
||||
*/
|
||||
private int addCategoryDocument(CategoryPath categoryPath, int length,
|
||||
int parent) throws IOException {
|
||||
// Before Lucene 2.9, position increments >=0 were supported, so we
|
||||
// added 1 to parent to allow the parent -1 (the parent of the root).
|
||||
// Unfortunately, starting with Lucene 2.9, after LUCENE-1542, this is
|
||||
|
@ -545,7 +544,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// we write here (e.g., to write parent+2), and need to do a workaround
|
||||
// in the reader (which knows that anyway only category 0 has a parent
|
||||
// -1).
|
||||
parentStream.set(parent+1);
|
||||
parentStream.set(parent + 1);
|
||||
Document d = new Document();
|
||||
d.add(parentStreamField);
|
||||
|
||||
|
@ -602,8 +601,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
}
|
||||
}
|
||||
|
||||
private void addToCache(CategoryPath categoryPath, int id)
|
||||
throws CorruptIndexException, IOException {
|
||||
private void addToCache(CategoryPath categoryPath, int id) throws IOException {
|
||||
if (cache.put(categoryPath, id)) {
|
||||
// If cache.put() returned true, it means the cache was limited in
|
||||
// size, became full, so parts of it had to be cleared.
|
||||
|
@ -615,20 +613,20 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// Because this is a slow operation, cache implementations are
|
||||
// expected not to delete entries one-by-one but rather in bulk
|
||||
// (LruTaxonomyWriterCache removes the 2/3rd oldest entries).
|
||||
refreshReader();
|
||||
refreshInternalReader();
|
||||
cacheIsComplete = false;
|
||||
}
|
||||
}
|
||||
|
||||
private void addToCache(CategoryPath categoryPath, int prefixLen, int id)
|
||||
throws CorruptIndexException, IOException {
|
||||
throws IOException {
|
||||
if (cache.put(categoryPath, prefixLen, id)) {
|
||||
refreshReader();
|
||||
refreshInternalReader();
|
||||
cacheIsComplete = false;
|
||||
}
|
||||
}
|
||||
|
||||
protected synchronized void refreshReader() throws IOException {
|
||||
private synchronized void refreshInternalReader() throws IOException {
|
||||
if (reader != null) {
|
||||
DirectoryReader r2 = DirectoryReader.openIfChanged(reader);
|
||||
if (r2 != null) {
|
||||
|
@ -648,7 +646,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
public synchronized void commit() throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
indexWriter.commit(combinedCommitData(null));
|
||||
refreshReader();
|
||||
refreshInternalReader();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -674,7 +672,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
public synchronized void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
|
||||
ensureOpen();
|
||||
indexWriter.commit(combinedCommitData(commitUserData));
|
||||
refreshReader();
|
||||
refreshInternalReader();
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -759,7 +757,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
// TODO (Facet): we should probably completely clear the cache before starting
|
||||
// to read it?
|
||||
if (reader == null) {
|
||||
reader = openReader();
|
||||
openInternalReader();
|
||||
}
|
||||
|
||||
if (!cache.hasRoom(reader.numDocs())) {
|
||||
|
@ -767,7 +765,29 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
}
|
||||
|
||||
CategoryPath cp = new CategoryPath();
|
||||
Terms terms = MultiFields.getTerms(reader, Consts.FULL);
|
||||
TermsEnum termsEnum = null;
|
||||
DocsEnum docsEnum = null;
|
||||
int base = 0;
|
||||
for (AtomicReader r : reader.getSequentialSubReaders()) {
|
||||
Terms terms = r.terms(Consts.FULL);
|
||||
if (terms != null) { // cannot really happen, but be on the safe side
|
||||
termsEnum = terms.iterator(termsEnum);
|
||||
while (termsEnum.next() != null) {
|
||||
BytesRef t = termsEnum.term();
|
||||
// Since we guarantee uniqueness of categories, each term has exactly
|
||||
// one document. Also, since we do not allow removing categories (and
|
||||
// hence documents), there are no deletions in the index. Therefore, it
|
||||
// is sufficient to call next(), and then doc(), exactly once with no
|
||||
// 'validation' checks.
|
||||
cp.clear();
|
||||
cp.add(t.utf8ToString(), delimiter);
|
||||
docsEnum = termsEnum.docs(null, docsEnum, false);
|
||||
cache.put(cp, docsEnum.nextDoc() + base);
|
||||
}
|
||||
}
|
||||
base += r.maxDoc(); // we don't have any deletions, so we're ok
|
||||
}
|
||||
/*Terms terms = MultiFields.getTerms(reader, Consts.FULL);
|
||||
// The check is done here to avoid checking it on every iteration of the
|
||||
// below loop. A null term wlil be returned if there are no terms in the
|
||||
// lexicon, or after the Consts.FULL term. However while the loop is
|
||||
|
@ -787,11 +807,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
docsEnum = termsEnum.docs(liveDocs, docsEnum, false);
|
||||
docsEnum.nextDoc();
|
||||
cp.clear();
|
||||
// TODO (Facet): avoid String creation/use bytes?
|
||||
cp.add(t.utf8ToString(), delimiter);
|
||||
cache.put(cp, docsEnum.docID());
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
cacheIsComplete = true;
|
||||
// No sense to keep the reader open - we will not need to read from it
|
||||
|
@ -805,13 +824,14 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
private synchronized ParentArray getParentArray() throws IOException {
|
||||
if (parentArray==null) {
|
||||
if (reader == null) {
|
||||
reader = openReader();
|
||||
openInternalReader();
|
||||
}
|
||||
parentArray = new ParentArray();
|
||||
parentArray.refresh(reader);
|
||||
}
|
||||
return parentArray;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getParent(int ordinal) throws IOException {
|
||||
ensureOpen();
|
||||
|
@ -823,171 +843,47 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
}
|
||||
return getParentArray().getArray()[ordinal];
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Take all the categories of one or more given taxonomies, and add them to
|
||||
* the main taxonomy (this), if they are not already there.
|
||||
* <P>
|
||||
* Additionally, fill a <I>mapping</I> for each of the added taxonomies,
|
||||
* mapping its ordinals to the ordinals in the enlarged main taxonomy.
|
||||
* These mapping are saved into an array of OrdinalMap objects given by the
|
||||
* user, one for each of the given taxonomies (not including "this", the main
|
||||
* taxonomy). Often the first of these will be a MemoryOrdinalMap and the
|
||||
* others will be a DiskOrdinalMap - see discussion in {OrdinalMap}.
|
||||
* <P>
|
||||
* Note that the taxonomies to be added are given as Directory objects,
|
||||
* not opened TaxonomyReader/TaxonomyWriter objects, so if any of them are
|
||||
* currently managed by an open TaxonomyWriter, make sure to commit() (or
|
||||
* close()) it first. The main taxonomy (this) is an open TaxonomyWriter,
|
||||
* and does not need to be commit()ed before this call.
|
||||
* Takes the categories from the given taxonomy directory, and adds the
|
||||
* missing ones to this taxonomy. Additionally, it fills the given
|
||||
* {@link OrdinalMap} with a mapping from the original ordinal to the new
|
||||
* ordinal.
|
||||
*/
|
||||
public void addTaxonomies(Directory[] taxonomies, OrdinalMap[] ordinalMaps) throws IOException {
|
||||
public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException {
|
||||
ensureOpen();
|
||||
// To prevent us stepping on the rest of this class's decisions on when
|
||||
// to open a reader, and when not, we'll be opening a new reader instead
|
||||
// of using the existing "reader" object:
|
||||
IndexReader mainreader = openReader();
|
||||
// TODO (Facet): can this then go segment-by-segment and avoid MultiDocsEnum etc?
|
||||
Terms terms = MultiFields.getTerms(mainreader, Consts.FULL);
|
||||
assert terms != null; // TODO (Facet): explicit check / throw exception?
|
||||
TermsEnum mainte = terms.iterator(null);
|
||||
DocsEnum mainde = null;
|
||||
|
||||
IndexReader[] otherreaders = new IndexReader[taxonomies.length];
|
||||
TermsEnum[] othertes = new TermsEnum[taxonomies.length];
|
||||
DocsEnum[] otherdocsEnum = new DocsEnum[taxonomies.length]; // just for reuse
|
||||
for (int i=0; i<taxonomies.length; i++) {
|
||||
otherreaders[i] = DirectoryReader.open(taxonomies[i]);
|
||||
terms = MultiFields.getTerms(otherreaders[i], Consts.FULL);
|
||||
assert terms != null; // TODO (Facet): explicit check / throw exception?
|
||||
othertes[i] = terms.iterator(null);
|
||||
// Also tell the ordinal maps their expected sizes:
|
||||
ordinalMaps[i].setSize(otherreaders[i].numDocs());
|
||||
}
|
||||
|
||||
CategoryPath cp = new CategoryPath();
|
||||
|
||||
// We keep a "current" cursor over the alphabetically-ordered list of
|
||||
// categories in each taxonomy. We start the cursor on the first
|
||||
// (alphabetically) category of each taxonomy:
|
||||
|
||||
String currentMain;
|
||||
String[] currentOthers = new String[taxonomies.length];
|
||||
currentMain = nextTE(mainte);
|
||||
int otherTaxonomiesLeft = 0;
|
||||
for (int i=0; i<taxonomies.length; i++) {
|
||||
currentOthers[i] = nextTE(othertes[i]);
|
||||
if (currentOthers[i]!=null) {
|
||||
otherTaxonomiesLeft++;
|
||||
}
|
||||
}
|
||||
|
||||
// And then, at each step look at the first (alphabetically) of the
|
||||
// current taxonomies.
|
||||
// NOTE: The most efficient way we could have done this is using a
|
||||
// PriorityQueue. But for simplicity, and assuming that usually we'll
|
||||
// have a very small number of other taxonomies (often just 1), we use
|
||||
// a more naive algorithm (o(ntaxonomies) instead of o(ln ntaxonomies)
|
||||
// per step)
|
||||
|
||||
while (otherTaxonomiesLeft>0) {
|
||||
// TODO: use a pq here
|
||||
String first=null;
|
||||
for (int i=0; i<taxonomies.length; i++) {
|
||||
if (currentOthers[i]==null) continue;
|
||||
if (first==null || first.compareTo(currentOthers[i])>0) {
|
||||
first = currentOthers[i];
|
||||
}
|
||||
}
|
||||
int comp = 0;
|
||||
if (currentMain==null || (comp = currentMain.compareTo(first))>0) {
|
||||
// If 'first' is before currentMain, or currentMain is null,
|
||||
// then 'first' is a new category and we need to add it to the
|
||||
// main taxonomy. Then for all taxonomies with this 'first'
|
||||
// category, we need to add the new category number to their
|
||||
// map, and move to the next category in all of them.
|
||||
cp.clear();
|
||||
cp.add(first, delimiter);
|
||||
// We can call internalAddCategory() instead of addCategory()
|
||||
// because we know the category hasn't been seen yet.
|
||||
int newordinal = internalAddCategory(cp, cp.length());
|
||||
// TODO (Facet): we already had this term in our hands before, in nextTE...
|
||||
// // TODO (Facet): no need to make this term?
|
||||
for (int i=0; i<taxonomies.length; i++) {
|
||||
if (first.equals(currentOthers[i])) {
|
||||
// remember the remapping of this ordinal. Note how
|
||||
// this requires reading a posting list from the index -
|
||||
// but since we do this in lexical order of terms, just
|
||||
// like Lucene's merge works, we hope there are few seeks.
|
||||
// TODO (Facet): is there a quicker way? E.g., not specifying the
|
||||
// next term by name every time?
|
||||
otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i], false);
|
||||
otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
|
||||
int origordinal = otherdocsEnum[i].docID();
|
||||
ordinalMaps[i].addMapping(origordinal, newordinal);
|
||||
// and move to the next category in the i'th taxonomy
|
||||
currentOthers[i] = nextTE(othertes[i]);
|
||||
if (currentOthers[i]==null) {
|
||||
otherTaxonomiesLeft--;
|
||||
}
|
||||
DirectoryReader r = DirectoryReader.open(taxoDir);
|
||||
try {
|
||||
final int size = r.numDocs();
|
||||
final OrdinalMap ordinalMap = map;
|
||||
ordinalMap.setSize(size);
|
||||
CategoryPath cp = new CategoryPath();
|
||||
int base = 0;
|
||||
TermsEnum te = null;
|
||||
DocsEnum docs = null;
|
||||
for (AtomicReader ar : r.getSequentialSubReaders()) {
|
||||
Terms terms = ar.terms(Consts.FULL);
|
||||
te = terms.iterator(te);
|
||||
while (te.next() != null) {
|
||||
String value = te.term().utf8ToString();
|
||||
cp.clear();
|
||||
cp.add(value, Consts.DEFAULT_DELIMITER);
|
||||
int ordinal = findCategory(cp);
|
||||
if (ordinal < 0) {
|
||||
// NOTE: call addCategory so that it works well in a multi-threaded
|
||||
// environment, in case e.g. a thread just added the category, after
|
||||
// the findCategory() call above failed to find it.
|
||||
ordinal = addCategory(cp);
|
||||
}
|
||||
docs = te.docs(null, docs, false);
|
||||
ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
|
||||
}
|
||||
} else if (comp==0) {
|
||||
// 'first' and currentMain are the same, so both the main and some
|
||||
// other taxonomies need to be moved, but a category doesn't need
|
||||
// to be added because it already existed in the main taxonomy.
|
||||
|
||||
// TODO (Facet): Again, is there a quicker way?
|
||||
mainde = mainte.docs(MultiFields.getLiveDocs(mainreader), mainde, false);
|
||||
mainde.nextDoc(); // TODO (Facet): check?
|
||||
int newordinal = mainde.docID();
|
||||
|
||||
currentMain = nextTE(mainte);
|
||||
for (int i=0; i<taxonomies.length; i++) {
|
||||
if (first.equals(currentOthers[i])) {
|
||||
// TODO (Facet): again, is there a quicker way?
|
||||
otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i], false);
|
||||
otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
|
||||
int origordinal = otherdocsEnum[i].docID();
|
||||
ordinalMaps[i].addMapping(origordinal, newordinal);
|
||||
|
||||
// and move to the next category
|
||||
currentOthers[i] = nextTE(othertes[i]);
|
||||
if (currentOthers[i]==null) {
|
||||
otherTaxonomiesLeft--;
|
||||
}
|
||||
}
|
||||
}
|
||||
} else /* comp > 0 */ {
|
||||
// The currentMain doesn't appear in any of the other taxonomies -
|
||||
// we don't need to do anything, just continue to the next one
|
||||
currentMain = nextTE(mainte);
|
||||
base += ar.maxDoc(); // no deletions, so we're ok
|
||||
}
|
||||
ordinalMap.addDone();
|
||||
} finally {
|
||||
r.close();
|
||||
}
|
||||
|
||||
// Close all the readers we've opened, and also tell the ordinal maps
|
||||
// we're done adding to them
|
||||
mainreader.close();
|
||||
for (int i=0; i<taxonomies.length; i++) {
|
||||
otherreaders[i].close();
|
||||
// We never actually added a mapping for the root ordinal - let's do
|
||||
// it now, just so that the map is complete (every ordinal between 0
|
||||
// and size-1 is remapped)
|
||||
ordinalMaps[i].addMapping(0, 0);
|
||||
ordinalMaps[i].addDone();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Expert: This method is only for expert use.
|
||||
* Note also that any call to refresh() will invalidate the returned reader,
|
||||
* so the caller needs to take care of appropriate locking.
|
||||
*
|
||||
* @return lucene indexReader
|
||||
*/
|
||||
DirectoryReader getInternalIndexReader() {
|
||||
ensureOpen();
|
||||
return this.reader;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1113,13 +1009,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
|
|||
}
|
||||
}
|
||||
|
||||
private static final String nextTE(TermsEnum te) throws IOException {
|
||||
if (te.next() != null) {
|
||||
return te.term().utf8ToString(); // TODO (Facet): avoid String creation/use Bytes?
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rollback changes to the taxonomy writer and closes the instance. Following
|
||||
* this method the instance becomes unusable (calling any of its API methods
|
||||
|
|
|
@ -1,5 +1,8 @@
|
|||
package org.apache.lucene.facet.taxonomy.writercache.cl2o;
|
||||
|
||||
import java.util.concurrent.locks.ReadWriteLock;
|
||||
import java.util.concurrent.locks.ReentrantReadWriteLock;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
|
||||
|
||||
|
@ -30,44 +33,71 @@ import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
|
|||
*/
|
||||
public class Cl2oTaxonomyWriterCache implements TaxonomyWriterCache {
|
||||
|
||||
private final ReadWriteLock lock = new ReentrantReadWriteLock();
|
||||
private CompactLabelToOrdinal cache;
|
||||
|
||||
public Cl2oTaxonomyWriterCache(int initialCapcity, float loadFactor, int numHashArrays) {
|
||||
this.cache = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
|
||||
}
|
||||
|
||||
public void close() {
|
||||
cache=null;
|
||||
@Override
|
||||
public synchronized void close() {
|
||||
cache = null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean hasRoom(int n) {
|
||||
// This cache is unlimited, so we always have room for remembering more:
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(CategoryPath categoryPath) {
|
||||
return cache.getOrdinal(categoryPath);
|
||||
lock.readLock().lock();
|
||||
try {
|
||||
return cache.getOrdinal(categoryPath);
|
||||
} finally {
|
||||
lock.readLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int get(CategoryPath categoryPath, int length) {
|
||||
if (length<0 || length>categoryPath.length()) {
|
||||
if (length < 0 || length > categoryPath.length()) {
|
||||
length = categoryPath.length();
|
||||
}
|
||||
return cache.getOrdinal(categoryPath, length);
|
||||
lock.readLock().lock();
|
||||
try {
|
||||
return cache.getOrdinal(categoryPath, length);
|
||||
} finally {
|
||||
lock.readLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean put(CategoryPath categoryPath, int ordinal) {
|
||||
cache.addLabel(categoryPath, ordinal);
|
||||
// Tell the caller we didn't clear part of the cache, so it doesn't
|
||||
// have to flush its on-disk index now
|
||||
return false;
|
||||
lock.writeLock().lock();
|
||||
try {
|
||||
cache.addLabel(categoryPath, ordinal);
|
||||
// Tell the caller we didn't clear part of the cache, so it doesn't
|
||||
// have to flush its on-disk index now
|
||||
return false;
|
||||
} finally {
|
||||
lock.writeLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) {
|
||||
cache.addLabel(categoryPath, prefixLen, ordinal);
|
||||
// Tell the caller we didn't clear part of the cache, so it doesn't
|
||||
// have to flush its on-disk index now
|
||||
return false;
|
||||
lock.writeLock().lock();
|
||||
try {
|
||||
cache.addLabel(categoryPath, prefixLen, ordinal);
|
||||
// Tell the caller we didn't clear part of the cache, so it doesn't
|
||||
// have to flush its on-disk index now
|
||||
return false;
|
||||
} finally {
|
||||
lock.writeLock().unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -75,8 +105,7 @@ public class Cl2oTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
* @return Number of bytes in memory used by this object.
|
||||
*/
|
||||
public int getMemoryUsage() {
|
||||
int memoryUsage = (this.cache == null) ? 0 : this.cache.getMemoryUsage();
|
||||
return memoryUsage;
|
||||
return cache == null ? 0 : cache.getMemoryUsage();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -60,16 +60,19 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
}
|
||||
}
|
||||
|
||||
public boolean hasRoom(int n) {
|
||||
return n<=(cache.getMaxSize()-cache.getSize());
|
||||
@Override
|
||||
public synchronized boolean hasRoom(int n) {
|
||||
return n <= (cache.getMaxSize() - cache.getSize());
|
||||
}
|
||||
|
||||
public void close() {
|
||||
@Override
|
||||
public synchronized void close() {
|
||||
cache.clear();
|
||||
cache=null;
|
||||
cache = null;
|
||||
}
|
||||
|
||||
public int get(CategoryPath categoryPath) {
|
||||
@Override
|
||||
public synchronized int get(CategoryPath categoryPath) {
|
||||
Integer res = cache.get(categoryPath);
|
||||
if (res == null) {
|
||||
return -1;
|
||||
|
@ -78,7 +81,8 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
return res.intValue();
|
||||
}
|
||||
|
||||
public int get(CategoryPath categoryPath, int length) {
|
||||
@Override
|
||||
public synchronized int get(CategoryPath categoryPath, int length) {
|
||||
if (length<0 || length>categoryPath.length()) {
|
||||
length = categoryPath.length();
|
||||
}
|
||||
|
@ -94,7 +98,8 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
return res.intValue();
|
||||
}
|
||||
|
||||
public boolean put(CategoryPath categoryPath, int ordinal) {
|
||||
@Override
|
||||
public synchronized boolean put(CategoryPath categoryPath, int ordinal) {
|
||||
boolean ret = cache.put(categoryPath, new Integer(ordinal));
|
||||
// If the cache is full, we need to clear one or more old entries
|
||||
// from the cache. However, if we delete from the cache a recent
|
||||
|
@ -109,7 +114,8 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
return ret;
|
||||
}
|
||||
|
||||
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) {
|
||||
@Override
|
||||
public synchronized boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) {
|
||||
boolean ret = cache.put(categoryPath, prefixLen, new Integer(ordinal));
|
||||
// If the cache is full, we need to clear one or more old entries
|
||||
// from the cache. However, if we delete from the cache a recent
|
||||
|
@ -125,4 +131,3 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
|
|
|
@ -1,254 +0,0 @@
|
|||
package org.apache.lucene.facet.taxonomy.directory;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.junit.Test;
|
||||
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class TestAddTaxonomies extends LuceneTestCase {
|
||||
|
||||
@Test
|
||||
public void test1() throws Exception {
|
||||
Directory dir1 = newDirectory();
|
||||
DirectoryTaxonomyWriter tw1 = new DirectoryTaxonomyWriter(dir1);
|
||||
tw1.addCategory(new CategoryPath("Author", "Mark Twain"));
|
||||
tw1.addCategory(new CategoryPath("Animals", "Dog"));
|
||||
Directory dir2 = newDirectory();
|
||||
DirectoryTaxonomyWriter tw2 = new DirectoryTaxonomyWriter(dir2);
|
||||
tw2.addCategory(new CategoryPath("Author", "Rob Pike"));
|
||||
tw2.addCategory(new CategoryPath("Aardvarks", "Bob"));
|
||||
tw2.close();
|
||||
Directory dir3 = newDirectory();
|
||||
DirectoryTaxonomyWriter tw3 = new DirectoryTaxonomyWriter(dir3);
|
||||
tw3.addCategory(new CategoryPath("Author", "Zebra Smith"));
|
||||
tw3.addCategory(new CategoryPath("Aardvarks", "Bob"));
|
||||
tw3.addCategory(new CategoryPath("Aardvarks", "Aaron"));
|
||||
tw3.close();
|
||||
|
||||
MemoryOrdinalMap[] maps = new MemoryOrdinalMap[2];
|
||||
maps[0] = new MemoryOrdinalMap();
|
||||
maps[1] = new MemoryOrdinalMap();
|
||||
|
||||
tw1.addTaxonomies(new Directory[] { dir2, dir3 }, maps);
|
||||
tw1.close();
|
||||
|
||||
TaxonomyReader tr = new DirectoryTaxonomyReader(dir1);
|
||||
|
||||
// Test that the merged taxonomy now contains what we expect:
|
||||
// First all the categories of the original taxonomy, in their original order:
|
||||
assertEquals(tr.getPath(0).toString(), "");
|
||||
assertEquals(tr.getPath(1).toString(), "Author");
|
||||
assertEquals(tr.getPath(2).toString(), "Author/Mark Twain");
|
||||
assertEquals(tr.getPath(3).toString(), "Animals");
|
||||
assertEquals(tr.getPath(4).toString(), "Animals/Dog");
|
||||
// Then the categories new in the new taxonomy, in alphabetical order:
|
||||
assertEquals(tr.getPath(5).toString(), "Aardvarks");
|
||||
assertEquals(tr.getPath(6).toString(), "Aardvarks/Aaron");
|
||||
assertEquals(tr.getPath(7).toString(), "Aardvarks/Bob");
|
||||
assertEquals(tr.getPath(8).toString(), "Author/Rob Pike");
|
||||
assertEquals(tr.getPath(9).toString(), "Author/Zebra Smith");
|
||||
assertEquals(tr.getSize(), 10);
|
||||
|
||||
// Test that the maps contain what we expect
|
||||
int[] map0 = maps[0].getMap();
|
||||
assertEquals(5, map0.length);
|
||||
assertEquals(0, map0[0]);
|
||||
assertEquals(1, map0[1]);
|
||||
assertEquals(8, map0[2]);
|
||||
assertEquals(5, map0[3]);
|
||||
assertEquals(7, map0[4]);
|
||||
|
||||
int[] map1 = maps[1].getMap();
|
||||
assertEquals(6, map1.length);
|
||||
assertEquals(0, map1[0]);
|
||||
assertEquals(1, map1[1]);
|
||||
assertEquals(9, map1[2]);
|
||||
assertEquals(5, map1[3]);
|
||||
assertEquals(7, map1[4]);
|
||||
assertEquals(6, map1[5]);
|
||||
|
||||
tr.close();
|
||||
dir1.close();
|
||||
dir2.close();
|
||||
dir3.close();
|
||||
}
|
||||
|
||||
// a reasonable random test
|
||||
public void testmedium() throws Exception {
|
||||
int numTests = atLeast(3);
|
||||
for (int i = 0; i < numTests; i++) {
|
||||
dotest(_TestUtil.nextInt(random(), 1, 10),
|
||||
_TestUtil.nextInt(random(), 1, 100),
|
||||
_TestUtil.nextInt(random(), 100, 1000),
|
||||
random().nextBoolean());
|
||||
}
|
||||
}
|
||||
|
||||
// A more comprehensive and big random test.
|
||||
@Test @Nightly
|
||||
public void testbig() throws Exception {
|
||||
dotest(2, 1000, 5000, false);
|
||||
dotest(10, 10000, 100, false);
|
||||
dotest(50, 20, 100, false);
|
||||
dotest(10, 1000, 10000, false);
|
||||
dotest(50, 20, 10000, false);
|
||||
dotest(1, 20, 10000, false);
|
||||
dotest(10, 1, 10000, false);
|
||||
dotest(10, 1000, 20000, true);
|
||||
}
|
||||
|
||||
private void dotest(int ntaxonomies, int ncats, int range, boolean disk) throws Exception {
|
||||
Directory dirs[] = new Directory[ntaxonomies];
|
||||
Directory copydirs[] = new Directory[ntaxonomies];
|
||||
|
||||
for (int i=0; i<ntaxonomies; i++) {
|
||||
dirs[i] = newDirectory();
|
||||
copydirs[i] = newDirectory();
|
||||
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[i]);
|
||||
DirectoryTaxonomyWriter copytw = new DirectoryTaxonomyWriter(copydirs[i]);
|
||||
for (int j=0; j<ncats; j++) {
|
||||
String cat = Integer.toString(random().nextInt(range));
|
||||
tw.addCategory(new CategoryPath("a",cat));
|
||||
copytw.addCategory(new CategoryPath("a",cat));
|
||||
}
|
||||
// System.err.println("Taxonomy "+i+": "+tw.getSize());
|
||||
tw.close();
|
||||
copytw.close();
|
||||
}
|
||||
|
||||
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0]);
|
||||
Directory otherdirs[] = new Directory[ntaxonomies-1];
|
||||
System.arraycopy(dirs, 1, otherdirs, 0, ntaxonomies-1);
|
||||
|
||||
OrdinalMap[] maps = new OrdinalMap[ntaxonomies-1];
|
||||
if (ntaxonomies>1) {
|
||||
for (int i=0; i<ntaxonomies-1; i++) {
|
||||
if (disk) {
|
||||
// TODO: use a LTC tempfile
|
||||
maps[i] = new DiskOrdinalMap(new File(System.getProperty("java.io.tmpdir"),
|
||||
"tmpmap"+i));
|
||||
} else {
|
||||
maps[i] = new MemoryOrdinalMap();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tw.addTaxonomies(otherdirs, maps);
|
||||
// System.err.println("Merged axonomy: "+tw.getSize());
|
||||
tw.close();
|
||||
|
||||
// Check that all original categories in the main taxonomy remain in
|
||||
// unchanged, and the rest of the taxonomies are completely unchanged.
|
||||
for (int i=0; i<ntaxonomies; i++) {
|
||||
TaxonomyReader tr = new DirectoryTaxonomyReader(dirs[i]);
|
||||
TaxonomyReader copytr = new DirectoryTaxonomyReader(copydirs[i]);
|
||||
if (i==0) {
|
||||
assertTrue(tr.getSize() >= copytr.getSize());
|
||||
} else {
|
||||
assertEquals(copytr.getSize(), tr.getSize());
|
||||
}
|
||||
for (int j=0; j<copytr.getSize(); j++) {
|
||||
String expected = copytr.getPath(j).toString();
|
||||
String got = tr.getPath(j).toString();
|
||||
assertTrue("Comparing category "+j+" of taxonomy "+i+": expected "+expected+", got "+got,
|
||||
expected.equals(got));
|
||||
}
|
||||
tr.close();
|
||||
copytr.close();
|
||||
}
|
||||
|
||||
// Check that all the new categories in the main taxonomy are in
|
||||
// lexicographic order. This isn't a requirement of our API, but happens
|
||||
// this way in our current implementation.
|
||||
TaxonomyReader tr = new DirectoryTaxonomyReader(dirs[0]);
|
||||
TaxonomyReader copytr = new DirectoryTaxonomyReader(copydirs[0]);
|
||||
if (tr.getSize() > copytr.getSize()) {
|
||||
String prev = tr.getPath(copytr.getSize()).toString();
|
||||
for (int j=copytr.getSize()+1; j<tr.getSize(); j++) {
|
||||
String n = tr.getPath(j).toString();
|
||||
assertTrue(prev.compareTo(n)<0);
|
||||
prev=n;
|
||||
}
|
||||
}
|
||||
int oldsize = copytr.getSize(); // remember for later
|
||||
tr.close();
|
||||
copytr.close();
|
||||
|
||||
// Check that all the categories from other taxonomies exist in the new
|
||||
// taxonomy.
|
||||
TaxonomyReader main = new DirectoryTaxonomyReader(dirs[0]);
|
||||
for (int i=1; i<ntaxonomies; i++) {
|
||||
TaxonomyReader other = new DirectoryTaxonomyReader(dirs[i]);
|
||||
for (int j=0; j<other.getSize(); j++) {
|
||||
int otherord = main.getOrdinal(other.getPath(j));
|
||||
assertTrue(otherord != TaxonomyReader.INVALID_ORDINAL);
|
||||
}
|
||||
other.close();
|
||||
}
|
||||
|
||||
// Check that all the new categories in the merged taxonomy exist in
|
||||
// one of the added taxonomies.
|
||||
TaxonomyReader[] others = new TaxonomyReader[ntaxonomies-1];
|
||||
for (int i=1; i<ntaxonomies; i++) {
|
||||
others[i-1] = new DirectoryTaxonomyReader(dirs[i]);
|
||||
}
|
||||
for (int j=oldsize; j<main.getSize(); j++) {
|
||||
boolean found=false;
|
||||
CategoryPath path = main.getPath(j);
|
||||
for (int i=1; i<ntaxonomies; i++) {
|
||||
if (others[i-1].getOrdinal(path) != TaxonomyReader.INVALID_ORDINAL) {
|
||||
found=true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!found) {
|
||||
fail("Found category "+j+" ("+path+") in merged taxonomy not in any of the separate ones");
|
||||
}
|
||||
}
|
||||
|
||||
// Check that all the maps are correct
|
||||
for (int i=0; i<ntaxonomies-1; i++) {
|
||||
int[] map = maps[i].getMap();
|
||||
for (int j=0; j<map.length; j++) {
|
||||
assertEquals(map[j], main.getOrdinal(others[i].getPath(j)));
|
||||
}
|
||||
}
|
||||
|
||||
for (int i=1; i<ntaxonomies; i++) {
|
||||
others[i-1].close();
|
||||
}
|
||||
|
||||
main.close();
|
||||
IOUtils.close(dirs);
|
||||
IOUtils.close(copydirs);
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,243 @@
|
|||
package org.apache.lucene.facet.taxonomy.directory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.HashSet;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.util._TestUtil;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
public class TestAddTaxonomy extends LuceneTestCase {
|
||||
|
||||
private void dotest(int ncats, final int range) throws Exception {
|
||||
final AtomicInteger numCats = new AtomicInteger(ncats);
|
||||
Directory dirs[] = new Directory[2];
|
||||
for (int i = 0; i < dirs.length; i++) {
|
||||
dirs[i] = newDirectory();
|
||||
final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[i]);
|
||||
Thread[] addThreads = new Thread[4];
|
||||
for (int j = 0; j < addThreads.length; j++) {
|
||||
addThreads[j] = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
Random random = random();
|
||||
while (numCats.decrementAndGet() > 0) {
|
||||
String cat = Integer.toString(random.nextInt(range));
|
||||
try {
|
||||
tw.addCategory(new CategoryPath("a", cat));
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for (Thread t : addThreads) t.start();
|
||||
for (Thread t : addThreads) t.join();
|
||||
tw.close();
|
||||
}
|
||||
|
||||
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0]);
|
||||
OrdinalMap map = randomOrdinalMap();
|
||||
tw.addTaxonomy(dirs[1], map);
|
||||
tw.close();
|
||||
|
||||
validate(dirs[0], dirs[1], map);
|
||||
|
||||
IOUtils.close(dirs);
|
||||
}
|
||||
|
||||
private OrdinalMap randomOrdinalMap() throws IOException {
|
||||
if (random().nextBoolean()) {
|
||||
return new DiskOrdinalMap(_TestUtil.createTempFile("taxoMap", "", TEMP_DIR));
|
||||
} else {
|
||||
return new MemoryOrdinalMap();
|
||||
}
|
||||
}
|
||||
|
||||
private void validate(Directory dest, Directory src, OrdinalMap ordMap) throws Exception {
|
||||
CategoryPath cp = new CategoryPath();
|
||||
DirectoryTaxonomyReader destTR = new DirectoryTaxonomyReader(dest);
|
||||
try {
|
||||
final int destSize = destTR.getSize();
|
||||
DirectoryTaxonomyReader srcTR = new DirectoryTaxonomyReader(src);
|
||||
try {
|
||||
int[] map = ordMap.getMap();
|
||||
|
||||
// validate taxo sizes
|
||||
int srcSize = srcTR.getSize();
|
||||
assertTrue("destination taxonomy expected to be larger than source; dest="
|
||||
+ destSize + " src=" + srcSize,
|
||||
destSize >= srcSize);
|
||||
|
||||
// validate that all source categories exist in destination, and their
|
||||
// ordinals are as expected.
|
||||
for (int j = 1; j < srcSize; j++) {
|
||||
srcTR.getPath(j, cp);
|
||||
int destOrdinal = destTR.getOrdinal(cp);
|
||||
assertTrue(cp + " not found in destination", destOrdinal > 0);
|
||||
assertEquals(destOrdinal, map[j]);
|
||||
}
|
||||
} finally {
|
||||
srcTR.close();
|
||||
}
|
||||
} finally {
|
||||
destTR.close();
|
||||
}
|
||||
}
|
||||
|
||||
public void testAddEmpty() throws Exception {
|
||||
Directory dest = newDirectory();
|
||||
DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
|
||||
destTW.addCategory(new CategoryPath("Author", "Rob Pike"));
|
||||
destTW.addCategory(new CategoryPath("Aardvarks", "Bob"));
|
||||
destTW.commit();
|
||||
|
||||
Directory src = newDirectory();
|
||||
new DirectoryTaxonomyWriter(src).close(); // create an empty taxonomy
|
||||
|
||||
OrdinalMap map = randomOrdinalMap();
|
||||
destTW.addTaxonomy(src, map);
|
||||
destTW.close();
|
||||
|
||||
validate(dest, src, map);
|
||||
|
||||
IOUtils.close(dest, src);
|
||||
}
|
||||
|
||||
public void testAddToEmpty() throws Exception {
|
||||
Directory dest = newDirectory();
|
||||
|
||||
Directory src = newDirectory();
|
||||
DirectoryTaxonomyWriter srcTW = new DirectoryTaxonomyWriter(src);
|
||||
srcTW.addCategory(new CategoryPath("Author", "Rob Pike"));
|
||||
srcTW.addCategory(new CategoryPath("Aardvarks", "Bob"));
|
||||
srcTW.close();
|
||||
|
||||
DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
|
||||
OrdinalMap map = randomOrdinalMap();
|
||||
destTW.addTaxonomy(src, map);
|
||||
destTW.close();
|
||||
|
||||
validate(dest, src, map);
|
||||
|
||||
IOUtils.close(dest, src);
|
||||
}
|
||||
|
||||
// A more comprehensive and big random test.
|
||||
public void testBig() throws Exception {
|
||||
dotest(200, 10000);
|
||||
dotest(1000, 20000);
|
||||
dotest(400000, 1000000);
|
||||
}
|
||||
|
||||
// a reasonable random test
|
||||
public void testMedium() throws Exception {
|
||||
Random random = random();
|
||||
int numTests = atLeast(3);
|
||||
for (int i = 0; i < numTests; i++) {
|
||||
dotest(_TestUtil.nextInt(random, 2, 100),
|
||||
_TestUtil.nextInt(random, 100, 1000));
|
||||
}
|
||||
}
|
||||
|
||||
public void testSimple() throws Exception {
|
||||
Directory dest = newDirectory();
|
||||
DirectoryTaxonomyWriter tw1 = new DirectoryTaxonomyWriter(dest);
|
||||
tw1.addCategory(new CategoryPath("Author", "Mark Twain"));
|
||||
tw1.addCategory(new CategoryPath("Animals", "Dog"));
|
||||
tw1.addCategory(new CategoryPath("Author", "Rob Pike"));
|
||||
|
||||
Directory src = newDirectory();
|
||||
DirectoryTaxonomyWriter tw2 = new DirectoryTaxonomyWriter(src);
|
||||
tw2.addCategory(new CategoryPath("Author", "Rob Pike"));
|
||||
tw2.addCategory(new CategoryPath("Aardvarks", "Bob"));
|
||||
tw2.close();
|
||||
|
||||
OrdinalMap map = randomOrdinalMap();
|
||||
|
||||
tw1.addTaxonomy(src, map);
|
||||
tw1.close();
|
||||
|
||||
validate(dest, src, map);
|
||||
|
||||
IOUtils.close(dest, src);
|
||||
}
|
||||
|
||||
public void testConcurrency() throws Exception {
|
||||
// tests that addTaxonomy and addCategory work in parallel
|
||||
final int numCategories = atLeast(5000);
|
||||
|
||||
// build an input taxonomy index
|
||||
Directory src = newDirectory();
|
||||
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(src);
|
||||
for (int i = 0; i < numCategories; i++) {
|
||||
tw.addCategory(new CategoryPath("a", Integer.toString(i)));
|
||||
}
|
||||
tw.close();
|
||||
|
||||
// now add the taxonomy to an empty taxonomy, while adding the categories
|
||||
// again, in parallel -- in the end, no duplicate categories should exist.
|
||||
Directory dest = newDirectory();
|
||||
final DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
|
||||
Thread t = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
for (int i = 0; i < numCategories; i++) {
|
||||
try {
|
||||
destTW.addCategory(new CategoryPath("a", Integer.toString(i)));
|
||||
} catch (IOException e) {
|
||||
// shouldn't happen - if it does, let the test fail on uncaught exception.
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
t.start();
|
||||
|
||||
OrdinalMap map = new MemoryOrdinalMap();
|
||||
destTW.addTaxonomy(src, map);
|
||||
t.join();
|
||||
destTW.close();
|
||||
|
||||
// now validate
|
||||
|
||||
DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dest);
|
||||
// +2 to account for the root category + "a"
|
||||
assertEquals(numCategories + 2, dtr.getSize());
|
||||
HashSet<CategoryPath> categories = new HashSet<CategoryPath>();
|
||||
for (int i = 1; i < dtr.getSize(); i++) {
|
||||
CategoryPath cat = dtr.getPath(i);
|
||||
assertTrue("category " + cat + " already existed", categories.add(cat));
|
||||
}
|
||||
dtr.close();
|
||||
|
||||
IOUtils.close(src, dest);
|
||||
}
|
||||
|
||||
}
|
|
@ -3,11 +3,16 @@ package org.apache.lucene.facet.taxonomy.directory;
|
|||
import java.io.IOException;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Random;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
|
||||
import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
|
@ -42,11 +47,17 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
|
|||
|
||||
NoOpCache() { }
|
||||
|
||||
@Override
|
||||
public void close() {}
|
||||
@Override
|
||||
public int get(CategoryPath categoryPath) { return -1; }
|
||||
@Override
|
||||
public int get(CategoryPath categoryPath, int length) { return get(categoryPath); }
|
||||
@Override
|
||||
public boolean put(CategoryPath categoryPath, int ordinal) { return true; }
|
||||
@Override
|
||||
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) { return true; }
|
||||
@Override
|
||||
public boolean hasRoom(int numberOfEntries) { return false; }
|
||||
|
||||
}
|
||||
|
@ -201,5 +212,48 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
|
|||
|
||||
dir.close();
|
||||
}
|
||||
|
||||
public void testConcurrency() throws Exception {
|
||||
int ncats = atLeast(100000); // add many categories
|
||||
final int range = ncats * 3; // affects the categories selection
|
||||
final AtomicInteger numCats = new AtomicInteger(ncats);
|
||||
Directory dir = newDirectory();
|
||||
final ConcurrentHashMap<Integer,Integer> values = new ConcurrentHashMap<Integer,Integer>();
|
||||
TaxonomyWriterCache cache = random().nextBoolean()
|
||||
? new Cl2oTaxonomyWriterCache(1024, 0.15f, 3)
|
||||
: new LruTaxonomyWriterCache(ncats / 10);
|
||||
final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache);
|
||||
Thread[] addThreads = new Thread[atLeast(4)];
|
||||
for (int z = 0; z < addThreads.length; z++) {
|
||||
addThreads[z] = new Thread() {
|
||||
@Override
|
||||
public void run() {
|
||||
Random random = random();
|
||||
while (numCats.decrementAndGet() > 0) {
|
||||
try {
|
||||
int value = random.nextInt(range);
|
||||
tw.addCategory(new CategoryPath("a", Integer.toString(value)));
|
||||
values.put(value, value);
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
for (Thread t : addThreads) t.start();
|
||||
for (Thread t : addThreads) t.join();
|
||||
tw.close();
|
||||
|
||||
DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir);
|
||||
assertEquals(values.size() + 2, dtr.getSize()); // +2 for root category + "a"
|
||||
for (Integer value : values.keySet()) {
|
||||
assertTrue("category not found a/" + value, dtr.getOrdinal(new CategoryPath("a", value.toString())) > 0);
|
||||
}
|
||||
dtr.close();
|
||||
|
||||
dir.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -1,193 +0,0 @@
|
|||
package org.apache.lucene.facet.taxonomy.directory;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashSet;
|
||||
import java.util.IdentityHashMap;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.CorruptIndexException;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.LockObtainFailedException;
|
||||
import org.junit.Test;
|
||||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.analysis.MockTokenizer;
|
||||
import org.apache.lucene.facet.taxonomy.CategoryPath;
|
||||
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
|
||||
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This test case attempts to catch index "leaks" in LuceneTaxonomyReader/Writer,
|
||||
* i.e., cases where an index has been opened, but never closed; In that case,
|
||||
* Java would eventually collect this object and close the index, but leaving
|
||||
* the index open might nevertheless cause problems - e.g., on Windows it prevents
|
||||
* deleting it.
|
||||
*/
|
||||
public class TestIndexClose extends LuceneTestCase {
|
||||
|
||||
@Test
|
||||
public void testLeaks() throws Exception {
|
||||
LeakChecker checker = new LeakChecker();
|
||||
Directory dir = newDirectory();
|
||||
DirectoryTaxonomyWriter tw = checker.openWriter(dir);
|
||||
tw.close();
|
||||
assertEquals(0, checker.nopen());
|
||||
|
||||
tw = checker.openWriter(dir);
|
||||
tw.addCategory(new CategoryPath("animal", "dog"));
|
||||
tw.close();
|
||||
assertEquals(0, checker.nopen());
|
||||
|
||||
DirectoryTaxonomyReader tr = checker.openReader(dir);
|
||||
tr.getPath(1);
|
||||
tr.refresh();
|
||||
tr.close();
|
||||
assertEquals(0, checker.nopen());
|
||||
|
||||
tr = checker.openReader(dir);
|
||||
tw = checker.openWriter(dir);
|
||||
tw.addCategory(new CategoryPath("animal", "cat"));
|
||||
tr.refresh();
|
||||
tw.commit();
|
||||
tw.close();
|
||||
tr.refresh();
|
||||
tr.close();
|
||||
assertEquals(0, checker.nopen());
|
||||
|
||||
tw = checker.openWriter(dir);
|
||||
for (int i=0; i<10000; i++) {
|
||||
tw.addCategory(new CategoryPath("number", Integer.toString(i)));
|
||||
}
|
||||
tw.close();
|
||||
assertEquals(0, checker.nopen());
|
||||
tw = checker.openWriter(dir);
|
||||
for (int i=0; i<10000; i++) {
|
||||
tw.addCategory(new CategoryPath("number", Integer.toString(i*2)));
|
||||
}
|
||||
tw.close();
|
||||
assertEquals(0, checker.nopen());
|
||||
dir.close();
|
||||
}
|
||||
|
||||
private static class LeakChecker {
|
||||
Set<DirectoryReader> readers = Collections.newSetFromMap(new IdentityHashMap<DirectoryReader,Boolean>());
|
||||
|
||||
int iwriter=0;
|
||||
Set<Integer> openWriters = new HashSet<Integer>();
|
||||
|
||||
LeakChecker() { }
|
||||
|
||||
public DirectoryTaxonomyWriter openWriter(Directory dir) throws CorruptIndexException, LockObtainFailedException, IOException {
|
||||
return new InstrumentedTaxonomyWriter(dir);
|
||||
}
|
||||
|
||||
public DirectoryTaxonomyReader openReader(Directory dir) throws CorruptIndexException, LockObtainFailedException, IOException {
|
||||
return new InstrumentedTaxonomyReader(dir);
|
||||
}
|
||||
|
||||
public int nopen() {
|
||||
int ret=0;
|
||||
for (DirectoryReader r: readers) {
|
||||
if (r.getRefCount() > 0) {
|
||||
System.err.println("reader "+r+" still open");
|
||||
ret++;
|
||||
}
|
||||
}
|
||||
for (int i: openWriters) {
|
||||
System.err.println("writer "+i+" still open");
|
||||
ret++;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
private class InstrumentedTaxonomyWriter extends DirectoryTaxonomyWriter {
|
||||
public InstrumentedTaxonomyWriter(Directory dir) throws CorruptIndexException, LockObtainFailedException, IOException {
|
||||
super(dir);
|
||||
}
|
||||
@Override
|
||||
protected DirectoryReader openReader() throws IOException {
|
||||
DirectoryReader r = super.openReader();
|
||||
readers.add(r);
|
||||
return r;
|
||||
}
|
||||
@Override
|
||||
protected synchronized void refreshReader() throws IOException {
|
||||
super.refreshReader();
|
||||
final DirectoryReader r = getInternalIndexReader();
|
||||
if (r != null) readers.add(r);
|
||||
}
|
||||
@Override
|
||||
protected IndexWriter openIndexWriter (Directory directory, IndexWriterConfig config) throws IOException {
|
||||
return new InstrumentedIndexWriter(directory, config);
|
||||
}
|
||||
@Override
|
||||
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
|
||||
return newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false))
|
||||
.setOpenMode(openMode).setMergePolicy(newLogMergePolicy());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private class InstrumentedTaxonomyReader extends DirectoryTaxonomyReader {
|
||||
public InstrumentedTaxonomyReader(Directory dir) throws CorruptIndexException, LockObtainFailedException, IOException {
|
||||
super(dir);
|
||||
}
|
||||
@Override
|
||||
protected DirectoryReader openIndexReader(Directory dir) throws CorruptIndexException, IOException {
|
||||
DirectoryReader r = super.openIndexReader(dir);
|
||||
readers.add(r);
|
||||
return r;
|
||||
}
|
||||
@Override
|
||||
public synchronized boolean refresh() throws IOException, InconsistentTaxonomyException {
|
||||
final boolean ret = super.refresh();
|
||||
readers.add(getInternalIndexReader());
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
private class InstrumentedIndexWriter extends IndexWriter {
|
||||
int mynum;
|
||||
public InstrumentedIndexWriter(Directory d, IndexWriterConfig conf) throws CorruptIndexException, LockObtainFailedException, IOException {
|
||||
super(d, conf);
|
||||
mynum = iwriter++;
|
||||
openWriters.add(mynum);
|
||||
// System.err.println("openedw "+mynum);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
super.close();
|
||||
if (!openWriters.contains(mynum)) { // probably can't happen...
|
||||
fail("Writer #"+mynum+" was closed twice!");
|
||||
}
|
||||
openWriters.remove(mynum);
|
||||
// System.err.println("closedw "+mynum);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -444,7 +444,6 @@ public class TestBlockJoin extends LuceneTestCase {
|
|||
}
|
||||
|
||||
final IndexSearcher s = newSearcher(r);
|
||||
s.setDefaultFieldSortScoring(true, true);
|
||||
|
||||
final IndexSearcher joinS = newSearcher(joinR);
|
||||
|
||||
|
|
|
@ -945,7 +945,7 @@ public class MemoryIndex {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return positions.size();
|
||||
}
|
||||
}
|
||||
|
@ -987,7 +987,7 @@ public class MemoryIndex {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return positions.size() / stride;
|
||||
}
|
||||
|
||||
|
|
|
@ -64,7 +64,7 @@ public class CustomScoreQuery extends Query {
|
|||
* computation. This parameter is optional - it can be null.
|
||||
*/
|
||||
public CustomScoreQuery(Query subQuery, Query scoringQuery) {
|
||||
this(subQuery, scoringQuery!=null ? // don't want an array that contains a single null..
|
||||
this(subQuery, scoringQuery!=null ? // don't want an array that contains a single null..
|
||||
new Query[] {scoringQuery} : new Query[0]);
|
||||
}
|
||||
|
||||
|
|
|
@ -31,6 +31,8 @@ import java.util.Map;
|
|||
/**
|
||||
* Query that is boosted by a ValueSource
|
||||
*/
|
||||
// TODO: BoostedQuery and BoostingQuery in the same module?
|
||||
// something has to give
|
||||
public class BoostedQuery extends Query {
|
||||
private Query q;
|
||||
private ValueSource boostVal; // optional, can be null
|
||||
|
@ -187,10 +189,9 @@ public class BoostedQuery extends Query {
|
|||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (getClass() != o.getClass()) return false;
|
||||
if (!super.equals(o)) return false;
|
||||
BoostedQuery other = (BoostedQuery)o;
|
||||
return this.getBoost() == other.getBoost()
|
||||
&& this.q.equals(other.q)
|
||||
return this.q.equals(other.q)
|
||||
&& this.boostVal.equals(other.boostVal);
|
||||
}
|
||||
|
||||
|
|
|
@ -184,5 +184,18 @@ public abstract class ValueSource {
|
|||
public Double value(int slot) {
|
||||
return values[slot];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Double valueObj) {
|
||||
final double value = valueObj.doubleValue();
|
||||
final double docValue = docVals.doubleVal(doc);
|
||||
if (docValue < value) {
|
||||
return -1;
|
||||
} else if (docValue > value) {
|
||||
return -1;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,99 @@
|
|||
package org.apache.lucene.queries.function;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
import org.apache.lucene.analysis.MockAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.RandomIndexWriter;
|
||||
import org.apache.lucene.queries.function.valuesource.ConstValueSource;
|
||||
import org.apache.lucene.search.CheckHits;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
import org.junit.AfterClass;
|
||||
import org.junit.BeforeClass;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* Basic tests for {@link BoostedQuery}
|
||||
*/
|
||||
// TODO: more tests
|
||||
public class TestBoostedQuery extends LuceneTestCase {
|
||||
static Directory dir;
|
||||
static IndexReader ir;
|
||||
static IndexSearcher is;
|
||||
|
||||
@BeforeClass
|
||||
public static void beforeClass() throws Exception {
|
||||
dir = newDirectory();
|
||||
IndexWriterConfig iwConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
|
||||
iwConfig.setMergePolicy(newLogMergePolicy());
|
||||
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConfig);
|
||||
Document document = new Document();
|
||||
Field idField = new StringField("id", "");
|
||||
document.add(idField);
|
||||
iw.addDocument(document);
|
||||
ir = iw.getReader();
|
||||
is = newSearcher(ir);
|
||||
iw.close();
|
||||
}
|
||||
|
||||
@AfterClass
|
||||
public static void afterClass() throws Exception {
|
||||
is = null;
|
||||
ir.close();
|
||||
ir = null;
|
||||
dir.close();
|
||||
dir = null;
|
||||
}
|
||||
|
||||
public void testBasic() throws Exception {
|
||||
Query q = new MatchAllDocsQuery();
|
||||
TopDocs docs = is.search(q, 10);
|
||||
assertEquals(1, docs.totalHits);
|
||||
float score = docs.scoreDocs[0].score;
|
||||
|
||||
Query boostedQ = new BoostedQuery(q, new ConstValueSource(2.0f));
|
||||
assertHits(boostedQ, new float[] { score*2 });
|
||||
}
|
||||
|
||||
void assertHits(Query q, float scores[]) throws Exception {
|
||||
ScoreDoc expected[] = new ScoreDoc[scores.length];
|
||||
int expectedDocs[] = new int[scores.length];
|
||||
for (int i = 0; i < expected.length; i++) {
|
||||
expectedDocs[i] = i;
|
||||
expected[i] = new ScoreDoc(i, scores[i]);
|
||||
}
|
||||
TopDocs docs = is.search(q, 10,
|
||||
new Sort(new SortField("id", SortField.Type.STRING)));
|
||||
CheckHits.checkHits(random(), q, "", is, expectedDocs);
|
||||
CheckHits.checkHitsQuery(q, expected, docs.scoreDocs, expectedDocs);
|
||||
CheckHits.checkExplanations(q, "", is);
|
||||
}
|
||||
}
|
|
@ -132,7 +132,6 @@ public class TestValueSources extends LuceneTestCase {
|
|||
|
||||
reader = iw.getReader();
|
||||
searcher = newSearcher(reader);
|
||||
searcher.setDefaultFieldSortScoring(true, true);
|
||||
iw.close();
|
||||
}
|
||||
|
||||
|
|
|
@ -118,4 +118,16 @@ public final class SlowCollatedStringComparator extends FieldComparator<String>
|
|||
return collator.compare(first, second);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, String value) {
|
||||
final BytesRef br = currentDocTerms.getTerm(doc, tempBR);
|
||||
final String docValue;
|
||||
if (br == null) {
|
||||
docValue = null;
|
||||
} else {
|
||||
docValue = br.utf8ToString();
|
||||
}
|
||||
return compareValues(docValue, value);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -439,7 +439,7 @@ public class RAMOnlyPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return current.positions.length;
|
||||
}
|
||||
|
||||
|
@ -487,7 +487,7 @@ public class RAMOnlyPostingsFormat extends PostingsFormat {
|
|||
}
|
||||
|
||||
@Override
|
||||
public int freq() {
|
||||
public int freq() throws IOException {
|
||||
return current.positions.length;
|
||||
}
|
||||
|
||||
|
|
|
@ -291,7 +291,8 @@ public abstract class LuceneTestCase extends Assert {
|
|||
*/
|
||||
@ClassRule
|
||||
public static TestRule classRules = RuleChain
|
||||
.outerRule(suiteFailureMarker = new TestRuleMarkFailure())
|
||||
.outerRule(new TestRuleIgnoreTestSuites())
|
||||
.around(suiteFailureMarker = new TestRuleMarkFailure())
|
||||
.around(new TestRuleAssertionsRequired())
|
||||
.around(new TestRuleNoStaticHooksShadowing())
|
||||
.around(new TestRuleNoInstanceHooksOverrides())
|
||||
|
|
|
@ -0,0 +1,67 @@
|
|||
package org.apache.lucene.util;
|
||||
|
||||
import org.junit.Assume;
|
||||
import org.junit.rules.TestRule;
|
||||
import org.junit.runner.Description;
|
||||
import org.junit.runners.model.Statement;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This rule will cause the suite to be assumption-ignored if
|
||||
* the test class implements a given marker interface and a special
|
||||
* property is not set.
|
||||
*
|
||||
* <p>This is a workaround for problems with certain JUnit containers (IntelliJ)
|
||||
* which automatically discover test suites and attempt to run nested classes
|
||||
* that we use for testing the test framework itself.
|
||||
*/
|
||||
public final class TestRuleIgnoreTestSuites implements TestRule {
|
||||
/**
|
||||
* Marker interface for nested suites that should be ignored
|
||||
* if executed in stand-alone mode.
|
||||
*/
|
||||
public static interface NestedTestSuite {}
|
||||
|
||||
/**
|
||||
* A boolean system property indicating nested suites should be executed
|
||||
* normally.
|
||||
*/
|
||||
public final static String PROPERTY_RUN_NESTED = "tests.runnested";
|
||||
|
||||
@Override
|
||||
public Statement apply(final Statement s, final Description d) {
|
||||
return new Statement() {
|
||||
@Override
|
||||
public void evaluate() throws Throwable {
|
||||
if (NestedTestSuite.class.isAssignableFrom(d.getTestClass())) {
|
||||
LuceneTestCase.assumeTrue("Nested suite class ignored (started as stand-along).",
|
||||
isRunningNested());
|
||||
}
|
||||
s.evaluate();
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if a suite class is running as a nested test.
|
||||
*/
|
||||
public static boolean isRunningNested() {
|
||||
return Boolean.getBoolean(PROPERTY_RUN_NESTED);
|
||||
}
|
||||
}
|
|
@ -107,7 +107,7 @@ New Features
|
|||
* SOLR-2112: Solrj API now supports streaming results. (ryan)
|
||||
|
||||
* SOLR-792: Adding PivotFacetComponent for Hierarchical faceting
|
||||
(erik, Jeremy Hinegardner, Thibaut Lassalle, ryan)
|
||||
(ehatcher, Jeremy Hinegardner, Thibaut Lassalle, ryan)
|
||||
|
||||
* LUCENE-2507, SOLR-2571, SOLR-2576: Added DirectSolrSpellChecker, which uses Lucene's
|
||||
DirectSpellChecker to retrieve correction candidates directly from the term dictionary using
|
||||
|
@ -144,7 +144,7 @@ New Features
|
|||
* SOLR-2338: Add support for using <similarity/> in a schema's fieldType,
|
||||
for customizing scoring on a per-field basis. (hossman, yonik, rmuir)
|
||||
|
||||
* SOLR-2335: New 'field("...")' function syntax for refering to complex
|
||||
* SOLR-2335: New 'field("...")' function syntax for referring to complex
|
||||
field names (containing whitespace or special characters) in functions.
|
||||
|
||||
* SOLR-2383: /browse improvements: generalize range and date facet display
|
||||
|
@ -226,10 +226,7 @@ New Features
|
|||
* SOLR-3069: Ability to add openSearcher=false to not open a searcher when doing
|
||||
a hard commit. commitWithin now only invokes a softCommit. (yonik)
|
||||
|
||||
* SOLR-1726: Added deep paging support to search (sort by score only) which should use less memory when paging deeply into results
|
||||
by keeping the priority queue small. (Manojkumar Rangasamy Kannadasan, gsingers)
|
||||
|
||||
* SOLR-2802: New FieldMutatingUpdateProcessor and Factory to simplify the
|
||||
* SOLR-2802: New FieldMutatingUpdateProcessor and Factory to simplify the
|
||||
development of UpdateProcessors that modify field values of documents as
|
||||
they are indexed. Also includes several useful new implementations:
|
||||
RemoveBlankFieldUpdateProcessorFactory
|
||||
|
@ -428,6 +425,9 @@ Bug Fixes
|
|||
* SOLR-3436: Group count incorrect when not all shards are queried in the second
|
||||
pass. (Francois Perron, Martijn van Groningen)
|
||||
|
||||
* SOLR-3454: Exception when using result grouping with main=true and using
|
||||
wt=javabin. (Ludovic Boutros, Martijn van Groningen)
|
||||
|
||||
Other Changes
|
||||
----------------------
|
||||
|
||||
|
|
|
@ -0,0 +1,45 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
# starts up the multicore example
|
||||
|
||||
cd ..
|
||||
|
||||
rm -r -f example2
|
||||
rm -r -f example3
|
||||
rm -r -f example4
|
||||
rm -r -f example5
|
||||
rm -r -f example6
|
||||
|
||||
rm -r -f dist
|
||||
rm -r -f build
|
||||
rm -r -f example/solr/zoo_data
|
||||
rm -r -f example/solr/data
|
||||
rm -f example/example.log
|
||||
|
||||
ant example dist
|
||||
|
||||
cp -r -f example example2
|
||||
cp -r -f example example3
|
||||
cp -r -f example example4
|
||||
cp -r -f example example5
|
||||
cp -r -f example example6
|
||||
|
||||
java -classpath lib/*:dist/*:build/lucene-libs/* org.apache.solr.cloud.ZkController 127.0.0.1:9983 example/multicore 8983
|
||||
|
||||
cd example
|
||||
java -DzkRun -DnumShards=2 -DSTOP.PORT=7983 -DSTOP.KEY=key -Dsolr.solr.home=multicore -jar start.jar 1>example.log 2>&1 &
|
||||
|
||||
cd ../example2
|
||||
java -Djetty.port=7574 -DzkHost=localhost:9983 -DnumShards=2 -DSTOP.PORT=6574 -DSTOP.KEY=key -Dsolr.solr.home=multicore -jar start.jar 1>example2.log 2>&1 &
|
||||
|
||||
cd ../example3
|
||||
java -Djetty.port=7575 -DzkHost=localhost:9983 -DnumShards=2 -DSTOP.PORT=6575 -DSTOP.KEY=key -Dsolr.solr.home=multicore -jar start.jar 1>example3.log 2>&1 &
|
||||
|
||||
cd ../example4
|
||||
java -Djetty.port=7576 -DzkHost=localhost:9983 -DnumShards=2 -DSTOP.PORT=6576 -DSTOP.KEY=key -Dsolr.solr.home=multicore -jar start.jar 1>example4.log 2>&1 &
|
||||
|
||||
cd ../example5
|
||||
java -Djetty.port=7577 -DzkHost=localhost:9983 -DnumShards=2 -DSTOP.PORT=6577 -DSTOP.KEY=key -Dsolr.solr.home=multicore -jar start.jar 1>example5.log 2>&1 &
|
||||
|
||||
cd ../example6
|
||||
java -Djetty.port=7578 -DzkHost=localhost:9983 -DnumShards=2 -DSTOP.PORT=6578 -DSTOP.KEY=key -Dsolr.solr.home=multicore -jar start.jar 1>example6.log 2>&1 &
|
|
@ -22,7 +22,7 @@ cp -r -f example example4
|
|||
cp -r -f example example5
|
||||
cp -r -f example example6
|
||||
|
||||
java -classpath lib/*:dist/*:build/lucene-libs/* org.apache.solr.cloud.ZkController 127.0.0.1:9983 8983 example/solr/conf conf1 example/solr
|
||||
java -classpath lib/*:dist/*:build/lucene-libs/* org.apache.solr.cloud.ZkController 127.0.0.1:9983 example/solr 8983
|
||||
|
||||
cd example
|
||||
java -DzkRun -DnumShards=2 -DSTOP.PORT=7983 -DSTOP.KEY=key -jar start.jar 1>example.log 2>&1 &
|
||||
|
|
|
@ -9,7 +9,8 @@ CHANGES
|
|||
$Id$
|
||||
================== Release 4.0.0-dev ==============
|
||||
|
||||
(No Changes)
|
||||
* SOLR-3470: Bug fix: custom Carrot2 tokenizer and stemmer factories are
|
||||
respected now (Stanislaw Osinski, Dawid Weiss)
|
||||
|
||||
================== Release 3.6.0 ==================
|
||||
|
||||
|
|
|
@ -62,6 +62,7 @@ import org.carrot2.core.LanguageCode;
|
|||
import org.carrot2.core.attribute.AttributeNames;
|
||||
import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor;
|
||||
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor;
|
||||
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder;
|
||||
import org.carrot2.util.resource.ClassLoaderLocator;
|
||||
import org.carrot2.util.resource.IResource;
|
||||
import org.carrot2.util.resource.IResourceLocator;
|
||||
|
@ -108,6 +109,9 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
private Controller controller = ControllerFactory.createPooling();
|
||||
private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
|
||||
|
||||
/** Solr core we're bound to. */
|
||||
private SolrCore core;
|
||||
|
||||
private static class SolrResourceLocator implements IResourceLocator {
|
||||
private final SolrResourceLoader resourceLoader;
|
||||
private final String carrot2ResourcesDir;
|
||||
|
@ -146,7 +150,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
public InputStream open() throws IOException {
|
||||
return new ByteArrayInputStream(asBytes);
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
// In case multiple resources are found they will be deduped, but we don't use it in Solr,
|
||||
|
@ -231,8 +235,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
extractCarrotAttributes(sreq.getParams(), attributes);
|
||||
|
||||
// Perform clustering and convert to named list
|
||||
return clustersToNamedList(controller.process(attributes,
|
||||
clusteringAlgorithmClass).getClusters(), sreq.getParams());
|
||||
// Carrot2 uses current thread's context class loader to get
|
||||
// certain classes (e.g. custom tokenizer/stemmer) at runtime.
|
||||
// To make sure classes from contrib JARs are available,
|
||||
// we swap the context class loader for the time of clustering.
|
||||
Thread ct = Thread.currentThread();
|
||||
ClassLoader prev = ct.getContextClassLoader();
|
||||
try {
|
||||
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
|
||||
return clustersToNamedList(controller.process(attributes,
|
||||
clusteringAlgorithmClass).getClusters(), sreq.getParams());
|
||||
} finally {
|
||||
ct.setContextClassLoader(prev);
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error("Carrot2 clustering failed", e);
|
||||
throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
|
||||
|
@ -242,6 +257,8 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
@Override
|
||||
@SuppressWarnings({ "unchecked", "rawtypes" })
|
||||
public String init(NamedList config, final SolrCore core) {
|
||||
this.core = core;
|
||||
|
||||
String result = super.init(config, core);
|
||||
final SolrParams initParams = SolrParams.toSolrParams(config);
|
||||
|
||||
|
@ -255,10 +272,14 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
// Additionally, we set a custom lexical resource factory for Carrot2 that
|
||||
// will use both Carrot2 default stop words as well as stop words from
|
||||
// the StopFilter defined on the field.
|
||||
BasicPreprocessingPipelineDescriptor.attributeBuilder(initAttributes)
|
||||
.stemmerFactory(LuceneCarrot2StemmerFactory.class)
|
||||
.tokenizerFactory(LuceneCarrot2TokenizerFactory.class)
|
||||
.lexicalDataFactory(SolrStopwordsCarrot2LexicalDataFactory.class);
|
||||
final AttributeBuilder attributeBuilder = BasicPreprocessingPipelineDescriptor.attributeBuilder(initAttributes);
|
||||
attributeBuilder.lexicalDataFactory(SolrStopwordsCarrot2LexicalDataFactory.class);
|
||||
if (!initAttributes.containsKey(BasicPreprocessingPipelineDescriptor.Keys.TOKENIZER_FACTORY)) {
|
||||
attributeBuilder.tokenizerFactory(LuceneCarrot2TokenizerFactory.class);
|
||||
}
|
||||
if (!initAttributes.containsKey(BasicPreprocessingPipelineDescriptor.Keys.STEMMER_FACTORY)) {
|
||||
attributeBuilder.stemmerFactory(LuceneCarrot2StemmerFactory.class);
|
||||
}
|
||||
|
||||
// Pass the schema to SolrStopwordsCarrot2LexicalDataFactory.
|
||||
initAttributes.put("solrIndexSchema", core.getSchema());
|
||||
|
@ -272,8 +293,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
|
|||
// Using the class loader directly because this time we want to omit the prefix
|
||||
new ClassLoaderLocator(core.getResourceLoader().getClassLoader())));
|
||||
|
||||
this.controller.init(initAttributes);
|
||||
|
||||
// Carrot2 uses current thread's context class loader to get
|
||||
// certain classes (e.g. custom tokenizer/stemmer) at initialization time.
|
||||
// To make sure classes from contrib JARs are available,
|
||||
// we swap the context class loader for the time of clustering.
|
||||
Thread ct = Thread.currentThread();
|
||||
ClassLoader prev = ct.getContextClassLoader();
|
||||
try {
|
||||
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
|
||||
this.controller.init(initAttributes);
|
||||
} finally {
|
||||
ct.setContextClassLoader(prev);
|
||||
}
|
||||
|
||||
SchemaField uniqueField = core.getSchema().getUniqueKeyField();
|
||||
if (uniqueField == null) {
|
||||
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,
|
||||
|
|
|
@ -22,29 +22,48 @@ import com.google.common.collect.ImmutableSet;
|
|||
*/
|
||||
|
||||
|
||||
public interface CarrotParams {
|
||||
/**
|
||||
* Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration).
|
||||
*/
|
||||
public final class CarrotParams {
|
||||
|
||||
String CARROT_PREFIX = "carrot.";
|
||||
private static String CARROT_PREFIX = "carrot.";
|
||||
|
||||
String ALGORITHM = CARROT_PREFIX + "algorithm";
|
||||
public static String ALGORITHM = CARROT_PREFIX + "algorithm";
|
||||
|
||||
String TITLE_FIELD_NAME = CARROT_PREFIX + "title";
|
||||
String URL_FIELD_NAME = CARROT_PREFIX + "url";
|
||||
String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet";
|
||||
String LANGUAGE_FIELD_NAME = CARROT_PREFIX + "lang";
|
||||
String CUSTOM_FIELD_NAME = CARROT_PREFIX + "custom";
|
||||
public static String TITLE_FIELD_NAME = CARROT_PREFIX + "title";
|
||||
public static String URL_FIELD_NAME = CARROT_PREFIX + "url";
|
||||
public static String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet";
|
||||
public static String LANGUAGE_FIELD_NAME = CARROT_PREFIX + "lang";
|
||||
public static String CUSTOM_FIELD_NAME = CARROT_PREFIX + "custom";
|
||||
|
||||
String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
|
||||
String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragSize";
|
||||
String SUMMARY_SNIPPETS = CARROT_PREFIX + "summarySnippets";
|
||||
public static String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
|
||||
public static String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragSize";
|
||||
public static String SUMMARY_SNIPPETS = CARROT_PREFIX + "summarySnippets";
|
||||
|
||||
String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
|
||||
String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
|
||||
String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
|
||||
String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap";
|
||||
public static String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
|
||||
public static String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
|
||||
public static String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
|
||||
public static String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap";
|
||||
|
||||
public static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
|
||||
ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME, LANGUAGE_FIELD_NAME,
|
||||
PRODUCE_SUMMARY, SUMMARY_FRAGSIZE, SUMMARY_SNIPPETS, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS,
|
||||
LEXICAL_RESOURCES_DIR);
|
||||
static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
|
||||
ALGORITHM,
|
||||
|
||||
TITLE_FIELD_NAME,
|
||||
URL_FIELD_NAME,
|
||||
SNIPPET_FIELD_NAME,
|
||||
LANGUAGE_FIELD_NAME,
|
||||
CUSTOM_FIELD_NAME,
|
||||
|
||||
PRODUCE_SUMMARY,
|
||||
SUMMARY_FRAGSIZE,
|
||||
SUMMARY_SNIPPETS,
|
||||
|
||||
NUM_DESCRIPTIONS,
|
||||
OUTPUT_SUB_CLUSTERS,
|
||||
LEXICAL_RESOURCES_DIR,
|
||||
LANGUAGE_CODE_MAP);
|
||||
|
||||
/** No instances. */
|
||||
private CarrotParams() {}
|
||||
}
|
||||
|
|
|
@ -339,6 +339,16 @@
|
|||
<str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.LexicalResourcesCheckClusteringAlgorithm</str>
|
||||
<str name="carrot.lexicalResourcesDir">clustering/custom</str>
|
||||
</lst>
|
||||
<lst name="engine">
|
||||
<str name="name">custom-duplicating-tokenizer</str>
|
||||
<str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.EchoTokensClusteringAlgorithm</str>
|
||||
<str name="PreprocessingPipeline.tokenizerFactory">org.apache.solr.handler.clustering.carrot2.DuplicatingTokenizerFactory</str>
|
||||
</lst>
|
||||
<lst name="engine">
|
||||
<str name="name">custom-duplicating-stemmer</str>
|
||||
<str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.EchoStemsClusteringAlgorithm</str>
|
||||
<str name="PreprocessingPipeline.stemmerFactory">org.apache.solr.handler.clustering.carrot2.DuplicatingStemmerFactory</str>
|
||||
</lst>
|
||||
</searchComponent>
|
||||
|
||||
<searchComponent class="org.apache.solr.handler.clustering.ClusteringComponent" name="doc-clustering">
|
||||
|
|
|
@ -205,7 +205,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void solrStopWordsUsedInCarrot2Clustering() throws Exception {
|
||||
public void testSolrStopWordsUsedInCarrot2Clustering() throws Exception {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.set("merge-resources", false);
|
||||
params.set(AttributeUtils.getKey(
|
||||
|
@ -220,7 +220,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void solrStopWordsNotDefinedOnAFieldForClustering() throws Exception {
|
||||
public void testSolrStopWordsNotDefinedOnAFieldForClustering() throws Exception {
|
||||
ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
// Force string fields to be used for clustering. Does not make sense
|
||||
// in a real word, but does the job in the test.
|
||||
|
@ -239,7 +239,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void highlightingOfMultiValueField() throws Exception {
|
||||
public void testHighlightingOfMultiValueField() throws Exception {
|
||||
final String snippetWithoutSummary = getLabels(clusterWithHighlighting(
|
||||
false, 30, 3, "multi", 1).get(0)).get(1);
|
||||
assertTrue("Snippet contains first value", snippetWithoutSummary.contains("First"));
|
||||
|
@ -256,7 +256,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void concatenatingMultipleFields() throws Exception {
|
||||
public void testConcatenatingMultipleFields() throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CarrotParams.TITLE_FIELD_NAME, "title,heading");
|
||||
params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet,body");
|
||||
|
@ -271,7 +271,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void highlightingMultipleFields() throws Exception {
|
||||
public void testHighlightingMultipleFields() throws Exception {
|
||||
final TermQuery query = new TermQuery(new Term("snippet", "content"));
|
||||
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
|
@ -297,7 +297,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void oneCarrot2SupportedLanguage() throws Exception {
|
||||
public void testOneCarrot2SupportedLanguage() throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang");
|
||||
|
||||
|
@ -309,7 +309,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void oneCarrot2SupportedLanguageOfMany() throws Exception {
|
||||
public void testOneCarrot2SupportedLanguageOfMany() throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang");
|
||||
|
||||
|
@ -321,7 +321,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void languageCodeMapping() throws Exception {
|
||||
public void testLanguageCodeMapping() throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang");
|
||||
params.add(CarrotParams.LANGUAGE_CODE_MAP, "POLISH:pl");
|
||||
|
@ -334,7 +334,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
}
|
||||
|
||||
@Test
|
||||
public void passingOfCustomFields() throws Exception {
|
||||
public void testPassingOfCustomFields() throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CarrotParams.CUSTOM_FIELD_NAME, "intfield_i:intfield");
|
||||
params.add(CarrotParams.CUSTOM_FIELD_NAME, "floatfield_f:floatfield");
|
||||
|
@ -352,6 +352,34 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
|
|||
assertEquals("List field", "[first, second]", labels.get(4));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCustomTokenizer() throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CarrotParams.TITLE_FIELD_NAME, "title");
|
||||
params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");
|
||||
|
||||
final List<String> labels = getLabels(checkEngine(
|
||||
getClusteringEngine("custom-duplicating-tokenizer"), 1, 16, new TermQuery(new Term("title",
|
||||
"field")), params).get(0));
|
||||
|
||||
// The custom test tokenizer duplicates each token's text
|
||||
assertTrue("First token", labels.get(0).contains("TitleTitle"));
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCustomStemmer() throws Exception {
|
||||
final ModifiableSolrParams params = new ModifiableSolrParams();
|
||||
params.add(CarrotParams.TITLE_FIELD_NAME, "title");
|
||||
params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");
|
||||
|
||||
final List<String> labels = getLabels(checkEngine(
|
||||
getClusteringEngine("custom-duplicating-stemmer"), 1, 12, new TermQuery(new Term("title",
|
||||
"field")), params).get(0));
|
||||
|
||||
// The custom test stemmer duplicates and lowercases each token's text
|
||||
assertTrue("First token", labels.get(0).contains("titletitle"));
|
||||
}
|
||||
|
||||
private CarrotClusteringEngine getClusteringEngine(String engineName) {
|
||||
ClusteringComponent comp = (ClusteringComponent) h.getCore()
|
||||
.getSearchComponent("clustering");
|
||||
|
|
|
@ -0,0 +1,34 @@
|
|||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.carrot2.core.LanguageCode;
|
||||
import org.carrot2.text.linguistic.IStemmer;
|
||||
import org.carrot2.text.linguistic.IStemmerFactory;
|
||||
|
||||
public class DuplicatingStemmerFactory implements IStemmerFactory {
|
||||
@Override
|
||||
public IStemmer getStemmer(LanguageCode language) {
|
||||
return new IStemmer() {
|
||||
@Override
|
||||
public CharSequence stem(CharSequence word) {
|
||||
return word.toString() + word.toString();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,52 @@
|
|||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
|
||||
import org.carrot2.core.LanguageCode;
|
||||
import org.carrot2.text.analysis.ExtendedWhitespaceTokenizer;
|
||||
import org.carrot2.text.analysis.ITokenizer;
|
||||
import org.carrot2.text.linguistic.ITokenizerFactory;
|
||||
import org.carrot2.text.util.MutableCharArray;
|
||||
|
||||
public class DuplicatingTokenizerFactory implements ITokenizerFactory {
|
||||
@Override
|
||||
public ITokenizer getTokenizer(LanguageCode language) {
|
||||
return new ITokenizer() {
|
||||
private final ExtendedWhitespaceTokenizer delegate = new ExtendedWhitespaceTokenizer();
|
||||
|
||||
@Override
|
||||
public void setTermBuffer(MutableCharArray buffer) {
|
||||
delegate.setTermBuffer(buffer);
|
||||
buffer.reset(buffer.toString() + buffer.toString());
|
||||
}
|
||||
|
||||
@Override
|
||||
public void reset(Reader input) throws IOException {
|
||||
delegate.reset(input);
|
||||
}
|
||||
|
||||
@Override
|
||||
public short nextToken() throws IOException {
|
||||
return delegate.nextToken();
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
|
@ -0,0 +1,75 @@
|
|||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.util.List;
|
||||
|
||||
import org.carrot2.core.Cluster;
|
||||
import org.carrot2.core.Document;
|
||||
import org.carrot2.core.IClusteringAlgorithm;
|
||||
import org.carrot2.core.LanguageCode;
|
||||
import org.carrot2.core.ProcessingComponentBase;
|
||||
import org.carrot2.core.ProcessingException;
|
||||
import org.carrot2.core.attribute.AttributeNames;
|
||||
import org.carrot2.core.attribute.Processing;
|
||||
import org.carrot2.text.preprocessing.PreprocessingContext;
|
||||
import org.carrot2.text.preprocessing.PreprocessingContext.AllStems;
|
||||
import org.carrot2.text.preprocessing.PreprocessingContext.AllTokens;
|
||||
import org.carrot2.text.preprocessing.PreprocessingContext.AllWords;
|
||||
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipeline;
|
||||
import org.carrot2.util.attribute.Attribute;
|
||||
import org.carrot2.util.attribute.Bindable;
|
||||
import org.carrot2.util.attribute.Input;
|
||||
import org.carrot2.util.attribute.Output;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* A mock Carrot2 clustering algorithm that outputs stem of each token of each
|
||||
* document as a separate cluster. Useful only in tests.
|
||||
*/
|
||||
@Bindable(prefix = "EchoTokensClusteringAlgorithm")
|
||||
public class EchoStemsClusteringAlgorithm extends ProcessingComponentBase
|
||||
implements IClusteringAlgorithm {
|
||||
@Input
|
||||
@Processing
|
||||
@Attribute(key = AttributeNames.DOCUMENTS)
|
||||
private List<Document> documents;
|
||||
|
||||
@Output
|
||||
@Processing
|
||||
@Attribute(key = AttributeNames.CLUSTERS)
|
||||
private List<Cluster> clusters;
|
||||
|
||||
BasicPreprocessingPipeline preprocessing = new BasicPreprocessingPipeline();
|
||||
|
||||
@Override
|
||||
public void process() throws ProcessingException {
|
||||
final PreprocessingContext preprocessingContext = preprocessing.preprocess(
|
||||
documents, "", LanguageCode.ENGLISH);
|
||||
final AllTokens allTokens = preprocessingContext.allTokens;
|
||||
final AllWords allWords = preprocessingContext.allWords;
|
||||
final AllStems allStems = preprocessingContext.allStems;
|
||||
clusters = Lists.newArrayListWithCapacity(allTokens.image.length);
|
||||
for (int i = 0; i < allTokens.image.length; i++) {
|
||||
if (allTokens.wordIndex[i] >= 0) {
|
||||
clusters.add(new Cluster(new String(
|
||||
allStems.image[allWords.stemIndex[allTokens.wordIndex[i]]])));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,69 @@
|
|||
package org.apache.solr.handler.clustering.carrot2;
|
||||
|
||||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
import java.util.List;
|
||||
|
||||
import org.carrot2.core.Cluster;
|
||||
import org.carrot2.core.Document;
|
||||
import org.carrot2.core.IClusteringAlgorithm;
|
||||
import org.carrot2.core.LanguageCode;
|
||||
import org.carrot2.core.ProcessingComponentBase;
|
||||
import org.carrot2.core.ProcessingException;
|
||||
import org.carrot2.core.attribute.AttributeNames;
|
||||
import org.carrot2.core.attribute.Processing;
|
||||
import org.carrot2.text.preprocessing.PreprocessingContext;
|
||||
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipeline;
|
||||
import org.carrot2.util.attribute.Attribute;
|
||||
import org.carrot2.util.attribute.Bindable;
|
||||
import org.carrot2.util.attribute.Input;
|
||||
import org.carrot2.util.attribute.Output;
|
||||
|
||||
import com.google.common.collect.Lists;
|
||||
|
||||
/**
|
||||
* A mock Carrot2 clustering algorithm that outputs each token of each document
|
||||
* as a separate cluster. Useful only in tests.
|
||||
*/
|
||||
@Bindable(prefix = "EchoTokensClusteringAlgorithm")
|
||||
public class EchoTokensClusteringAlgorithm extends ProcessingComponentBase
|
||||
implements IClusteringAlgorithm {
|
||||
@Input
|
||||
@Processing
|
||||
@Attribute(key = AttributeNames.DOCUMENTS)
|
||||
private List<Document> documents;
|
||||
|
||||
@Output
|
||||
@Processing
|
||||
@Attribute(key = AttributeNames.CLUSTERS)
|
||||
private List<Cluster> clusters;
|
||||
|
||||
BasicPreprocessingPipeline preprocessing = new BasicPreprocessingPipeline();
|
||||
|
||||
@Override
|
||||
public void process() throws ProcessingException {
|
||||
final PreprocessingContext preprocessingContext = preprocessing.preprocess(
|
||||
documents, "", LanguageCode.ENGLISH);
|
||||
clusters = Lists
|
||||
.newArrayListWithCapacity(preprocessingContext.allTokens.image.length);
|
||||
for (char[] token : preprocessingContext.allTokens.image) {
|
||||
if (token != null) {
|
||||
clusters.add(new Cluster(new String(token)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,53 +0,0 @@
|
|||
<%@ page import="org.apache.solr.request.SolrRequestHandler" %>
|
||||
<%@ page import="java.util.Map" %>
|
||||
<%@ page contentType="text/html; charset=utf-8" pageEncoding="UTF-8"%>
|
||||
<%--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
--%>
|
||||
<%-- do a verbatim include so we can use the local vars --%>
|
||||
<%@include file="_info.jsp"%>
|
||||
<html>
|
||||
<%
|
||||
String handler = request.getParameter("handler");
|
||||
|
||||
if (handler == null) {
|
||||
Map<String, SolrRequestHandler> handlers = core.getRequestHandlers();
|
||||
%>
|
||||
<head>
|
||||
<title>DataImportHandler Interactive Development</title>
|
||||
<link rel="stylesheet" type="text/css" href="solr-admin.css">
|
||||
</head>
|
||||
<body>
|
||||
Select handler:
|
||||
<ul>
|
||||
<%
|
||||
for (String key : handlers.keySet()) {
|
||||
if (handlers.get(key).getClass().getName().equals("org.apache.solr.handler.dataimport.DataImportHandler")) { %>
|
||||
<li><a href="dataimport.jsp?handler=<%=key%>"><%=key%></a></li>
|
||||
<%
|
||||
}
|
||||
}
|
||||
%>
|
||||
</ul>
|
||||
</body>
|
||||
<% } else { %>
|
||||
|
||||
<frameset cols = "50%, 50%">
|
||||
<frame src ="debug.jsp?handler=<%=handler%>" />
|
||||
<frame src ="..<%=handler%>?command=status" name="result"/>
|
||||
</frameset>
|
||||
<% } %>
|
||||
</html>
|
|
@ -1,103 +0,0 @@
|
|||
<%@ page contentType="text/html; charset=utf-8" pageEncoding="UTF-8"%>
|
||||
<%--
|
||||
Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
contributor license agreements. See the NOTICE file distributed with
|
||||
this work for additional information regarding copyright ownership.
|
||||
The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
(the "License"); you may not use this file except in compliance with
|
||||
the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
--%>
|
||||
<%-- do a verbatim include so we can use the local vars --%>
|
||||
<%@include file="_info.jsp"%>
|
||||
<html>
|
||||
<head>
|
||||
<title>DataImportHandler Interactive Development</title>
|
||||
<link rel="stylesheet" type="text/css" href="solr-admin.css"/>
|
||||
<link rel="icon" href="favicon.ico" type="image/ico"/>
|
||||
<link rel="shortcut icon" href="favicon.ico" type="image/ico"/>
|
||||
<script src="jquery-1.4.3.min.js"></script>
|
||||
</head>
|
||||
<body>
|
||||
<h1>DataImportHandler Development Console</h1>
|
||||
<%
|
||||
String handler = request.getParameter("handler"); // must be specified
|
||||
%>
|
||||
<form action="..<%=handler%>" target="result" method="get">
|
||||
<input type="hidden" name="debug" value="on"/>
|
||||
<table>
|
||||
<tr>
|
||||
<th>Handler: </th>
|
||||
<td><%=handler%> <a href="dataimport.jsp" target="_top">change handler</a></td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2">
|
||||
<table width="100%">
|
||||
<tr>
|
||||
<td>
|
||||
<select name="command">
|
||||
<option value="full-import" selected="selected">full-import</option>
|
||||
<option value="delta-import">delta-import</option>
|
||||
</select>
|
||||
</td>
|
||||
<td><strong>Verbose</strong> <input
|
||||
name="verbose" type="checkbox"></td>
|
||||
<td><strong>Commit</strong> <input
|
||||
name="commit" type="checkbox"></td>
|
||||
<td><strong>Clean</strong> <input
|
||||
name="clean" type="checkbox"></td>
|
||||
<td><strong>Start Row</strong> <input
|
||||
name="start" size="4" type="text" value="0"></td>
|
||||
<td><strong>No. of Rows</strong> <input name="rows"
|
||||
type="text" size="4" value="10"></td>
|
||||
</tr>
|
||||
</table>
|
||||
</td>
|
||||
<tr>
|
||||
<td><strong>data config xml</strong></td>
|
||||
<td><input class="stdbutton" type="submit" value="debug now">
|
||||
</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td colspan="2"><textarea id="txtDataConfig" rows="30" cols="80" name="dataConfig"></textarea></td>
|
||||
<script type="text/javascript" language="javascript">
|
||||
$.get("..<%=handler%>?command=show-config", function(data){
|
||||
$('#txtDataConfig').attr('value', data);
|
||||
});
|
||||
</script>
|
||||
</tr>
|
||||
</table>
|
||||
</form>
|
||||
<form action="..<%=handler%>" target="result" method="get">
|
||||
<input type="hidden" name="clean" value="false">
|
||||
<input type="hidden" name="commit" value="true">
|
||||
<input class="stdbutton" type="submit" name="command" value="full-import">
|
||||
<input class="stdbutton" type="submit" name="command" value="delta-import">
|
||||
<input class="stdbutton" type="submit" name="command" value="status">
|
||||
<input class="stdbutton" type="submit" name="command" value="reload-config">
|
||||
<input class="stdbutton" type="submit" name="command" value="abort">
|
||||
</form>
|
||||
<form action="../select" target="result" method="get">
|
||||
<input type="hidden" name="q" value="*:*">
|
||||
<input type="hidden" name="start" value="0">
|
||||
<input type="hidden" name="rows" value="0">
|
||||
<input class="stdbutton" type="submit" value="Documents Count">
|
||||
</form>
|
||||
<form action="..<%=handler%>" target="result" method="get">
|
||||
<input type="hidden" name="verbose" value="true">
|
||||
<input type="hidden" name="clean" value="true">
|
||||
<input type="hidden" name="commit" value="true">
|
||||
<input type="hidden" name="command" value="full-import">
|
||||
<input class="stdbutton" type="submit" value="Full Import with Cleaning">
|
||||
</form>
|
||||
|
||||
<a href="index.jsp" target="_parent">Return to Admin Page</a>
|
||||
</body>
|
||||
</html>
|
|
@ -32,6 +32,8 @@ import java.util.concurrent.TimeoutException;
|
|||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javax.xml.xpath.XPathConstants;
|
||||
|
||||
import org.apache.solr.client.solrj.impl.HttpSolrServer;
|
||||
import org.apache.solr.client.solrj.request.CoreAdminRequest.WaitForState;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -46,16 +48,22 @@ import org.apache.solr.common.cloud.ZkNodeProps;
|
|||
import org.apache.solr.common.cloud.ZkStateReader;
|
||||
import org.apache.solr.common.cloud.ZooKeeperException;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.core.Config;
|
||||
import org.apache.solr.core.CoreContainer;
|
||||
import org.apache.solr.core.CoreDescriptor;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
import org.apache.solr.core.SolrResourceLoader;
|
||||
import org.apache.solr.update.UpdateLog;
|
||||
import org.apache.solr.util.DOMUtil;
|
||||
import org.apache.zookeeper.CreateMode;
|
||||
import org.apache.zookeeper.KeeperException;
|
||||
import org.apache.zookeeper.KeeperException.NoNodeException;
|
||||
import org.apache.zookeeper.data.Stat;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.w3c.dom.Node;
|
||||
import org.w3c.dom.NodeList;
|
||||
import org.xml.sax.InputSource;
|
||||
|
||||
/**
|
||||
* Handle ZooKeeper interactions.
|
||||
|
@ -114,21 +122,31 @@ public final class ZkController {
|
|||
// this can be null in which case recovery will be inactive
|
||||
private CoreContainer cc;
|
||||
|
||||
/**
|
||||
* Bootstraps the current configs for all collections in solr.xml.
|
||||
* Takes two params - the zkhost to connect to and the solrhome location
|
||||
* to find solr.xml.
|
||||
*
|
||||
* If you also pass a solrPort, it will be used to start
|
||||
* an embedded zk useful for single machine, multi node tests.
|
||||
*
|
||||
* @param args
|
||||
* @throws Exception
|
||||
*/
|
||||
public static void main(String[] args) throws Exception {
|
||||
// start up a tmp zk server first
|
||||
String zkServerAddress = args[0];
|
||||
|
||||
String solrPort = args[1];
|
||||
|
||||
String confDir = args[2];
|
||||
String confName = args[3];
|
||||
|
||||
String solrHome = null;
|
||||
if (args.length == 5) {
|
||||
solrHome = args[4];
|
||||
String solrHome = args[1];
|
||||
|
||||
String solrPort = null;
|
||||
if (args.length > 2) {
|
||||
solrPort = args[2];
|
||||
}
|
||||
|
||||
|
||||
SolrZkServer zkServer = null;
|
||||
if (solrHome != null) {
|
||||
if (solrPort != null) {
|
||||
zkServer = new SolrZkServer("true", null, solrHome + "/zoo_data", solrHome, solrPort);
|
||||
zkServer.parseConfig();
|
||||
zkServer.start();
|
||||
|
@ -140,8 +158,13 @@ public final class ZkController {
|
|||
public void command() {
|
||||
}});
|
||||
|
||||
uploadConfigDir(zkClient, new File(confDir), confName);
|
||||
if (solrHome != null) {
|
||||
SolrResourceLoader loader = new SolrResourceLoader(solrHome);
|
||||
solrHome = loader.getInstanceDir();
|
||||
|
||||
InputSource cfgis = new InputSource(new File(solrHome, "solr.xml").toURI().toASCIIString());
|
||||
Config cfg = new Config(loader, null, cfgis , null, false);
|
||||
bootstrapConf(zkClient, cfg, solrHome);
|
||||
if (solrPort != null) {
|
||||
zkServer.stop();
|
||||
}
|
||||
}
|
||||
|
@ -855,7 +878,7 @@ public final class ZkController {
|
|||
try {
|
||||
Map<String,String> collectionProps = new HashMap<String,String>();
|
||||
// TODO: if collection.configName isn't set, and there isn't already a conf in zk, just use that?
|
||||
String defaultConfigName = System.getProperty(COLLECTION_PARAM_PREFIX+CONFIGNAME_PROP, "configuration1");
|
||||
String defaultConfigName = System.getProperty(COLLECTION_PARAM_PREFIX+CONFIGNAME_PROP, collection);
|
||||
|
||||
// params passed in - currently only done via core admin (create core commmand).
|
||||
if (params != null) {
|
||||
|
@ -948,6 +971,13 @@ public final class ZkController {
|
|||
collectionProps.put(CONFIGNAME_PROP, configNames.get(0));
|
||||
break;
|
||||
}
|
||||
|
||||
if (configNames != null && configNames.contains(collection)) {
|
||||
log.info("Could not find explicit collection configName, but found config name matching collection name - using that set.");
|
||||
collectionProps.put(CONFIGNAME_PROP, collection);
|
||||
break;
|
||||
}
|
||||
|
||||
log.info("Could not find collection configName - pausing for 3 seconds and trying again - try: " + retry);
|
||||
Thread.sleep(3000);
|
||||
}
|
||||
|
@ -1155,5 +1185,34 @@ public final class ZkController {
|
|||
}
|
||||
return leaderProps;
|
||||
}
|
||||
|
||||
/**
|
||||
* If in SolrCloud mode, upload config sets for each SolrCore in solr.xml.
|
||||
*
|
||||
* @throws IOException
|
||||
* @throws KeeperException
|
||||
* @throws InterruptedException
|
||||
*/
|
||||
public static void bootstrapConf(SolrZkClient zkClient, Config cfg, String solrHome) throws IOException,
|
||||
KeeperException, InterruptedException {
|
||||
|
||||
NodeList nodes = (NodeList)cfg.evaluate("solr/cores/core", XPathConstants.NODESET);
|
||||
|
||||
for (int i=0; i<nodes.getLength(); i++) {
|
||||
Node node = nodes.item(i);
|
||||
String rawName = DOMUtil.getAttr(node, "name", null);
|
||||
String instanceDir = DOMUtil.getAttr(node, "instanceDir", null);
|
||||
File idir = new File(instanceDir);
|
||||
if (!idir.isAbsolute()) {
|
||||
idir = new File(solrHome, instanceDir);
|
||||
}
|
||||
String confName = DOMUtil.getAttr(node, "collection", null);
|
||||
if (confName == null) {
|
||||
confName = rawName;
|
||||
}
|
||||
|
||||
ZkController.uploadConfigDir(zkClient, new File(idir, "conf"), confName);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -233,7 +233,7 @@ public class CoreContainer
|
|||
|
||||
boolean boostrapConf = Boolean.getBoolean("bootstrap_conf");
|
||||
if(boostrapConf) {
|
||||
bootstrapConf();
|
||||
ZkController.bootstrapConf(zkController.getZkClient(), cfg, solrHome);
|
||||
}
|
||||
|
||||
} catch (InterruptedException e) {
|
||||
|
@ -259,28 +259,6 @@ public class CoreContainer
|
|||
|
||||
}
|
||||
|
||||
private void bootstrapConf() throws IOException,
|
||||
KeeperException, InterruptedException {
|
||||
|
||||
NodeList nodes = (NodeList)cfg.evaluate("solr/cores/core", XPathConstants.NODESET);
|
||||
|
||||
for (int i=0; i<nodes.getLength(); i++) {
|
||||
Node node = nodes.item(i);
|
||||
String rawName = DOMUtil.getAttr(node, "name", null);
|
||||
String instanceDir = DOMUtil.getAttr(node, "instanceDir", null);
|
||||
File idir = new File(instanceDir);
|
||||
if (!idir.isAbsolute()) {
|
||||
idir = new File(solrHome, instanceDir);
|
||||
}
|
||||
String confName = DOMUtil.getAttr(node, "collection", null);
|
||||
if (confName == null) {
|
||||
confName = rawName;
|
||||
}
|
||||
|
||||
zkController.uploadConfigDir(new File(idir, "conf"), confName);
|
||||
}
|
||||
}
|
||||
|
||||
public Properties getContainerProperties() {
|
||||
return containerProperties;
|
||||
}
|
||||
|
|
|
@ -1006,11 +1006,23 @@ public final class SolrCore implements SolrInfoMBean {
|
|||
}
|
||||
|
||||
/**
|
||||
* Return the newest normal {@link RefCounted}<{@link SolrIndexSearcher}> with
|
||||
* the reference count incremented. It <b>must</b> be decremented when no longer needed.
|
||||
* If no searcher is currently open, then if openNew==true a new searcher will be opened,
|
||||
* or null is returned if openNew==false.
|
||||
* Returns the current registered searcher with its reference count incremented, or null if none are registered.
|
||||
*/
|
||||
public RefCounted<SolrIndexSearcher> getRegisteredSearcher() {
|
||||
synchronized (searcherLock) {
|
||||
if (_searcher != null) {
|
||||
_searcher.incref();
|
||||
}
|
||||
return _searcher;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the newest normal {@link RefCounted}<{@link SolrIndexSearcher}> with
|
||||
* the reference count incremented. It <b>must</b> be decremented when no longer needed.
|
||||
* If no searcher is currently open, then if openNew==true a new searcher will be opened,
|
||||
* or null is returned if openNew==false.
|
||||
*/
|
||||
public RefCounted<SolrIndexSearcher> getNewestSearcher(boolean openNew) {
|
||||
synchronized (searcherLock) {
|
||||
if (!_searchers.isEmpty()) {
|
||||
|
@ -1023,7 +1035,6 @@ public final class SolrCore implements SolrInfoMBean {
|
|||
return openNew ? getRealtimeSearcher() : null;
|
||||
}
|
||||
|
||||
|
||||
/** Gets the latest real-time searcher w/o forcing open a new searcher if one already exists.
|
||||
* The reference count will be incremented.
|
||||
*/
|
||||
|
|
|
@ -25,6 +25,7 @@ import java.util.Locale;
|
|||
|
||||
import org.apache.solr.common.SolrException;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.ModifiableSolrParams;
|
||||
import org.apache.solr.common.params.SolrParams;
|
||||
import org.apache.solr.common.util.NamedList;
|
||||
import org.apache.solr.core.SolrCore;
|
||||
|
@ -173,7 +174,15 @@ public class PingRequestHandler extends RequestHandlerBase implements SolrCoreAw
|
|||
{
|
||||
|
||||
SolrParams params = req.getParams();
|
||||
SolrCore core = req.getCore();
|
||||
|
||||
// in this case, we want to default distrib to false so
|
||||
// we only ping the single node
|
||||
Boolean distrib = params.getBool("distrib");
|
||||
if (distrib == null) {
|
||||
ModifiableSolrParams mparams = new ModifiableSolrParams(params);
|
||||
mparams.set("distrib", false);
|
||||
req.setParams(mparams);
|
||||
}
|
||||
|
||||
String actionParam = params.get("action");
|
||||
ACTIONS action = null;
|
||||
|
|
|
@ -560,9 +560,14 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
|
|||
public Integer value(int slot) {
|
||||
return values[slot];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Integer valueObj) throws IOException {
|
||||
final int value = valueObj.intValue();
|
||||
final int docValue = docVal(doc);
|
||||
return docValue - value; // values will be small enough that there is no overflow concern
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ import org.apache.solr.common.SolrDocument;
|
|||
import org.apache.solr.request.SolrQueryRequest;
|
||||
|
||||
/**
|
||||
* Simple Augmenter that adds the docId
|
||||
* Simple Augmenter that adds the score
|
||||
*
|
||||
*
|
||||
* @since solr 4.0
|
||||
|
|
|
@ -37,7 +37,7 @@ public class ValueAugmenterFactory extends TransformerFactory
|
|||
public void init(NamedList args) {
|
||||
value = args.get( "value" );
|
||||
if( value == null ) {
|
||||
defaultValue = args.get( "deaultValue" );
|
||||
defaultValue = args.get( "defaultValue" );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -138,6 +138,12 @@ public class RandomSortField extends FieldType {
|
|||
public Integer value(int slot) {
|
||||
return values[slot];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Integer valueObj) {
|
||||
// values will be positive... no overflow possible.
|
||||
return hash(doc+seed) - valueObj.intValue();
|
||||
}
|
||||
};
|
||||
}
|
||||
};
|
||||
|
|
|
@ -651,7 +651,7 @@ public class Grouping {
|
|||
}
|
||||
}
|
||||
|
||||
int len = docsGathered - offset;
|
||||
int len = docsGathered > offset ? docsGathered - offset : 0;
|
||||
int[] docs = ArrayUtils.toPrimitive(ids.toArray(new Integer[ids.size()]));
|
||||
float[] docScores = ArrayUtils.toPrimitive(scores.toArray(new Float[scores.size()]));
|
||||
DocSlice docSlice = new DocSlice(offset, len, docs, docScores, getMatches(), maxScore);
|
||||
|
|
|
@ -59,11 +59,13 @@ public class JoinQParserPlugin extends QParserPlugin {
|
|||
String toField = getParam("to");
|
||||
String v = localParams.get("v");
|
||||
Query fromQuery;
|
||||
long fromCoreOpenTime = 0;
|
||||
|
||||
if (fromIndex != null) {
|
||||
if (fromIndex != null && !fromIndex.equals(req.getCore().getCoreDescriptor().getName()) ) {
|
||||
CoreContainer container = req.getCore().getCoreDescriptor().getCoreContainer();
|
||||
|
||||
final SolrCore fromCore = container.getCore(fromIndex);
|
||||
RefCounted<SolrIndexSearcher> fromHolder = null;
|
||||
|
||||
if (fromCore == null) {
|
||||
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Cross-core join: no such core " + fromIndex);
|
||||
|
@ -73,9 +75,12 @@ public class JoinQParserPlugin extends QParserPlugin {
|
|||
try {
|
||||
QParser parser = QParser.getParser(v, "lucene", otherReq);
|
||||
fromQuery = parser.getQuery();
|
||||
fromHolder = fromCore.getRegisteredSearcher();
|
||||
if (fromHolder != null) fromCoreOpenTime = fromHolder.get().getOpenTime();
|
||||
} finally {
|
||||
otherReq.close();
|
||||
fromCore.close();
|
||||
if (fromHolder != null) fromHolder.decref();
|
||||
}
|
||||
} else {
|
||||
QParser fromQueryParser = subQuery(v, null);
|
||||
|
@ -83,6 +88,7 @@ public class JoinQParserPlugin extends QParserPlugin {
|
|||
}
|
||||
|
||||
JoinQuery jq = new JoinQuery(fromField, toField, fromIndex, fromQuery);
|
||||
jq.fromCoreOpenTime = fromCoreOpenTime;
|
||||
return jq;
|
||||
}
|
||||
};
|
||||
|
@ -95,6 +101,7 @@ class JoinQuery extends Query {
|
|||
String toField;
|
||||
String fromIndex;
|
||||
Query q;
|
||||
long fromCoreOpenTime;
|
||||
|
||||
public JoinQuery(String fromField, String toField, String fromIndex, Query subQuery) {
|
||||
this.fromField = fromField;
|
||||
|
@ -548,12 +555,14 @@ class JoinQuery extends Query {
|
|||
&& this.toField.equals(other.toField)
|
||||
&& this.getBoost() == other.getBoost()
|
||||
&& this.q.equals(other.q)
|
||||
&& (this.fromIndex == other.fromIndex || this.fromIndex != null && this.fromIndex.equals(other.fromIndex));
|
||||
&& (this.fromIndex == other.fromIndex || this.fromIndex != null && this.fromIndex.equals(other.fromIndex))
|
||||
&& this.fromCoreOpenTime == other.fromCoreOpenTime
|
||||
;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int h = q.hashCode();
|
||||
int h = q.hashCode() + (int)fromCoreOpenTime;
|
||||
h = h * 31 + fromField.hashCode();
|
||||
h = h * 31 + toField.hashCode();
|
||||
return h;
|
||||
|
|
|
@ -121,6 +121,11 @@ class TermOrdValComparator_SML extends FieldComparator<Comparable> {
|
|||
return TermOrdValComparator_SML.createComparator(context.reader(), this);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, Comparable docValue) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
// Base class for specialized (per bit width of the
|
||||
// ords) per-segment comparator. NOTE: this is messy;
|
||||
// we do this only because hotspot can't reliably inline
|
||||
|
@ -216,6 +221,20 @@ class TermOrdValComparator_SML extends FieldComparator<Comparable> {
|
|||
public BytesRef value(int slot) {
|
||||
return values==null ? parent.NULL_VAL : values[slot];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareDocToValue(int doc, BytesRef value) {
|
||||
final BytesRef docValue = termsIndex.getTerm(doc, tempBR);
|
||||
if (docValue == null) {
|
||||
if (value == null) {
|
||||
return 0;
|
||||
}
|
||||
return 1;
|
||||
} else if (value == null) {
|
||||
return -1;
|
||||
}
|
||||
return docValue.compareTo(value);
|
||||
}
|
||||
}
|
||||
|
||||
// Used per-segment when bit width of doc->ord is 8:
|
||||
|
|
|
@ -45,7 +45,7 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
/**
|
||||
* Reusable base class for UpdateProcessors that will consider
|
||||
* AddUpdateCommands and mutate the values assocaited with configured
|
||||
* AddUpdateCommands and mutate the values associated with configured
|
||||
* fields.
|
||||
* <p>
|
||||
* Subclasses should override the mutate method to specify how individual
|
||||
|
|
|
@ -29,9 +29,9 @@ public class FileUtils {
|
|||
* Resolves a path relative a base directory.
|
||||
*
|
||||
* <p>
|
||||
* This method does what "new File(base,path)" <b>Should</b> do, it wasn't
|
||||
* This method does what "new File(base,path)" <b>Should</b> do, if it wasn't
|
||||
* completely lame: If path is absolute, then a File for that path is returned;
|
||||
* if it's not absoluve, then a File is returnd using "path" as a child
|
||||
* if it's not absolute, then a File is returned using "path" as a child
|
||||
* of "base")
|
||||
* </p>
|
||||
*/
|
||||
|
|
|
@ -20,13 +20,21 @@ package org.apache.solr;
|
|||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.noggit.JSONUtil;
|
||||
import org.apache.noggit.ObjectBuilder;
|
||||
import org.apache.solr.client.solrj.impl.BinaryResponseParser;
|
||||
import org.apache.solr.common.params.CommonParams;
|
||||
import org.apache.solr.common.params.GroupParams;
|
||||
import org.apache.solr.request.SolrQueryRequest;
|
||||
import org.apache.solr.request.SolrRequestInfo;
|
||||
import org.apache.solr.response.BinaryResponseWriter;
|
||||
import org.apache.solr.response.ResultContext;
|
||||
import org.apache.solr.response.SolrQueryResponse;
|
||||
import org.apache.solr.schema.IndexSchema;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
import org.junit.Test;
|
||||
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.util.*;
|
||||
|
||||
public class TestGroupingSearch extends SolrTestCaseJ4 {
|
||||
|
@ -37,7 +45,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
|
||||
@BeforeClass
|
||||
public static void beforeTests() throws Exception {
|
||||
initCore("solrconfig.xml","schema12.xml");
|
||||
initCore("solrconfig.xml", "schema12.xml");
|
||||
}
|
||||
|
||||
@Before
|
||||
|
@ -75,18 +83,18 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
|
||||
);
|
||||
|
||||
assertQ(req("q","title:title", "group", "true", "group.field","group_si")
|
||||
,"//lst[@name='grouped']/lst[@name='group_si']"
|
||||
,"*[count(//arr[@name='groups']/lst) = 2]"
|
||||
assertQ(req("q", "title:title", "group", "true", "group.field", "group_si")
|
||||
, "//lst[@name='grouped']/lst[@name='group_si']"
|
||||
, "*[count(//arr[@name='groups']/lst) = 2]"
|
||||
|
||||
,"//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']"
|
||||
,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
|
||||
,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
|
||||
, "//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']"
|
||||
, "//arr[@name='groups']/lst[1]/result[@numFound='2']"
|
||||
, "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
|
||||
|
||||
,"//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']"
|
||||
,"//arr[@name='groups']/lst[2]/result[@numFound='3']"
|
||||
,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
|
||||
);
|
||||
, "//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']"
|
||||
, "//arr[@name='groups']/lst[2]/result[@numFound='3']"
|
||||
, "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -117,19 +125,19 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
|
||||
);
|
||||
|
||||
assertQ(req("q","title:title", "group", "true", "group.field","group_si", "group.ngroups", "true")
|
||||
,"//lst[@name='grouped']/lst[@name='group_si']/int[@name='matches'][.='5']"
|
||||
,"//lst[@name='grouped']/lst[@name='group_si']/int[@name='ngroups'][.='2']"
|
||||
,"*[count(//arr[@name='groups']/lst) = 2]"
|
||||
assertQ(req("q", "title:title", "group", "true", "group.field", "group_si", "group.ngroups", "true")
|
||||
, "//lst[@name='grouped']/lst[@name='group_si']/int[@name='matches'][.='5']"
|
||||
, "//lst[@name='grouped']/lst[@name='group_si']/int[@name='ngroups'][.='2']"
|
||||
, "*[count(//arr[@name='groups']/lst) = 2]"
|
||||
|
||||
,"//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']"
|
||||
,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
|
||||
,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
|
||||
, "//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']"
|
||||
, "//arr[@name='groups']/lst[1]/result[@numFound='2']"
|
||||
, "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
|
||||
|
||||
,"//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']"
|
||||
,"//arr[@name='groups']/lst[2]/result[@numFound='3']"
|
||||
,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
|
||||
);
|
||||
, "//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']"
|
||||
, "//arr[@name='groups']/lst[2]/result[@numFound='3']"
|
||||
, "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -141,22 +149,22 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
assertU(add(doc("id", "5","name", "author3", "title", "the title of a title")));
|
||||
assertU(commit());
|
||||
|
||||
assertQ(req("q","title:title", "group", "true", "group.field","name", "sort", "score desc", "group.sort", "score desc")
|
||||
,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
|
||||
// ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
|
||||
,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
|
||||
,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
|
||||
assertQ(req("q", "title:title", "group", "true", "group.field", "name", "sort", "score desc", "group.sort", "score desc")
|
||||
, "//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
|
||||
// ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
|
||||
, "//arr[@name='groups']/lst[1]/result[@numFound='2']"
|
||||
, "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
|
||||
|
||||
,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
|
||||
// ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
|
||||
,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
|
||||
,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
|
||||
, "//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
|
||||
// ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
|
||||
, "//arr[@name='groups']/lst[2]/result[@numFound='2']"
|
||||
, "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
|
||||
|
||||
,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
|
||||
// ,"//arr[@name='groups']/lst[3]/int[@name='matches'][.='1']"
|
||||
,"//arr[@name='groups']/lst[3]/result[@numFound='1']"
|
||||
,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
|
||||
);
|
||||
, "//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
|
||||
// ,"//arr[@name='groups']/lst[3]/int[@name='matches'][.='1']"
|
||||
, "//arr[@name='groups']/lst[3]/result[@numFound='1']"
|
||||
, "//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
|
@ -168,18 +176,18 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
assertU(add(doc("id", "4","name", "author2", "weight", "0.11")));
|
||||
assertU(commit());
|
||||
|
||||
assertQ(req("q","*:*", "group", "true", "group.field","name", "sort", "id asc", "group.sort", "weight desc")
|
||||
,"*[count(//arr[@name='groups']/lst) = 2]"
|
||||
,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author1']"
|
||||
// ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
|
||||
,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
|
||||
,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='1']"
|
||||
assertQ(req("q", "*:*", "group", "true", "group.field", "name", "sort", "id asc", "group.sort", "weight desc")
|
||||
, "*[count(//arr[@name='groups']/lst) = 2]"
|
||||
, "//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author1']"
|
||||
// ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
|
||||
, "//arr[@name='groups']/lst[1]/result[@numFound='2']"
|
||||
, "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='1']"
|
||||
|
||||
,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author2']"
|
||||
// ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
|
||||
,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
|
||||
,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='4']"
|
||||
);
|
||||
, "//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author2']"
|
||||
// ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
|
||||
, "//arr[@name='groups']/lst[2]/result[@numFound='2']"
|
||||
, "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='4']"
|
||||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
|
@ -214,6 +222,37 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupingSimpleFormatArrayIndexOutOfBoundsExceptionWithJavaBin() throws Exception {
|
||||
assertU(add(doc("id", "1", "nullfirst", "1")));
|
||||
assertU(add(doc("id", "2", "nullfirst", "1")));
|
||||
assertU(add(doc("id", "3", "nullfirst", "2")));
|
||||
assertU(add(doc("id", "4", "nullfirst", "2")));
|
||||
assertU(add(doc("id", "5", "nullfirst", "2")));
|
||||
assertU(add(doc("id", "6", "nullfirst", "3")));
|
||||
assertU(commit());
|
||||
|
||||
SolrQueryRequest request =
|
||||
req("q", "*:*","group", "true", "group.field", "nullfirst", "group.main", "true", "wt", "javabin", "start", "4", "rows", "10");
|
||||
|
||||
SolrQueryResponse response = new SolrQueryResponse();
|
||||
ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
try {
|
||||
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(request, response));
|
||||
String handlerName = request.getParams().get(CommonParams.QT);
|
||||
h.getCore().execute(h.getCore().getRequestHandler(handlerName), request, response);
|
||||
BinaryResponseWriter responseWriter = new BinaryResponseWriter();
|
||||
responseWriter.write(out, request, response);
|
||||
} finally {
|
||||
request.close();
|
||||
SolrRequestInfo.clearRequestInfo();
|
||||
}
|
||||
|
||||
assertEquals(6, ((ResultContext) response.getValues().get("response")).docs.matches());
|
||||
new BinaryResponseParser().processResponse(new ByteArrayInputStream(out.toByteArray()), "");
|
||||
out.close();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGroupingWithTimeAllowed() throws Exception {
|
||||
assertU(add(doc("id", "1")));
|
||||
|
@ -530,9 +569,9 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
|
|||
);
|
||||
|
||||
///////////////////////// group.format == simple
|
||||
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","3", "start","1", "group.limit","2", "group.format","simple")
|
||||
, "/grouped/foo_i=={'matches':10,'doclist':"
|
||||
+"{'numFound':10,'start':1,'docs':[{'id':'10'},{'id':'3'},{'id':'6'}]}}"
|
||||
assertJQ(req("fq", filt, "q", "{!func}" + f2, "group", "true", "group.field", f, "fl", "id", "rows", "3", "start", "1", "group.limit", "2", "group.format", "simple")
|
||||
, "/grouped/foo_i=={'matches':10,'doclist':"
|
||||
+ "{'numFound':10,'start':1,'docs':[{'id':'10'},{'id':'3'},{'id':'6'}]}}"
|
||||
);
|
||||
}
|
||||
|
||||
|
|
|
@ -17,6 +17,7 @@
|
|||
package org.apache.solr.core;
|
||||
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.junit.Before;
|
||||
import org.junit.BeforeClass;
|
||||
|
@ -109,6 +110,9 @@ public class TestSolrDeletionPolicy1 extends SolrTestCaseJ4 {
|
|||
|
||||
@Test
|
||||
public void testCommitAge() throws InterruptedException {
|
||||
assumeFalse("This test is not working on Windows (or maybe machines with only 2 CPUs)",
|
||||
Constants.WINDOWS);
|
||||
|
||||
IndexDeletionPolicyWrapper delPolicy = h.getCore().getDeletionPolicy();
|
||||
addDocs();
|
||||
Map<Long, IndexCommit> commits = delPolicy.getCommits();
|
||||
|
|
|
@ -25,6 +25,7 @@ import org.apache.lucene.index.*;
|
|||
import org.apache.lucene.search.*;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.noggit.ObjectBuilder;
|
||||
import org.apache.solr.SolrTestCaseJ4;
|
||||
import org.apache.solr.common.SolrException;
|
||||
|
@ -1213,6 +1214,7 @@ public class TestRealTimeGet extends SolrTestCaseJ4 {
|
|||
// and tests the ability to buffer updates and apply them later
|
||||
@Test
|
||||
public void testStressRecovery() throws Exception {
|
||||
assumeFalse("FIXME: This test is horribly slow sometimes on Windows!", Constants.WINDOWS);
|
||||
clearIndex();
|
||||
assertU(commit());
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue