LUCENE-4055: merge trunk (1338960:1341010)

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4055@1341015 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2012-05-21 13:39:57 +00:00
commit d5f7dbdde7
123 changed files with 3040 additions and 1478 deletions

View File

@ -266,6 +266,11 @@ Changes in backwards compatibility policy
* LUCENE-3970: Rename Fields.getUniqueFieldCount -> .size() and
Terms.getUniqueTermCount -> .size(). (Iulius Curt via Mike McCandless)
* LUCENE-3514: IndexSearcher.setDefaultFieldSortScoring was removed
and replaced with per-search control via new expert search methods
that take two booleans indicating whether hit scores and max
score should be computed. (Mike McCandless)
Changes in Runtime Behavior
* LUCENE-2846: omitNorms now behaves like omitTermFrequencyAndPositions, if you
@ -508,6 +513,9 @@ API Changes
immutable instances of NormalizeCharMap. (Dawid Weiss, Mike
McCandless)
* LUCENE-4063: FrenchLightStemmer no longer deletes repeated digits.
(Tanguy Moal via Steve Rowe)
New features
* LUCENE-2604: Added RegexpQuery support to QueryParser. Regular expressions
@ -857,6 +865,10 @@ New features
* LUCENE-4039: Add AddIndexesTask to benchmark, which uses IW.addIndexes.
(Shai Erera)
* LUCENE-3514: Added IndexSearcher.searchAfter when Sort is used,
returning results after a specified FieldDoc for deep
paging. (Mike McCandless)
Optimizations
* LUCENE-2588: Don't store unnecessary suffixes when writing the terms
@ -905,7 +917,11 @@ Optimizations
FST under the hood, which requires less RAM. NormalizeCharMap no
longer accepts empty string match (it did previously, but ignored
it). (Dawid Weiss, Mike McCandless)
* LUCENE-4061: improve synchronization in DirectoryTaxonomyWriter.addCategory
and few general improvements to DirectoryTaxonomyWriter.
(Shai Erera, Gilad Barkai)
Bug fixes
* LUCENE-2803: The FieldCache can miss values if an entry for a reader
@ -953,6 +969,11 @@ Bug fixes
offset calculation in PathHierarchyTokenizer.
(Mike McCandless, Uwe Schindler, Robert Muir)
* LUCENE-4060: Fix a synchronization bug in
DirectoryTaxonomyWriter.addTaxonomies(). Also, the method has been renamed to
addTaxonomy and now takes only one Directory and one OrdinalMap.
(Shai Erera, Gilad Barkai)
Documentation
* LUCENE-3958: Javadocs corrections for IndexWriter.
@ -990,6 +1011,10 @@ Build
* LUCENE-3286: Moved remainder of contrib/xml-query-parser to lucene/queryparser.
Classes now found at org.apache.lucene.queryparser.xml.*
* LUCENE-4059: Improve ANT task prepare-webpages (used by documentation
tasks) to correctly encode build file names as URIs for later processing by
XSL. (Greg Bowyer, Uwe Schindler)
======================= Lucene 3.6.0 =======================
Changes in backwards compatibility policy

View File

@ -62,8 +62,16 @@ CharacterEntities = ( "AElig" | "Aacute" | "Acirc" | "Agrave" | "Alpha"
| "weierp" | "xi" | "yacute" | "yen" | "yuml" | "zeta"
| "zwj" | "zwnj" )
%{
private static final Set<String> upperCaseVariantsAccepted
= new HashSet<String>(Arrays.asList("quot","copy","gt","lt","reg","amp"));
private static final Map<String,String> upperCaseVariantsAccepted
= new HashMap<String,String>();
static {
upperCaseVariantsAccepted.put("quot", "QUOT");
upperCaseVariantsAccepted.put("copy", "COPY");
upperCaseVariantsAccepted.put("gt", "GT");
upperCaseVariantsAccepted.put("lt", "LT");
upperCaseVariantsAccepted.put("reg", "REG");
upperCaseVariantsAccepted.put("amp", "AMP");
}
private static final CharArrayMap<Character> entityValues
= new CharArrayMap<Character>(Version.LUCENE_40, 253, false);
static {
@ -145,8 +153,9 @@ CharacterEntities = ( "AElig" | "Aacute" | "Acirc" | "Agrave" | "Alpha"
for (int i = 0 ; i < entities.length ; i += 2) {
Character value = entities[i + 1].charAt(0);
entityValues.put(entities[i], value);
if (upperCaseVariantsAccepted.contains(entities[i])) {
entityValues.put(entities[i].toUpperCase(), value);
String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);
if (upperCaseVariant != null) {
entityValues.put(upperCaseVariant, value);
}
}
}

View File

@ -1,4 +1,4 @@
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 3/24/12 4:50 PM */
/* The following code was generated by JFlex 1.5.0-SNAPSHOT on 5/18/12 12:24 PM */
package org.apache.lucene.analysis.charfilter;
@ -21,7 +21,8 @@ package org.apache.lucene.analysis.charfilter;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.util.Version;
@ -39,8 +40,8 @@ import org.apache.lucene.analysis.util.OpenStringBuilder;
/**
* This class is a scanner generated by
* <a href="http://www.jflex.de/">JFlex</a> 1.5.0-SNAPSHOT
* on 3/24/12 4:50 PM from the specification file
* <tt>C:/cygwin/home/s/svn/lucene/dev/trunk/modules/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
* on 5/18/12 12:24 PM from the specification file
* <tt>C:/svn/lucene/dev/trunk/lucene/analysis/common/src/java/org/apache/lucene/analysis/charfilter/HTMLStripCharFilter.jflex</tt>
*/
public final class HTMLStripCharFilter extends BaseCharFilter {
@ -30522,8 +30523,16 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
private boolean zzEOFDone;
/* user code: */
private static final Set<String> upperCaseVariantsAccepted
= new HashSet<String>(Arrays.asList("quot","copy","gt","lt","reg","amp"));
private static final Map<String,String> upperCaseVariantsAccepted
= new HashMap<String,String>();
static {
upperCaseVariantsAccepted.put("quot", "QUOT");
upperCaseVariantsAccepted.put("copy", "COPY");
upperCaseVariantsAccepted.put("gt", "GT");
upperCaseVariantsAccepted.put("lt", "LT");
upperCaseVariantsAccepted.put("reg", "REG");
upperCaseVariantsAccepted.put("amp", "AMP");
}
private static final CharArrayMap<Character> entityValues
= new CharArrayMap<Character>(Version.LUCENE_40, 253, false);
static {
@ -30605,8 +30614,9 @@ public final class HTMLStripCharFilter extends BaseCharFilter {
for (int i = 0 ; i < entities.length ; i += 2) {
Character value = entities[i + 1].charAt(0);
entityValues.put(entities[i], value);
if (upperCaseVariantsAccepted.contains(entities[i])) {
entityValues.put(entities[i].toUpperCase(), value);
String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);
if (upperCaseVariant != null) {
entityValues.put(upperCaseVariant, value);
}
}
}

View File

@ -19,7 +19,8 @@ package org.apache.lucene.analysis.charfilter;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.util.Version;

View File

@ -50,8 +50,16 @@ def main():
print output_line, ')'
print '%{'
print ' private static final Set<String> upperCaseVariantsAccepted'
print ' = new HashSet<String>(Arrays.asList("quot","copy","gt","lt","reg","amp"));'
print ' private static final Map<String,String> upperCaseVariantsAccepted'
print ' = new HashMap<String,String>();'
print ' static {'
print ' upperCaseVariantsAccepted.put("quot", "QUOT");'
print ' upperCaseVariantsAccepted.put("copy", "COPY");'
print ' upperCaseVariantsAccepted.put("gt", "GT");'
print ' upperCaseVariantsAccepted.put("lt", "LT");'
print ' upperCaseVariantsAccepted.put("reg", "REG");'
print ' upperCaseVariantsAccepted.put("amp", "AMP");'
print ' }'
print ' private static final CharArrayMap<Character> entityValues'
print ' = new CharArrayMap<Character>(Version.LUCENE_40, %i, false);' % len(keys)
print ' static {'
@ -68,8 +76,9 @@ def main():
print ' for (int i = 0 ; i < entities.length ; i += 2) {'
print ' Character value = entities[i + 1].charAt(0);'
print ' entityValues.put(entities[i], value);'
print ' if (upperCaseVariantsAccepted.contains(entities[i])) {'
print ' entityValues.put(entities[i].toUpperCase(), value);'
print ' String upperCaseVariant = upperCaseVariantsAccepted.get(entities[i]);'
print ' if (upperCaseVariant != null) {'
print ' entityValues.put(upperCaseVariant, value);'
print ' }'
print ' }'
print " }"

View File

@ -246,7 +246,7 @@ public class FrenchLightStemmer {
char ch = s[0];
for (int i = 1; i < len; i++) {
if (s[i] == ch)
if (s[i] == ch && Character.isLetter(ch))
len = delete(s, i--, len);
else
ch = s[i];
@ -260,7 +260,7 @@ public class FrenchLightStemmer {
if (s[len-1] == 'r') len--;
if (s[len-1] == 'e') len--;
if (s[len-1] == 'e') len--;
if (s[len-1] == s[len-2]) len--;
if (s[len-1] == s[len-2] && Character.isLetter(s[len-1])) len--;
}
return len;
}

View File

@ -153,6 +153,22 @@ public class TestFrenchLightStemFilter extends BaseTokenStreamTestCase {
checkOneTerm(analyzer, "disposition", "dispos");
checkOneTerm(analyzer, "dispose", "dispos");
// SOLR-3463 : abusive compression of repeated characters in numbers
// Trailing repeated char elision :
checkOneTerm(analyzer, "1234555", "1234555");
// Repeated char within numbers with more than 4 characters :
checkOneTerm(analyzer, "12333345", "12333345");
// Short numbers weren't affected already:
checkOneTerm(analyzer, "1234", "1234");
// Ensure behaviour is preserved for words!
// Trailing repeated char elision :
checkOneTerm(analyzer, "abcdeff", "abcdef");
// Repeated char within words with more than 4 characters :
checkOneTerm(analyzer, "abcccddeef", "abcdef");
checkOneTerm(analyzer, "créées", "cre");
// Combined letter and digit repetition
checkOneTerm(analyzer, "22hh00", "22h00"); // 10:00pm
}
/** Test against a vocabulary from the reference impl */

View File

@ -252,6 +252,8 @@ public class ToStringUtil {
/**
* Romanize katakana with modified hepburn
*/
// TODO: now that this is used by readingsfilter and not just for
// debugging, fix this to really be a scheme that works best with IMEs
public static void getRomanization(Appendable builder, CharSequence s) throws IOException {
final int len = s.length();
for (int i = 0; i < len; i++) {
@ -522,6 +524,9 @@ public class ToStringUtil {
if (ch2 == 'ウ') {
builder.append("");
i++;
} else if (ch2 == 'ゥ') {
builder.append("tu");
i++;
} else {
builder.append("to");
}
@ -665,7 +670,7 @@ public class ToStringUtil {
builder.append("mu");
break;
case 'メ':
builder.append("mi");
builder.append("me");
break;
case 'モ':
if (ch2 == 'ウ') {
@ -690,7 +695,12 @@ public class ToStringUtil {
}
break;
case 'ラ':
builder.append("ra");
if (ch2 == '゜') {
builder.append("la");
i++;
} else {
builder.append("ra");
}
break;
case 'リ':
if (ch2 == 'ョ' && ch3 == 'ウ') {
@ -711,20 +721,36 @@ public class ToStringUtil {
} else if (ch2 == 'ェ') {
builder.append("rye");
i++;
} else if (ch2 == '゜') {
builder.append("li");
i++;
} else {
builder.append("ri");
}
break;
case 'ル':
builder.append("ru");
if (ch2 == '゜') {
builder.append("lu");
i++;
} else {
builder.append("ru");
}
break;
case 'レ':
builder.append("re");
if (ch2 == '゜') {
builder.append("le");
i++;
} else {
builder.append("re");
}
break;
case 'ロ':
if (ch2 == 'ウ') {
builder.append("");
i++;
} else if (ch2 == '゜') {
builder.append("lo");
i++;
} else {
builder.append("ro");
}
@ -887,7 +913,28 @@ public class ToStringUtil {
builder.append("da");
break;
case 'ヂ':
builder.append("ji");
// TODO: investigate all this
if (ch2 == 'ョ' && ch3 == 'ウ') {
builder.append("");
i += 2;
} else if (ch2 == 'ュ' && ch3 == 'ウ') {
builder.append("");
i += 2;
} else if (ch2 == 'ャ') {
builder.append("ja");
i++;
} else if (ch2 == 'ョ') {
builder.append("jo");
i++;
} else if (ch2 == 'ュ') {
builder.append("ju");
i++;
} else if (ch2 == 'ェ') {
builder.append("je");
i++;
} else {
builder.append("ji");
}
break;
case 'ヅ':
builder.append("zu");
@ -994,6 +1041,18 @@ public class ToStringUtil {
builder.append("po");
}
break;
case 'ヷ':
builder.append("va");
break;
case 'ヸ':
builder.append("vi");
break;
case 'ヹ':
builder.append("ve");
break;
case 'ヺ':
builder.append("vo");
break;
case 'ヴ':
if (ch2 == 'ィ' && ch3 == 'ェ') {
builder.append("vye");

View File

@ -17,6 +17,9 @@ package org.apache.lucene.analysis.ja.util;
* limitations under the License.
*/
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.util.LuceneTestCase;
public class TestToStringUtil extends LuceneTestCase {
@ -31,4 +34,79 @@ public class TestToStringUtil extends LuceneTestCase {
assertEquals("chashu", ToStringUtil.getRomanization("チャーシュー"));
assertEquals("shumai", ToStringUtil.getRomanization("シューマイ"));
}
// see http://en.wikipedia.org/wiki/Hepburn_romanization,
// but this isnt even thorough or really probably what we want!
public void testHepburnTable() {
Map<String,String> table = new HashMap<String,String>() {{
put("", "a"); put("", "i"); put("", "u"); put("", "e"); put("", "o");
put("", "ka"); put("", "ki"); put("", "ku"); put("", "ke"); put("", "ko");
put("", "sa"); put("", "shi"); put("", "su"); put("", "se"); put("", "so");
put("", "ta"); put("", "chi"); put("", "tsu"); put("", "te"); put("", "to");
put("", "na"); put("", "ni"); put("", "nu"); put("", "ne"); put("", "no");
put("", "ha"); put("", "hi"); put("", "fu"); put("", "he"); put("", "ho");
put("", "ma"); put("", "mi"); put("", "mu"); put("", "me"); put("", "mo");
put("", "ya"); put("", "yu"); put("", "yo");
put("", "ra"); put("", "ri"); put("", "ru"); put("", "re"); put("", "ro");
put("", "wa"); put("", "i"); put("", "e"); put("", "o");
put("", "n");
put("", "ga"); put("", "gi"); put("", "gu"); put("", "ge"); put("", "go");
put("", "za"); put("", "ji"); put("", "zu"); put("", "ze"); put("", "zo");
put("", "da"); put("", "ji"); put("", "zu"); put("", "de"); put("", "do");
put("", "ba"); put("", "bi"); put("", "bu"); put("", "be"); put("", "bo");
put("", "pa"); put("", "pi"); put("", "pu"); put("", "pe"); put("", "po");
put("キャ", "kya"); put("キュ", "kyu"); put("キョ", "kyo");
put("シャ", "sha"); put("シュ", "shu"); put("ショ", "sho");
put("チャ", "cha"); put("チュ", "chu"); put("チョ", "cho");
put("ニャ", "nya"); put("ニュ", "nyu"); put("ニョ", "nyo");
put("ヒャ", "hya"); put("ヒュ", "hyu"); put("ヒョ", "hyo");
put("ミャ", "mya"); put("ミュ", "myu"); put("ミョ", "myo");
put("リャ", "rya"); put("リュ", "ryu"); put("リョ", "ryo");
put("ギャ", "gya"); put("ギュ", "gyu"); put("ギョ", "gyo");
put("ジャ", "ja"); put("ジュ", "ju"); put("ジョ", "jo");
put("ヂャ", "ja"); put("ヂュ", "ju"); put("ヂョ", "jo");
put("ビャ", "bya"); put("ビュ", "byu"); put("ビョ", "byo");
put("ピャ", "pya"); put("ピュ", "pyu"); put("ピョ", "pyo");
put("イィ", "yi"); put("イェ", "ye");
put("ウァ", "wa"); put("ウィ", "wi"); put("ウゥ", "wu"); put("ウェ", "we"); put("ウォ", "wo");
put("ウュ", "wyu");
// TODO: really should be vu
put("ヴァ", "va"); put("ヴィ", "vi"); put("", "v"); put("ヴェ", "ve"); put("ヴォ", "vo");
put("ヴャ", "vya"); put("ヴュ", "vyu"); put("ヴィェ", "vye"); put("ヴョ", "vyo");
put("キェ", "kye");
put("ギェ", "gye");
put("クァ", "kwa"); put("クィ", "kwi"); put("クェ", "kwe"); put("クォ", "kwo");
put("クヮ", "kwa");
put("グァ", "gwa"); put("グィ", "gwi"); put("グェ", "gwe"); put("グォ", "gwo");
put("グヮ", "gwa");
put("シェ", "she");
put("ジェ", "je");
put("スィ", "si");
put("ズィ", "zi");
put("チェ", "che");
put("ツァ", "tsa"); put("ツィ", "tsi"); put("ツェ", "tse"); put("ツォ", "tso");
put("ツュ", "tsyu");
put("ティ", "ti"); put("トゥ", "tu");
put("テュ", "tyu");
put("ディ", "di"); put("ドゥ", "du");
put("デュ", "dyu");
put("ニェ", "nye");
put("ヒェ", "hye");
put("ビェ", "bye");
put("ピェ", "pye");
put("ファ", "fa"); put("フィ", "fi"); put("フェ", "fe"); put("フォ", "fo");
put("フャ", "fya"); put("フュ", "fyu"); put("フィェ", "fye"); put("フョ", "fyo");
put("ホゥ", "hu");
put("ミェ", "mye");
put("リェ", "rye");
put("ラ゜", "la"); put("リ゜", "li"); put("ル゜", "lu"); put("レ゜", "le"); put("ロ゜", "lo");
put("", "va"); put("", "vi"); put("", "ve"); put("", "vo");
}};
for (String s : table.keySet()) {
assertEquals(s, table.get(s), ToStringUtil.getRomanization(s));
}
}
}

View File

@ -25,20 +25,20 @@ import java.util.HashMap;
import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.feeds.FacetSource;
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.benchmark.byTask.stats.Points;
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
import org.apache.lucene.benchmark.byTask.tasks.PerfTask;
import org.apache.lucene.benchmark.byTask.tasks.ReadTask;
import org.apache.lucene.benchmark.byTask.tasks.SearchTask;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.benchmark.byTask.utils.FileUtils;
import org.apache.lucene.benchmark.byTask.tasks.NewAnalyzerTask;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
@ -80,6 +80,7 @@ public class PerfRunData implements Closeable {
private Directory directory;
private Analyzer analyzer;
private DocMaker docMaker;
private ContentSource contentSource;
private FacetSource facetSource;
private Locale locale;
@ -105,10 +106,16 @@ public class PerfRunData implements Closeable {
// analyzer (default is standard analyzer)
analyzer = NewAnalyzerTask.createAnalyzer(config.get("analyzer",
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
// content source
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
contentSource = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
contentSource.setConfig(config);
// doc maker
docMaker = Class.forName(config.get("doc.maker",
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
docMaker.setConfig(config);
docMaker.setConfig(config, contentSource);
// facet source
facetSource = Class.forName(config.get("facet.source",
"org.apache.lucene.benchmark.byTask.feeds.RandomFacetSource")).asSubclass(FacetSource.class).newInstance();
@ -129,10 +136,11 @@ public class PerfRunData implements Closeable {
}
}
@Override
public void close() throws IOException {
IOUtils.close(indexWriter, indexReader, directory,
taxonomyWriter, taxonomyReader, taxonomyDir,
docMaker, facetSource);
docMaker, facetSource, contentSource);
// close all perf objects that are closeable.
ArrayList<Closeable> perfObjectsToClose = new ArrayList<Closeable>();
@ -361,7 +369,12 @@ public class PerfRunData implements Closeable {
this.analyzer = analyzer;
}
/** Returns the docMaker. */
/** Returns the ContentSource. */
public ContentSource getContentSource() {
return contentSource;
}
/** Returns the DocMaker. */
public DocMaker getDocMaker() {
return docMaker;
}
@ -393,6 +406,7 @@ public class PerfRunData implements Closeable {
}
public void resetInputs() throws IOException {
contentSource.resetInputs();
docMaker.resetInputs();
facetSource.resetInputs();
for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {

View File

@ -131,7 +131,6 @@ public abstract class ContentItemsSource implements Closeable {
* items generated since the last reset, so it's important to call
* super.resetInputs in case you override this method.
*/
@SuppressWarnings("unused")
public void resetInputs() throws IOException {
bytesCount = 0;
itemCount = 0;

View File

@ -355,26 +355,11 @@ public class DocMaker implements Closeable {
* {@link ContentSource}, and it can be overridden to do more work (but make
* sure to call super.close()).
*/
@Override
public void close() throws IOException {
source.close();
}
/**
* Returns the number of bytes generated by the content source since last
* reset.
*/
public synchronized long getBytesCount() {
return source.getBytesCount();
}
/**
* Returns the total number of bytes that were generated by the content source
* defined to that doc maker.
*/
public long getTotalBytesCount() {
return source.getTotalBytesCount();
}
/**
* Creates a {@link Document} object ready for indexing. This method uses the
* {@link ContentSource} to get the next document from the source, and creates
@ -426,26 +411,16 @@ public class DocMaker implements Closeable {
public synchronized void resetInputs() throws IOException {
source.printStatistics("docs");
// re-initiate since properties by round may have changed.
setConfig(config);
setConfig(config, source);
source.resetInputs();
numDocsCreated.set(0);
resetLeftovers();
}
/** Set the configuration parameters of this doc maker. */
public void setConfig(Config config) {
public void setConfig(Config config, ContentSource source) {
this.config = config;
try {
if (source != null) {
source.close();
}
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
source.setConfig(config);
} catch (Exception e) {
// Should not get here. Throw runtime exception.
throw new RuntimeException(e);
}
this.source = source;
boolean stored = config.get("doc.stored", false);
boolean bodyStored = config.get("doc.body.stored", stored);

View File

@ -20,34 +20,16 @@ package org.apache.lucene.benchmark.byTask.tasks;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
import org.apache.lucene.benchmark.byTask.feeds.DocData;
import org.apache.lucene.benchmark.byTask.utils.Config;
/**
* Consumes a {@link org.apache.lucene.benchmark.byTask.feeds.ContentSource}.
* Supports the following parameters:
* <ul>
* <li>content.source - the content source to use. (mandatory)
* </ul>
*/
/** Consumes a {@link org.apache.lucene.benchmark.byTask.feeds.ContentSource}. */
public class ConsumeContentSourceTask extends PerfTask {
private ContentSource source;
private DocData dd = new DocData();
private final ContentSource source;
private ThreadLocal<DocData> dd = new ThreadLocal<DocData>();
public ConsumeContentSourceTask(PerfRunData runData) {
super(runData);
Config config = runData.getConfig();
String sourceClass = config.get("content.source", null);
if (sourceClass == null) {
throw new IllegalArgumentException("content.source must be defined");
}
try {
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
source.setConfig(config);
source.resetInputs();
} catch (Exception e) {
throw new RuntimeException(e);
}
source = runData.getContentSource();
}
@Override
@ -55,15 +37,9 @@ public class ConsumeContentSourceTask extends PerfTask {
return "read " + recsCount + " documents from the content source";
}
@Override
public void close() throws Exception {
source.close();
super.close();
}
@Override
public int doLogic() throws Exception {
dd = source.getNextDocData(dd);
dd.set(source.getNextDocData(dd.get()));
return 1;
}

View File

@ -22,7 +22,9 @@ import java.io.FileWriter;
import java.io.IOException;
import java.util.Properties;
import org.apache.lucene.benchmark.byTask.feeds.ContentSource;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource;
import org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.Document;
@ -122,15 +124,19 @@ public class ExtractWikipedia {
} else if (arg.equals("--discardImageOnlyDocs") || arg.equals("-d")) {
keepImageOnlyDocs = false;
}
}
DocMaker docMaker = new DocMaker();
Properties properties = new Properties();
properties.setProperty("content.source", "org.apache.lucene.benchmark.byTask.feeds.EnwikiContentSource");
properties.setProperty("docs.file", wikipedia.getAbsolutePath());
properties.setProperty("content.source.forever", "false");
properties.setProperty("keep.image.only.docs", String.valueOf(keepImageOnlyDocs));
docMaker.setConfig(new Config(properties));
Config config = new Config(properties);
ContentSource source = new EnwikiContentSource();
source.setConfig(config);
DocMaker docMaker = new DocMaker();
docMaker.setConfig(config, source);
docMaker.resetInputs();
if (wikipedia.exists()) {
System.out.println("Extracting Wikipedia to: " + outputDir + " using EnwikiContentSource");

View File

@ -28,7 +28,6 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.tasks.AddDocTask;
import org.apache.lucene.benchmark.byTask.tasks.CloseIndexTask;
import org.apache.lucene.benchmark.byTask.tasks.CreateIndexTask;
import org.apache.lucene.benchmark.byTask.tasks.ResetInputsTask;
import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.Document;
@ -42,7 +41,7 @@ import org.apache.lucene.search.TopDocs;
/** Tests the functionality of {@link DocMaker}. */
public class DocMakerTest extends BenchmarkTestCase {
static final class OneDocSource extends ContentSource {
public static final class OneDocSource extends ContentSource {
private boolean finish = false;
@ -106,7 +105,6 @@ public class DocMakerTest extends BenchmarkTestCase {
// Indexing configuration.
props.setProperty("analyzer", WhitespaceAnalyzer.class.getName());
props.setProperty("content.source", OneDocSource.class.getName());
props.setProperty("directory", "RAMDirectory");
if (setNormsProp) {
props.setProperty("doc.tokenized.norms", Boolean.toString(normsPropVal));
@ -119,7 +117,7 @@ public class DocMakerTest extends BenchmarkTestCase {
Config config = new Config(props);
DocMaker dm = new DocMaker();
dm.setConfig(config);
dm.setConfig(config, new OneDocSource());
return dm.makeDocument();
}
@ -175,12 +173,15 @@ public class DocMakerTest extends BenchmarkTestCase {
ps.close();
Properties props = new Properties();
props.setProperty("content.source", "org.apache.lucene.benchmark.byTask.feeds.LineDocSource");
props.setProperty("docs.file", f.getAbsolutePath());
props.setProperty("content.source.forever", "false");
Config config = new Config(props);
ContentSource source = new LineDocSource();
source.setConfig(config);
DocMaker dm = new DocMaker();
dm.setConfig(config);
dm.setConfig(config, source);
dm.resetInputs();
dm.resetInputs();
dm.close();

View File

@ -225,8 +225,13 @@
</target>
<target name="process-webpages" depends="resolve-pegdown">
<pathconvert pathsep="|" dirsep="/" property="buildfiles">
<pathconvert pathsep="|" property="buildfiles">
<fileset dir="." includes="**/build.xml" excludes="build.xml,analysis/*,build/**,tools/**,backwards/**,site/**"/>
<mapper>
<scriptmapper language="javascript">
self.addMappedName((new java.io.File(source)).toURI());
</scriptmapper>
</mapper>
</pathconvert>
<!--
The XSL input file is ignored completely, but XSL expects one to be given,

View File

@ -53,7 +53,7 @@ public final class MappingMultiDocsAndPositionsEnum extends DocsAndPositionsEnum
}
@Override
public int freq() {
public int freq() throws IOException {
return current.freq();
}

View File

@ -52,7 +52,7 @@ public final class MappingMultiDocsEnum extends DocsEnum {
}
@Override
public int freq() {
public int freq() throws IOException {
return current.freq();
}

View File

@ -1022,7 +1022,7 @@ class Lucene3xFields extends FieldsProducer {
}
@Override
public int freq() {
public int freq() throws IOException {
return docs.freq();
}
@ -1069,7 +1069,7 @@ class Lucene3xFields extends FieldsProducer {
}
@Override
public int freq() {
public int freq() throws IOException {
return pos.freq();
}

View File

@ -528,7 +528,7 @@ class Lucene3xTermVectorsReader extends TermVectorsReader {
private Bits liveDocs;
@Override
public int freq() {
public int freq() throws IOException {
return freq;
}
@ -574,7 +574,7 @@ class Lucene3xTermVectorsReader extends TermVectorsReader {
private int[] endOffsets;
@Override
public int freq() {
public int freq() throws IOException {
if (positions != null) {
return positions.length;
} else {

View File

@ -353,7 +353,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
}
@Override
public final int freq() {
public final int freq() throws IOException {
assert !indexOmitsTF;
return freq;
}
@ -772,7 +772,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
}
@Override
public int freq() {
public int freq() throws IOException {
return freq;
}
@ -991,7 +991,7 @@ public class Lucene40PostingsReader extends PostingsReaderBase {
}
@Override
public int freq() {
public int freq() throws IOException {
return freq;
}

View File

@ -549,7 +549,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
private Bits liveDocs;
@Override
public int freq() {
public int freq() throws IOException {
return freq;
}
@ -595,7 +595,7 @@ public class Lucene40TermVectorsReader extends TermVectorsReader {
private int[] endOffsets;
@Override
public int freq() {
public int freq() throws IOException {
if (positions != null) {
return positions.length;
} else {

View File

@ -424,7 +424,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
}
@Override
public int freq() {
public int freq() throws IOException {
assert indexOptions != IndexOptions.DOCS_ONLY;
return freq;
}
@ -624,7 +624,7 @@ public class MemoryPostingsFormat extends PostingsFormat {
}
@Override
public int freq() {
public int freq() throws IOException {
return freq;
}
}

View File

@ -356,7 +356,7 @@ public class PulsingPostingsReader extends PostingsReaderBase {
}
@Override
public int freq() {
public int freq() throws IOException {
assert indexOptions != IndexOptions.DOCS_ONLY;
return freq;
}
@ -462,7 +462,7 @@ public class PulsingPostingsReader extends PostingsReaderBase {
}
@Override
public int freq() {
public int freq() throws IOException {
return freq;
}

View File

@ -420,7 +420,7 @@ public class SepPostingsReader extends PostingsReaderBase {
}
@Override
public int freq() {
public int freq() throws IOException {
assert !omitTF;
return freq;
}
@ -598,7 +598,7 @@ public class SepPostingsReader extends PostingsReaderBase {
}
@Override
public int freq() {
public int freq() throws IOException {
return freq;
}

View File

@ -269,7 +269,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
}
@Override
public int freq() {
public int freq() throws IOException {
assert !omitTF;
return tf;
}
@ -370,7 +370,7 @@ class SimpleTextFieldsReader extends FieldsProducer {
}
@Override
public int freq() {
public int freq() throws IOException {
return tf;
}

View File

@ -398,7 +398,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
private Bits liveDocs;
@Override
public int freq() {
public int freq() throws IOException {
assert freq != -1;
return freq;
}
@ -445,7 +445,7 @@ public class SimpleTextTermVectorsReader extends TermVectorsReader {
private int[] endOffsets;
@Override
public int freq() {
public int freq() throws IOException {
if (positions != null) {
return positions.length;
} else {

View File

@ -17,6 +17,8 @@ package org.apache.lucene.index;
* limitations under the License.
*/
import java.io.IOException;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.AttributeSource;
@ -29,8 +31,9 @@ public abstract class DocsEnum extends DocIdSetIterator {
/** Returns term frequency in the current document. Do
* not call this before {@link #nextDoc} is first called,
* nor after {@link #nextDoc} returns NO_MORE_DOCS. */
public abstract int freq();
* nor after {@link #nextDoc} returns NO_MORE_DOCS.
**/
public abstract int freq() throws IOException;
/** Returns the related attributes. */
public AttributeSource attributes() {

View File

@ -26,6 +26,7 @@ import java.util.Queue;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
import org.apache.lucene.index.DocumentsWriterStallControl.MemoryController;
import org.apache.lucene.util.ThreadInterruptedException;
/**
@ -40,7 +41,7 @@ import org.apache.lucene.util.ThreadInterruptedException;
* {@link IndexWriterConfig#getRAMPerThreadHardLimitMB()} to prevent address
* space exhaustion.
*/
final class DocumentsWriterFlushControl {
final class DocumentsWriterFlushControl implements MemoryController {
private final long hardMaxBytesPerDWPT;
private long activeBytes = 0;
@ -88,7 +89,7 @@ final class DocumentsWriterFlushControl {
return flushBytes + activeBytes;
}
long stallLimitBytes() {
public long stallLimitBytes() {
final double maxRamMB = config.getRAMBufferSizeMB();
return maxRamMB != IndexWriterConfig.DISABLE_AUTO_FLUSH ? (long)(2 * (maxRamMB * 1024 * 1024)) : Long.MAX_VALUE;
}

View File

@ -1,6 +1,6 @@
package org.apache.lucene.index;
/**
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
@ -19,6 +19,7 @@ package org.apache.lucene.index;
import java.util.concurrent.locks.AbstractQueuedSynchronizer;
import org.apache.lucene.index.DocumentsWriterPerThreadPool.ThreadState;
import org.apache.lucene.util.ThreadInterruptedException;
/**
* Controls the health status of a {@link DocumentsWriter} sessions. This class
@ -55,11 +56,11 @@ final class DocumentsWriterStallControl {
boolean tryReset() {
final int oldState = getState();
if (oldState == 0)
if (oldState == 0) {
return true;
}
if (compareAndSetState(oldState, 0)) {
releaseShared(0);
return true;
return releaseShared(0);
}
return false;
}
@ -97,11 +98,11 @@ final class DocumentsWriterStallControl {
* {@link DocumentsWriterStallControl} to healthy and release all threads waiting on
* {@link #waitIfStalled()}
*/
void updateStalled(DocumentsWriterFlushControl flushControl) {
void updateStalled(MemoryController controller) {
do {
// if we have more flushing / blocked DWPT than numActiveDWPT we stall!
// don't stall if we have queued flushes - threads should be hijacked instead
while (flushControl.netBytes() > flushControl.stallLimitBytes()) {
while (controller.netBytes() > controller.stallLimitBytes()) {
if (sync.trySetStalled()) {
assert wasStalled = true;
return;
@ -111,10 +112,19 @@ final class DocumentsWriterStallControl {
}
void waitIfStalled() {
sync.acquireShared(0);
try {
sync.acquireSharedInterruptibly(0);
} catch (InterruptedException e) {
throw new ThreadInterruptedException(e);
}
}
boolean hasBlocked() { // for tests
return sync.hasBlockedThreads;
}
}
static interface MemoryController {
long netBytes();
long stallLimitBytes();
}
}

View File

@ -225,7 +225,7 @@ public class FilterAtomicReader extends AtomicReader {
}
@Override
public int freq() {
public int freq() throws IOException {
return in.freq();
}
@ -259,7 +259,7 @@ public class FilterAtomicReader extends AtomicReader {
}
@Override
public int freq() {
public int freq() throws IOException {
return in.freq();
}

View File

@ -1092,12 +1092,19 @@ public class IndexWriter implements Closeable, TwoPhaseCommit {
*
* <p><b>WARNING</b>: the index does not currently record
* which documents were added as a block. Today this is
* fine, because merging will preserve the block (as long
* as none them were deleted). But it's possible in the
* future that Lucene may more aggressively re-order
* documents (for example, perhaps to obtain better index
* compression), in which case you may need to fully
* re-index your documents at that time.
* fine, because merging will preserve a block. The order of
* documents within a segment will be preserved, even when child
* documents within a block are deleted. Most search features
* (like result grouping and block joining) require you to
* mark documents; when these documents are deleted these
* search features will not work as expected. Obviously adding
* documents to an existing block will require you the reindex
* the entire block.
*
* <p>However it's possible that in the future Lucene may
* merge more aggressively re-order documents (for example,
* perhaps to obtain better index compression), in which case
* you may need to fully re-index your documents at that time.
*
* <p>See {@link #addDocument(Iterable)} for details on
* index and IndexWriter state after an Exception, and

View File

@ -69,7 +69,7 @@ public final class MultiDocsAndPositionsEnum extends DocsAndPositionsEnum {
}
@Override
public int freq() {
public int freq() throws IOException {
return current.freq();
}

View File

@ -69,7 +69,7 @@ public final class MultiDocsEnum extends DocsEnum {
}
@Override
public int freq() {
public int freq() throws IOException {
return current.freq();
}

View File

@ -190,6 +190,10 @@ public abstract class FieldComparator<T> {
}
}
/** Returns negative result if the doc's value is less
* than the provided value. */
public abstract int compareDocToValue(int doc, T value) throws IOException;
public static abstract class NumericComparator<T extends Number> extends FieldComparator<T> {
protected final T missingValue;
protected final String field;
@ -274,9 +278,19 @@ public abstract class FieldComparator<T> {
public Byte value(int slot) {
return Byte.valueOf(values[slot]);
}
@Override
public int compareDocToValue(int doc, Byte value) {
byte docValue = currentReaderValues[doc];
// Test for docValue == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
docValue = missingValue;
}
return docValue - value.byteValue();
}
}
/** Parses field's values as double (using {@link
* FieldCache#getDoubles} and sorts by ascending value */
public static final class DoubleComparator extends NumericComparator<Double> {
@ -351,6 +365,24 @@ public abstract class FieldComparator<T> {
public Double value(int slot) {
return Double.valueOf(values[slot]);
}
@Override
public int compareDocToValue(int doc, Double valueObj) {
final double value = valueObj.doubleValue();
double docValue = currentReaderValues[doc];
// Test for docValue == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
docValue = missingValue;
}
if (docValue < value) {
return -1;
} else if (docValue > value) {
return 1;
} else {
return 0;
}
}
}
/** Uses float index values to sort by ascending value */
@ -415,6 +447,19 @@ public abstract class FieldComparator<T> {
public Double value(int slot) {
return Double.valueOf(values[slot]);
}
@Override
public int compareDocToValue(int doc, Double valueObj) {
final double value = valueObj.doubleValue();
final double docValue = currentReaderValues.getFloat(doc);
if (docValue < value) {
return -1;
} else if (docValue > value) {
return 1;
} else {
return 0;
}
}
}
/** Parses field's values as float (using {@link
@ -494,6 +539,24 @@ public abstract class FieldComparator<T> {
public Float value(int slot) {
return Float.valueOf(values[slot]);
}
@Override
public int compareDocToValue(int doc, Float valueObj) {
final float value = valueObj.floatValue();
float docValue = currentReaderValues[doc];
// Test for docValue == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
docValue = missingValue;
}
if (docValue < value) {
return -1;
} else if (docValue > value) {
return 1;
} else {
return 0;
}
}
}
/** Parses field's values as short (using {@link
@ -556,6 +619,18 @@ public abstract class FieldComparator<T> {
public Short value(int slot) {
return Short.valueOf(values[slot]);
}
@Override
public int compareDocToValue(int doc, Short valueObj) {
final short value = valueObj.shortValue();
short docValue = currentReaderValues[doc];
// Test for docValue == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
docValue = missingValue;
}
return docValue - value;
}
}
/** Parses field's values as int (using {@link
@ -640,6 +715,24 @@ public abstract class FieldComparator<T> {
public Integer value(int slot) {
return Integer.valueOf(values[slot]);
}
@Override
public int compareDocToValue(int doc, Integer valueObj) {
final int value = valueObj.intValue();
int docValue = currentReaderValues[doc];
// Test for docValue == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
docValue = missingValue;
}
if (docValue < value) {
return -1;
} else if (docValue > value) {
return 1;
} else {
return 0;
}
}
}
/** Loads int index values and sorts by ascending value. */
@ -708,6 +801,19 @@ public abstract class FieldComparator<T> {
public Long value(int slot) {
return Long.valueOf(values[slot]);
}
@Override
public int compareDocToValue(int doc, Long valueObj) {
final long value = valueObj.longValue();
final long docValue = currentReaderValues.getInt(doc);
if (docValue < value) {
return -1;
} else if (docValue > value) {
return 1;
} else {
return 0;
}
}
}
/** Parses field's values as long (using {@link
@ -788,6 +894,24 @@ public abstract class FieldComparator<T> {
public Long value(int slot) {
return Long.valueOf(values[slot]);
}
@Override
public int compareDocToValue(int doc, Long valueObj) {
final long value = valueObj.longValue();
long docValue = currentReaderValues[doc];
// Test for docValue == 0 to save Bits.get method call for
// the common case (doc has value and value is non-zero):
if (docsWithField != null && docValue == 0 && !docsWithField.get(doc)) {
docValue = missingValue;
}
if (docValue < value) {
return -1;
} else if (docValue > value) {
return 1;
} else {
return 0;
}
}
}
/** Sorts by descending relevance. NOTE: if you are
@ -815,12 +939,14 @@ public abstract class FieldComparator<T> {
@Override
public int compareBottom(int doc) throws IOException {
float score = scorer.score();
assert !Float.isNaN(score);
return bottom > score ? -1 : (bottom < score ? 1 : 0);
}
@Override
public void copy(int slot, int doc) throws IOException {
scores[slot] = scorer.score();
assert !Float.isNaN(scores[slot]);
}
@Override
@ -857,6 +983,22 @@ public abstract class FieldComparator<T> {
// sorts descending:
return second.compareTo(first);
}
@Override
public int compareDocToValue(int doc, Float valueObj) throws IOException {
final float value = valueObj.floatValue();
float docValue = scorer.score();
assert !Float.isNaN(docValue);
if (docValue < value) {
// reverse of FloatComparator
return 1;
} else if (docValue > value) {
// reverse of FloatComparator
return -1;
} else {
return 0;
}
}
}
/** Sorts by ascending docID */
@ -904,6 +1046,19 @@ public abstract class FieldComparator<T> {
public Integer value(int slot) {
return Integer.valueOf(docIDs[slot]);
}
@Override
public int compareDocToValue(int doc, Integer valueObj) {
final int value = valueObj.intValue();
int docValue = docBase + doc;
if (docValue < value) {
return -1;
} else if (docValue > value) {
return 1;
} else {
return 0;
}
}
}
/** Sorts by field's natural Term sort order, using
@ -998,6 +1153,20 @@ public abstract class FieldComparator<T> {
throw new UnsupportedOperationException();
}
@Override
public int compareDocToValue(int doc, BytesRef value) {
BytesRef docValue = termsIndex.getTerm(doc, tempBR);
if (docValue == null) {
if (value == null) {
return 0;
}
return -1;
} else if (value == null) {
return 1;
}
return docValue.compareTo(value);
}
/** Base class for specialized (per bit width of the
* ords) per-segment comparator. NOTE: this is messy;
* we do this only because hotspot can't reliably inline
@ -1038,6 +1207,11 @@ public abstract class FieldComparator<T> {
}
return val1.compareTo(val2);
}
@Override
public int compareDocToValue(int doc, BytesRef value) {
return TermOrdValComparator.this.compareDocToValue(doc, value);
}
}
// Used per-segment when bit width of doc->ord is 8:
@ -1385,6 +1559,11 @@ public abstract class FieldComparator<T> {
throw new UnsupportedOperationException();
}
@Override
public int compareDocToValue(int doc, BytesRef value) {
return termsIndex.getBytes(doc, tempBR).compareTo(value);
}
// TODO: would be nice to share these specialized impls
// w/ TermOrdValComparator
@ -1422,6 +1601,11 @@ public abstract class FieldComparator<T> {
assert val2 != null;
return comp.compare(val1, val2);
}
@Override
public int compareDocToValue(int doc, BytesRef value) {
return TermOrdValDocValuesComparator.this.compareDocToValue(doc, value);
}
}
// Used per-segment when bit width of doc->ord is 8:
@ -1801,6 +1985,11 @@ public abstract class FieldComparator<T> {
}
return val1.compareTo(val2);
}
@Override
public int compareDocToValue(int doc, BytesRef value) {
return docTerms.getTerm(doc, tempBR).compareTo(value);
}
}
/** Sorts by field's natural Term sort order. All
@ -1869,6 +2058,11 @@ public abstract class FieldComparator<T> {
assert val2 != null;
return val1.compareTo(val2);
}
@Override
public int compareDocToValue(int doc, BytesRef value) {
return docTerms.getBytes(doc, tempBR).compareTo(value);
}
}
final protected static int binarySearch(BytesRef br, DocTermsIndex a, BytesRef key) {

View File

@ -304,14 +304,51 @@ public class IndexSearcher {
* <code>sort</code>.
*
* <p>NOTE: this does not compute scores by default; use
* {@link IndexSearcher#setDefaultFieldSortScoring} to
* enable scoring.
* {@link IndexSearcher#search(Query,Filter,int,Sort,boolean,boolean)} to
* control scoring.
*
* @throws BooleanQuery.TooManyClauses
*/
public TopFieldDocs search(Query query, Filter filter, int n,
Sort sort) throws IOException {
return search(createNormalizedWeight(wrapFilter(query, filter)), n, sort);
return search(createNormalizedWeight(wrapFilter(query, filter)), n, sort, false, false);
}
/** Search implementation with arbitrary sorting, plus
* control over whether hit scores and max score
* should be computed. Finds
* the top <code>n</code> hits for <code>query</code>, applying
* <code>filter</code> if non-null, and sorting the hits by the criteria in
* <code>sort</code>. If <code>doDocScores</code> is <code>true</code>
* then the score of each hit will be computed and
* returned. If <code>doMaxScore</code> is
* <code>true</code> then the maximum score over all
* collected hits will be computed.
*
* @throws BooleanQuery.TooManyClauses
*/
public TopFieldDocs search(Query query, Filter filter, int n,
Sort sort, boolean doDocScores, boolean doMaxScore) throws IOException {
return search(createNormalizedWeight(wrapFilter(query, filter)), n, sort, doDocScores, doMaxScore);
}
/** Finds the top <code>n</code>
* hits for <code>query</code>, applying <code>filter</code> if non-null,
* where all results are after a previous result (<code>after</code>).
* <p>
* By passing the bottom result from a previous page as <code>after</code>,
* this method can be used for efficient 'deep-paging' across potentially
* large result sets.
*
* @throws BooleanQuery.TooManyClauses
*/
public TopDocs searchAfter(ScoreDoc after, Query query, Filter filter, int n, Sort sort) throws IOException {
if (after != null && !(after instanceof FieldDoc)) {
// TODO: if we fix type safety of TopFieldDocs we can
// remove this
throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
}
return search(createNormalizedWeight(wrapFilter(query, filter)), (FieldDoc) after, n, sort, true, false, false);
}
/**
@ -324,7 +361,52 @@ public class IndexSearcher {
*/
public TopFieldDocs search(Query query, int n,
Sort sort) throws IOException {
return search(createNormalizedWeight(query), n, sort);
return search(createNormalizedWeight(query), n, sort, false, false);
}
/** Finds the top <code>n</code>
* hits for <code>query</code> where all results are after a previous
* result (<code>after</code>).
* <p>
* By passing the bottom result from a previous page as <code>after</code>,
* this method can be used for efficient 'deep-paging' across potentially
* large result sets.
*
* @throws BooleanQuery.TooManyClauses
*/
public TopDocs searchAfter(ScoreDoc after, Query query, int n, Sort sort) throws IOException {
if (after != null && !(after instanceof FieldDoc)) {
// TODO: if we fix type safety of TopFieldDocs we can
// remove this
throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
}
return search(createNormalizedWeight(query), (FieldDoc) after, n, sort, true, false, false);
}
/** Finds the top <code>n</code>
* hits for <code>query</code> where all results are after a previous
* result (<code>after</code>), allowing control over
* whether hit scores and max score should be computed.
* <p>
* By passing the bottom result from a previous page as <code>after</code>,
* this method can be used for efficient 'deep-paging' across potentially
* large result sets. If <code>doDocScores</code> is <code>true</code>
* then the score of each hit will be computed and
* returned. If <code>doMaxScore</code> is
* <code>true</code> then the maximum score over all
* collected hits will be computed.
*
* @throws BooleanQuery.TooManyClauses
*/
public TopDocs searchAfter(ScoreDoc after, Query query, Filter filter, int n, Sort sort,
boolean doDocScores, boolean doMaxScore) throws IOException {
if (after != null && !(after instanceof FieldDoc)) {
// TODO: if we fix type safety of TopFieldDocs we can
// remove this
throw new IllegalArgumentException("after must be a FieldDoc; got " + after);
}
return search(createNormalizedWeight(wrapFilter(query, filter)), (FieldDoc) after, n, sort, true,
doDocScores, doMaxScore);
}
/** Expert: Low-level search implementation. Finds the top <code>n</code>
@ -383,7 +465,9 @@ public class IndexSearcher {
return collector.topDocs();
}
/** Expert: Low-level search implementation with arbitrary sorting. Finds
/** Expert: Low-level search implementation with arbitrary
* sorting and control over whether hit scores and max
* score should be computed. Finds
* the top <code>n</code> hits for <code>query</code> and sorting the hits
* by the criteria in <code>sort</code>.
*
@ -393,12 +477,13 @@ public class IndexSearcher {
* @throws BooleanQuery.TooManyClauses
*/
protected TopFieldDocs search(Weight weight,
final int nDocs, Sort sort) throws IOException {
return search(weight, nDocs, sort, true);
final int nDocs, Sort sort,
boolean doDocScores, boolean doMaxScore) throws IOException {
return search(weight, null, nDocs, sort, true, doDocScores, doMaxScore);
}
/**
* Just like {@link #search(Weight, int, Sort)}, but you choose
* Just like {@link #search(Weight, int, Sort, boolean, boolean)}, but you choose
* whether or not the fields in the returned {@link FieldDoc} instances should
* be set by specifying fillFields.
*
@ -408,27 +493,29 @@ public class IndexSearcher {
* then pass that to {@link #search(AtomicReaderContext[], Weight,
* Collector)}.</p>
*/
protected TopFieldDocs search(Weight weight, int nDocs,
Sort sort, boolean fillFields)
protected TopFieldDocs search(Weight weight, FieldDoc after, int nDocs,
Sort sort, boolean fillFields,
boolean doDocScores, boolean doMaxScore)
throws IOException {
if (sort == null) throw new NullPointerException();
if (executor == null) {
// use all leaves here!
return search (leafContexts, weight, nDocs, sort, fillFields);
return search(leafContexts, weight, after, nDocs, sort, fillFields, doDocScores, doMaxScore);
} else {
final TopFieldCollector topCollector = TopFieldCollector.create(sort, nDocs,
after,
fillFields,
fieldSortDoTrackScores,
fieldSortDoMaxScore,
doDocScores,
doMaxScore,
false);
final Lock lock = new ReentrantLock();
final ExecutionHelper<TopFieldDocs> runner = new ExecutionHelper<TopFieldDocs>(executor);
for (int i = 0; i < leafSlices.length; i++) { // search each leaf slice
runner.submit(
new SearcherCallableWithSort(lock, this, leafSlices[i], weight, nDocs, topCollector, sort));
new SearcherCallableWithSort(lock, this, leafSlices[i], weight, after, nDocs, topCollector, sort, doDocScores, doMaxScore));
}
int totalHits = 0;
float maxScore = Float.NEGATIVE_INFINITY;
@ -447,18 +534,12 @@ public class IndexSearcher {
/**
* Just like {@link #search(Weight, int, Sort)}, but you choose
* Just like {@link #search(Weight, int, Sort, boolean, boolean)}, but you choose
* whether or not the fields in the returned {@link FieldDoc} instances should
* be set by specifying fillFields.
*
* <p>NOTE: this does not compute scores by default. If you
* need scores, create a {@link TopFieldCollector}
* instance by calling {@link TopFieldCollector#create} and
* then pass that to {@link #search(AtomicReaderContext[], Weight,
* Collector)}.</p>
*/
protected TopFieldDocs search(AtomicReaderContext[] leaves, Weight weight, int nDocs,
Sort sort, boolean fillFields) throws IOException {
protected TopFieldDocs search(AtomicReaderContext[] leaves, Weight weight, FieldDoc after, int nDocs,
Sort sort, boolean fillFields, boolean doDocScores, boolean doMaxScore) throws IOException {
// single thread
int limit = reader.maxDoc();
if (limit == 0) {
@ -466,8 +547,9 @@ public class IndexSearcher {
}
nDocs = Math.min(nDocs, limit);
TopFieldCollector collector = TopFieldCollector.create(sort, nDocs,
fillFields, fieldSortDoTrackScores, fieldSortDoMaxScore, !weight.scoresDocsOutOfOrder());
TopFieldCollector collector = TopFieldCollector.create(sort, nDocs, after,
fillFields, doDocScores,
doMaxScore, !weight.scoresDocsOutOfOrder());
search(leaves, weight, collector);
return (TopFieldDocs) collector.topDocs();
}
@ -553,26 +635,6 @@ public class IndexSearcher {
return weight.explain(leafContexts[n], deBasedDoc);
}
private boolean fieldSortDoTrackScores;
private boolean fieldSortDoMaxScore;
/** By default, no scores are computed when sorting by
* field (using {@link #search(Query,Filter,int,Sort)}).
* You can change that, per IndexSearcher instance, by
* calling this method. Note that this will incur a CPU
* cost.
*
* @param doTrackScores If true, then scores are
* returned for every matching document in {@link
* TopFieldDocs}.
*
* @param doMaxScore If true, then the max score for all
* matching docs is computed. */
public void setDefaultFieldSortScoring(boolean doTrackScores, boolean doMaxScore) {
fieldSortDoTrackScores = doTrackScores;
fieldSortDoMaxScore = doMaxScore;
}
/**
* Creates a normalized weight for a top-level {@link Query}.
* The query is rewritten by this method and {@link Query#createWeight} called,
@ -626,7 +688,7 @@ public class IndexSearcher {
}
public TopDocs call() throws IOException {
final TopDocs docs = searcher.search (slice.leaves, weight, after, nDocs);
final TopDocs docs = searcher.search(slice.leaves, weight, after, nDocs);
final ScoreDoc[] scoreDocs = docs.scoreDocs;
//it would be so nice if we had a thread-safe insert
lock.lock();
@ -657,9 +719,13 @@ public class IndexSearcher {
private final TopFieldCollector hq;
private final Sort sort;
private final LeafSlice slice;
private final FieldDoc after;
private final boolean doDocScores;
private final boolean doMaxScore;
public SearcherCallableWithSort(Lock lock, IndexSearcher searcher, LeafSlice slice, Weight weight,
int nDocs, TopFieldCollector hq, Sort sort) {
FieldDoc after, int nDocs, TopFieldCollector hq, Sort sort,
boolean doDocScores, boolean doMaxScore) {
this.lock = lock;
this.searcher = searcher;
this.weight = weight;
@ -667,6 +733,9 @@ public class IndexSearcher {
this.hq = hq;
this.sort = sort;
this.slice = slice;
this.after = after;
this.doDocScores = doDocScores;
this.doMaxScore = doMaxScore;
}
private final class FakeScorer extends Scorer {
@ -707,7 +776,7 @@ public class IndexSearcher {
public TopFieldDocs call() throws IOException {
assert slice.leaves.length == 1;
final TopFieldDocs docs = searcher.search (slice.leaves, weight, nDocs, sort, true);
final TopFieldDocs docs = searcher.search(slice.leaves, weight, after, nDocs, sort, true, doDocScores, doMaxScore);
lock.lock();
try {
final int base = slice.leaves[0].docBase;
@ -718,6 +787,11 @@ public class IndexSearcher {
fakeScorer.score = scoreDoc.score;
hq.collect(scoreDoc.doc-base);
}
// Carry over maxScore from sub:
if (doMaxScore && docs.getMaxScore() > hq.maxScore) {
hq.maxScore = docs.getMaxScore();
}
} finally {
lock.unlock();
}

View File

@ -568,7 +568,7 @@ class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum {
}
@Override
public final int freq() {
public final int freq() throws IOException {
return _freq;
}

View File

@ -51,7 +51,7 @@ final class TermScorer extends Scorer {
}
@Override
public float freq() {
public float freq() throws IOException {
return docsEnum.freq();
}
@ -66,7 +66,7 @@ final class TermScorer extends Scorer {
}
@Override
public float score() {
public float score() throws IOException {
assert docID() != NO_MORE_DOCS;
return docScorer.score(docsEnum.docID(), docsEnum.freq());
}

View File

@ -45,7 +45,7 @@ public class TopDocs {
/** Sets the maximum score value encountered. */
public void setMaxScore(float maxScore) {
this.maxScore=maxScore;
this.maxScore = maxScore;
}
/** Constructs a TopDocs with a default maxScore=Float.NaN. */

View File

@ -843,6 +843,166 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
}
/*
* Implements a TopFieldCollector when after != null.
*/
private final static class PagingFieldCollector extends TopFieldCollector {
Scorer scorer;
int collectedHits;
final FieldComparator<?>[] comparators;
final int[] reverseMul;
final FieldValueHitQueue<Entry> queue;
final boolean trackDocScores;
final boolean trackMaxScore;
final FieldDoc after;
int afterDoc;
public PagingFieldCollector(
FieldValueHitQueue<Entry> queue, FieldDoc after, int numHits, boolean fillFields,
boolean trackDocScores, boolean trackMaxScore)
throws IOException {
super(queue, numHits, fillFields);
this.queue = queue;
this.trackDocScores = trackDocScores;
this.trackMaxScore = trackMaxScore;
this.after = after;
comparators = queue.getComparators();
reverseMul = queue.getReverseMul();
// Must set maxScore to NEG_INF, or otherwise Math.max always returns NaN.
maxScore = Float.NEGATIVE_INFINITY;
}
void updateBottom(int doc, float score) {
bottom.doc = docBase + doc;
bottom.score = score;
bottom = pq.updateTop();
}
@SuppressWarnings({"unchecked", "rawtypes"})
@Override
public void collect(int doc) throws IOException {
totalHits++;
//System.out.println(" collect doc=" + doc);
// Check if this hit was already collected on a
// previous page:
boolean sameValues = true;
for(int compIDX=0;compIDX<comparators.length;compIDX++) {
final FieldComparator comp = comparators[compIDX];
final int cmp = reverseMul[compIDX] * comp.compareDocToValue(doc, after.fields[compIDX]);
if (cmp < 0) {
// Already collected on a previous page
//System.out.println(" skip: before");
return;
} else if (cmp > 0) {
// Not yet collected
sameValues = false;
//System.out.println(" keep: after");
break;
}
}
// Tie-break by docID:
if (sameValues && doc <= afterDoc) {
// Already collected on a previous page
//System.out.println(" skip: tie-break");
return;
}
collectedHits++;
float score = Float.NaN;
if (trackMaxScore) {
score = scorer.score();
if (score > maxScore) {
maxScore = score;
}
}
if (queueFull) {
// Fastmatch: return if this hit is not competitive
for (int i = 0;; i++) {
final int c = reverseMul[i] * comparators[i].compareBottom(doc);
if (c < 0) {
// Definitely not competitive.
return;
} else if (c > 0) {
// Definitely competitive.
break;
} else if (i == comparators.length - 1) {
// This is the equals case.
if (doc + docBase > bottom.doc) {
// Definitely not competitive
return;
}
break;
}
}
// This hit is competitive - replace bottom element in queue & adjustTop
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(bottom.slot, doc);
}
// Compute score only if it is competitive.
if (trackDocScores && !trackMaxScore) {
score = scorer.score();
}
updateBottom(doc, score);
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
} else {
// Startup transient: queue hasn't gathered numHits yet
final int slot = collectedHits - 1;
//System.out.println(" slot=" + slot);
// Copy hit into queue
for (int i = 0; i < comparators.length; i++) {
comparators[i].copy(slot, doc);
}
// Compute score only if it is competitive.
if (trackDocScores && !trackMaxScore) {
score = scorer.score();
}
bottom = pq.add(new Entry(slot, docBase + doc, score));
queueFull = collectedHits == numHits;
if (queueFull) {
for (int i = 0; i < comparators.length; i++) {
comparators[i].setBottom(bottom.slot);
}
}
}
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
for (int i = 0; i < comparators.length; i++) {
comparators[i].setScorer(scorer);
}
}
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
}
@Override
public void setNextReader(AtomicReaderContext context) throws IOException {
docBase = context.docBase;
afterDoc = after.doc - docBase;
for (int i = 0; i < comparators.length; i++) {
queue.setComparator(i, comparators[i].setNextReader(context));
}
}
}
private static final ScoreDoc[] EMPTY_SCOREDOCS = new ScoreDoc[0];
private final boolean fillFields;
@ -909,6 +1069,52 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
boolean fillFields, boolean trackDocScores, boolean trackMaxScore,
boolean docsScoredInOrder)
throws IOException {
return create(sort, numHits, null, fillFields, trackDocScores, trackMaxScore, docsScoredInOrder);
}
/**
* Creates a new {@link TopFieldCollector} from the given
* arguments.
*
* <p><b>NOTE</b>: The instances returned by this method
* pre-allocate a full array of length
* <code>numHits</code>.
*
* @param sort
* the sort criteria (SortFields).
* @param numHits
* the number of results to collect.
* @param after
* only hits after this FieldDoc will be collected
* @param fillFields
* specifies whether the actual field values should be returned on
* the results (FieldDoc).
* @param trackDocScores
* specifies whether document scores should be tracked and set on the
* results. Note that if set to false, then the results' scores will
* be set to Float.NaN. Setting this to true affects performance, as
* it incurs the score computation on each competitive result.
* Therefore if document scores are not required by the application,
* it is recommended to set it to false.
* @param trackMaxScore
* specifies whether the query's maxScore should be tracked and set
* on the resulting {@link TopDocs}. Note that if set to false,
* {@link TopDocs#getMaxScore()} returns Float.NaN. Setting this to
* true affects performance as it incurs the score computation on
* each result. Also, setting this true automatically sets
* <code>trackDocScores</code> to true as well.
* @param docsScoredInOrder
* specifies whether documents are scored in doc Id order or not by
* the given {@link Scorer} in {@link #setScorer(Scorer)}.
* @return a {@link TopFieldCollector} instance which will sort the results by
* the sort criteria.
* @throws IOException
*/
public static TopFieldCollector create(Sort sort, int numHits, FieldDoc after,
boolean fillFields, boolean trackDocScores, boolean trackMaxScore,
boolean docsScoredInOrder)
throws IOException {
if (sort.fields.length == 0) {
throw new IllegalArgumentException("Sort must contain at least one field");
}
@ -918,43 +1124,56 @@ public abstract class TopFieldCollector extends TopDocsCollector<Entry> {
}
FieldValueHitQueue<Entry> queue = FieldValueHitQueue.create(sort.fields, numHits);
if (queue.getComparators().length == 1) {
if (after == null) {
if (queue.getComparators().length == 1) {
if (docsScoredInOrder) {
if (trackMaxScore) {
return new OneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
} else if (trackDocScores) {
return new OneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
} else {
return new OneComparatorNonScoringCollector(queue, numHits, fillFields);
}
} else {
if (trackMaxScore) {
return new OutOfOrderOneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
} else if (trackDocScores) {
return new OutOfOrderOneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
} else {
return new OutOfOrderOneComparatorNonScoringCollector(queue, numHits, fillFields);
}
}
}
// multiple comparators.
if (docsScoredInOrder) {
if (trackMaxScore) {
return new OneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
return new MultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
} else if (trackDocScores) {
return new OneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
return new MultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
} else {
return new OneComparatorNonScoringCollector(queue, numHits, fillFields);
return new MultiComparatorNonScoringCollector(queue, numHits, fillFields);
}
} else {
if (trackMaxScore) {
return new OutOfOrderOneComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
return new OutOfOrderMultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
} else if (trackDocScores) {
return new OutOfOrderOneComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
return new OutOfOrderMultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
} else {
return new OutOfOrderOneComparatorNonScoringCollector(queue, numHits, fillFields);
return new OutOfOrderMultiComparatorNonScoringCollector(queue, numHits, fillFields);
}
}
}
// multiple comparators.
if (docsScoredInOrder) {
if (trackMaxScore) {
return new MultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
} else if (trackDocScores) {
return new MultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
} else {
return new MultiComparatorNonScoringCollector(queue, numHits, fillFields);
}
} else {
if (trackMaxScore) {
return new OutOfOrderMultiComparatorScoringMaxScoreCollector(queue, numHits, fillFields);
} else if (trackDocScores) {
return new OutOfOrderMultiComparatorScoringNoMaxScoreCollector(queue, numHits, fillFields);
} else {
return new OutOfOrderMultiComparatorNonScoringCollector(queue, numHits, fillFields);
if (after.fields == null) {
throw new IllegalArgumentException("after.fields wasn't set; you must pass fillFields=true for the previous search");
}
if (after.fields.length != sort.getSort().length) {
throw new IllegalArgumentException("after.fields has " + after.fields.length + " values but sort has " + sort.getSort().length);
}
return new PagingFieldCollector(queue, after, numHits, fillFields, trackDocScores, trackMaxScore);
}
}

View File

@ -21,20 +21,19 @@ package org.apache.lucene.search;
/** Represents hits returned by {@link
* IndexSearcher#search(Query,Filter,int,Sort)}.
*/
public class TopFieldDocs
extends TopDocs {
public class TopFieldDocs extends TopDocs {
/** The fields which were used to sort results by. */
public SortField[] fields;
/** The fields which were used to sort results by. */
public SortField[] fields;
/** Creates one of these objects.
* @param totalHits Total number of hits for the query.
* @param scoreDocs The top hits for the query.
* @param fields The sort criteria used to find the top hits.
* @param maxScore The maximum score encountered.
*/
public TopFieldDocs (int totalHits, ScoreDoc[] scoreDocs, SortField[] fields, float maxScore) {
super (totalHits, scoreDocs, maxScore);
this.fields = fields;
}
/** Creates one of these objects.
* @param totalHits Total number of hits for the query.
* @param scoreDocs The top hits for the query.
* @param fields The sort criteria used to find the top hits.
* @param maxScore The maximum score encountered.
*/
public TopFieldDocs (int totalHits, ScoreDoc[] scoreDocs, SortField[] fields, float maxScore) {
super (totalHits, scoreDocs, maxScore);
this.fields = fields;
}
}

View File

@ -118,16 +118,14 @@ final class CompoundFileWriter implements Closeable{
private synchronized IndexOutput getOutput() throws IOException {
if (dataOut == null) {
IndexOutput dataOutput = null;
boolean success = false;
try {
dataOutput = directory.createOutput(dataFileName, IOContext.DEFAULT);
dataOutput.writeVInt(FORMAT_CURRENT);
dataOut = dataOutput;
dataOut = directory.createOutput(dataFileName, IOContext.DEFAULT);
dataOut.writeVInt(FORMAT_CURRENT);
success = true;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(dataOutput);
IOUtils.closeWhileHandlingException(dataOut);
}
}
}

View File

@ -68,7 +68,7 @@ public class TestDocumentsWriterDeleteQueue extends LuceneTestCase {
assertAllBetween(last2, j, bd2, ids);
last2 = j + 1;
}
assertEquals(uniqueValues.size(), queue.numGlobalTermDeletes());
assertEquals(j+1, queue.numGlobalTermDeletes());
}
assertEquals(uniqueValues, bd1.terms.keySet());
assertEquals(uniqueValues, bd2.terms.keySet());

View File

@ -0,0 +1,353 @@
package org.apache.lucene.index;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with this
* work for additional information regarding copyright ownership. The ASF
* licenses this file to You under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.lucene.index.DocumentsWriterStallControl.MemoryController;
import org.apache.lucene.util.LuceneTestCase;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeaks;
/**
* Tests for {@link DocumentsWriterStallControl}
*/
@ThreadLeaks(failTestIfLeaking = true)
public class TestDocumentsWriterStallControl extends LuceneTestCase {
public void testSimpleStall() throws InterruptedException {
DocumentsWriterStallControl ctrl = new DocumentsWriterStallControl();
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
memCtrl.limit = 1000;
memCtrl.netBytes = 1000;
ctrl.updateStalled(memCtrl);
Thread[] waitThreads = waitThreads(atLeast(1), ctrl);
start(waitThreads);
assertFalse(ctrl.hasBlocked());
assertFalse(ctrl.anyStalledThreads());
join(waitThreads, 10);
// now stall threads and wake them up again
memCtrl.netBytes = 1001;
ctrl.updateStalled(memCtrl);
waitThreads = waitThreads(atLeast(1), ctrl);
start(waitThreads);
awaitState(100, Thread.State.WAITING, waitThreads);
assertTrue(ctrl.hasBlocked());
assertTrue(ctrl.anyStalledThreads());
memCtrl.netBytes = 50;
ctrl.updateStalled(memCtrl);
assertFalse(ctrl.anyStalledThreads());
join(waitThreads, 500);
}
public void testRandom() throws InterruptedException {
final DocumentsWriterStallControl ctrl = new DocumentsWriterStallControl();
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
memCtrl.limit = 1000;
memCtrl.netBytes = 1;
ctrl.updateStalled(memCtrl);
Thread[] stallThreads = new Thread[atLeast(3)];
for (int i = 0; i < stallThreads.length; i++) {
final int threadId = i;
stallThreads[i] = new Thread() {
public void run() {
int baseBytes = threadId % 2 == 0 ? 500 : 700;
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
memCtrl.limit = 1000;
memCtrl.netBytes = 1;
int iters = atLeast(1000);
for (int j = 0; j < iters; j++) {
memCtrl.netBytes = baseBytes + random().nextInt(1000);
ctrl.updateStalled(memCtrl);
if (random().nextInt(5) == 0) { // thread 0 only updates
ctrl.waitIfStalled();
}
}
}
};
}
start(stallThreads);
long time = System.currentTimeMillis();
/*
* use a 100 sec timeout to make sure we not hang forever. join will fail in
* that case
*/
while ((System.currentTimeMillis() - time) < 100 * 1000
&& !terminated(stallThreads)) {
ctrl.updateStalled(memCtrl);
if (random().nextBoolean()) {
Thread.yield();
} else {
Thread.sleep(1);
}
}
join(stallThreads, 100);
}
public void testAccquireReleaseRace() throws InterruptedException {
final DocumentsWriterStallControl ctrl = new DocumentsWriterStallControl();
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
memCtrl.limit = 1000;
memCtrl.netBytes = 1;
ctrl.updateStalled(memCtrl);
final AtomicBoolean stop = new AtomicBoolean(false);
final AtomicBoolean checkPoint = new AtomicBoolean(true);
int numStallers = atLeast(1);
int numReleasers = atLeast(1);
int numWaiters = atLeast(1);
final CountDownLatch[] latches = new CountDownLatch[] {
new CountDownLatch(numStallers + numReleasers), new CountDownLatch(1),
new CountDownLatch(numWaiters)};
Thread[] threads = new Thread[numReleasers + numStallers + numWaiters];
List<Throwable> exceptions = Collections.synchronizedList(new ArrayList<Throwable>());
for (int i = 0; i < numReleasers; i++) {
threads[i] = new Updater(stop, checkPoint, ctrl, latches, true, exceptions);
}
for (int i = numReleasers; i < numReleasers + numStallers; i++) {
threads[i] = new Updater(stop, checkPoint, ctrl, latches, false, exceptions);
}
for (int i = numReleasers + numStallers; i < numReleasers + numStallers
+ numWaiters; i++) {
threads[i] = new Waiter(stop, checkPoint, ctrl, latches, exceptions);
}
start(threads);
int iters = atLeast(20000);
for (int i = 0; i < iters; i++) {
if (checkPoint.get()) {
latches[0].await(5, TimeUnit.SECONDS);
if (!exceptions.isEmpty()) {
for (Throwable throwable : exceptions) {
throwable.printStackTrace();
}
fail("got exceptions in threads");
}
if (!ctrl.anyStalledThreads()) {
assertTrue(
"control claims no stalled threads but waiter seems to be blocked",
latches[2].await(3, TimeUnit.SECONDS));
}
checkPoint.set(false);
latches[1].countDown();
}
assertFalse(checkPoint.get());
if (random().nextInt(2) == 0) {
latches[0] = new CountDownLatch(numStallers + numReleasers);
latches[1] = new CountDownLatch(1);
latches[2] = new CountDownLatch(numWaiters);
checkPoint.set(true);
}
}
stop.set(true);
memCtrl.limit = 1000;
memCtrl.netBytes = 1;
ctrl.updateStalled(memCtrl);
if (checkPoint.get()) {
latches[1].countDown();
}
for (int i = 0; i < threads.length; i++) {
threads[i].join(2000);
if (threads[i].isAlive() && threads[i] instanceof Waiter) {
if (threads[i].getState() == Thread.State.WAITING) {
fail("waiter is not released - anyThreadsStalled: "
+ ctrl.anyStalledThreads());
}
}
}
}
public static class Waiter extends Thread {
private CountDownLatch[] latches;
private DocumentsWriterStallControl ctrl;
private AtomicBoolean checkPoint;
private AtomicBoolean stop;
private List<Throwable> exceptions;
public Waiter(AtomicBoolean stop, AtomicBoolean checkPoint,
DocumentsWriterStallControl ctrl, CountDownLatch[] latches,
List<Throwable> exceptions) {
this.stop = stop;
this.checkPoint = checkPoint;
this.ctrl = ctrl;
this.latches = latches;
this.exceptions = exceptions;
}
public void run() {
try {
while (!stop.get()) {
ctrl.waitIfStalled();
if (checkPoint.get()) {
CountDownLatch join = latches[2];
CountDownLatch wait = latches[1];
join.countDown();
try {
wait.await();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
} catch (Throwable e) {
e.printStackTrace();
exceptions.add(e);
}
}
}
public static class Updater extends Thread {
private CountDownLatch[] latches;
private DocumentsWriterStallControl ctrl;
private AtomicBoolean checkPoint;
private AtomicBoolean stop;
private boolean release;
private List<Throwable> exceptions;
public Updater(AtomicBoolean stop, AtomicBoolean checkPoint,
DocumentsWriterStallControl ctrl, CountDownLatch[] latches,
boolean release, List<Throwable> exceptions) {
this.stop = stop;
this.checkPoint = checkPoint;
this.ctrl = ctrl;
this.latches = latches;
this.release = release;
this.exceptions = exceptions;
}
public void run() {
try {
SimpleMemCtrl memCtrl = new SimpleMemCtrl();
memCtrl.limit = 1000;
memCtrl.netBytes = release ? 1 : 2000;
while (!stop.get()) {
int internalIters = release && random().nextBoolean() ? atLeast(5) : 1;
for (int i = 0; i < internalIters; i++) {
ctrl.updateStalled(memCtrl);
}
if (checkPoint.get()) {
CountDownLatch join = latches[0];
CountDownLatch wait = latches[1];
join.countDown();
try {
wait.await();
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
Thread.yield();
}
} catch (Throwable e) {
e.printStackTrace();
exceptions.add(e);
}
}
}
public static boolean terminated(Thread[] threads) {
for (Thread thread : threads) {
if (Thread.State.TERMINATED != thread.getState()) return false;
}
return true;
}
public static void start(Thread[] tostart) throws InterruptedException {
for (Thread thread : tostart) {
thread.start();
}
Thread.sleep(1); // let them start
}
public static void join(Thread[] toJoin, long timeout)
throws InterruptedException {
for (Thread thread : toJoin) {
thread.join(timeout);
}
}
public static Thread[] waitThreads(int num,
final DocumentsWriterStallControl ctrl) {
Thread[] array = new Thread[num];
for (int i = 0; i < array.length; i++) {
array[i] = new Thread() {
public void run() {
ctrl.waitIfStalled();
}
};
}
return array;
}
public static void awaitState(long timeout, Thread.State state,
Thread... threads) throws InterruptedException {
long t = System.currentTimeMillis();
while (System.currentTimeMillis() - t <= timeout) {
boolean done = true;
for (Thread thread : threads) {
if (thread.getState() != state) {
done = false;
}
}
if (done) {
return;
}
if (random().nextBoolean()) {
Thread.yield();
} else {
Thread.sleep(1);
}
}
fail("timed out waiting for state: " + state + " timeout: " + timeout
+ " ms");
}
private static class SimpleMemCtrl implements MemoryController {
long netBytes;
long limit;
@Override
public long netBytes() {
return netBytes;
}
@Override
public long stallLimitBytes() {
return limit;
}
}
}

View File

@ -139,6 +139,10 @@ final class JustCompileSearch {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
@Override
public int compareDocToValue(int doc, Object value) {
throw new UnsupportedOperationException(UNSUPPORTED_MSG);
}
}
static final class JustCompileFieldComparatorSource extends FieldComparatorSource {

View File

@ -188,6 +188,14 @@ class ElevationComparatorSource extends FieldComparatorSource {
public Integer value(int slot) {
return Integer.valueOf(values[slot]);
}
@Override
public int compareDocToValue(int doc, Integer valueObj) throws IOException {
final int value = valueObj.intValue();
final int docValue = docVal(doc);
// values will be small enough that there is no overflow concern
return value - docValue;
}
};
}
}

View File

@ -17,12 +17,25 @@ package org.apache.lucene.search;
* limitations under the License.
*/
import java.util.Arrays;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.FloatDocValuesField;
import org.apache.lucene.document.FloatField;
import org.apache.lucene.document.IntDocValuesField;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.SortedBytesDocValuesField;
import org.apache.lucene.document.StraightBytesDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.English;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
@ -30,11 +43,19 @@ import org.apache.lucene.util._TestUtil;
/**
* Tests IndexSearcher's searchAfter() method
*/
public class TestSearchAfter extends LuceneTestCase {
private Directory dir;
private IndexReader reader;
private IndexSearcher searcher;
boolean supportsDocValues = Codec.getDefault().getName().equals("Lucene3x") == false;
private static SortField useDocValues(SortField field) {
field.setUseIndexValues(true);
return field;
}
@Override
public void setUp() throws Exception {
super.setUp();
@ -45,6 +66,25 @@ public class TestSearchAfter extends LuceneTestCase {
Document document = new Document();
document.add(newField("english", English.intToEnglish(i), TextField.TYPE_UNSTORED));
document.add(newField("oddeven", (i % 2 == 0) ? "even" : "odd", TextField.TYPE_UNSTORED));
document.add(newField("byte", "" + ((byte) random().nextInt()), StringField.TYPE_UNSTORED));
document.add(newField("short", "" + ((short) random().nextInt()), StringField.TYPE_UNSTORED));
document.add(new IntField("int", random().nextInt()));
document.add(new LongField("long", random().nextLong()));
document.add(new FloatField("float", random().nextFloat()));
document.add(new DoubleField("double", random().nextDouble()));
document.add(newField("bytes", _TestUtil.randomRealisticUnicodeString(random()), StringField.TYPE_UNSTORED));
document.add(newField("bytesval", _TestUtil.randomRealisticUnicodeString(random()), StringField.TYPE_UNSTORED));
document.add(new DoubleField("double", random().nextDouble()));
if (supportsDocValues) {
document.add(new IntDocValuesField("intdocvalues", random().nextInt()));
document.add(new FloatDocValuesField("floatdocvalues", random().nextFloat()));
document.add(new SortedBytesDocValuesField("sortedbytesdocvalues", new BytesRef(_TestUtil.randomRealisticUnicodeString(random()))));
document.add(new SortedBytesDocValuesField("sortedbytesdocvaluesval", new BytesRef(_TestUtil.randomRealisticUnicodeString(random()))));
document.add(new StraightBytesDocValuesField("straightbytesdocvalues", new BytesRef(_TestUtil.randomRealisticUnicodeString(random()))));
}
iw.addDocument(document);
}
reader = iw.getReader();
@ -63,7 +103,7 @@ public class TestSearchAfter extends LuceneTestCase {
// because the first page has a null 'after', we get a normal collector.
// so we need to run the test a few times to ensure we will collect multiple
// pages.
int n = atLeast(10);
int n = atLeast(20);
for (int i = 0; i < n; i++) {
Filter odd = new QueryWrapperFilter(new TermQuery(new Term("oddeven", "odd")));
assertQuery(new MatchAllDocsQuery(), null);
@ -78,13 +118,67 @@ public class TestSearchAfter extends LuceneTestCase {
}
void assertQuery(Query query, Filter filter) throws Exception {
assertQuery(query, filter, null);
assertQuery(query, filter, Sort.RELEVANCE);
assertQuery(query, filter, Sort.INDEXORDER);
for(int rev=0;rev<2;rev++) {
boolean reversed = rev == 1;
assertQuery(query, filter, new Sort(new SortField[] {new SortField("byte", SortField.Type.BYTE, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("short", SortField.Type.SHORT, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("int", SortField.Type.INT, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("long", SortField.Type.LONG, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("float", SortField.Type.FLOAT, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("double", SortField.Type.DOUBLE, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("bytes", SortField.Type.STRING, reversed)}));
assertQuery(query, filter, new Sort(new SortField[] {new SortField("bytesval", SortField.Type.STRING_VAL, reversed)}));
if (supportsDocValues) {
assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("intdocvalues", SortField.Type.INT, reversed))}));
assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("floatdocvalues", SortField.Type.FLOAT, reversed))}));
assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("sortedbytesdocvalues", SortField.Type.STRING, reversed))}));
assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("sortedbytesdocvaluesval", SortField.Type.STRING_VAL, reversed))}));
assertQuery(query, filter, new Sort(new SortField[] {useDocValues(new SortField("straightbytesdocvalues", SortField.Type.STRING_VAL, reversed))}));
}
}
}
void assertQuery(Query query, Filter filter, Sort sort) throws Exception {
int maxDoc = searcher.getIndexReader().maxDoc();
TopDocs all = searcher.search(query, filter, maxDoc);
TopDocs all;
int pageSize = _TestUtil.nextInt(random(), 1, maxDoc*2);
if (VERBOSE) {
System.out.println("\nassertQuery: query=" + query + " filter=" + filter + " sort=" + sort + " pageSize=" + pageSize);
}
final boolean doMaxScore = random().nextBoolean();
if (sort == null) {
all = searcher.search(query, filter, maxDoc);
} else if (sort == Sort.RELEVANCE) {
all = searcher.search(query, filter, maxDoc, sort, true, doMaxScore);
} else {
all = searcher.search(query, filter, maxDoc, sort);
}
if (VERBOSE) {
System.out.println(" all.totalHits=" + all.totalHits);
}
int pageStart = 0;
ScoreDoc lastBottom = null;
while (pageStart < all.totalHits) {
TopDocs paged = searcher.searchAfter(lastBottom, query, filter, pageSize);
TopDocs paged;
if (sort == null) {
if (VERBOSE) {
System.out.println(" iter lastBottom=" + lastBottom);
}
paged = searcher.searchAfter(lastBottom, query, filter, pageSize);
} else {
if (VERBOSE) {
System.out.println(" iter lastBottom=" + lastBottom + (lastBottom == null ? "" : " fields=" + Arrays.toString(((FieldDoc) lastBottom).fields)));
}
if (sort == Sort.RELEVANCE) {
paged = searcher.searchAfter(lastBottom, query, filter, pageSize, sort, true, doMaxScore);
} else {
paged = searcher.searchAfter(lastBottom, query, filter, pageSize, sort);
}
}
if (paged.scoreDocs.length == 0) {
break;
}
@ -98,8 +192,14 @@ public class TestSearchAfter extends LuceneTestCase {
static void assertPage(int pageStart, TopDocs all, TopDocs paged) {
assertEquals(all.totalHits, paged.totalHits);
for (int i = 0; i < paged.scoreDocs.length; i++) {
assertEquals(all.scoreDocs[pageStart + i].doc, paged.scoreDocs[i].doc);
assertEquals(all.scoreDocs[pageStart + i].score, paged.scoreDocs[i].score, 0f);
ScoreDoc sd1 = all.scoreDocs[pageStart + i];
ScoreDoc sd2 = paged.scoreDocs[i];
assertEquals(sd1.doc, sd2.doc);
assertEquals(sd1.score, sd2.score, 0f);
if (sd1 instanceof FieldDoc) {
assertTrue(sd2 instanceof FieldDoc);
assertEquals(((FieldDoc) sd1).fields, ((FieldDoc) sd2).fields);
}
}
}
}

View File

@ -44,9 +44,11 @@ import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiReader;
import org.apache.lucene.index.RandomIndexWriter;
@ -218,7 +220,6 @@ public class TestSort extends LuceneTestCase {
IndexReader reader = writer.getReader();
writer.close ();
IndexSearcher s = newSearcher(reader);
s.setDefaultFieldSortScoring(true, true);
return s;
}
@ -734,6 +735,15 @@ public class TestSort extends LuceneTestCase {
public Integer value(int slot) {
return Integer.valueOf(slotValues[slot]);
}
@Override
public int compareDocToValue(int doc, Integer valueObj) {
final int value = valueObj.intValue();
final int docValue = docValues[doc];
// values are small enough that overflow won't happen
return docValue - value;
}
}
static class MyFieldComparatorSource extends FieldComparatorSource {
@ -889,7 +899,7 @@ public class TestSort extends LuceneTestCase {
// try to pick a query that will result in an unnormalized
// score greater than 1 to test for correct normalization
final TopDocs docs1 = full.search(queryE,null,nDocs,sort);
final TopDocs docs1 = full.search(queryE,null,nDocs,sort,true,true);
// a filter that only allows through the first hit
Filter filt = new Filter() {
@ -903,7 +913,7 @@ public class TestSort extends LuceneTestCase {
}
};
TopDocs docs2 = full.search(queryE, filt, nDocs, sort);
TopDocs docs2 = full.search(queryE, filt, nDocs, sort,true,true);
assertEquals(docs1.scoreDocs[0].score, docs2.scoreDocs[0].score, 1e-6);
}
@ -1244,7 +1254,7 @@ public class TestSort extends LuceneTestCase {
String expectedResult) throws IOException {
//ScoreDoc[] result = searcher.search (query, null, 1000, sort).scoreDocs;
TopDocs hits = searcher.search(query, null, Math.max(1, expectedResult.length()), sort);
TopDocs hits = searcher.search(query, null, Math.max(1, expectedResult.length()), sort, true, true);
ScoreDoc[] result = hits.scoreDocs;
assertEquals(expectedResult.length(),hits.totalHits);
StringBuilder buff = new StringBuilder(10);
@ -1478,4 +1488,38 @@ public class TestSort extends LuceneTestCase {
r.close();
dir.close();
}
public void testMaxScore() throws Exception {
Directory d = newDirectory();
// Not RIW because we need exactly 2 segs:
IndexWriter w = new IndexWriter(d, new IndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random())));
int id = 0;
for(int seg=0;seg<2;seg++) {
for(int docIDX=0;docIDX<10;docIDX++) {
Document doc = new Document();
doc.add(newField("id", ""+docIDX, StringField.TYPE_STORED));
StringBuilder sb = new StringBuilder();
for(int i=0;i<id;i++) {
sb.append(' ');
sb.append("text");
}
doc.add(newField("body", sb.toString(), TextField.TYPE_UNSTORED));
w.addDocument(doc);
id++;
}
w.commit();
}
IndexReader r = DirectoryReader.open(w, true);
w.close();
Query q = new TermQuery(new Term("body", "text"));
IndexSearcher s = newSearcher(r);
float maxScore = s.search(q , 10).getMaxScore();
assertEquals(maxScore, s.search(q, null, 3, Sort.INDEXORDER, random().nextBoolean(), true).getMaxScore(), 0.0);
assertEquals(maxScore, s.search(q, null, 3, Sort.RELEVANCE, random().nextBoolean(), true).getMaxScore(), 0.0);
assertEquals(maxScore, s.search(q, null, 3, new Sort(new SortField[] {new SortField("id", SortField.Type.INT, false)}), random().nextBoolean(), true).getMaxScore(), 0.0);
assertEquals(maxScore, s.search(q, null, 3, new Sort(new SortField[] {new SortField("id", SortField.Type.INT, true)}), random().nextBoolean(), true).getMaxScore(), 0.0);
r.close();
d.close();
}
}

View File

@ -53,9 +53,7 @@ public class TestReproduceMessage extends WithNestedTests {
public Statement apply(final Statement base, Description description) {
return new Statement() {
public void evaluate() throws Throwable {
if (isRunningNested()) {
triggerOn(SorePoint.RULE);
}
triggerOn(SorePoint.RULE);
base.evaluate();
}
};
@ -69,9 +67,7 @@ public class TestReproduceMessage extends WithNestedTests {
@Before
public void before() {
if (isRunningNested()) {
triggerOn(SorePoint.BEFORE);
}
triggerOn(SorePoint.BEFORE);
}
@Test
@ -81,9 +77,7 @@ public class TestReproduceMessage extends WithNestedTests {
@After
public void after() {
if (isRunningNested()) {
triggerOn(SorePoint.AFTER);
}
triggerOn(SorePoint.AFTER);
}
@AfterClass

View File

@ -22,15 +22,18 @@ import java.io.PrintStream;
import java.io.UnsupportedEncodingException;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.TestRuleIgnoreTestSuites;
import org.junit.After;
import org.junit.Assert;
import org.junit.Before;
import org.junit.ClassRule;
import org.junit.Rule;
import org.junit.rules.TestRule;
import org.junit.runner.Description;
import org.junit.runners.model.Statement;
import com.carrotsearch.randomizedtesting.RandomizedRunner;
import com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule;
/**
* An abstract test class that prepares nested test classes to run.
@ -45,28 +48,11 @@ import com.carrotsearch.randomizedtesting.RandomizedRunner;
* cause havoc (static fields).
*/
public abstract class WithNestedTests {
/**
* This can no longer be thread local because {@link RandomizedRunner} runs
* suites in an isolated threadgroup/thread.
*/
public static volatile boolean runsAsNested;
public static abstract class AbstractNestedTest extends LuceneTestCase {
@ClassRule
public static TestRule ignoreIfRunAsStandalone = new TestRule() {
public Statement apply(final Statement s, Description arg1) {
return new Statement() {
public void evaluate() throws Throwable {
if (isRunningNested()) {
s.evaluate();
}
}
};
}
};
public static abstract class AbstractNestedTest extends LuceneTestCase
implements TestRuleIgnoreTestSuites.NestedTestSuite {
protected static boolean isRunningNested() {
return runsAsNested;
return TestRuleIgnoreTestSuites.isRunningNested();
}
}
@ -81,6 +67,12 @@ public abstract class WithNestedTests {
private ByteArrayOutputStream sysout;
private ByteArrayOutputStream syserr;
/**
* Restore properties after test.
*/
@Rule
public SystemPropertiesRestoreRule restoreProperties = new SystemPropertiesRestoreRule();
@Before
public final void before() {
if (suppressOutputStreams) {
@ -97,13 +89,11 @@ public abstract class WithNestedTests {
}
}
runsAsNested = true;
System.setProperty(TestRuleIgnoreTestSuites.PROPERTY_RUN_NESTED, "true");
}
@After
public final void after() {
runsAsNested = false;
if (suppressOutputStreams) {
System.out.flush();
System.err.flush();

View File

@ -81,7 +81,7 @@ public class TaxonomyMergeUtils {
OrdinalMap map, IndexWriter destIndexWriter,
DirectoryTaxonomyWriter destTaxWriter) throws IOException {
// merge the taxonomies
destTaxWriter.addTaxonomies(new Directory[] { srcTaxDir }, new OrdinalMap[] { map });
destTaxWriter.addTaxonomy(srcTaxDir, map);
PayloadProcessorProvider payloadProcessor = new FacetsPayloadProcessorProvider(
srcIndexDir, map.getMap(), new DefaultFacetIndexingParams());

View File

@ -4,8 +4,6 @@ import java.io.IOException;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.index.StoredFieldVisitor.Status;
import org.apache.lucene.store.IndexInput;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
@ -42,6 +40,7 @@ abstract class Consts {
public static final class LoadFullPathOnly extends StoredFieldVisitor {
private String fullPath;
@Override
public void stringField(FieldInfo fieldInfo, String value) throws IOException {
fullPath = value;
}

View File

@ -12,15 +12,22 @@ import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.core.KeywordAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsEnum;
@ -29,28 +36,18 @@ import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.index.SegmentInfos;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.NativeFSLockFactory;
import org.apache.lucene.store.SimpleFSLockFactory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -233,7 +230,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// Make sure that the taxonomy always contain the root category
// with category id 0.
addCategory(new CategoryPath());
refreshReader();
refreshInternalReader();
} else {
// There are some categories on the disk, which we have not yet
// read into the cache, and therefore the cache is incomplete.
@ -289,15 +286,15 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
new KeywordAnalyzer()).setOpenMode(openMode).setMergePolicy(
new LogByteSizeMergePolicy());
}
// Currently overridden by a unit test that verifies that every index we open is close()ed.
/**
* Open an {@link IndexReader} from the internal {@link IndexWriter}, by
* calling {@link IndexReader#open(IndexWriter, boolean)}. Extending classes can override
* this method to return their own {@link IndexReader}.
*/
protected DirectoryReader openReader() throws IOException {
return DirectoryReader.open(indexWriter, true);
/** Opens a {@link DirectoryReader} from the internal {@link IndexWriter}. */
private synchronized void openInternalReader() throws IOException {
// verify that the taxo-writer hasn't been closed on us. the method is
// synchronized since it may be called from a non sync'ed block, and it
// needs to protect against close() happening concurrently.
ensureOpen();
assert reader == null : "a reader is already open !";
reader = DirectoryReader.open(indexWriter, false);
}
/**
@ -348,18 +345,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
closeResources();
}
/**
* Returns the number of memory bytes used by the cache.
* @return Number of cache bytes in memory, for CL2O only; zero otherwise.
*/
public int getCacheMemoryUsage() {
ensureOpen();
if (this.cache == null || !(this.cache instanceof Cl2oTaxonomyWriterCache)) {
return 0;
}
return ((Cl2oTaxonomyWriterCache)this.cache).getMemoryUsage();
}
/**
* A hook for extending classes to close additional resources that were used.
* The default implementation closes the {@link IndexReader} as well as the
@ -411,24 +396,29 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// We need to get an answer from the on-disk index. If a reader
// is not yet open, do it now:
if (reader == null) {
reader = openReader();
openInternalReader();
}
// TODO (Facet): avoid Multi*?
Bits liveDocs = MultiFields.getLiveDocs(reader);
DocsEnum docs = MultiFields.getTermDocsEnum(reader, liveDocs, Consts.FULL,
new BytesRef(categoryPath.toString(delimiter)),
false);
if (docs == null || docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
return -1; // category does not exist in taxonomy
int base = 0;
int doc = -1;
for (AtomicReader r : reader.getSequentialSubReaders()) {
DocsEnum docs = r.termDocsEnum(null, Consts.FULL,
new BytesRef(categoryPath.toString(delimiter)), false);
if (docs != null) {
doc = docs.nextDoc() + base;
break;
}
base += r.maxDoc(); // we don't have deletions, so it's ok to call maxDoc
}
// Note: we do NOT add to the cache the fact that the category
// does not exist. The reason is that our only use for this
// method is just before we actually add this category. If
// in the future this usage changes, we should consider caching
// the fact that the category is not in the taxonomy.
addToCache(categoryPath, docs.docID());
return docs.docID();
if (doc > 0) {
addToCache(categoryPath, doc);
}
return doc;
}
/**
@ -437,7 +427,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
* case the category does not yet exist in the taxonomy.
*/
private int findCategory(CategoryPath categoryPath, int prefixLen)
throws IOException {
throws IOException {
int res = cache.get(categoryPath, prefixLen);
if (res >= 0) {
return res;
@ -450,38 +440,48 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
return cache.get(categoryPath, prefixLen);
}
if (reader == null) {
reader = openReader();
openInternalReader();
}
Bits liveDocs = MultiFields.getLiveDocs(reader);
DocsEnum docs = MultiFields.getTermDocsEnum(reader, liveDocs, Consts.FULL,
new BytesRef(categoryPath.toString(delimiter, prefixLen)),
false);
if (docs == null || docs.nextDoc() == DocIdSetIterator.NO_MORE_DOCS) {
return -1; // category does not exist in taxonomy
int base = 0;
int doc = -1;
for (AtomicReader r : reader.getSequentialSubReaders()) {
DocsEnum docs = r.termDocsEnum(null, Consts.FULL,
new BytesRef(categoryPath.toString(delimiter, prefixLen)), false);
if (docs != null) {
doc = docs.nextDoc() + base;
break;
}
base += r.maxDoc(); // we don't have deletions, so it's ok to call maxDoc
}
addToCache(categoryPath, prefixLen, docs.docID());
return docs.docID();
if (doc > 0) {
addToCache(categoryPath, prefixLen, doc);
}
return doc;
}
// TODO (Facet): addCategory() is synchronized. This means that if indexing is
// multi-threaded, a new category that needs to be written to disk (and
// potentially even trigger a lengthy merge) locks out other addCategory()
// calls - even those which could immediately return a cached value.
// We definitely need to fix this situation!
@Override
public synchronized int addCategory(CategoryPath categoryPath) throws IOException {
public int addCategory(CategoryPath categoryPath) throws IOException {
ensureOpen();
// If the category is already in the cache and/or the taxonomy, we
// should return its existing ordinal:
// should return its existing ordinal
int res = findCategory(categoryPath);
if (res < 0) {
// This is a new category, and we need to insert it into the index
// (and the cache). Actually, we might also need to add some of
// the category's ancestors before we can add the category itself
// (while keeping the invariant that a parent is always added to
// the taxonomy before its child). internalAddCategory() does all
// this recursively:
res = internalAddCategory(categoryPath, categoryPath.length());
// the category is neither in the cache nor in the index - following code
// cannot be executed in parallel.
synchronized (this) {
res = findCategory(categoryPath);
if (res < 0) {
// This is a new category, and we need to insert it into the index
// (and the cache). Actually, we might also need to add some of
// the category's ancestors before we can add the category itself
// (while keeping the invariant that a parent is always added to
// the taxonomy before its child). internalAddCategory() does all
// this recursively
res = internalAddCategory(categoryPath, categoryPath.length());
}
}
}
return res;
@ -497,7 +497,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
* recursion.
*/
private int internalAddCategory(CategoryPath categoryPath, int length)
throws CorruptIndexException, IOException {
throws IOException {
// Find our parent's ordinal (recursively adding the parent category
// to the taxonomy if it's not already there). Then add the parent
@ -529,13 +529,12 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
}
}
// Note that the methods calling addCategoryDocument() are synchornized,
// so this method is effectively synchronized as well, but we'll add
// synchronized to be on the safe side, and we can reuse class-local objects
// instead of allocating them every time
protected synchronized int addCategoryDocument(CategoryPath categoryPath,
int length, int parent)
throws CorruptIndexException, IOException {
/**
* Note that the methods calling addCategoryDocument() are synchornized, so
* this method is effectively synchronized as well.
*/
private int addCategoryDocument(CategoryPath categoryPath, int length,
int parent) throws IOException {
// Before Lucene 2.9, position increments >=0 were supported, so we
// added 1 to parent to allow the parent -1 (the parent of the root).
// Unfortunately, starting with Lucene 2.9, after LUCENE-1542, this is
@ -545,7 +544,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// we write here (e.g., to write parent+2), and need to do a workaround
// in the reader (which knows that anyway only category 0 has a parent
// -1).
parentStream.set(parent+1);
parentStream.set(parent + 1);
Document d = new Document();
d.add(parentStreamField);
@ -602,8 +601,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
}
}
private void addToCache(CategoryPath categoryPath, int id)
throws CorruptIndexException, IOException {
private void addToCache(CategoryPath categoryPath, int id) throws IOException {
if (cache.put(categoryPath, id)) {
// If cache.put() returned true, it means the cache was limited in
// size, became full, so parts of it had to be cleared.
@ -615,20 +613,20 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// Because this is a slow operation, cache implementations are
// expected not to delete entries one-by-one but rather in bulk
// (LruTaxonomyWriterCache removes the 2/3rd oldest entries).
refreshReader();
refreshInternalReader();
cacheIsComplete = false;
}
}
private void addToCache(CategoryPath categoryPath, int prefixLen, int id)
throws CorruptIndexException, IOException {
throws IOException {
if (cache.put(categoryPath, prefixLen, id)) {
refreshReader();
refreshInternalReader();
cacheIsComplete = false;
}
}
protected synchronized void refreshReader() throws IOException {
private synchronized void refreshInternalReader() throws IOException {
if (reader != null) {
DirectoryReader r2 = DirectoryReader.openIfChanged(reader);
if (r2 != null) {
@ -648,7 +646,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
public synchronized void commit() throws CorruptIndexException, IOException {
ensureOpen();
indexWriter.commit(combinedCommitData(null));
refreshReader();
refreshInternalReader();
}
/**
@ -674,7 +672,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
public synchronized void commit(Map<String,String> commitUserData) throws CorruptIndexException, IOException {
ensureOpen();
indexWriter.commit(combinedCommitData(commitUserData));
refreshReader();
refreshInternalReader();
}
/**
@ -759,7 +757,7 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
// TODO (Facet): we should probably completely clear the cache before starting
// to read it?
if (reader == null) {
reader = openReader();
openInternalReader();
}
if (!cache.hasRoom(reader.numDocs())) {
@ -767,7 +765,29 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
}
CategoryPath cp = new CategoryPath();
Terms terms = MultiFields.getTerms(reader, Consts.FULL);
TermsEnum termsEnum = null;
DocsEnum docsEnum = null;
int base = 0;
for (AtomicReader r : reader.getSequentialSubReaders()) {
Terms terms = r.terms(Consts.FULL);
if (terms != null) { // cannot really happen, but be on the safe side
termsEnum = terms.iterator(termsEnum);
while (termsEnum.next() != null) {
BytesRef t = termsEnum.term();
// Since we guarantee uniqueness of categories, each term has exactly
// one document. Also, since we do not allow removing categories (and
// hence documents), there are no deletions in the index. Therefore, it
// is sufficient to call next(), and then doc(), exactly once with no
// 'validation' checks.
cp.clear();
cp.add(t.utf8ToString(), delimiter);
docsEnum = termsEnum.docs(null, docsEnum, false);
cache.put(cp, docsEnum.nextDoc() + base);
}
}
base += r.maxDoc(); // we don't have any deletions, so we're ok
}
/*Terms terms = MultiFields.getTerms(reader, Consts.FULL);
// The check is done here to avoid checking it on every iteration of the
// below loop. A null term wlil be returned if there are no terms in the
// lexicon, or after the Consts.FULL term. However while the loop is
@ -787,11 +807,10 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
docsEnum = termsEnum.docs(liveDocs, docsEnum, false);
docsEnum.nextDoc();
cp.clear();
// TODO (Facet): avoid String creation/use bytes?
cp.add(t.utf8ToString(), delimiter);
cache.put(cp, docsEnum.docID());
}
}
}*/
cacheIsComplete = true;
// No sense to keep the reader open - we will not need to read from it
@ -805,13 +824,14 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
private synchronized ParentArray getParentArray() throws IOException {
if (parentArray==null) {
if (reader == null) {
reader = openReader();
openInternalReader();
}
parentArray = new ParentArray();
parentArray.refresh(reader);
}
return parentArray;
}
@Override
public int getParent(int ordinal) throws IOException {
ensureOpen();
@ -823,171 +843,47 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
}
return getParentArray().getArray()[ordinal];
}
/**
* Take all the categories of one or more given taxonomies, and add them to
* the main taxonomy (this), if they are not already there.
* <P>
* Additionally, fill a <I>mapping</I> for each of the added taxonomies,
* mapping its ordinals to the ordinals in the enlarged main taxonomy.
* These mapping are saved into an array of OrdinalMap objects given by the
* user, one for each of the given taxonomies (not including "this", the main
* taxonomy). Often the first of these will be a MemoryOrdinalMap and the
* others will be a DiskOrdinalMap - see discussion in {OrdinalMap}.
* <P>
* Note that the taxonomies to be added are given as Directory objects,
* not opened TaxonomyReader/TaxonomyWriter objects, so if any of them are
* currently managed by an open TaxonomyWriter, make sure to commit() (or
* close()) it first. The main taxonomy (this) is an open TaxonomyWriter,
* and does not need to be commit()ed before this call.
* Takes the categories from the given taxonomy directory, and adds the
* missing ones to this taxonomy. Additionally, it fills the given
* {@link OrdinalMap} with a mapping from the original ordinal to the new
* ordinal.
*/
public void addTaxonomies(Directory[] taxonomies, OrdinalMap[] ordinalMaps) throws IOException {
public void addTaxonomy(Directory taxoDir, OrdinalMap map) throws IOException {
ensureOpen();
// To prevent us stepping on the rest of this class's decisions on when
// to open a reader, and when not, we'll be opening a new reader instead
// of using the existing "reader" object:
IndexReader mainreader = openReader();
// TODO (Facet): can this then go segment-by-segment and avoid MultiDocsEnum etc?
Terms terms = MultiFields.getTerms(mainreader, Consts.FULL);
assert terms != null; // TODO (Facet): explicit check / throw exception?
TermsEnum mainte = terms.iterator(null);
DocsEnum mainde = null;
IndexReader[] otherreaders = new IndexReader[taxonomies.length];
TermsEnum[] othertes = new TermsEnum[taxonomies.length];
DocsEnum[] otherdocsEnum = new DocsEnum[taxonomies.length]; // just for reuse
for (int i=0; i<taxonomies.length; i++) {
otherreaders[i] = DirectoryReader.open(taxonomies[i]);
terms = MultiFields.getTerms(otherreaders[i], Consts.FULL);
assert terms != null; // TODO (Facet): explicit check / throw exception?
othertes[i] = terms.iterator(null);
// Also tell the ordinal maps their expected sizes:
ordinalMaps[i].setSize(otherreaders[i].numDocs());
}
CategoryPath cp = new CategoryPath();
// We keep a "current" cursor over the alphabetically-ordered list of
// categories in each taxonomy. We start the cursor on the first
// (alphabetically) category of each taxonomy:
String currentMain;
String[] currentOthers = new String[taxonomies.length];
currentMain = nextTE(mainte);
int otherTaxonomiesLeft = 0;
for (int i=0; i<taxonomies.length; i++) {
currentOthers[i] = nextTE(othertes[i]);
if (currentOthers[i]!=null) {
otherTaxonomiesLeft++;
}
}
// And then, at each step look at the first (alphabetically) of the
// current taxonomies.
// NOTE: The most efficient way we could have done this is using a
// PriorityQueue. But for simplicity, and assuming that usually we'll
// have a very small number of other taxonomies (often just 1), we use
// a more naive algorithm (o(ntaxonomies) instead of o(ln ntaxonomies)
// per step)
while (otherTaxonomiesLeft>0) {
// TODO: use a pq here
String first=null;
for (int i=0; i<taxonomies.length; i++) {
if (currentOthers[i]==null) continue;
if (first==null || first.compareTo(currentOthers[i])>0) {
first = currentOthers[i];
}
}
int comp = 0;
if (currentMain==null || (comp = currentMain.compareTo(first))>0) {
// If 'first' is before currentMain, or currentMain is null,
// then 'first' is a new category and we need to add it to the
// main taxonomy. Then for all taxonomies with this 'first'
// category, we need to add the new category number to their
// map, and move to the next category in all of them.
cp.clear();
cp.add(first, delimiter);
// We can call internalAddCategory() instead of addCategory()
// because we know the category hasn't been seen yet.
int newordinal = internalAddCategory(cp, cp.length());
// TODO (Facet): we already had this term in our hands before, in nextTE...
// // TODO (Facet): no need to make this term?
for (int i=0; i<taxonomies.length; i++) {
if (first.equals(currentOthers[i])) {
// remember the remapping of this ordinal. Note how
// this requires reading a posting list from the index -
// but since we do this in lexical order of terms, just
// like Lucene's merge works, we hope there are few seeks.
// TODO (Facet): is there a quicker way? E.g., not specifying the
// next term by name every time?
otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i], false);
otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
int origordinal = otherdocsEnum[i].docID();
ordinalMaps[i].addMapping(origordinal, newordinal);
// and move to the next category in the i'th taxonomy
currentOthers[i] = nextTE(othertes[i]);
if (currentOthers[i]==null) {
otherTaxonomiesLeft--;
}
DirectoryReader r = DirectoryReader.open(taxoDir);
try {
final int size = r.numDocs();
final OrdinalMap ordinalMap = map;
ordinalMap.setSize(size);
CategoryPath cp = new CategoryPath();
int base = 0;
TermsEnum te = null;
DocsEnum docs = null;
for (AtomicReader ar : r.getSequentialSubReaders()) {
Terms terms = ar.terms(Consts.FULL);
te = terms.iterator(te);
while (te.next() != null) {
String value = te.term().utf8ToString();
cp.clear();
cp.add(value, Consts.DEFAULT_DELIMITER);
int ordinal = findCategory(cp);
if (ordinal < 0) {
// NOTE: call addCategory so that it works well in a multi-threaded
// environment, in case e.g. a thread just added the category, after
// the findCategory() call above failed to find it.
ordinal = addCategory(cp);
}
docs = te.docs(null, docs, false);
ordinalMap.addMapping(docs.nextDoc() + base, ordinal);
}
} else if (comp==0) {
// 'first' and currentMain are the same, so both the main and some
// other taxonomies need to be moved, but a category doesn't need
// to be added because it already existed in the main taxonomy.
// TODO (Facet): Again, is there a quicker way?
mainde = mainte.docs(MultiFields.getLiveDocs(mainreader), mainde, false);
mainde.nextDoc(); // TODO (Facet): check?
int newordinal = mainde.docID();
currentMain = nextTE(mainte);
for (int i=0; i<taxonomies.length; i++) {
if (first.equals(currentOthers[i])) {
// TODO (Facet): again, is there a quicker way?
otherdocsEnum[i] = othertes[i].docs(MultiFields.getLiveDocs(otherreaders[i]), otherdocsEnum[i], false);
otherdocsEnum[i].nextDoc(); // TODO (Facet): check?
int origordinal = otherdocsEnum[i].docID();
ordinalMaps[i].addMapping(origordinal, newordinal);
// and move to the next category
currentOthers[i] = nextTE(othertes[i]);
if (currentOthers[i]==null) {
otherTaxonomiesLeft--;
}
}
}
} else /* comp > 0 */ {
// The currentMain doesn't appear in any of the other taxonomies -
// we don't need to do anything, just continue to the next one
currentMain = nextTE(mainte);
base += ar.maxDoc(); // no deletions, so we're ok
}
ordinalMap.addDone();
} finally {
r.close();
}
// Close all the readers we've opened, and also tell the ordinal maps
// we're done adding to them
mainreader.close();
for (int i=0; i<taxonomies.length; i++) {
otherreaders[i].close();
// We never actually added a mapping for the root ordinal - let's do
// it now, just so that the map is complete (every ordinal between 0
// and size-1 is remapped)
ordinalMaps[i].addMapping(0, 0);
ordinalMaps[i].addDone();
}
}
/**
* Expert: This method is only for expert use.
* Note also that any call to refresh() will invalidate the returned reader,
* so the caller needs to take care of appropriate locking.
*
* @return lucene indexReader
*/
DirectoryReader getInternalIndexReader() {
ensureOpen();
return this.reader;
}
/**
@ -1113,13 +1009,6 @@ public class DirectoryTaxonomyWriter implements TaxonomyWriter {
}
}
private static final String nextTE(TermsEnum te) throws IOException {
if (te.next() != null) {
return te.term().utf8ToString(); // TODO (Facet): avoid String creation/use Bytes?
}
return null;
}
/**
* Rollback changes to the taxonomy writer and closes the instance. Following
* this method the instance becomes unusable (calling any of its API methods

View File

@ -1,5 +1,8 @@
package org.apache.lucene.facet.taxonomy.writercache.cl2o;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
@ -30,44 +33,71 @@ import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
*/
public class Cl2oTaxonomyWriterCache implements TaxonomyWriterCache {
private final ReadWriteLock lock = new ReentrantReadWriteLock();
private CompactLabelToOrdinal cache;
public Cl2oTaxonomyWriterCache(int initialCapcity, float loadFactor, int numHashArrays) {
this.cache = new CompactLabelToOrdinal(initialCapcity, loadFactor, numHashArrays);
}
public void close() {
cache=null;
@Override
public synchronized void close() {
cache = null;
}
@Override
public boolean hasRoom(int n) {
// This cache is unlimited, so we always have room for remembering more:
return true;
}
@Override
public int get(CategoryPath categoryPath) {
return cache.getOrdinal(categoryPath);
lock.readLock().lock();
try {
return cache.getOrdinal(categoryPath);
} finally {
lock.readLock().unlock();
}
}
@Override
public int get(CategoryPath categoryPath, int length) {
if (length<0 || length>categoryPath.length()) {
if (length < 0 || length > categoryPath.length()) {
length = categoryPath.length();
}
return cache.getOrdinal(categoryPath, length);
lock.readLock().lock();
try {
return cache.getOrdinal(categoryPath, length);
} finally {
lock.readLock().unlock();
}
}
@Override
public boolean put(CategoryPath categoryPath, int ordinal) {
cache.addLabel(categoryPath, ordinal);
// Tell the caller we didn't clear part of the cache, so it doesn't
// have to flush its on-disk index now
return false;
lock.writeLock().lock();
try {
cache.addLabel(categoryPath, ordinal);
// Tell the caller we didn't clear part of the cache, so it doesn't
// have to flush its on-disk index now
return false;
} finally {
lock.writeLock().unlock();
}
}
@Override
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) {
cache.addLabel(categoryPath, prefixLen, ordinal);
// Tell the caller we didn't clear part of the cache, so it doesn't
// have to flush its on-disk index now
return false;
lock.writeLock().lock();
try {
cache.addLabel(categoryPath, prefixLen, ordinal);
// Tell the caller we didn't clear part of the cache, so it doesn't
// have to flush its on-disk index now
return false;
} finally {
lock.writeLock().unlock();
}
}
/**
@ -75,8 +105,7 @@ public class Cl2oTaxonomyWriterCache implements TaxonomyWriterCache {
* @return Number of bytes in memory used by this object.
*/
public int getMemoryUsage() {
int memoryUsage = (this.cache == null) ? 0 : this.cache.getMemoryUsage();
return memoryUsage;
return cache == null ? 0 : cache.getMemoryUsage();
}
}

View File

@ -60,16 +60,19 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
}
}
public boolean hasRoom(int n) {
return n<=(cache.getMaxSize()-cache.getSize());
@Override
public synchronized boolean hasRoom(int n) {
return n <= (cache.getMaxSize() - cache.getSize());
}
public void close() {
@Override
public synchronized void close() {
cache.clear();
cache=null;
cache = null;
}
public int get(CategoryPath categoryPath) {
@Override
public synchronized int get(CategoryPath categoryPath) {
Integer res = cache.get(categoryPath);
if (res == null) {
return -1;
@ -78,7 +81,8 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
return res.intValue();
}
public int get(CategoryPath categoryPath, int length) {
@Override
public synchronized int get(CategoryPath categoryPath, int length) {
if (length<0 || length>categoryPath.length()) {
length = categoryPath.length();
}
@ -94,7 +98,8 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
return res.intValue();
}
public boolean put(CategoryPath categoryPath, int ordinal) {
@Override
public synchronized boolean put(CategoryPath categoryPath, int ordinal) {
boolean ret = cache.put(categoryPath, new Integer(ordinal));
// If the cache is full, we need to clear one or more old entries
// from the cache. However, if we delete from the cache a recent
@ -109,7 +114,8 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
return ret;
}
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) {
@Override
public synchronized boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) {
boolean ret = cache.put(categoryPath, prefixLen, new Integer(ordinal));
// If the cache is full, we need to clear one or more old entries
// from the cache. However, if we delete from the cache a recent
@ -125,4 +131,3 @@ public class LruTaxonomyWriterCache implements TaxonomyWriterCache {
}
}

View File

@ -1,254 +0,0 @@
package org.apache.lucene.facet.taxonomy.directory;
import java.io.File;
import org.apache.lucene.store.Directory;
import org.junit.Test;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class TestAddTaxonomies extends LuceneTestCase {
@Test
public void test1() throws Exception {
Directory dir1 = newDirectory();
DirectoryTaxonomyWriter tw1 = new DirectoryTaxonomyWriter(dir1);
tw1.addCategory(new CategoryPath("Author", "Mark Twain"));
tw1.addCategory(new CategoryPath("Animals", "Dog"));
Directory dir2 = newDirectory();
DirectoryTaxonomyWriter tw2 = new DirectoryTaxonomyWriter(dir2);
tw2.addCategory(new CategoryPath("Author", "Rob Pike"));
tw2.addCategory(new CategoryPath("Aardvarks", "Bob"));
tw2.close();
Directory dir3 = newDirectory();
DirectoryTaxonomyWriter tw3 = new DirectoryTaxonomyWriter(dir3);
tw3.addCategory(new CategoryPath("Author", "Zebra Smith"));
tw3.addCategory(new CategoryPath("Aardvarks", "Bob"));
tw3.addCategory(new CategoryPath("Aardvarks", "Aaron"));
tw3.close();
MemoryOrdinalMap[] maps = new MemoryOrdinalMap[2];
maps[0] = new MemoryOrdinalMap();
maps[1] = new MemoryOrdinalMap();
tw1.addTaxonomies(new Directory[] { dir2, dir3 }, maps);
tw1.close();
TaxonomyReader tr = new DirectoryTaxonomyReader(dir1);
// Test that the merged taxonomy now contains what we expect:
// First all the categories of the original taxonomy, in their original order:
assertEquals(tr.getPath(0).toString(), "");
assertEquals(tr.getPath(1).toString(), "Author");
assertEquals(tr.getPath(2).toString(), "Author/Mark Twain");
assertEquals(tr.getPath(3).toString(), "Animals");
assertEquals(tr.getPath(4).toString(), "Animals/Dog");
// Then the categories new in the new taxonomy, in alphabetical order:
assertEquals(tr.getPath(5).toString(), "Aardvarks");
assertEquals(tr.getPath(6).toString(), "Aardvarks/Aaron");
assertEquals(tr.getPath(7).toString(), "Aardvarks/Bob");
assertEquals(tr.getPath(8).toString(), "Author/Rob Pike");
assertEquals(tr.getPath(9).toString(), "Author/Zebra Smith");
assertEquals(tr.getSize(), 10);
// Test that the maps contain what we expect
int[] map0 = maps[0].getMap();
assertEquals(5, map0.length);
assertEquals(0, map0[0]);
assertEquals(1, map0[1]);
assertEquals(8, map0[2]);
assertEquals(5, map0[3]);
assertEquals(7, map0[4]);
int[] map1 = maps[1].getMap();
assertEquals(6, map1.length);
assertEquals(0, map1[0]);
assertEquals(1, map1[1]);
assertEquals(9, map1[2]);
assertEquals(5, map1[3]);
assertEquals(7, map1[4]);
assertEquals(6, map1[5]);
tr.close();
dir1.close();
dir2.close();
dir3.close();
}
// a reasonable random test
public void testmedium() throws Exception {
int numTests = atLeast(3);
for (int i = 0; i < numTests; i++) {
dotest(_TestUtil.nextInt(random(), 1, 10),
_TestUtil.nextInt(random(), 1, 100),
_TestUtil.nextInt(random(), 100, 1000),
random().nextBoolean());
}
}
// A more comprehensive and big random test.
@Test @Nightly
public void testbig() throws Exception {
dotest(2, 1000, 5000, false);
dotest(10, 10000, 100, false);
dotest(50, 20, 100, false);
dotest(10, 1000, 10000, false);
dotest(50, 20, 10000, false);
dotest(1, 20, 10000, false);
dotest(10, 1, 10000, false);
dotest(10, 1000, 20000, true);
}
private void dotest(int ntaxonomies, int ncats, int range, boolean disk) throws Exception {
Directory dirs[] = new Directory[ntaxonomies];
Directory copydirs[] = new Directory[ntaxonomies];
for (int i=0; i<ntaxonomies; i++) {
dirs[i] = newDirectory();
copydirs[i] = newDirectory();
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[i]);
DirectoryTaxonomyWriter copytw = new DirectoryTaxonomyWriter(copydirs[i]);
for (int j=0; j<ncats; j++) {
String cat = Integer.toString(random().nextInt(range));
tw.addCategory(new CategoryPath("a",cat));
copytw.addCategory(new CategoryPath("a",cat));
}
// System.err.println("Taxonomy "+i+": "+tw.getSize());
tw.close();
copytw.close();
}
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0]);
Directory otherdirs[] = new Directory[ntaxonomies-1];
System.arraycopy(dirs, 1, otherdirs, 0, ntaxonomies-1);
OrdinalMap[] maps = new OrdinalMap[ntaxonomies-1];
if (ntaxonomies>1) {
for (int i=0; i<ntaxonomies-1; i++) {
if (disk) {
// TODO: use a LTC tempfile
maps[i] = new DiskOrdinalMap(new File(System.getProperty("java.io.tmpdir"),
"tmpmap"+i));
} else {
maps[i] = new MemoryOrdinalMap();
}
}
}
tw.addTaxonomies(otherdirs, maps);
// System.err.println("Merged axonomy: "+tw.getSize());
tw.close();
// Check that all original categories in the main taxonomy remain in
// unchanged, and the rest of the taxonomies are completely unchanged.
for (int i=0; i<ntaxonomies; i++) {
TaxonomyReader tr = new DirectoryTaxonomyReader(dirs[i]);
TaxonomyReader copytr = new DirectoryTaxonomyReader(copydirs[i]);
if (i==0) {
assertTrue(tr.getSize() >= copytr.getSize());
} else {
assertEquals(copytr.getSize(), tr.getSize());
}
for (int j=0; j<copytr.getSize(); j++) {
String expected = copytr.getPath(j).toString();
String got = tr.getPath(j).toString();
assertTrue("Comparing category "+j+" of taxonomy "+i+": expected "+expected+", got "+got,
expected.equals(got));
}
tr.close();
copytr.close();
}
// Check that all the new categories in the main taxonomy are in
// lexicographic order. This isn't a requirement of our API, but happens
// this way in our current implementation.
TaxonomyReader tr = new DirectoryTaxonomyReader(dirs[0]);
TaxonomyReader copytr = new DirectoryTaxonomyReader(copydirs[0]);
if (tr.getSize() > copytr.getSize()) {
String prev = tr.getPath(copytr.getSize()).toString();
for (int j=copytr.getSize()+1; j<tr.getSize(); j++) {
String n = tr.getPath(j).toString();
assertTrue(prev.compareTo(n)<0);
prev=n;
}
}
int oldsize = copytr.getSize(); // remember for later
tr.close();
copytr.close();
// Check that all the categories from other taxonomies exist in the new
// taxonomy.
TaxonomyReader main = new DirectoryTaxonomyReader(dirs[0]);
for (int i=1; i<ntaxonomies; i++) {
TaxonomyReader other = new DirectoryTaxonomyReader(dirs[i]);
for (int j=0; j<other.getSize(); j++) {
int otherord = main.getOrdinal(other.getPath(j));
assertTrue(otherord != TaxonomyReader.INVALID_ORDINAL);
}
other.close();
}
// Check that all the new categories in the merged taxonomy exist in
// one of the added taxonomies.
TaxonomyReader[] others = new TaxonomyReader[ntaxonomies-1];
for (int i=1; i<ntaxonomies; i++) {
others[i-1] = new DirectoryTaxonomyReader(dirs[i]);
}
for (int j=oldsize; j<main.getSize(); j++) {
boolean found=false;
CategoryPath path = main.getPath(j);
for (int i=1; i<ntaxonomies; i++) {
if (others[i-1].getOrdinal(path) != TaxonomyReader.INVALID_ORDINAL) {
found=true;
break;
}
}
if (!found) {
fail("Found category "+j+" ("+path+") in merged taxonomy not in any of the separate ones");
}
}
// Check that all the maps are correct
for (int i=0; i<ntaxonomies-1; i++) {
int[] map = maps[i].getMap();
for (int j=0; j<map.length; j++) {
assertEquals(map[j], main.getOrdinal(others[i].getPath(j)));
}
}
for (int i=1; i<ntaxonomies; i++) {
others[i-1].close();
}
main.close();
IOUtils.close(dirs);
IOUtils.close(copydirs);
}
}

View File

@ -0,0 +1,243 @@
package org.apache.lucene.facet.taxonomy.directory;
import java.io.IOException;
import java.util.HashSet;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.DiskOrdinalMap;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.MemoryOrdinalMap;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter.OrdinalMap;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util._TestUtil;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
public class TestAddTaxonomy extends LuceneTestCase {
private void dotest(int ncats, final int range) throws Exception {
final AtomicInteger numCats = new AtomicInteger(ncats);
Directory dirs[] = new Directory[2];
for (int i = 0; i < dirs.length; i++) {
dirs[i] = newDirectory();
final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[i]);
Thread[] addThreads = new Thread[4];
for (int j = 0; j < addThreads.length; j++) {
addThreads[j] = new Thread() {
@Override
public void run() {
Random random = random();
while (numCats.decrementAndGet() > 0) {
String cat = Integer.toString(random.nextInt(range));
try {
tw.addCategory(new CategoryPath("a", cat));
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
};
}
for (Thread t : addThreads) t.start();
for (Thread t : addThreads) t.join();
tw.close();
}
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dirs[0]);
OrdinalMap map = randomOrdinalMap();
tw.addTaxonomy(dirs[1], map);
tw.close();
validate(dirs[0], dirs[1], map);
IOUtils.close(dirs);
}
private OrdinalMap randomOrdinalMap() throws IOException {
if (random().nextBoolean()) {
return new DiskOrdinalMap(_TestUtil.createTempFile("taxoMap", "", TEMP_DIR));
} else {
return new MemoryOrdinalMap();
}
}
private void validate(Directory dest, Directory src, OrdinalMap ordMap) throws Exception {
CategoryPath cp = new CategoryPath();
DirectoryTaxonomyReader destTR = new DirectoryTaxonomyReader(dest);
try {
final int destSize = destTR.getSize();
DirectoryTaxonomyReader srcTR = new DirectoryTaxonomyReader(src);
try {
int[] map = ordMap.getMap();
// validate taxo sizes
int srcSize = srcTR.getSize();
assertTrue("destination taxonomy expected to be larger than source; dest="
+ destSize + " src=" + srcSize,
destSize >= srcSize);
// validate that all source categories exist in destination, and their
// ordinals are as expected.
for (int j = 1; j < srcSize; j++) {
srcTR.getPath(j, cp);
int destOrdinal = destTR.getOrdinal(cp);
assertTrue(cp + " not found in destination", destOrdinal > 0);
assertEquals(destOrdinal, map[j]);
}
} finally {
srcTR.close();
}
} finally {
destTR.close();
}
}
public void testAddEmpty() throws Exception {
Directory dest = newDirectory();
DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
destTW.addCategory(new CategoryPath("Author", "Rob Pike"));
destTW.addCategory(new CategoryPath("Aardvarks", "Bob"));
destTW.commit();
Directory src = newDirectory();
new DirectoryTaxonomyWriter(src).close(); // create an empty taxonomy
OrdinalMap map = randomOrdinalMap();
destTW.addTaxonomy(src, map);
destTW.close();
validate(dest, src, map);
IOUtils.close(dest, src);
}
public void testAddToEmpty() throws Exception {
Directory dest = newDirectory();
Directory src = newDirectory();
DirectoryTaxonomyWriter srcTW = new DirectoryTaxonomyWriter(src);
srcTW.addCategory(new CategoryPath("Author", "Rob Pike"));
srcTW.addCategory(new CategoryPath("Aardvarks", "Bob"));
srcTW.close();
DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
OrdinalMap map = randomOrdinalMap();
destTW.addTaxonomy(src, map);
destTW.close();
validate(dest, src, map);
IOUtils.close(dest, src);
}
// A more comprehensive and big random test.
public void testBig() throws Exception {
dotest(200, 10000);
dotest(1000, 20000);
dotest(400000, 1000000);
}
// a reasonable random test
public void testMedium() throws Exception {
Random random = random();
int numTests = atLeast(3);
for (int i = 0; i < numTests; i++) {
dotest(_TestUtil.nextInt(random, 2, 100),
_TestUtil.nextInt(random, 100, 1000));
}
}
public void testSimple() throws Exception {
Directory dest = newDirectory();
DirectoryTaxonomyWriter tw1 = new DirectoryTaxonomyWriter(dest);
tw1.addCategory(new CategoryPath("Author", "Mark Twain"));
tw1.addCategory(new CategoryPath("Animals", "Dog"));
tw1.addCategory(new CategoryPath("Author", "Rob Pike"));
Directory src = newDirectory();
DirectoryTaxonomyWriter tw2 = new DirectoryTaxonomyWriter(src);
tw2.addCategory(new CategoryPath("Author", "Rob Pike"));
tw2.addCategory(new CategoryPath("Aardvarks", "Bob"));
tw2.close();
OrdinalMap map = randomOrdinalMap();
tw1.addTaxonomy(src, map);
tw1.close();
validate(dest, src, map);
IOUtils.close(dest, src);
}
public void testConcurrency() throws Exception {
// tests that addTaxonomy and addCategory work in parallel
final int numCategories = atLeast(5000);
// build an input taxonomy index
Directory src = newDirectory();
DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(src);
for (int i = 0; i < numCategories; i++) {
tw.addCategory(new CategoryPath("a", Integer.toString(i)));
}
tw.close();
// now add the taxonomy to an empty taxonomy, while adding the categories
// again, in parallel -- in the end, no duplicate categories should exist.
Directory dest = newDirectory();
final DirectoryTaxonomyWriter destTW = new DirectoryTaxonomyWriter(dest);
Thread t = new Thread() {
@Override
public void run() {
for (int i = 0; i < numCategories; i++) {
try {
destTW.addCategory(new CategoryPath("a", Integer.toString(i)));
} catch (IOException e) {
// shouldn't happen - if it does, let the test fail on uncaught exception.
throw new RuntimeException(e);
}
}
}
};
t.start();
OrdinalMap map = new MemoryOrdinalMap();
destTW.addTaxonomy(src, map);
t.join();
destTW.close();
// now validate
DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dest);
// +2 to account for the root category + "a"
assertEquals(numCategories + 2, dtr.getSize());
HashSet<CategoryPath> categories = new HashSet<CategoryPath>();
for (int i = 1; i < dtr.getSize(); i++) {
CategoryPath cat = dtr.getPath(i);
assertTrue("category " + cat + " already existed", categories.add(cat));
}
dtr.close();
IOUtils.close(src, dest);
}
}

View File

@ -3,11 +3,16 @@ package org.apache.lucene.facet.taxonomy.directory;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
import org.apache.lucene.facet.taxonomy.writercache.TaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.cl2o.Cl2oTaxonomyWriterCache;
import org.apache.lucene.facet.taxonomy.writercache.lru.LruTaxonomyWriterCache;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
@ -42,11 +47,17 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
NoOpCache() { }
@Override
public void close() {}
@Override
public int get(CategoryPath categoryPath) { return -1; }
@Override
public int get(CategoryPath categoryPath, int length) { return get(categoryPath); }
@Override
public boolean put(CategoryPath categoryPath, int ordinal) { return true; }
@Override
public boolean put(CategoryPath categoryPath, int prefixLen, int ordinal) { return true; }
@Override
public boolean hasRoom(int numberOfEntries) { return false; }
}
@ -201,5 +212,48 @@ public class TestDirectoryTaxonomyWriter extends LuceneTestCase {
dir.close();
}
public void testConcurrency() throws Exception {
int ncats = atLeast(100000); // add many categories
final int range = ncats * 3; // affects the categories selection
final AtomicInteger numCats = new AtomicInteger(ncats);
Directory dir = newDirectory();
final ConcurrentHashMap<Integer,Integer> values = new ConcurrentHashMap<Integer,Integer>();
TaxonomyWriterCache cache = random().nextBoolean()
? new Cl2oTaxonomyWriterCache(1024, 0.15f, 3)
: new LruTaxonomyWriterCache(ncats / 10);
final DirectoryTaxonomyWriter tw = new DirectoryTaxonomyWriter(dir, OpenMode.CREATE, cache);
Thread[] addThreads = new Thread[atLeast(4)];
for (int z = 0; z < addThreads.length; z++) {
addThreads[z] = new Thread() {
@Override
public void run() {
Random random = random();
while (numCats.decrementAndGet() > 0) {
try {
int value = random.nextInt(range);
tw.addCategory(new CategoryPath("a", Integer.toString(value)));
values.put(value, value);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
};
}
for (Thread t : addThreads) t.start();
for (Thread t : addThreads) t.join();
tw.close();
DirectoryTaxonomyReader dtr = new DirectoryTaxonomyReader(dir);
assertEquals(values.size() + 2, dtr.getSize()); // +2 for root category + "a"
for (Integer value : values.keySet()) {
assertTrue("category not found a/" + value, dtr.getOrdinal(new CategoryPath("a", value.toString())) > 0);
}
dtr.close();
dir.close();
}
}

View File

@ -1,193 +0,0 @@
package org.apache.lucene.facet.taxonomy.directory;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.IdentityHashMap;
import java.util.Set;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.junit.Test;
import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.analysis.MockTokenizer;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.InconsistentTaxonomyException;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This test case attempts to catch index "leaks" in LuceneTaxonomyReader/Writer,
* i.e., cases where an index has been opened, but never closed; In that case,
* Java would eventually collect this object and close the index, but leaving
* the index open might nevertheless cause problems - e.g., on Windows it prevents
* deleting it.
*/
public class TestIndexClose extends LuceneTestCase {
@Test
public void testLeaks() throws Exception {
LeakChecker checker = new LeakChecker();
Directory dir = newDirectory();
DirectoryTaxonomyWriter tw = checker.openWriter(dir);
tw.close();
assertEquals(0, checker.nopen());
tw = checker.openWriter(dir);
tw.addCategory(new CategoryPath("animal", "dog"));
tw.close();
assertEquals(0, checker.nopen());
DirectoryTaxonomyReader tr = checker.openReader(dir);
tr.getPath(1);
tr.refresh();
tr.close();
assertEquals(0, checker.nopen());
tr = checker.openReader(dir);
tw = checker.openWriter(dir);
tw.addCategory(new CategoryPath("animal", "cat"));
tr.refresh();
tw.commit();
tw.close();
tr.refresh();
tr.close();
assertEquals(0, checker.nopen());
tw = checker.openWriter(dir);
for (int i=0; i<10000; i++) {
tw.addCategory(new CategoryPath("number", Integer.toString(i)));
}
tw.close();
assertEquals(0, checker.nopen());
tw = checker.openWriter(dir);
for (int i=0; i<10000; i++) {
tw.addCategory(new CategoryPath("number", Integer.toString(i*2)));
}
tw.close();
assertEquals(0, checker.nopen());
dir.close();
}
private static class LeakChecker {
Set<DirectoryReader> readers = Collections.newSetFromMap(new IdentityHashMap<DirectoryReader,Boolean>());
int iwriter=0;
Set<Integer> openWriters = new HashSet<Integer>();
LeakChecker() { }
public DirectoryTaxonomyWriter openWriter(Directory dir) throws CorruptIndexException, LockObtainFailedException, IOException {
return new InstrumentedTaxonomyWriter(dir);
}
public DirectoryTaxonomyReader openReader(Directory dir) throws CorruptIndexException, LockObtainFailedException, IOException {
return new InstrumentedTaxonomyReader(dir);
}
public int nopen() {
int ret=0;
for (DirectoryReader r: readers) {
if (r.getRefCount() > 0) {
System.err.println("reader "+r+" still open");
ret++;
}
}
for (int i: openWriters) {
System.err.println("writer "+i+" still open");
ret++;
}
return ret;
}
private class InstrumentedTaxonomyWriter extends DirectoryTaxonomyWriter {
public InstrumentedTaxonomyWriter(Directory dir) throws CorruptIndexException, LockObtainFailedException, IOException {
super(dir);
}
@Override
protected DirectoryReader openReader() throws IOException {
DirectoryReader r = super.openReader();
readers.add(r);
return r;
}
@Override
protected synchronized void refreshReader() throws IOException {
super.refreshReader();
final DirectoryReader r = getInternalIndexReader();
if (r != null) readers.add(r);
}
@Override
protected IndexWriter openIndexWriter (Directory directory, IndexWriterConfig config) throws IOException {
return new InstrumentedIndexWriter(directory, config);
}
@Override
protected IndexWriterConfig createIndexWriterConfig(OpenMode openMode) {
return newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random(), MockTokenizer.KEYWORD, false))
.setOpenMode(openMode).setMergePolicy(newLogMergePolicy());
}
}
private class InstrumentedTaxonomyReader extends DirectoryTaxonomyReader {
public InstrumentedTaxonomyReader(Directory dir) throws CorruptIndexException, LockObtainFailedException, IOException {
super(dir);
}
@Override
protected DirectoryReader openIndexReader(Directory dir) throws CorruptIndexException, IOException {
DirectoryReader r = super.openIndexReader(dir);
readers.add(r);
return r;
}
@Override
public synchronized boolean refresh() throws IOException, InconsistentTaxonomyException {
final boolean ret = super.refresh();
readers.add(getInternalIndexReader());
return ret;
}
}
private class InstrumentedIndexWriter extends IndexWriter {
int mynum;
public InstrumentedIndexWriter(Directory d, IndexWriterConfig conf) throws CorruptIndexException, LockObtainFailedException, IOException {
super(d, conf);
mynum = iwriter++;
openWriters.add(mynum);
// System.err.println("openedw "+mynum);
}
@Override
public void close() throws IOException {
super.close();
if (!openWriters.contains(mynum)) { // probably can't happen...
fail("Writer #"+mynum+" was closed twice!");
}
openWriters.remove(mynum);
// System.err.println("closedw "+mynum);
}
}
}
}

View File

@ -444,7 +444,6 @@ public class TestBlockJoin extends LuceneTestCase {
}
final IndexSearcher s = newSearcher(r);
s.setDefaultFieldSortScoring(true, true);
final IndexSearcher joinS = newSearcher(joinR);

View File

@ -945,7 +945,7 @@ public class MemoryIndex {
}
@Override
public int freq() {
public int freq() throws IOException {
return positions.size();
}
}
@ -987,7 +987,7 @@ public class MemoryIndex {
}
@Override
public int freq() {
public int freq() throws IOException {
return positions.size() / stride;
}

View File

@ -64,7 +64,7 @@ public class CustomScoreQuery extends Query {
* computation. This parameter is optional - it can be null.
*/
public CustomScoreQuery(Query subQuery, Query scoringQuery) {
this(subQuery, scoringQuery!=null ? // don't want an array that contains a single null..
this(subQuery, scoringQuery!=null ? // don't want an array that contains a single null..
new Query[] {scoringQuery} : new Query[0]);
}

View File

@ -31,6 +31,8 @@ import java.util.Map;
/**
* Query that is boosted by a ValueSource
*/
// TODO: BoostedQuery and BoostingQuery in the same module?
// something has to give
public class BoostedQuery extends Query {
private Query q;
private ValueSource boostVal; // optional, can be null
@ -187,10 +189,9 @@ public class BoostedQuery extends Query {
@Override
public boolean equals(Object o) {
if (getClass() != o.getClass()) return false;
if (!super.equals(o)) return false;
BoostedQuery other = (BoostedQuery)o;
return this.getBoost() == other.getBoost()
&& this.q.equals(other.q)
return this.q.equals(other.q)
&& this.boostVal.equals(other.boostVal);
}

View File

@ -184,5 +184,18 @@ public abstract class ValueSource {
public Double value(int slot) {
return values[slot];
}
@Override
public int compareDocToValue(int doc, Double valueObj) {
final double value = valueObj.doubleValue();
final double docValue = docVals.doubleVal(doc);
if (docValue < value) {
return -1;
} else if (docValue > value) {
return -1;
} else {
return 0;
}
}
}
}

View File

@ -0,0 +1,99 @@
package org.apache.lucene.queries.function;
import java.io.IOException;
import org.apache.lucene.analysis.MockAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.queries.function.valuesource.ConstValueSource;
import org.apache.lucene.search.CheckHits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
import org.junit.AfterClass;
import org.junit.BeforeClass;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* Basic tests for {@link BoostedQuery}
*/
// TODO: more tests
public class TestBoostedQuery extends LuceneTestCase {
static Directory dir;
static IndexReader ir;
static IndexSearcher is;
@BeforeClass
public static void beforeClass() throws Exception {
dir = newDirectory();
IndexWriterConfig iwConfig = newIndexWriterConfig(TEST_VERSION_CURRENT, new MockAnalyzer(random()));
iwConfig.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwConfig);
Document document = new Document();
Field idField = new StringField("id", "");
document.add(idField);
iw.addDocument(document);
ir = iw.getReader();
is = newSearcher(ir);
iw.close();
}
@AfterClass
public static void afterClass() throws Exception {
is = null;
ir.close();
ir = null;
dir.close();
dir = null;
}
public void testBasic() throws Exception {
Query q = new MatchAllDocsQuery();
TopDocs docs = is.search(q, 10);
assertEquals(1, docs.totalHits);
float score = docs.scoreDocs[0].score;
Query boostedQ = new BoostedQuery(q, new ConstValueSource(2.0f));
assertHits(boostedQ, new float[] { score*2 });
}
void assertHits(Query q, float scores[]) throws Exception {
ScoreDoc expected[] = new ScoreDoc[scores.length];
int expectedDocs[] = new int[scores.length];
for (int i = 0; i < expected.length; i++) {
expectedDocs[i] = i;
expected[i] = new ScoreDoc(i, scores[i]);
}
TopDocs docs = is.search(q, 10,
new Sort(new SortField("id", SortField.Type.STRING)));
CheckHits.checkHits(random(), q, "", is, expectedDocs);
CheckHits.checkHitsQuery(q, expected, docs.scoreDocs, expectedDocs);
CheckHits.checkExplanations(q, "", is);
}
}

View File

@ -132,7 +132,6 @@ public class TestValueSources extends LuceneTestCase {
reader = iw.getReader();
searcher = newSearcher(reader);
searcher.setDefaultFieldSortScoring(true, true);
iw.close();
}

View File

@ -118,4 +118,16 @@ public final class SlowCollatedStringComparator extends FieldComparator<String>
return collator.compare(first, second);
}
}
@Override
public int compareDocToValue(int doc, String value) {
final BytesRef br = currentDocTerms.getTerm(doc, tempBR);
final String docValue;
if (br == null) {
docValue = null;
} else {
docValue = br.utf8ToString();
}
return compareValues(docValue, value);
}
}

View File

@ -439,7 +439,7 @@ public class RAMOnlyPostingsFormat extends PostingsFormat {
}
@Override
public int freq() {
public int freq() throws IOException {
return current.positions.length;
}
@ -487,7 +487,7 @@ public class RAMOnlyPostingsFormat extends PostingsFormat {
}
@Override
public int freq() {
public int freq() throws IOException {
return current.positions.length;
}

View File

@ -291,7 +291,8 @@ public abstract class LuceneTestCase extends Assert {
*/
@ClassRule
public static TestRule classRules = RuleChain
.outerRule(suiteFailureMarker = new TestRuleMarkFailure())
.outerRule(new TestRuleIgnoreTestSuites())
.around(suiteFailureMarker = new TestRuleMarkFailure())
.around(new TestRuleAssertionsRequired())
.around(new TestRuleNoStaticHooksShadowing())
.around(new TestRuleNoInstanceHooksOverrides())

View File

@ -0,0 +1,67 @@
package org.apache.lucene.util;
import org.junit.Assume;
import org.junit.rules.TestRule;
import org.junit.runner.Description;
import org.junit.runners.model.Statement;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This rule will cause the suite to be assumption-ignored if
* the test class implements a given marker interface and a special
* property is not set.
*
* <p>This is a workaround for problems with certain JUnit containers (IntelliJ)
* which automatically discover test suites and attempt to run nested classes
* that we use for testing the test framework itself.
*/
public final class TestRuleIgnoreTestSuites implements TestRule {
/**
* Marker interface for nested suites that should be ignored
* if executed in stand-alone mode.
*/
public static interface NestedTestSuite {}
/**
* A boolean system property indicating nested suites should be executed
* normally.
*/
public final static String PROPERTY_RUN_NESTED = "tests.runnested";
@Override
public Statement apply(final Statement s, final Description d) {
return new Statement() {
@Override
public void evaluate() throws Throwable {
if (NestedTestSuite.class.isAssignableFrom(d.getTestClass())) {
LuceneTestCase.assumeTrue("Nested suite class ignored (started as stand-along).",
isRunningNested());
}
s.evaluate();
}
};
}
/**
* Check if a suite class is running as a nested test.
*/
public static boolean isRunningNested() {
return Boolean.getBoolean(PROPERTY_RUN_NESTED);
}
}

View File

@ -107,7 +107,7 @@ New Features
* SOLR-2112: Solrj API now supports streaming results. (ryan)
* SOLR-792: Adding PivotFacetComponent for Hierarchical faceting
(erik, Jeremy Hinegardner, Thibaut Lassalle, ryan)
(ehatcher, Jeremy Hinegardner, Thibaut Lassalle, ryan)
* LUCENE-2507, SOLR-2571, SOLR-2576: Added DirectSolrSpellChecker, which uses Lucene's
DirectSpellChecker to retrieve correction candidates directly from the term dictionary using
@ -144,7 +144,7 @@ New Features
* SOLR-2338: Add support for using <similarity/> in a schema's fieldType,
for customizing scoring on a per-field basis. (hossman, yonik, rmuir)
* SOLR-2335: New 'field("...")' function syntax for refering to complex
* SOLR-2335: New 'field("...")' function syntax for referring to complex
field names (containing whitespace or special characters) in functions.
* SOLR-2383: /browse improvements: generalize range and date facet display
@ -226,10 +226,7 @@ New Features
* SOLR-3069: Ability to add openSearcher=false to not open a searcher when doing
a hard commit. commitWithin now only invokes a softCommit. (yonik)
* SOLR-1726: Added deep paging support to search (sort by score only) which should use less memory when paging deeply into results
by keeping the priority queue small. (Manojkumar Rangasamy Kannadasan, gsingers)
* SOLR-2802: New FieldMutatingUpdateProcessor and Factory to simplify the
* SOLR-2802: New FieldMutatingUpdateProcessor and Factory to simplify the
development of UpdateProcessors that modify field values of documents as
they are indexed. Also includes several useful new implementations:
RemoveBlankFieldUpdateProcessorFactory
@ -428,6 +425,9 @@ Bug Fixes
* SOLR-3436: Group count incorrect when not all shards are queried in the second
pass. (Francois Perron, Martijn van Groningen)
* SOLR-3454: Exception when using result grouping with main=true and using
wt=javabin. (Ludovic Boutros, Martijn van Groningen)
Other Changes
----------------------

View File

@ -0,0 +1,45 @@
#!/usr/bin/env bash
# starts up the multicore example
cd ..
rm -r -f example2
rm -r -f example3
rm -r -f example4
rm -r -f example5
rm -r -f example6
rm -r -f dist
rm -r -f build
rm -r -f example/solr/zoo_data
rm -r -f example/solr/data
rm -f example/example.log
ant example dist
cp -r -f example example2
cp -r -f example example3
cp -r -f example example4
cp -r -f example example5
cp -r -f example example6
java -classpath lib/*:dist/*:build/lucene-libs/* org.apache.solr.cloud.ZkController 127.0.0.1:9983 example/multicore 8983
cd example
java -DzkRun -DnumShards=2 -DSTOP.PORT=7983 -DSTOP.KEY=key -Dsolr.solr.home=multicore -jar start.jar 1>example.log 2>&1 &
cd ../example2
java -Djetty.port=7574 -DzkHost=localhost:9983 -DnumShards=2 -DSTOP.PORT=6574 -DSTOP.KEY=key -Dsolr.solr.home=multicore -jar start.jar 1>example2.log 2>&1 &
cd ../example3
java -Djetty.port=7575 -DzkHost=localhost:9983 -DnumShards=2 -DSTOP.PORT=6575 -DSTOP.KEY=key -Dsolr.solr.home=multicore -jar start.jar 1>example3.log 2>&1 &
cd ../example4
java -Djetty.port=7576 -DzkHost=localhost:9983 -DnumShards=2 -DSTOP.PORT=6576 -DSTOP.KEY=key -Dsolr.solr.home=multicore -jar start.jar 1>example4.log 2>&1 &
cd ../example5
java -Djetty.port=7577 -DzkHost=localhost:9983 -DnumShards=2 -DSTOP.PORT=6577 -DSTOP.KEY=key -Dsolr.solr.home=multicore -jar start.jar 1>example5.log 2>&1 &
cd ../example6
java -Djetty.port=7578 -DzkHost=localhost:9983 -DnumShards=2 -DSTOP.PORT=6578 -DSTOP.KEY=key -Dsolr.solr.home=multicore -jar start.jar 1>example6.log 2>&1 &

View File

@ -22,7 +22,7 @@ cp -r -f example example4
cp -r -f example example5
cp -r -f example example6
java -classpath lib/*:dist/*:build/lucene-libs/* org.apache.solr.cloud.ZkController 127.0.0.1:9983 8983 example/solr/conf conf1 example/solr
java -classpath lib/*:dist/*:build/lucene-libs/* org.apache.solr.cloud.ZkController 127.0.0.1:9983 example/solr 8983
cd example
java -DzkRun -DnumShards=2 -DSTOP.PORT=7983 -DSTOP.KEY=key -jar start.jar 1>example.log 2>&1 &

View File

@ -9,7 +9,8 @@ CHANGES
$Id$
================== Release 4.0.0-dev ==============
(No Changes)
* SOLR-3470: Bug fix: custom Carrot2 tokenizer and stemmer factories are
respected now (Stanislaw Osinski, Dawid Weiss)
================== Release 3.6.0 ==================

View File

@ -62,6 +62,7 @@ import org.carrot2.core.LanguageCode;
import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.text.linguistic.DefaultLexicalDataFactoryDescriptor;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipelineDescriptor.AttributeBuilder;
import org.carrot2.util.resource.ClassLoaderLocator;
import org.carrot2.util.resource.IResource;
import org.carrot2.util.resource.IResourceLocator;
@ -108,6 +109,9 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
private Controller controller = ControllerFactory.createPooling();
private Class<? extends IClusteringAlgorithm> clusteringAlgorithmClass;
/** Solr core we're bound to. */
private SolrCore core;
private static class SolrResourceLocator implements IResourceLocator {
private final SolrResourceLoader resourceLoader;
private final String carrot2ResourcesDir;
@ -146,7 +150,7 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
public InputStream open() throws IOException {
return new ByteArrayInputStream(asBytes);
}
@Override
public int hashCode() {
// In case multiple resources are found they will be deduped, but we don't use it in Solr,
@ -231,8 +235,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
extractCarrotAttributes(sreq.getParams(), attributes);
// Perform clustering and convert to named list
return clustersToNamedList(controller.process(attributes,
clusteringAlgorithmClass).getClusters(), sreq.getParams());
// Carrot2 uses current thread's context class loader to get
// certain classes (e.g. custom tokenizer/stemmer) at runtime.
// To make sure classes from contrib JARs are available,
// we swap the context class loader for the time of clustering.
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
return clustersToNamedList(controller.process(attributes,
clusteringAlgorithmClass).getClusters(), sreq.getParams());
} finally {
ct.setContextClassLoader(prev);
}
} catch (Exception e) {
log.error("Carrot2 clustering failed", e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
@ -242,6 +257,8 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
@Override
@SuppressWarnings({ "unchecked", "rawtypes" })
public String init(NamedList config, final SolrCore core) {
this.core = core;
String result = super.init(config, core);
final SolrParams initParams = SolrParams.toSolrParams(config);
@ -255,10 +272,14 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
// Additionally, we set a custom lexical resource factory for Carrot2 that
// will use both Carrot2 default stop words as well as stop words from
// the StopFilter defined on the field.
BasicPreprocessingPipelineDescriptor.attributeBuilder(initAttributes)
.stemmerFactory(LuceneCarrot2StemmerFactory.class)
.tokenizerFactory(LuceneCarrot2TokenizerFactory.class)
.lexicalDataFactory(SolrStopwordsCarrot2LexicalDataFactory.class);
final AttributeBuilder attributeBuilder = BasicPreprocessingPipelineDescriptor.attributeBuilder(initAttributes);
attributeBuilder.lexicalDataFactory(SolrStopwordsCarrot2LexicalDataFactory.class);
if (!initAttributes.containsKey(BasicPreprocessingPipelineDescriptor.Keys.TOKENIZER_FACTORY)) {
attributeBuilder.tokenizerFactory(LuceneCarrot2TokenizerFactory.class);
}
if (!initAttributes.containsKey(BasicPreprocessingPipelineDescriptor.Keys.STEMMER_FACTORY)) {
attributeBuilder.stemmerFactory(LuceneCarrot2StemmerFactory.class);
}
// Pass the schema to SolrStopwordsCarrot2LexicalDataFactory.
initAttributes.put("solrIndexSchema", core.getSchema());
@ -272,8 +293,19 @@ public class CarrotClusteringEngine extends SearchClusteringEngine {
// Using the class loader directly because this time we want to omit the prefix
new ClassLoaderLocator(core.getResourceLoader().getClassLoader())));
this.controller.init(initAttributes);
// Carrot2 uses current thread's context class loader to get
// certain classes (e.g. custom tokenizer/stemmer) at initialization time.
// To make sure classes from contrib JARs are available,
// we swap the context class loader for the time of clustering.
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
this.controller.init(initAttributes);
} finally {
ct.setContextClassLoader(prev);
}
SchemaField uniqueField = core.getSchema().getUniqueKeyField();
if (uniqueField == null) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR,

View File

@ -22,29 +22,48 @@ import com.google.common.collect.ImmutableSet;
*/
public interface CarrotParams {
/**
* Carrot2 parameter mapping (recognized and mapped if passed via Solr configuration).
*/
public final class CarrotParams {
String CARROT_PREFIX = "carrot.";
private static String CARROT_PREFIX = "carrot.";
String ALGORITHM = CARROT_PREFIX + "algorithm";
public static String ALGORITHM = CARROT_PREFIX + "algorithm";
String TITLE_FIELD_NAME = CARROT_PREFIX + "title";
String URL_FIELD_NAME = CARROT_PREFIX + "url";
String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet";
String LANGUAGE_FIELD_NAME = CARROT_PREFIX + "lang";
String CUSTOM_FIELD_NAME = CARROT_PREFIX + "custom";
public static String TITLE_FIELD_NAME = CARROT_PREFIX + "title";
public static String URL_FIELD_NAME = CARROT_PREFIX + "url";
public static String SNIPPET_FIELD_NAME = CARROT_PREFIX + "snippet";
public static String LANGUAGE_FIELD_NAME = CARROT_PREFIX + "lang";
public static String CUSTOM_FIELD_NAME = CARROT_PREFIX + "custom";
String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragSize";
String SUMMARY_SNIPPETS = CARROT_PREFIX + "summarySnippets";
public static String PRODUCE_SUMMARY = CARROT_PREFIX + "produceSummary";
public static String SUMMARY_FRAGSIZE = CARROT_PREFIX + "fragSize";
public static String SUMMARY_SNIPPETS = CARROT_PREFIX + "summarySnippets";
String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap";
public static String NUM_DESCRIPTIONS = CARROT_PREFIX + "numDescriptions";
public static String OUTPUT_SUB_CLUSTERS = CARROT_PREFIX + "outputSubClusters";
public static String LEXICAL_RESOURCES_DIR = CARROT_PREFIX + "lexicalResourcesDir";
public static String LANGUAGE_CODE_MAP = CARROT_PREFIX + "lcmap";
public static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
ALGORITHM, TITLE_FIELD_NAME, URL_FIELD_NAME, SNIPPET_FIELD_NAME, LANGUAGE_FIELD_NAME,
PRODUCE_SUMMARY, SUMMARY_FRAGSIZE, SUMMARY_SNIPPETS, NUM_DESCRIPTIONS, OUTPUT_SUB_CLUSTERS,
LEXICAL_RESOURCES_DIR);
static final Set<String> CARROT_PARAM_NAMES = ImmutableSet.of(
ALGORITHM,
TITLE_FIELD_NAME,
URL_FIELD_NAME,
SNIPPET_FIELD_NAME,
LANGUAGE_FIELD_NAME,
CUSTOM_FIELD_NAME,
PRODUCE_SUMMARY,
SUMMARY_FRAGSIZE,
SUMMARY_SNIPPETS,
NUM_DESCRIPTIONS,
OUTPUT_SUB_CLUSTERS,
LEXICAL_RESOURCES_DIR,
LANGUAGE_CODE_MAP);
/** No instances. */
private CarrotParams() {}
}

View File

@ -339,6 +339,16 @@
<str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.LexicalResourcesCheckClusteringAlgorithm</str>
<str name="carrot.lexicalResourcesDir">clustering/custom</str>
</lst>
<lst name="engine">
<str name="name">custom-duplicating-tokenizer</str>
<str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.EchoTokensClusteringAlgorithm</str>
<str name="PreprocessingPipeline.tokenizerFactory">org.apache.solr.handler.clustering.carrot2.DuplicatingTokenizerFactory</str>
</lst>
<lst name="engine">
<str name="name">custom-duplicating-stemmer</str>
<str name="carrot.algorithm">org.apache.solr.handler.clustering.carrot2.EchoStemsClusteringAlgorithm</str>
<str name="PreprocessingPipeline.stemmerFactory">org.apache.solr.handler.clustering.carrot2.DuplicatingStemmerFactory</str>
</lst>
</searchComponent>
<searchComponent class="org.apache.solr.handler.clustering.ClusteringComponent" name="doc-clustering">

View File

@ -205,7 +205,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void solrStopWordsUsedInCarrot2Clustering() throws Exception {
public void testSolrStopWordsUsedInCarrot2Clustering() throws Exception {
ModifiableSolrParams params = new ModifiableSolrParams();
params.set("merge-resources", false);
params.set(AttributeUtils.getKey(
@ -220,7 +220,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void solrStopWordsNotDefinedOnAFieldForClustering() throws Exception {
public void testSolrStopWordsNotDefinedOnAFieldForClustering() throws Exception {
ModifiableSolrParams params = new ModifiableSolrParams();
// Force string fields to be used for clustering. Does not make sense
// in a real word, but does the job in the test.
@ -239,7 +239,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void highlightingOfMultiValueField() throws Exception {
public void testHighlightingOfMultiValueField() throws Exception {
final String snippetWithoutSummary = getLabels(clusterWithHighlighting(
false, 30, 3, "multi", 1).get(0)).get(1);
assertTrue("Snippet contains first value", snippetWithoutSummary.contains("First"));
@ -256,7 +256,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void concatenatingMultipleFields() throws Exception {
public void testConcatenatingMultipleFields() throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CarrotParams.TITLE_FIELD_NAME, "title,heading");
params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet,body");
@ -271,7 +271,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void highlightingMultipleFields() throws Exception {
public void testHighlightingMultipleFields() throws Exception {
final TermQuery query = new TermQuery(new Term("snippet", "content"));
final ModifiableSolrParams params = new ModifiableSolrParams();
@ -297,7 +297,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void oneCarrot2SupportedLanguage() throws Exception {
public void testOneCarrot2SupportedLanguage() throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang");
@ -309,7 +309,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void oneCarrot2SupportedLanguageOfMany() throws Exception {
public void testOneCarrot2SupportedLanguageOfMany() throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang");
@ -321,7 +321,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void languageCodeMapping() throws Exception {
public void testLanguageCodeMapping() throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CarrotParams.LANGUAGE_FIELD_NAME, "lang");
params.add(CarrotParams.LANGUAGE_CODE_MAP, "POLISH:pl");
@ -334,7 +334,7 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
}
@Test
public void passingOfCustomFields() throws Exception {
public void testPassingOfCustomFields() throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CarrotParams.CUSTOM_FIELD_NAME, "intfield_i:intfield");
params.add(CarrotParams.CUSTOM_FIELD_NAME, "floatfield_f:floatfield");
@ -352,6 +352,34 @@ public class CarrotClusteringEngineTest extends AbstractClusteringTestCase {
assertEquals("List field", "[first, second]", labels.get(4));
}
@Test
public void testCustomTokenizer() throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CarrotParams.TITLE_FIELD_NAME, "title");
params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");
final List<String> labels = getLabels(checkEngine(
getClusteringEngine("custom-duplicating-tokenizer"), 1, 16, new TermQuery(new Term("title",
"field")), params).get(0));
// The custom test tokenizer duplicates each token's text
assertTrue("First token", labels.get(0).contains("TitleTitle"));
}
@Test
public void testCustomStemmer() throws Exception {
final ModifiableSolrParams params = new ModifiableSolrParams();
params.add(CarrotParams.TITLE_FIELD_NAME, "title");
params.add(CarrotParams.SNIPPET_FIELD_NAME, "snippet");
final List<String> labels = getLabels(checkEngine(
getClusteringEngine("custom-duplicating-stemmer"), 1, 12, new TermQuery(new Term("title",
"field")), params).get(0));
// The custom test stemmer duplicates and lowercases each token's text
assertTrue("First token", labels.get(0).contains("titletitle"));
}
private CarrotClusteringEngine getClusteringEngine(String engineName) {
ClusteringComponent comp = (ClusteringComponent) h.getCore()
.getSearchComponent("clustering");

View File

@ -0,0 +1,34 @@
package org.apache.solr.handler.clustering.carrot2;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.carrot2.core.LanguageCode;
import org.carrot2.text.linguistic.IStemmer;
import org.carrot2.text.linguistic.IStemmerFactory;
public class DuplicatingStemmerFactory implements IStemmerFactory {
@Override
public IStemmer getStemmer(LanguageCode language) {
return new IStemmer() {
@Override
public CharSequence stem(CharSequence word) {
return word.toString() + word.toString();
}
};
}
}

View File

@ -0,0 +1,52 @@
package org.apache.solr.handler.clustering.carrot2;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.io.Reader;
import org.carrot2.core.LanguageCode;
import org.carrot2.text.analysis.ExtendedWhitespaceTokenizer;
import org.carrot2.text.analysis.ITokenizer;
import org.carrot2.text.linguistic.ITokenizerFactory;
import org.carrot2.text.util.MutableCharArray;
public class DuplicatingTokenizerFactory implements ITokenizerFactory {
@Override
public ITokenizer getTokenizer(LanguageCode language) {
return new ITokenizer() {
private final ExtendedWhitespaceTokenizer delegate = new ExtendedWhitespaceTokenizer();
@Override
public void setTermBuffer(MutableCharArray buffer) {
delegate.setTermBuffer(buffer);
buffer.reset(buffer.toString() + buffer.toString());
}
@Override
public void reset(Reader input) throws IOException {
delegate.reset(input);
}
@Override
public short nextToken() throws IOException {
return delegate.nextToken();
}
};
}
}

View File

@ -0,0 +1,75 @@
package org.apache.solr.handler.clustering.carrot2;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.List;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.ProcessingComponentBase;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.PreprocessingContext.AllStems;
import org.carrot2.text.preprocessing.PreprocessingContext.AllTokens;
import org.carrot2.text.preprocessing.PreprocessingContext.AllWords;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipeline;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output;
import com.google.common.collect.Lists;
/**
* A mock Carrot2 clustering algorithm that outputs stem of each token of each
* document as a separate cluster. Useful only in tests.
*/
@Bindable(prefix = "EchoTokensClusteringAlgorithm")
public class EchoStemsClusteringAlgorithm extends ProcessingComponentBase
implements IClusteringAlgorithm {
@Input
@Processing
@Attribute(key = AttributeNames.DOCUMENTS)
private List<Document> documents;
@Output
@Processing
@Attribute(key = AttributeNames.CLUSTERS)
private List<Cluster> clusters;
BasicPreprocessingPipeline preprocessing = new BasicPreprocessingPipeline();
@Override
public void process() throws ProcessingException {
final PreprocessingContext preprocessingContext = preprocessing.preprocess(
documents, "", LanguageCode.ENGLISH);
final AllTokens allTokens = preprocessingContext.allTokens;
final AllWords allWords = preprocessingContext.allWords;
final AllStems allStems = preprocessingContext.allStems;
clusters = Lists.newArrayListWithCapacity(allTokens.image.length);
for (int i = 0; i < allTokens.image.length; i++) {
if (allTokens.wordIndex[i] >= 0) {
clusters.add(new Cluster(new String(
allStems.image[allWords.stemIndex[allTokens.wordIndex[i]]])));
}
}
}
}

View File

@ -0,0 +1,69 @@
package org.apache.solr.handler.clustering.carrot2;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.util.List;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.IClusteringAlgorithm;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.ProcessingComponentBase;
import org.carrot2.core.ProcessingException;
import org.carrot2.core.attribute.AttributeNames;
import org.carrot2.core.attribute.Processing;
import org.carrot2.text.preprocessing.PreprocessingContext;
import org.carrot2.text.preprocessing.pipeline.BasicPreprocessingPipeline;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Output;
import com.google.common.collect.Lists;
/**
* A mock Carrot2 clustering algorithm that outputs each token of each document
* as a separate cluster. Useful only in tests.
*/
@Bindable(prefix = "EchoTokensClusteringAlgorithm")
public class EchoTokensClusteringAlgorithm extends ProcessingComponentBase
implements IClusteringAlgorithm {
@Input
@Processing
@Attribute(key = AttributeNames.DOCUMENTS)
private List<Document> documents;
@Output
@Processing
@Attribute(key = AttributeNames.CLUSTERS)
private List<Cluster> clusters;
BasicPreprocessingPipeline preprocessing = new BasicPreprocessingPipeline();
@Override
public void process() throws ProcessingException {
final PreprocessingContext preprocessingContext = preprocessing.preprocess(
documents, "", LanguageCode.ENGLISH);
clusters = Lists
.newArrayListWithCapacity(preprocessingContext.allTokens.image.length);
for (char[] token : preprocessingContext.allTokens.image) {
if (token != null) {
clusters.add(new Cluster(new String(token)));
}
}
}
}

View File

@ -1,53 +0,0 @@
<%@ page import="org.apache.solr.request.SolrRequestHandler" %>
<%@ page import="java.util.Map" %>
<%@ page contentType="text/html; charset=utf-8" pageEncoding="UTF-8"%>
<%--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--%>
<%-- do a verbatim include so we can use the local vars --%>
<%@include file="_info.jsp"%>
<html>
<%
String handler = request.getParameter("handler");
if (handler == null) {
Map<String, SolrRequestHandler> handlers = core.getRequestHandlers();
%>
<head>
<title>DataImportHandler Interactive Development</title>
<link rel="stylesheet" type="text/css" href="solr-admin.css">
</head>
<body>
Select handler:
<ul>
<%
for (String key : handlers.keySet()) {
if (handlers.get(key).getClass().getName().equals("org.apache.solr.handler.dataimport.DataImportHandler")) { %>
<li><a href="dataimport.jsp?handler=<%=key%>"><%=key%></a></li>
<%
}
}
%>
</ul>
</body>
<% } else { %>
<frameset cols = "50%, 50%">
<frame src ="debug.jsp?handler=<%=handler%>" />
<frame src ="..<%=handler%>?command=status" name="result"/>
</frameset>
<% } %>
</html>

View File

@ -1,103 +0,0 @@
<%@ page contentType="text/html; charset=utf-8" pageEncoding="UTF-8"%>
<%--
Licensed to the Apache Software Foundation (ASF) under one or more
contributor license agreements. See the NOTICE file distributed with
this work for additional information regarding copyright ownership.
The ASF licenses this file to You under the Apache License, Version 2.0
(the "License"); you may not use this file except in compliance with
the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
--%>
<%-- do a verbatim include so we can use the local vars --%>
<%@include file="_info.jsp"%>
<html>
<head>
<title>DataImportHandler Interactive Development</title>
<link rel="stylesheet" type="text/css" href="solr-admin.css"/>
<link rel="icon" href="favicon.ico" type="image/ico"/>
<link rel="shortcut icon" href="favicon.ico" type="image/ico"/>
<script src="jquery-1.4.3.min.js"></script>
</head>
<body>
<h1>DataImportHandler Development Console</h1>
<%
String handler = request.getParameter("handler"); // must be specified
%>
<form action="..<%=handler%>" target="result" method="get">
<input type="hidden" name="debug" value="on"/>
<table>
<tr>
<th>Handler: </th>
<td><%=handler%> <a href="dataimport.jsp" target="_top">change handler</a></td>
</tr>
<tr>
<td colspan="2">
<table width="100%">
<tr>
<td>
<select name="command">
<option value="full-import" selected="selected">full-import</option>
<option value="delta-import">delta-import</option>
</select>
</td>
<td><strong>Verbose</strong>&nbsp;<input
name="verbose" type="checkbox"></td>
<td><strong>Commit</strong>&nbsp;<input
name="commit" type="checkbox"></td>
<td><strong>Clean</strong>&nbsp;<input
name="clean" type="checkbox"></td>
<td><strong>Start Row</strong>&nbsp;<input
name="start" size="4" type="text" value="0"></td>
<td><strong>No. of Rows</strong>&nbsp;<input name="rows"
type="text" size="4" value="10"></td>
</tr>
</table>
</td>
<tr>
<td><strong>data config xml</strong></td>
<td><input class="stdbutton" type="submit" value="debug now">
</td>
</tr>
<tr>
<td colspan="2"><textarea id="txtDataConfig" rows="30" cols="80" name="dataConfig"></textarea></td>
<script type="text/javascript" language="javascript">
$.get("..<%=handler%>?command=show-config", function(data){
$('#txtDataConfig').attr('value', data);
});
</script>
</tr>
</table>
</form>
<form action="..<%=handler%>" target="result" method="get">
<input type="hidden" name="clean" value="false">
<input type="hidden" name="commit" value="true">
<input class="stdbutton" type="submit" name="command" value="full-import">
<input class="stdbutton" type="submit" name="command" value="delta-import">
<input class="stdbutton" type="submit" name="command" value="status">
<input class="stdbutton" type="submit" name="command" value="reload-config">
<input class="stdbutton" type="submit" name="command" value="abort">
</form>
<form action="../select" target="result" method="get">
<input type="hidden" name="q" value="*:*">
<input type="hidden" name="start" value="0">
<input type="hidden" name="rows" value="0">
<input class="stdbutton" type="submit" value="Documents Count">
</form>
<form action="..<%=handler%>" target="result" method="get">
<input type="hidden" name="verbose" value="true">
<input type="hidden" name="clean" value="true">
<input type="hidden" name="commit" value="true">
<input type="hidden" name="command" value="full-import">
<input class="stdbutton" type="submit" value="Full Import with Cleaning">
</form>
<a href="index.jsp" target="_parent">Return to Admin Page</a>
</body>
</html>

View File

@ -32,6 +32,8 @@ import java.util.concurrent.TimeoutException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.xpath.XPathConstants;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.client.solrj.request.CoreAdminRequest.WaitForState;
import org.apache.solr.common.SolrException;
@ -46,16 +48,22 @@ import org.apache.solr.common.cloud.ZkNodeProps;
import org.apache.solr.common.cloud.ZkStateReader;
import org.apache.solr.common.cloud.ZooKeeperException;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.core.Config;
import org.apache.solr.core.CoreContainer;
import org.apache.solr.core.CoreDescriptor;
import org.apache.solr.core.SolrCore;
import org.apache.solr.core.SolrResourceLoader;
import org.apache.solr.update.UpdateLog;
import org.apache.solr.util.DOMUtil;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
/**
* Handle ZooKeeper interactions.
@ -114,21 +122,31 @@ public final class ZkController {
// this can be null in which case recovery will be inactive
private CoreContainer cc;
/**
* Bootstraps the current configs for all collections in solr.xml.
* Takes two params - the zkhost to connect to and the solrhome location
* to find solr.xml.
*
* If you also pass a solrPort, it will be used to start
* an embedded zk useful for single machine, multi node tests.
*
* @param args
* @throws Exception
*/
public static void main(String[] args) throws Exception {
// start up a tmp zk server first
String zkServerAddress = args[0];
String solrPort = args[1];
String confDir = args[2];
String confName = args[3];
String solrHome = null;
if (args.length == 5) {
solrHome = args[4];
String solrHome = args[1];
String solrPort = null;
if (args.length > 2) {
solrPort = args[2];
}
SolrZkServer zkServer = null;
if (solrHome != null) {
if (solrPort != null) {
zkServer = new SolrZkServer("true", null, solrHome + "/zoo_data", solrHome, solrPort);
zkServer.parseConfig();
zkServer.start();
@ -140,8 +158,13 @@ public final class ZkController {
public void command() {
}});
uploadConfigDir(zkClient, new File(confDir), confName);
if (solrHome != null) {
SolrResourceLoader loader = new SolrResourceLoader(solrHome);
solrHome = loader.getInstanceDir();
InputSource cfgis = new InputSource(new File(solrHome, "solr.xml").toURI().toASCIIString());
Config cfg = new Config(loader, null, cfgis , null, false);
bootstrapConf(zkClient, cfg, solrHome);
if (solrPort != null) {
zkServer.stop();
}
}
@ -855,7 +878,7 @@ public final class ZkController {
try {
Map<String,String> collectionProps = new HashMap<String,String>();
// TODO: if collection.configName isn't set, and there isn't already a conf in zk, just use that?
String defaultConfigName = System.getProperty(COLLECTION_PARAM_PREFIX+CONFIGNAME_PROP, "configuration1");
String defaultConfigName = System.getProperty(COLLECTION_PARAM_PREFIX+CONFIGNAME_PROP, collection);
// params passed in - currently only done via core admin (create core commmand).
if (params != null) {
@ -948,6 +971,13 @@ public final class ZkController {
collectionProps.put(CONFIGNAME_PROP, configNames.get(0));
break;
}
if (configNames != null && configNames.contains(collection)) {
log.info("Could not find explicit collection configName, but found config name matching collection name - using that set.");
collectionProps.put(CONFIGNAME_PROP, collection);
break;
}
log.info("Could not find collection configName - pausing for 3 seconds and trying again - try: " + retry);
Thread.sleep(3000);
}
@ -1155,5 +1185,34 @@ public final class ZkController {
}
return leaderProps;
}
/**
* If in SolrCloud mode, upload config sets for each SolrCore in solr.xml.
*
* @throws IOException
* @throws KeeperException
* @throws InterruptedException
*/
public static void bootstrapConf(SolrZkClient zkClient, Config cfg, String solrHome) throws IOException,
KeeperException, InterruptedException {
NodeList nodes = (NodeList)cfg.evaluate("solr/cores/core", XPathConstants.NODESET);
for (int i=0; i<nodes.getLength(); i++) {
Node node = nodes.item(i);
String rawName = DOMUtil.getAttr(node, "name", null);
String instanceDir = DOMUtil.getAttr(node, "instanceDir", null);
File idir = new File(instanceDir);
if (!idir.isAbsolute()) {
idir = new File(solrHome, instanceDir);
}
String confName = DOMUtil.getAttr(node, "collection", null);
if (confName == null) {
confName = rawName;
}
ZkController.uploadConfigDir(zkClient, new File(idir, "conf"), confName);
}
}
}

View File

@ -233,7 +233,7 @@ public class CoreContainer
boolean boostrapConf = Boolean.getBoolean("bootstrap_conf");
if(boostrapConf) {
bootstrapConf();
ZkController.bootstrapConf(zkController.getZkClient(), cfg, solrHome);
}
} catch (InterruptedException e) {
@ -259,28 +259,6 @@ public class CoreContainer
}
private void bootstrapConf() throws IOException,
KeeperException, InterruptedException {
NodeList nodes = (NodeList)cfg.evaluate("solr/cores/core", XPathConstants.NODESET);
for (int i=0; i<nodes.getLength(); i++) {
Node node = nodes.item(i);
String rawName = DOMUtil.getAttr(node, "name", null);
String instanceDir = DOMUtil.getAttr(node, "instanceDir", null);
File idir = new File(instanceDir);
if (!idir.isAbsolute()) {
idir = new File(solrHome, instanceDir);
}
String confName = DOMUtil.getAttr(node, "collection", null);
if (confName == null) {
confName = rawName;
}
zkController.uploadConfigDir(new File(idir, "conf"), confName);
}
}
public Properties getContainerProperties() {
return containerProperties;
}

View File

@ -1006,11 +1006,23 @@ public final class SolrCore implements SolrInfoMBean {
}
/**
* Return the newest normal {@link RefCounted}&lt;{@link SolrIndexSearcher}&gt; with
* the reference count incremented. It <b>must</b> be decremented when no longer needed.
* If no searcher is currently open, then if openNew==true a new searcher will be opened,
* or null is returned if openNew==false.
* Returns the current registered searcher with its reference count incremented, or null if none are registered.
*/
public RefCounted<SolrIndexSearcher> getRegisteredSearcher() {
synchronized (searcherLock) {
if (_searcher != null) {
_searcher.incref();
}
return _searcher;
}
}
/**
* Return the newest normal {@link RefCounted}&lt;{@link SolrIndexSearcher}&gt; with
* the reference count incremented. It <b>must</b> be decremented when no longer needed.
* If no searcher is currently open, then if openNew==true a new searcher will be opened,
* or null is returned if openNew==false.
*/
public RefCounted<SolrIndexSearcher> getNewestSearcher(boolean openNew) {
synchronized (searcherLock) {
if (!_searchers.isEmpty()) {
@ -1023,7 +1035,6 @@ public final class SolrCore implements SolrInfoMBean {
return openNew ? getRealtimeSearcher() : null;
}
/** Gets the latest real-time searcher w/o forcing open a new searcher if one already exists.
* The reference count will be incremented.
*/

View File

@ -25,6 +25,7 @@ import java.util.Locale;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
@ -173,7 +174,15 @@ public class PingRequestHandler extends RequestHandlerBase implements SolrCoreAw
{
SolrParams params = req.getParams();
SolrCore core = req.getCore();
// in this case, we want to default distrib to false so
// we only ping the single node
Boolean distrib = params.getBool("distrib");
if (distrib == null) {
ModifiableSolrParams mparams = new ModifiableSolrParams(params);
mparams.set("distrib", false);
req.setParams(mparams);
}
String actionParam = params.get("action");
ACTIONS action = null;

View File

@ -560,9 +560,14 @@ public class QueryElevationComponent extends SearchComponent implements SolrCore
public Integer value(int slot) {
return values[slot];
}
@Override
public int compareDocToValue(int doc, Integer valueObj) throws IOException {
final int value = valueObj.intValue();
final int docValue = docVal(doc);
return docValue - value; // values will be small enough that there is no overflow concern
}
};
}
}
}
}

View File

@ -20,7 +20,7 @@ import org.apache.solr.common.SolrDocument;
import org.apache.solr.request.SolrQueryRequest;
/**
* Simple Augmenter that adds the docId
* Simple Augmenter that adds the score
*
*
* @since solr 4.0

View File

@ -37,7 +37,7 @@ public class ValueAugmenterFactory extends TransformerFactory
public void init(NamedList args) {
value = args.get( "value" );
if( value == null ) {
defaultValue = args.get( "deaultValue" );
defaultValue = args.get( "defaultValue" );
}
}

View File

@ -138,6 +138,12 @@ public class RandomSortField extends FieldType {
public Integer value(int slot) {
return values[slot];
}
@Override
public int compareDocToValue(int doc, Integer valueObj) {
// values will be positive... no overflow possible.
return hash(doc+seed) - valueObj.intValue();
}
};
}
};

View File

@ -651,7 +651,7 @@ public class Grouping {
}
}
int len = docsGathered - offset;
int len = docsGathered > offset ? docsGathered - offset : 0;
int[] docs = ArrayUtils.toPrimitive(ids.toArray(new Integer[ids.size()]));
float[] docScores = ArrayUtils.toPrimitive(scores.toArray(new Float[scores.size()]));
DocSlice docSlice = new DocSlice(offset, len, docs, docScores, getMatches(), maxScore);

View File

@ -59,11 +59,13 @@ public class JoinQParserPlugin extends QParserPlugin {
String toField = getParam("to");
String v = localParams.get("v");
Query fromQuery;
long fromCoreOpenTime = 0;
if (fromIndex != null) {
if (fromIndex != null && !fromIndex.equals(req.getCore().getCoreDescriptor().getName()) ) {
CoreContainer container = req.getCore().getCoreDescriptor().getCoreContainer();
final SolrCore fromCore = container.getCore(fromIndex);
RefCounted<SolrIndexSearcher> fromHolder = null;
if (fromCore == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Cross-core join: no such core " + fromIndex);
@ -73,9 +75,12 @@ public class JoinQParserPlugin extends QParserPlugin {
try {
QParser parser = QParser.getParser(v, "lucene", otherReq);
fromQuery = parser.getQuery();
fromHolder = fromCore.getRegisteredSearcher();
if (fromHolder != null) fromCoreOpenTime = fromHolder.get().getOpenTime();
} finally {
otherReq.close();
fromCore.close();
if (fromHolder != null) fromHolder.decref();
}
} else {
QParser fromQueryParser = subQuery(v, null);
@ -83,6 +88,7 @@ public class JoinQParserPlugin extends QParserPlugin {
}
JoinQuery jq = new JoinQuery(fromField, toField, fromIndex, fromQuery);
jq.fromCoreOpenTime = fromCoreOpenTime;
return jq;
}
};
@ -95,6 +101,7 @@ class JoinQuery extends Query {
String toField;
String fromIndex;
Query q;
long fromCoreOpenTime;
public JoinQuery(String fromField, String toField, String fromIndex, Query subQuery) {
this.fromField = fromField;
@ -548,12 +555,14 @@ class JoinQuery extends Query {
&& this.toField.equals(other.toField)
&& this.getBoost() == other.getBoost()
&& this.q.equals(other.q)
&& (this.fromIndex == other.fromIndex || this.fromIndex != null && this.fromIndex.equals(other.fromIndex));
&& (this.fromIndex == other.fromIndex || this.fromIndex != null && this.fromIndex.equals(other.fromIndex))
&& this.fromCoreOpenTime == other.fromCoreOpenTime
;
}
@Override
public int hashCode() {
int h = q.hashCode();
int h = q.hashCode() + (int)fromCoreOpenTime;
h = h * 31 + fromField.hashCode();
h = h * 31 + toField.hashCode();
return h;

View File

@ -121,6 +121,11 @@ class TermOrdValComparator_SML extends FieldComparator<Comparable> {
return TermOrdValComparator_SML.createComparator(context.reader(), this);
}
@Override
public int compareDocToValue(int doc, Comparable docValue) {
throw new UnsupportedOperationException();
}
// Base class for specialized (per bit width of the
// ords) per-segment comparator. NOTE: this is messy;
// we do this only because hotspot can't reliably inline
@ -216,6 +221,20 @@ class TermOrdValComparator_SML extends FieldComparator<Comparable> {
public BytesRef value(int slot) {
return values==null ? parent.NULL_VAL : values[slot];
}
@Override
public int compareDocToValue(int doc, BytesRef value) {
final BytesRef docValue = termsIndex.getTerm(doc, tempBR);
if (docValue == null) {
if (value == null) {
return 0;
}
return 1;
} else if (value == null) {
return -1;
}
return docValue.compareTo(value);
}
}
// Used per-segment when bit width of doc->ord is 8:

View File

@ -45,7 +45,7 @@ import org.slf4j.LoggerFactory;
/**
* Reusable base class for UpdateProcessors that will consider
* AddUpdateCommands and mutate the values assocaited with configured
* AddUpdateCommands and mutate the values associated with configured
* fields.
* <p>
* Subclasses should override the mutate method to specify how individual

View File

@ -29,9 +29,9 @@ public class FileUtils {
* Resolves a path relative a base directory.
*
* <p>
* This method does what "new File(base,path)" <b>Should</b> do, it wasn't
* This method does what "new File(base,path)" <b>Should</b> do, if it wasn't
* completely lame: If path is absolute, then a File for that path is returned;
* if it's not absoluve, then a File is returnd using "path" as a child
* if it's not absolute, then a File is returned using "path" as a child
* of "base")
* </p>
*/

View File

@ -20,13 +20,21 @@ package org.apache.solr;
import org.apache.lucene.search.FieldCache;
import org.apache.noggit.JSONUtil;
import org.apache.noggit.ObjectBuilder;
import org.apache.solr.client.solrj.impl.BinaryResponseParser;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.GroupParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.request.SolrRequestInfo;
import org.apache.solr.response.BinaryResponseWriter;
import org.apache.solr.response.ResultContext;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.junit.Before;
import org.junit.BeforeClass;
import org.junit.Test;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.*;
public class TestGroupingSearch extends SolrTestCaseJ4 {
@ -37,7 +45,7 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
@BeforeClass
public static void beforeTests() throws Exception {
initCore("solrconfig.xml","schema12.xml");
initCore("solrconfig.xml", "schema12.xml");
}
@Before
@ -75,18 +83,18 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
);
assertQ(req("q","title:title", "group", "true", "group.field","group_si")
,"//lst[@name='grouped']/lst[@name='group_si']"
,"*[count(//arr[@name='groups']/lst) = 2]"
assertQ(req("q", "title:title", "group", "true", "group.field", "group_si")
, "//lst[@name='grouped']/lst[@name='group_si']"
, "*[count(//arr[@name='groups']/lst) = 2]"
,"//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']"
,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
, "//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']"
, "//arr[@name='groups']/lst[1]/result[@numFound='2']"
, "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
,"//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']"
,"//arr[@name='groups']/lst[2]/result[@numFound='3']"
,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
);
, "//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']"
, "//arr[@name='groups']/lst[2]/result[@numFound='3']"
, "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
);
}
@Test
@ -117,19 +125,19 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
);
assertQ(req("q","title:title", "group", "true", "group.field","group_si", "group.ngroups", "true")
,"//lst[@name='grouped']/lst[@name='group_si']/int[@name='matches'][.='5']"
,"//lst[@name='grouped']/lst[@name='group_si']/int[@name='ngroups'][.='2']"
,"*[count(//arr[@name='groups']/lst) = 2]"
assertQ(req("q", "title:title", "group", "true", "group.field", "group_si", "group.ngroups", "true")
, "//lst[@name='grouped']/lst[@name='group_si']/int[@name='matches'][.='5']"
, "//lst[@name='grouped']/lst[@name='group_si']/int[@name='ngroups'][.='2']"
, "*[count(//arr[@name='groups']/lst) = 2]"
,"//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']"
,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
, "//arr[@name='groups']/lst[1]/int[@name='groupValue'][.='2']"
, "//arr[@name='groups']/lst[1]/result[@numFound='2']"
, "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
,"//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']"
,"//arr[@name='groups']/lst[2]/result[@numFound='3']"
,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
);
, "//arr[@name='groups']/lst[2]/int[@name='groupValue'][.='1']"
, "//arr[@name='groups']/lst[2]/result[@numFound='3']"
, "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='5']"
);
}
@Test
@ -141,22 +149,22 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
assertU(add(doc("id", "5","name", "author3", "title", "the title of a title")));
assertU(commit());
assertQ(req("q","title:title", "group", "true", "group.field","name", "sort", "score desc", "group.sort", "score desc")
,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
// ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
assertQ(req("q", "title:title", "group", "true", "group.field", "name", "sort", "score desc", "group.sort", "score desc")
, "//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author2']"
// ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
, "//arr[@name='groups']/lst[1]/result[@numFound='2']"
, "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='4']"
,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
// ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
, "//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author1']"
// ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
, "//arr[@name='groups']/lst[2]/result[@numFound='2']"
, "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='2']"
,"//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
// ,"//arr[@name='groups']/lst[3]/int[@name='matches'][.='1']"
,"//arr[@name='groups']/lst[3]/result[@numFound='1']"
,"//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
);
, "//arr[@name='groups']/lst[3]/str[@name='groupValue'][.='author3']"
// ,"//arr[@name='groups']/lst[3]/int[@name='matches'][.='1']"
, "//arr[@name='groups']/lst[3]/result[@numFound='1']"
, "//arr[@name='groups']/lst[3]/result/doc/*[@name='id'][.='5']"
);
}
@ -168,18 +176,18 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
assertU(add(doc("id", "4","name", "author2", "weight", "0.11")));
assertU(commit());
assertQ(req("q","*:*", "group", "true", "group.field","name", "sort", "id asc", "group.sort", "weight desc")
,"*[count(//arr[@name='groups']/lst) = 2]"
,"//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author1']"
// ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
,"//arr[@name='groups']/lst[1]/result[@numFound='2']"
,"//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='1']"
assertQ(req("q", "*:*", "group", "true", "group.field", "name", "sort", "id asc", "group.sort", "weight desc")
, "*[count(//arr[@name='groups']/lst) = 2]"
, "//arr[@name='groups']/lst[1]/str[@name='groupValue'][.='author1']"
// ,"//arr[@name='groups']/lst[1]/int[@name='matches'][.='2']"
, "//arr[@name='groups']/lst[1]/result[@numFound='2']"
, "//arr[@name='groups']/lst[1]/result/doc/*[@name='id'][.='1']"
,"//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author2']"
// ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
,"//arr[@name='groups']/lst[2]/result[@numFound='2']"
,"//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='4']"
);
, "//arr[@name='groups']/lst[2]/str[@name='groupValue'][.='author2']"
// ,"//arr[@name='groups']/lst[2]/int[@name='matches'][.='2']"
, "//arr[@name='groups']/lst[2]/result[@numFound='2']"
, "//arr[@name='groups']/lst[2]/result/doc/*[@name='id'][.='4']"
);
}
@Test
@ -214,6 +222,37 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
);
}
@Test
public void testGroupingSimpleFormatArrayIndexOutOfBoundsExceptionWithJavaBin() throws Exception {
assertU(add(doc("id", "1", "nullfirst", "1")));
assertU(add(doc("id", "2", "nullfirst", "1")));
assertU(add(doc("id", "3", "nullfirst", "2")));
assertU(add(doc("id", "4", "nullfirst", "2")));
assertU(add(doc("id", "5", "nullfirst", "2")));
assertU(add(doc("id", "6", "nullfirst", "3")));
assertU(commit());
SolrQueryRequest request =
req("q", "*:*","group", "true", "group.field", "nullfirst", "group.main", "true", "wt", "javabin", "start", "4", "rows", "10");
SolrQueryResponse response = new SolrQueryResponse();
ByteArrayOutputStream out = new ByteArrayOutputStream();
try {
SolrRequestInfo.setRequestInfo(new SolrRequestInfo(request, response));
String handlerName = request.getParams().get(CommonParams.QT);
h.getCore().execute(h.getCore().getRequestHandler(handlerName), request, response);
BinaryResponseWriter responseWriter = new BinaryResponseWriter();
responseWriter.write(out, request, response);
} finally {
request.close();
SolrRequestInfo.clearRequestInfo();
}
assertEquals(6, ((ResultContext) response.getValues().get("response")).docs.matches());
new BinaryResponseParser().processResponse(new ByteArrayInputStream(out.toByteArray()), "");
out.close();
}
@Test
public void testGroupingWithTimeAllowed() throws Exception {
assertU(add(doc("id", "1")));
@ -530,9 +569,9 @@ public class TestGroupingSearch extends SolrTestCaseJ4 {
);
///////////////////////// group.format == simple
assertJQ(req("fq",filt, "q","{!func}"+f2, "group","true", "group.field",f, "fl","id", "rows","3", "start","1", "group.limit","2", "group.format","simple")
, "/grouped/foo_i=={'matches':10,'doclist':"
+"{'numFound':10,'start':1,'docs':[{'id':'10'},{'id':'3'},{'id':'6'}]}}"
assertJQ(req("fq", filt, "q", "{!func}" + f2, "group", "true", "group.field", f, "fl", "id", "rows", "3", "start", "1", "group.limit", "2", "group.format", "simple")
, "/grouped/foo_i=={'matches':10,'doclist':"
+ "{'numFound':10,'start':1,'docs':[{'id':'10'},{'id':'3'},{'id':'6'}]}}"
);
}

View File

@ -17,6 +17,7 @@
package org.apache.solr.core;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.util.Constants;
import org.apache.solr.SolrTestCaseJ4;
import org.junit.Before;
import org.junit.BeforeClass;
@ -109,6 +110,9 @@ public class TestSolrDeletionPolicy1 extends SolrTestCaseJ4 {
@Test
public void testCommitAge() throws InterruptedException {
assumeFalse("This test is not working on Windows (or maybe machines with only 2 CPUs)",
Constants.WINDOWS);
IndexDeletionPolicyWrapper delPolicy = h.getCore().getDeletionPolicy();
addDocs();
Map<Long, IndexCommit> commits = delPolicy.getCommits();

View File

@ -25,6 +25,7 @@ import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Constants;
import org.apache.noggit.ObjectBuilder;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.common.SolrException;
@ -1213,6 +1214,7 @@ public class TestRealTimeGet extends SolrTestCaseJ4 {
// and tests the ability to buffer updates and apply them later
@Test
public void testStressRecovery() throws Exception {
assumeFalse("FIXME: This test is horribly slow sometimes on Windows!", Constants.WINDOWS);
clearIndex();
assertU(commit());

Some files were not shown because too many files have changed in this diff Show More