mirror of https://github.com/apache/lucene.git
LUCENE-2398: Improve tests to work better from IDEs such as Eclipse
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@935014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
4c18268a19
commit
605656e960
|
@ -523,6 +523,9 @@ Test Cases
|
|||
access to "real" files from the test folder itsself, can use
|
||||
LuceneTestCase(J4).getDataFile(). (Uwe Schindler)
|
||||
|
||||
* LUCENE-2398: Improve tests to work better from IDEs such as Eclipse.
|
||||
(Paolo Castagna via Robert Muir)
|
||||
|
||||
================== Release 2.9.2 / 3.0.1 2010-02-26 ====================
|
||||
|
||||
Changes in backwards compatibility policy
|
||||
|
|
|
@ -441,12 +441,6 @@
|
|||
|
||||
<sysproperty key="lucene.version" value="${dev.version}"/>
|
||||
|
||||
<!-- set as a system property so contrib tests can have a fixed root
|
||||
to reference file paths from, and "ant test" can work from
|
||||
anywhere.
|
||||
-->
|
||||
<sysproperty key="lucene.common.dir" file="${common.dir}" />
|
||||
|
||||
<!-- contrib/ant IndexTaskTest needs these two system properties -->
|
||||
<sysproperty key="docs.dir" file="src/test"/>
|
||||
<sysproperty key="index.dir" file="${build.dir}/test/index"/>
|
||||
|
|
|
@ -18,21 +18,83 @@ package org.apache.lucene.benchmark;
|
|||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.io.StringReader;
|
||||
|
||||
import org.apache.lucene.benchmark.byTask.Benchmark;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/** Base class for all Benchmark unit tests. */
|
||||
public class BenchmarkTestCase extends LuceneTestCase {
|
||||
|
||||
private static final File workDir;
|
||||
|
||||
static {
|
||||
workDir = new File(System.getProperty("benchmark.work.dir", "test/benchmark")).getAbsoluteFile();
|
||||
workDir.mkdirs();
|
||||
}
|
||||
public abstract class BenchmarkTestCase extends LuceneTestCase {
|
||||
|
||||
public File getWorkDir() {
|
||||
return workDir;
|
||||
return TEMP_DIR;
|
||||
}
|
||||
|
||||
/** Copy a resource into the workdir */
|
||||
public void copyToWorkDir(String resourceName) throws IOException {
|
||||
InputStream resource = getClass().getResourceAsStream(resourceName);
|
||||
OutputStream dest = new FileOutputStream(new File(getWorkDir(), resourceName));
|
||||
byte[] buffer = new byte[8192];
|
||||
int len;
|
||||
|
||||
while ((len = resource.read(buffer)) > 0) {
|
||||
dest.write(buffer, 0, len);
|
||||
}
|
||||
|
||||
resource.close();
|
||||
dest.close();
|
||||
}
|
||||
|
||||
/** Return a path, suitable for a .alg config file, for a resource in the workdir */
|
||||
public String getWorkDirResourcePath(String resourceName) {
|
||||
return new File(getWorkDir(), resourceName).getAbsolutePath().replace("\\", "/");
|
||||
}
|
||||
|
||||
/** Return a path, suitable for a .alg config file, for the workdir */
|
||||
public String getWorkDirPath() {
|
||||
return getWorkDir().getAbsolutePath().replace("\\", "/");
|
||||
}
|
||||
|
||||
// create the benchmark and execute it.
|
||||
public Benchmark execBenchmark(String[] algLines) throws Exception {
|
||||
String algText = algLinesToText(algLines);
|
||||
logTstLogic(algText);
|
||||
Benchmark benchmark = new Benchmark(new StringReader(algText));
|
||||
benchmark.execute();
|
||||
return benchmark;
|
||||
}
|
||||
|
||||
// properties in effect in all tests here
|
||||
final String propLines [] = {
|
||||
"work.dir=" + getWorkDirPath(),
|
||||
"directory=RAMDirectory",
|
||||
"print.props=false",
|
||||
};
|
||||
|
||||
static final String NEW_LINE = System.getProperty("line.separator");
|
||||
|
||||
// catenate alg lines to make the alg text
|
||||
private String algLinesToText(String[] algLines) {
|
||||
String indent = " ";
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 0; i < propLines.length; i++) {
|
||||
sb.append(indent).append(propLines[i]).append(NEW_LINE);
|
||||
}
|
||||
for (int i = 0; i < algLines.length; i++) {
|
||||
sb.append(indent).append(algLines[i]).append(NEW_LINE);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static void logTstLogic (String txt) {
|
||||
if (!VERBOSE)
|
||||
return;
|
||||
System.out.println("Test logic of:");
|
||||
System.out.println(txt);
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.apache.lucene.analysis.Analyzer;
|
|||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.benchmark.BenchmarkTestCase;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||
import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
|
||||
import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
|
||||
|
@ -50,26 +51,16 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
|||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.search.FieldCache.StringIndex;
|
||||
import org.apache.lucene.search.FieldCache;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/**
|
||||
* Test very simply that perf tasks - simple algorithms - are doing what they should.
|
||||
*/
|
||||
public class TestPerfTasksLogic extends LuceneTestCase {
|
||||
public class TestPerfTasksLogic extends BenchmarkTestCase {
|
||||
|
||||
static final String NEW_LINE = System.getProperty("line.separator");
|
||||
|
||||
// properties in effect in all tests here
|
||||
static final String propLines [] = {
|
||||
"directory=RAMDirectory",
|
||||
"print.props=false",
|
||||
};
|
||||
|
||||
/**
|
||||
* @param name test name
|
||||
*/
|
||||
public TestPerfTasksLogic(String name) {
|
||||
super(name);
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
copyToWorkDir("reuters.first20.lines.txt");
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -531,34 +522,6 @@ public class TestPerfTasksLogic extends LuceneTestCase {
|
|||
ir.close();
|
||||
}
|
||||
|
||||
// create the benchmark and execute it.
|
||||
public static Benchmark execBenchmark(String[] algLines) throws Exception {
|
||||
String algText = algLinesToText(algLines);
|
||||
logTstLogic(algText);
|
||||
Benchmark benchmark = new Benchmark(new StringReader(algText));
|
||||
benchmark.execute();
|
||||
return benchmark;
|
||||
}
|
||||
|
||||
// catenate alg lines to make the alg text
|
||||
private static String algLinesToText(String[] algLines) {
|
||||
String indent = " ";
|
||||
StringBuffer sb = new StringBuffer();
|
||||
for (int i = 0; i < propLines.length; i++) {
|
||||
sb.append(indent).append(propLines[i]).append(NEW_LINE);
|
||||
}
|
||||
for (int i = 0; i < algLines.length; i++) {
|
||||
sb.append(indent).append(algLines[i]).append(NEW_LINE);
|
||||
}
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private static void logTstLogic (String txt) {
|
||||
if (!VERBOSE)
|
||||
return;
|
||||
System.out.println("Test logic of:");
|
||||
System.out.println(txt);
|
||||
}
|
||||
|
||||
/**
|
||||
* Test that exhaust in loop works as expected (LUCENE-1115).
|
||||
|
@ -851,7 +814,7 @@ public class TestPerfTasksLogic extends LuceneTestCase {
|
|||
assertEquals("Missing some tasks to check!",3,nChecked);
|
||||
}
|
||||
|
||||
private static String[] disableCountingLines (boolean disable) {
|
||||
private String[] disableCountingLines (boolean disable) {
|
||||
String dis = disable ? "-" : "";
|
||||
return new String[] {
|
||||
"# ----- properties ",
|
||||
|
@ -901,7 +864,7 @@ public class TestPerfTasksLogic extends LuceneTestCase {
|
|||
assertEquals(new Locale("no", "NO", "NY"), benchmark.getRunData().getLocale());
|
||||
}
|
||||
|
||||
private static String[] getLocaleConfig(String localeParam) {
|
||||
private String[] getLocaleConfig(String localeParam) {
|
||||
String algLines[] = {
|
||||
"# ----- properties ",
|
||||
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
|
||||
|
@ -966,7 +929,7 @@ public class TestPerfTasksLogic extends LuceneTestCase {
|
|||
ts2.close();
|
||||
}
|
||||
|
||||
private static String[] getCollatorConfig(String localeParam,
|
||||
private String[] getCollatorConfig(String localeParam,
|
||||
String collationParam) {
|
||||
String algLines[] = {
|
||||
"# ----- properties ",
|
||||
|
@ -1048,7 +1011,7 @@ public class TestPerfTasksLogic extends LuceneTestCase {
|
|||
stream.close();
|
||||
}
|
||||
|
||||
private static String[] getShingleConfig(String params) {
|
||||
private String[] getShingleConfig(String params) {
|
||||
String algLines[] = {
|
||||
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
|
||||
"docs.file=" + getReuters20LinesFile(),
|
||||
|
@ -1061,8 +1024,7 @@ public class TestPerfTasksLogic extends LuceneTestCase {
|
|||
return algLines;
|
||||
}
|
||||
|
||||
private static String getReuters20LinesFile() {
|
||||
return System.getProperty("lucene.common.dir").replace('\\','/') +
|
||||
"/contrib/benchmark/src/test/org/apache/lucene/benchmark/reuters.first20.lines.txt";
|
||||
private String getReuters20LinesFile() {
|
||||
return getWorkDirResourcePath("reuters.first20.lines.txt");
|
||||
}
|
||||
}
|
||||
|
|
|
@ -19,7 +19,6 @@ package org.apache.lucene.benchmark.quality;
|
|||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.File;
|
||||
import java.io.FileReader;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.PrintWriter;
|
||||
|
@ -36,7 +35,6 @@ import org.apache.lucene.benchmark.quality.utils.SimpleQQParser;
|
|||
import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
/**
|
||||
* Test that quality run does its job.
|
||||
|
@ -47,6 +45,12 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
*/
|
||||
public class TestQualityRun extends BenchmarkTestCase {
|
||||
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
super.setUp();
|
||||
copyToWorkDir("reuters.578.lines.txt.bz2");
|
||||
}
|
||||
|
||||
public void testTrecQuality() throws Exception {
|
||||
// first create the partial reuters index
|
||||
createReutersIndex();
|
||||
|
@ -173,7 +177,7 @@ public class TestQualityRun extends BenchmarkTestCase {
|
|||
String algLines[] = {
|
||||
"# ----- properties ",
|
||||
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
|
||||
"docs.file=" + getReuters578LinesFile(),
|
||||
"docs.file=" + getWorkDirResourcePath("reuters.578.lines.txt.bz2"),
|
||||
"content.source.log.step=2500",
|
||||
"doc.term.vector=false",
|
||||
"content.source.forever=false",
|
||||
|
@ -188,11 +192,6 @@ public class TestQualityRun extends BenchmarkTestCase {
|
|||
};
|
||||
|
||||
// 2. execute the algorithm (required in every "logic" test)
|
||||
TestPerfTasksLogic.execBenchmark(algLines);
|
||||
}
|
||||
|
||||
private static String getReuters578LinesFile() {
|
||||
return System.getProperty("lucene.common.dir").replace('\\','/') +
|
||||
"/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/reuters.578.lines.txt.bz2";
|
||||
execBenchmark(algLines);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -18,548 +18,192 @@ package org.apache.lucene.index.memory;
|
|||
*/
|
||||
|
||||
import java.io.BufferedReader;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FilenameFilter;
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.charset.Charset;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.List;
|
||||
import java.util.HashSet;
|
||||
import java.util.Random;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
|
||||
import org.apache.lucene.analysis.KeywordAnalyzer;
|
||||
import org.apache.lucene.analysis.SimpleAnalyzer;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Fieldable;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
import org.apache.lucene.queryParser.QueryParser;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.RAMDirectory;
|
||||
import org.apache.lucene.index.TermDocs;
|
||||
|
||||
/**
|
||||
Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour,
|
||||
returning the same results for any given query.
|
||||
Runs a set of queries against a set of files and compares results for identity.
|
||||
Can also be used as a simple benchmark.
|
||||
<p>
|
||||
Example usage:
|
||||
<pre>
|
||||
cd lucene-svn
|
||||
java -server -cp ~/unix/java/share/misc/junit/junit.jar:build/classes:build/lucene-core-2.1-dev.jar:build/contrib/memory/classes/test:build/contrib/memory/classes/java org.apache.lucene.index.memory.MemoryIndexTest 1 1 memram @contrib/memory/src/test/org/apache/lucene/index/memory/testqueries.txt *.txt *.html *.xml xdocs/*.xml src/test/org/apache/lucene/queryParser/*.java contrib/memory/src/java/org/apache/lucene/index/memory/*.java
|
||||
</pre>
|
||||
where testqueries.txt is a file with one query per line, such as:
|
||||
<pre>
|
||||
#
|
||||
# queries extracted from TestQueryParser.java
|
||||
#
|
||||
Apache
|
||||
Apach~ AND Copy*
|
||||
|
||||
a AND b
|
||||
(a AND b)
|
||||
c OR (a AND b)
|
||||
a AND NOT b
|
||||
a AND -b
|
||||
a AND !b
|
||||
a && b
|
||||
a && ! b
|
||||
|
||||
a OR b
|
||||
a || b
|
||||
a OR !b
|
||||
a OR ! b
|
||||
a OR -b
|
||||
|
||||
+term -term term
|
||||
foo:term AND field:anotherTerm
|
||||
term AND "phrase phrase"
|
||||
"hello there"
|
||||
|
||||
germ term^2.0
|
||||
(term)^2.0
|
||||
(germ term)^2.0
|
||||
term^2.0
|
||||
term^2
|
||||
"germ term"^2.0
|
||||
"term germ"^2
|
||||
|
||||
(foo OR bar) AND (baz OR boo)
|
||||
((a OR b) AND NOT c) OR d
|
||||
+(apple "steve jobs") -(foo bar baz)
|
||||
+title:(dog OR cat) -author:"bob dole"
|
||||
|
||||
|
||||
a&b
|
||||
a&&b
|
||||
.NET
|
||||
|
||||
"term germ"~2
|
||||
"term germ"~2 flork
|
||||
"term"~2
|
||||
"~2 germ"
|
||||
"term germ"~2^2
|
||||
|
||||
3
|
||||
term 1.0 1 2
|
||||
term term1 term2
|
||||
|
||||
term*
|
||||
term*^2
|
||||
term~
|
||||
term~0.7
|
||||
term~^2
|
||||
term^2~
|
||||
term*germ
|
||||
term*germ^3
|
||||
|
||||
|
||||
term*
|
||||
Term*
|
||||
TERM*
|
||||
term*
|
||||
Term*
|
||||
TERM*
|
||||
|
||||
// Then 'full' wildcard queries:
|
||||
te?m
|
||||
Te?m
|
||||
TE?M
|
||||
Te?m*gerM
|
||||
te?m
|
||||
Te?m
|
||||
TE?M
|
||||
Te?m*gerM
|
||||
|
||||
term term term
|
||||
term +stop term
|
||||
term -stop term
|
||||
drop AND stop AND roll
|
||||
term phrase term
|
||||
term AND NOT phrase term
|
||||
stop
|
||||
|
||||
|
||||
[ a TO c]
|
||||
[ a TO c ]
|
||||
{ a TO c}
|
||||
{ a TO c }
|
||||
{ a TO c }^2.0
|
||||
[ a TO c] OR bar
|
||||
[ a TO c] AND bar
|
||||
( bar blar { a TO c})
|
||||
gack ( bar blar { a TO c})
|
||||
|
||||
|
||||
+weltbank +worlbank
|
||||
+weltbank\n+worlbank
|
||||
weltbank \n+worlbank
|
||||
weltbank \n +worlbank
|
||||
+weltbank\r+worlbank
|
||||
weltbank \r+worlbank
|
||||
weltbank \r +worlbank
|
||||
+weltbank\r\n+worlbank
|
||||
weltbank \r\n+worlbank
|
||||
weltbank \r\n +worlbank
|
||||
weltbank \r \n +worlbank
|
||||
+weltbank\t+worlbank
|
||||
weltbank \t+worlbank
|
||||
weltbank \t +worlbank
|
||||
|
||||
|
||||
term term term
|
||||
term +term term
|
||||
term term +term
|
||||
term +term +term
|
||||
-term term term
|
||||
|
||||
|
||||
on^1.0
|
||||
"hello"^2.0
|
||||
hello^2.0
|
||||
"on"^1.0
|
||||
the^3
|
||||
</pre>
|
||||
|
||||
*/
|
||||
* Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour,
|
||||
* returning the same results for queries on some randomish indexes.
|
||||
*/
|
||||
public class MemoryIndexTest extends BaseTokenStreamTestCase {
|
||||
private Set<String> queries = new HashSet<String>();
|
||||
private Random random;
|
||||
|
||||
private Analyzer analyzer;
|
||||
public static final int ITERATIONS = 100;
|
||||
|
||||
private static final String FIELD_NAME = "content";
|
||||
|
||||
/** Runs the tests and/or benchmark */
|
||||
public static void main(String[] args) throws Throwable {
|
||||
new MemoryIndexTest().run(args);
|
||||
}
|
||||
|
||||
/* all files will be open relative to this */
|
||||
public String fileDir;
|
||||
@Override
|
||||
protected void setUp() throws Exception {
|
||||
public void setUp() throws Exception {
|
||||
super.setUp();
|
||||
fileDir = System.getProperty("lucene.common.dir", null);
|
||||
queries.addAll(readQueries("testqueries.txt"));
|
||||
queries.addAll(readQueries("testqueries2.txt"));
|
||||
random = newRandom();
|
||||
}
|
||||
|
||||
// public void tearDown() {}
|
||||
|
||||
public void testMany() throws Throwable {
|
||||
String[] files = listFiles(new String[] {
|
||||
"*.txt", "*.html", "*.xml", "xdocs/*.xml",
|
||||
"src/java/test/org/apache/lucene/queryParser/*.java",
|
||||
"contrib/memory/src/java/org/apache/lucene/index/memory/*.java",
|
||||
});
|
||||
if (VERBOSE) System.out.println("files = " + java.util.Arrays.asList(files));
|
||||
String[] xargs = new String[] {
|
||||
"1", "1", "memram",
|
||||
"@contrib/memory/src/test/org/apache/lucene/index/memory/testqueries.txt",
|
||||
};
|
||||
String[] args = new String[xargs.length + files.length];
|
||||
System.arraycopy(xargs, 0, args, 0, xargs.length);
|
||||
System.arraycopy(files, 0, args, xargs.length, files.length);
|
||||
run(args);
|
||||
}
|
||||
|
||||
private void run(String[] args) throws Throwable {
|
||||
int k = -1;
|
||||
|
||||
int iters = 1;
|
||||
if (args.length > ++k) iters = Math.max(1, Integer.parseInt(args[k]));
|
||||
|
||||
int runs = 1;
|
||||
if (args.length > ++k) runs = Math.max(1, Integer.parseInt(args[k]));
|
||||
|
||||
String cmd = "memram";
|
||||
if (args.length > ++k) cmd = args[k];
|
||||
boolean useMemIndex = cmd.indexOf("mem") >= 0;
|
||||
boolean useRAMIndex = cmd.indexOf("ram") >= 0;
|
||||
|
||||
String[] queries = { "term", "term*", "term~", "Apache", "Apach~ AND Copy*" };
|
||||
if (args.length > ++k) {
|
||||
String arg = args[k];
|
||||
if (arg.startsWith("@"))
|
||||
queries = readLines(new File(fileDir, arg.substring(1)));
|
||||
else
|
||||
queries = new String[] { arg };
|
||||
}
|
||||
|
||||
File[] files = new File[] {new File("CHANGES.txt"), new File("LICENSE.txt") };
|
||||
if (args.length > ++k) {
|
||||
files = new File[args.length - k];
|
||||
for (int i=k; i < args.length; i++) {
|
||||
files[i-k] = new File(args[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// boolean toLowerCase = false;
|
||||
// Set stopWords = null;
|
||||
|
||||
Analyzer[] analyzers = new Analyzer[] {
|
||||
new SimpleAnalyzer(TEST_VERSION_CURRENT),
|
||||
new StopAnalyzer(TEST_VERSION_CURRENT),
|
||||
new StandardAnalyzer(TEST_VERSION_CURRENT),
|
||||
// new WhitespaceAnalyzer(TEST_VERSION_CURRENT),
|
||||
// new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN, false, null),
|
||||
// new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN, true, stopWords),
|
||||
// new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS),
|
||||
};
|
||||
|
||||
boolean first = true;
|
||||
|
||||
for (int iter=0; iter < iters; iter++) {
|
||||
if (VERBOSE) System.out.println("\n########### iteration=" + iter);
|
||||
long start = System.currentTimeMillis();
|
||||
long bytes = 0;
|
||||
|
||||
for (int anal=0; anal < analyzers.length; anal++) {
|
||||
this.analyzer = analyzers[anal];
|
||||
|
||||
for (int i=0; i < files.length; i++) {
|
||||
File file = files[i];
|
||||
if (!file.exists() || file.isDirectory()) continue; // ignore
|
||||
bytes += file.length();
|
||||
String text = toString(new FileInputStream(file), null);
|
||||
Document doc = createDocument(text);
|
||||
if (VERBOSE) System.out.println("\n*********** FILE=" + file);
|
||||
|
||||
boolean measureIndexing = false; // toggle this to measure query performance
|
||||
MemoryIndex memind = null;
|
||||
IndexSearcher memsearcher = null;
|
||||
if (useMemIndex && !measureIndexing) {
|
||||
memind = createMemoryIndex(doc);
|
||||
memsearcher = memind.createSearcher();
|
||||
}
|
||||
|
||||
if (first) {
|
||||
IndexSearcher s = memind.createSearcher();
|
||||
TermDocs td = s.getIndexReader().termDocs(null);
|
||||
assertTrue(td.next());
|
||||
assertEquals(0, td.doc());
|
||||
assertEquals(1, td.freq());
|
||||
td.close();
|
||||
s.close();
|
||||
first = false;
|
||||
}
|
||||
|
||||
RAMDirectory ramind = null;
|
||||
IndexSearcher ramsearcher = null;
|
||||
if (useRAMIndex && !measureIndexing) {
|
||||
ramind = createRAMIndex(doc);
|
||||
ramsearcher = new IndexSearcher(ramind);
|
||||
}
|
||||
|
||||
for (int q=0; q < queries.length; q++) {
|
||||
try {
|
||||
Query query = parseQuery(queries[q]);
|
||||
for (int run=0; run < runs; run++) {
|
||||
float score1 = 0.0f; float score2 = 0.0f;
|
||||
if (useMemIndex && measureIndexing) {
|
||||
memind = createMemoryIndex(doc);
|
||||
memsearcher = memind.createSearcher();
|
||||
}
|
||||
if (useMemIndex) score1 = query(memsearcher, query);
|
||||
if (useRAMIndex && measureIndexing) {
|
||||
ramind = createRAMIndex(doc);
|
||||
ramsearcher = new IndexSearcher(ramind);
|
||||
}
|
||||
if (useRAMIndex) score2 = query(ramsearcher, query);
|
||||
if (useMemIndex && useRAMIndex) {
|
||||
if (VERBOSE) System.out.println("diff="+ (score1-score2) + ", query=" + queries[q] + ", s1=" + score1 + ", s2=" + score2);
|
||||
if (score1 != score2 || score1 < 0.0f || score2 < 0.0f || score1 > 1.0f || score2 > 1.0f) {
|
||||
throw new IllegalStateException("BUG DETECTED:" + (i*(q+1)) + " at query=" + queries[q] + ", file=" + file + ", anal=" + analyzer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (Throwable t) {
|
||||
if (t instanceof OutOfMemoryError) t.printStackTrace();
|
||||
if (VERBOSE) System.out.println("Fatal error at query=" + queries[q] + ", file=" + file + ", anal=" + analyzer);
|
||||
throw t;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
if (VERBOSE) {
|
||||
System.out.println("\nsecs = " + ((end-start)/1000.0f));
|
||||
System.out.println("queries/sec= " +
|
||||
(1.0f * runs * queries.length * analyzers.length * files.length
|
||||
/ ((end-start)/1000.0f)));
|
||||
float mb = (1.0f * bytes * queries.length * runs) / (1024.0f * 1024.0f);
|
||||
System.out.println("MB/sec = " + (mb / ((end-start)/1000.0f)));
|
||||
}
|
||||
}
|
||||
|
||||
if (!VERBOSE) return;
|
||||
|
||||
if (useMemIndex && useRAMIndex)
|
||||
System.out.println("No bug found. done.");
|
||||
else
|
||||
System.out.println("Done benchmarking (without checking correctness).");
|
||||
}
|
||||
|
||||
// returns file line by line, ignoring empty lines and comments
|
||||
private String[] readLines(File file) throws Exception {
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(
|
||||
new FileInputStream(file)));
|
||||
List<String> lines = new ArrayList<String>();
|
||||
String line;
|
||||
/**
|
||||
* read a set of queries from a resource file
|
||||
*/
|
||||
private Set<String> readQueries(String resource) throws IOException {
|
||||
Set<String> queries = new HashSet<String>();
|
||||
InputStream stream = getClass().getResourceAsStream(resource);
|
||||
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
|
||||
String line = null;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
String t = line.trim();
|
||||
if (t.length() > 0 && t.charAt(0) != '#' && (!t.startsWith("//"))) {
|
||||
lines.add(line);
|
||||
line = line.trim();
|
||||
if (line.length() > 0 && !line.startsWith("#") && !line.startsWith("//")) {
|
||||
queries.add(line);
|
||||
}
|
||||
}
|
||||
reader.close();
|
||||
|
||||
String[] result = new String[lines.size()];
|
||||
lines.toArray(result);
|
||||
return result;
|
||||
return queries;
|
||||
}
|
||||
|
||||
private Document createDocument(String content) {
|
||||
|
||||
/**
|
||||
* runs random tests, up to ITERATIONS times.
|
||||
*/
|
||||
public void testRandomQueries() throws Exception {
|
||||
for (int i = 0; i < ITERATIONS; i++)
|
||||
assertAgainstRAMDirectory();
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a randomish document for both RAMDirectory and MemoryIndex,
|
||||
* and run all the queries against it.
|
||||
*/
|
||||
public void assertAgainstRAMDirectory() throws Exception {
|
||||
StringBuilder fooField = new StringBuilder();
|
||||
StringBuilder termField = new StringBuilder();
|
||||
|
||||
// add up to 250 terms to field "foo"
|
||||
for (int i = 0; i < random.nextInt(250); i++) {
|
||||
fooField.append(" ");
|
||||
fooField.append(randomTerm());
|
||||
}
|
||||
|
||||
// add up to 250 terms to field "term"
|
||||
for (int i = 0; i < random.nextInt(250); i++) {
|
||||
termField.append(" ");
|
||||
termField.append(randomTerm());
|
||||
}
|
||||
|
||||
RAMDirectory ramdir = new RAMDirectory();
|
||||
Analyzer analyzer = randomAnalyzer();
|
||||
IndexWriter writer = new IndexWriter(ramdir, analyzer,
|
||||
IndexWriter.MaxFieldLength.UNLIMITED);
|
||||
Document doc = new Document();
|
||||
doc.add(new Field(FIELD_NAME, content, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
|
||||
return doc;
|
||||
Field field1 = new Field("foo", fooField.toString(), Field.Store.NO, Field.Index.ANALYZED);
|
||||
Field field2 = new Field("term", termField.toString(), Field.Store.NO, Field.Index.ANALYZED);
|
||||
doc.add(field1);
|
||||
doc.add(field2);
|
||||
writer.addDocument(doc);
|
||||
writer.close();
|
||||
|
||||
MemoryIndex memory = new MemoryIndex();
|
||||
memory.addField("foo", fooField.toString(), analyzer);
|
||||
memory.addField("term", termField.toString(), analyzer);
|
||||
assertAllQueries(memory, ramdir, analyzer);
|
||||
}
|
||||
|
||||
private MemoryIndex createMemoryIndex(Document doc) {
|
||||
MemoryIndex index = new MemoryIndex();
|
||||
Iterator<Fieldable> iter = doc.getFields().iterator();
|
||||
while (iter.hasNext()) {
|
||||
Fieldable field = iter.next();
|
||||
index.addField(field.name(), field.stringValue(), analyzer);
|
||||
/**
|
||||
* Run all queries against both the RAMDirectory and MemoryIndex, ensuring they are the same.
|
||||
*/
|
||||
public void assertAllQueries(MemoryIndex memory, RAMDirectory ramdir, Analyzer analyzer) throws Exception {
|
||||
IndexSearcher ram = new IndexSearcher(ramdir);
|
||||
IndexSearcher mem = memory.createSearcher();
|
||||
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "foo", analyzer);
|
||||
for (String query : queries) {
|
||||
TopDocs ramDocs = ram.search(qp.parse(query), 1);
|
||||
TopDocs memDocs = mem.search(qp.parse(query), 1);
|
||||
assertEquals(ramDocs.totalHits, memDocs.totalHits);
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
private RAMDirectory createRAMIndex(Document doc) {
|
||||
RAMDirectory dir = new RAMDirectory();
|
||||
IndexWriter writer = null;
|
||||
try {
|
||||
writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
|
||||
writer.addDocument(doc);
|
||||
writer.optimize();
|
||||
return dir;
|
||||
} catch (IOException e) { // should never happen (RAMDirectory)
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
try {
|
||||
if (writer != null) writer.close();
|
||||
} catch (IOException e) { // should never happen (RAMDirectory)
|
||||
throw new RuntimeException(e);
|
||||
/**
|
||||
* Return a random analyzer (Simple, Stop, Standard) to analyze the terms.
|
||||
*/
|
||||
private Analyzer randomAnalyzer() {
|
||||
switch(random.nextInt(3)) {
|
||||
case 0: return new SimpleAnalyzer(TEST_VERSION_CURRENT);
|
||||
case 1: return new StopAnalyzer(TEST_VERSION_CURRENT);
|
||||
default: return new StandardAnalyzer(TEST_VERSION_CURRENT);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Some terms to be indexed, in addition to random words.
|
||||
* These terms are commonly used in the queries.
|
||||
*/
|
||||
private static final String[] TEST_TERMS = {"term", "Term", "tErm", "TERM",
|
||||
"telm", "stop", "drop", "roll", "phrase", "a", "c", "bar", "blar",
|
||||
"gack", "weltbank", "worlbank", "hello", "on", "the", "apache", "Apache",
|
||||
"copyright", "Copyright"};
|
||||
|
||||
|
||||
/**
|
||||
* half of the time, returns a random term from TEST_TERMS.
|
||||
* the other half of the time, returns a random unicode string.
|
||||
*/
|
||||
private String randomTerm() {
|
||||
if (random.nextBoolean()) {
|
||||
// return a random TEST_TERM
|
||||
return TEST_TERMS[random.nextInt(TEST_TERMS.length)];
|
||||
} else {
|
||||
// return a random unicode term
|
||||
return randomString();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a random unicode term, like TestStressIndexing.
|
||||
*/
|
||||
private String randomString() {
|
||||
final int end = random.nextInt(20);
|
||||
if (buffer.length < 1 + end) {
|
||||
char[] newBuffer = new char[(int) ((1 + end) * 1.25)];
|
||||
System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
|
||||
buffer = newBuffer;
|
||||
}
|
||||
for (int i = 0; i < end - 1; i++) {
|
||||
int t = random.nextInt(6);
|
||||
if (0 == t && i < end - 1) {
|
||||
// Make a surrogate pair
|
||||
// High surrogate
|
||||
buffer[i++] = (char) nextInt(0xd800, 0xdc00);
|
||||
// Low surrogate
|
||||
buffer[i] = (char) nextInt(0xdc00, 0xe000);
|
||||
} else if (t <= 1) buffer[i] = (char) random.nextInt(0x80);
|
||||
else if (2 == t) buffer[i] = (char) nextInt(0x80, 0x800);
|
||||
else if (3 == t) buffer[i] = (char) nextInt(0x800, 0xd800);
|
||||
else if (4 == t) buffer[i] = (char) nextInt(0xe000, 0xffff);
|
||||
else if (5 == t) {
|
||||
// Illegal unpaired surrogate
|
||||
if (random.nextBoolean()) buffer[i] = (char) nextInt(0xd800, 0xdc00);
|
||||
else buffer[i] = (char) nextInt(0xdc00, 0xe000);
|
||||
}
|
||||
}
|
||||
return new String(buffer, 0, end);
|
||||
}
|
||||
|
||||
final float[] scores = new float[1]; // inits to 0.0f (no match)
|
||||
|
||||
private float query(IndexSearcher searcher, Query query) {
|
||||
// System.out.println("MB=" + (getMemorySize(index) / (1024.0f * 1024.0f)));
|
||||
try {
|
||||
searcher.search(query, new Collector() {
|
||||
private Scorer scorer;
|
||||
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
scores[0] = scorer.score();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {
|
||||
this.scorer = scorer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(IndexReader reader, int docBase) { }
|
||||
});
|
||||
float score = scores[0];
|
||||
// Hits hits = searcher.search(query);
|
||||
// float score = hits.length() > 0 ? hits.score(0) : 0.0f;
|
||||
return score;
|
||||
} catch (IOException e) { // should never happen (RAMDirectory)
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
private char buffer[] = new char[20];
|
||||
// start is inclusive and end is exclusive
|
||||
private int nextInt(int start, int end) {
|
||||
return start + random.nextInt(end - start);
|
||||
}
|
||||
|
||||
// for debugging purposes
|
||||
int getMemorySize(Object index) {
|
||||
if (index instanceof Directory) {
|
||||
try {
|
||||
Directory dir = (Directory) index;
|
||||
int size = 0;
|
||||
String[] fileNames = dir.listAll();
|
||||
for (int i=0; i < fileNames.length; i++) {
|
||||
size += dir.fileLength(fileNames[i]);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
catch (IOException e) { // can never happen (RAMDirectory)
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
else {
|
||||
return ((MemoryIndex) index).getMemorySize();
|
||||
}
|
||||
}
|
||||
|
||||
private Query parseQuery(String expression) throws ParseException {
|
||||
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
|
||||
// parser.setPhraseSlop(0);
|
||||
return parser.parse(expression);
|
||||
}
|
||||
|
||||
/** returns all files matching the given file name patterns (quick n'dirty) */
|
||||
static String[] listFiles(String[] fileNames) {
|
||||
LinkedHashSet<String> allFiles = new LinkedHashSet<String>();
|
||||
for (int i=0; i < fileNames.length; i++) {
|
||||
int k;
|
||||
if ((k = fileNames[i].indexOf("*")) < 0) {
|
||||
allFiles.add(fileNames[i]);
|
||||
} else {
|
||||
String prefix = fileNames[i].substring(0, k);
|
||||
if (prefix.length() == 0) prefix = ".";
|
||||
final String suffix = fileNames[i].substring(k+1);
|
||||
File[] files = new File(prefix).listFiles(new FilenameFilter() {
|
||||
public boolean accept(File dir, String name) {
|
||||
return name.endsWith(suffix);
|
||||
}
|
||||
});
|
||||
if (files != null) {
|
||||
for (int j=0; j < files.length; j++) {
|
||||
allFiles.add(files[j].getPath());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
String[] result = new String[allFiles.size()];
|
||||
allFiles.toArray(result);
|
||||
return result;
|
||||
}
|
||||
|
||||
// trick to detect default platform charset
|
||||
private static final Charset DEFAULT_PLATFORM_CHARSET =
|
||||
Charset.forName(new InputStreamReader(new ByteArrayInputStream(new byte[0])).getEncoding());
|
||||
|
||||
// the following utility methods below are copied from Apache style Nux library - see http://dsd.lbl.gov/nux
|
||||
private static String toString(InputStream input, Charset charset) throws IOException {
|
||||
if (charset == null) charset = DEFAULT_PLATFORM_CHARSET;
|
||||
byte[] data = toByteArray(input);
|
||||
return charset.decode(ByteBuffer.wrap(data)).toString();
|
||||
}
|
||||
|
||||
private static byte[] toByteArray(InputStream input) throws IOException {
|
||||
try {
|
||||
// safe and fast even if input.available() behaves weird or buggy
|
||||
int len = Math.max(256, input.available());
|
||||
byte[] buffer = new byte[len];
|
||||
byte[] output = new byte[len];
|
||||
|
||||
len = 0;
|
||||
int n;
|
||||
while ((n = input.read(buffer)) >= 0) {
|
||||
if (len + n > output.length) { // grow capacity
|
||||
byte tmp[] = new byte[Math.max(output.length << 1, len + n)];
|
||||
System.arraycopy(output, 0, tmp, 0, len);
|
||||
System.arraycopy(buffer, 0, tmp, len, n);
|
||||
buffer = output; // use larger buffer for future larger bulk reads
|
||||
output = tmp;
|
||||
} else {
|
||||
System.arraycopy(buffer, 0, output, len, n);
|
||||
}
|
||||
len += n;
|
||||
}
|
||||
|
||||
if (len == output.length) return output;
|
||||
buffer = null; // help gc
|
||||
buffer = new byte[len];
|
||||
System.arraycopy(output, 0, buffer, 0, len);
|
||||
return buffer;
|
||||
} finally {
|
||||
input.close();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -42,7 +42,7 @@ import org.apache.lucene.util.LuceneTestCase;
|
|||
|
||||
import java.io.IOException;
|
||||
|
||||
public class CollationTestBase extends LuceneTestCase {
|
||||
public abstract class CollationTestBase extends LuceneTestCase {
|
||||
|
||||
protected String firstRangeBeginningOriginal = "\u062F";
|
||||
protected String firstRangeEndOriginal = "\u0698";
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.util.cache;
|
|||
|
||||
import org.apache.lucene.util.LuceneTestCase;
|
||||
|
||||
public class BaseTestLRU extends LuceneTestCase {
|
||||
public abstract class BaseTestLRU extends LuceneTestCase {
|
||||
|
||||
protected void testCache(Cache<Integer,Object> cache, int n) throws Exception {
|
||||
Object dummy = new Object();
|
||||
|
|
Loading…
Reference in New Issue