LUCENE-2398: Improve tests to work better from IDEs such as Eclipse

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@935014 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Robert Muir 2010-04-16 18:13:23 +00:00
parent 4c18268a19
commit 605656e960
9 changed files with 246 additions and 582 deletions

View File

@ -523,6 +523,9 @@ Test Cases
access to "real" files from the test folder itsself, can use
LuceneTestCase(J4).getDataFile(). (Uwe Schindler)
* LUCENE-2398: Improve tests to work better from IDEs such as Eclipse.
(Paolo Castagna via Robert Muir)
================== Release 2.9.2 / 3.0.1 2010-02-26 ====================
Changes in backwards compatibility policy

View File

@ -441,12 +441,6 @@
<sysproperty key="lucene.version" value="${dev.version}"/>
<!-- set as a system property so contrib tests can have a fixed root
to reference file paths from, and "ant test" can work from
anywhere.
-->
<sysproperty key="lucene.common.dir" file="${common.dir}" />
<!-- contrib/ant IndexTaskTest needs these two system properties -->
<sysproperty key="docs.dir" file="src/test"/>
<sysproperty key="index.dir" file="${build.dir}/test/index"/>

View File

@ -18,21 +18,83 @@ package org.apache.lucene.benchmark;
*/
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.StringReader;
import org.apache.lucene.benchmark.byTask.Benchmark;
import org.apache.lucene.util.LuceneTestCase;
/** Base class for all Benchmark unit tests. */
public class BenchmarkTestCase extends LuceneTestCase {
private static final File workDir;
static {
workDir = new File(System.getProperty("benchmark.work.dir", "test/benchmark")).getAbsoluteFile();
workDir.mkdirs();
}
public abstract class BenchmarkTestCase extends LuceneTestCase {
public File getWorkDir() {
return workDir;
return TEMP_DIR;
}
/** Copy a resource into the workdir */
public void copyToWorkDir(String resourceName) throws IOException {
InputStream resource = getClass().getResourceAsStream(resourceName);
OutputStream dest = new FileOutputStream(new File(getWorkDir(), resourceName));
byte[] buffer = new byte[8192];
int len;
while ((len = resource.read(buffer)) > 0) {
dest.write(buffer, 0, len);
}
resource.close();
dest.close();
}
/** Return a path, suitable for a .alg config file, for a resource in the workdir */
public String getWorkDirResourcePath(String resourceName) {
return new File(getWorkDir(), resourceName).getAbsolutePath().replace("\\", "/");
}
/** Return a path, suitable for a .alg config file, for the workdir */
public String getWorkDirPath() {
return getWorkDir().getAbsolutePath().replace("\\", "/");
}
// create the benchmark and execute it.
public Benchmark execBenchmark(String[] algLines) throws Exception {
String algText = algLinesToText(algLines);
logTstLogic(algText);
Benchmark benchmark = new Benchmark(new StringReader(algText));
benchmark.execute();
return benchmark;
}
// properties in effect in all tests here
final String propLines [] = {
"work.dir=" + getWorkDirPath(),
"directory=RAMDirectory",
"print.props=false",
};
static final String NEW_LINE = System.getProperty("line.separator");
// catenate alg lines to make the alg text
private String algLinesToText(String[] algLines) {
String indent = " ";
StringBuffer sb = new StringBuffer();
for (int i = 0; i < propLines.length; i++) {
sb.append(indent).append(propLines[i]).append(NEW_LINE);
}
for (int i = 0; i < algLines.length; i++) {
sb.append(indent).append(algLines[i]).append(NEW_LINE);
}
return sb.toString();
}
private static void logTstLogic (String txt) {
if (!VERBOSE)
return;
System.out.println("Test logic of:");
System.out.println(txt);
}
}

View File

@ -29,6 +29,7 @@ import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.benchmark.BenchmarkTestCase;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
@ -50,26 +51,16 @@ import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.search.FieldCache.StringIndex;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.util.LuceneTestCase;
/**
* Test very simply that perf tasks - simple algorithms - are doing what they should.
*/
public class TestPerfTasksLogic extends LuceneTestCase {
public class TestPerfTasksLogic extends BenchmarkTestCase {
static final String NEW_LINE = System.getProperty("line.separator");
// properties in effect in all tests here
static final String propLines [] = {
"directory=RAMDirectory",
"print.props=false",
};
/**
* @param name test name
*/
public TestPerfTasksLogic(String name) {
super(name);
@Override
protected void setUp() throws Exception {
super.setUp();
copyToWorkDir("reuters.first20.lines.txt");
}
/**
@ -531,34 +522,6 @@ public class TestPerfTasksLogic extends LuceneTestCase {
ir.close();
}
// create the benchmark and execute it.
public static Benchmark execBenchmark(String[] algLines) throws Exception {
String algText = algLinesToText(algLines);
logTstLogic(algText);
Benchmark benchmark = new Benchmark(new StringReader(algText));
benchmark.execute();
return benchmark;
}
// catenate alg lines to make the alg text
private static String algLinesToText(String[] algLines) {
String indent = " ";
StringBuffer sb = new StringBuffer();
for (int i = 0; i < propLines.length; i++) {
sb.append(indent).append(propLines[i]).append(NEW_LINE);
}
for (int i = 0; i < algLines.length; i++) {
sb.append(indent).append(algLines[i]).append(NEW_LINE);
}
return sb.toString();
}
private static void logTstLogic (String txt) {
if (!VERBOSE)
return;
System.out.println("Test logic of:");
System.out.println(txt);
}
/**
* Test that exhaust in loop works as expected (LUCENE-1115).
@ -851,7 +814,7 @@ public class TestPerfTasksLogic extends LuceneTestCase {
assertEquals("Missing some tasks to check!",3,nChecked);
}
private static String[] disableCountingLines (boolean disable) {
private String[] disableCountingLines (boolean disable) {
String dis = disable ? "-" : "";
return new String[] {
"# ----- properties ",
@ -901,7 +864,7 @@ public class TestPerfTasksLogic extends LuceneTestCase {
assertEquals(new Locale("no", "NO", "NY"), benchmark.getRunData().getLocale());
}
private static String[] getLocaleConfig(String localeParam) {
private String[] getLocaleConfig(String localeParam) {
String algLines[] = {
"# ----- properties ",
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
@ -966,7 +929,7 @@ public class TestPerfTasksLogic extends LuceneTestCase {
ts2.close();
}
private static String[] getCollatorConfig(String localeParam,
private String[] getCollatorConfig(String localeParam,
String collationParam) {
String algLines[] = {
"# ----- properties ",
@ -1048,7 +1011,7 @@ public class TestPerfTasksLogic extends LuceneTestCase {
stream.close();
}
private static String[] getShingleConfig(String params) {
private String[] getShingleConfig(String params) {
String algLines[] = {
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
"docs.file=" + getReuters20LinesFile(),
@ -1061,8 +1024,7 @@ public class TestPerfTasksLogic extends LuceneTestCase {
return algLines;
}
private static String getReuters20LinesFile() {
return System.getProperty("lucene.common.dir").replace('\\','/') +
"/contrib/benchmark/src/test/org/apache/lucene/benchmark/reuters.first20.lines.txt";
private String getReuters20LinesFile() {
return getWorkDirResourcePath("reuters.first20.lines.txt");
}
}

View File

@ -19,7 +19,6 @@ package org.apache.lucene.benchmark.quality;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
@ -36,7 +35,6 @@ import org.apache.lucene.benchmark.quality.utils.SimpleQQParser;
import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.LuceneTestCase;
/**
* Test that quality run does its job.
@ -47,6 +45,12 @@ import org.apache.lucene.util.LuceneTestCase;
*/
public class TestQualityRun extends BenchmarkTestCase {
@Override
protected void setUp() throws Exception {
super.setUp();
copyToWorkDir("reuters.578.lines.txt.bz2");
}
public void testTrecQuality() throws Exception {
// first create the partial reuters index
createReutersIndex();
@ -173,7 +177,7 @@ public class TestQualityRun extends BenchmarkTestCase {
String algLines[] = {
"# ----- properties ",
"content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
"docs.file=" + getReuters578LinesFile(),
"docs.file=" + getWorkDirResourcePath("reuters.578.lines.txt.bz2"),
"content.source.log.step=2500",
"doc.term.vector=false",
"content.source.forever=false",
@ -188,11 +192,6 @@ public class TestQualityRun extends BenchmarkTestCase {
};
// 2. execute the algorithm (required in every "logic" test)
TestPerfTasksLogic.execBenchmark(algLines);
}
private static String getReuters578LinesFile() {
return System.getProperty("lucene.common.dir").replace('\\','/') +
"/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/reuters.578.lines.txt.bz2";
execBenchmark(algLines);
}
}

View File

@ -18,548 +18,192 @@ package org.apache.lucene.index.memory;
*/
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.BaseTokenStreamTestCase;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.store.Directory;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.index.TermDocs;
/**
Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour,
returning the same results for any given query.
Runs a set of queries against a set of files and compares results for identity.
Can also be used as a simple benchmark.
<p>
Example usage:
<pre>
cd lucene-svn
java -server -cp ~/unix/java/share/misc/junit/junit.jar:build/classes:build/lucene-core-2.1-dev.jar:build/contrib/memory/classes/test:build/contrib/memory/classes/java org.apache.lucene.index.memory.MemoryIndexTest 1 1 memram @contrib/memory/src/test/org/apache/lucene/index/memory/testqueries.txt *.txt *.html *.xml xdocs/*.xml src/test/org/apache/lucene/queryParser/*.java contrib/memory/src/java/org/apache/lucene/index/memory/*.java
</pre>
where testqueries.txt is a file with one query per line, such as:
<pre>
#
# queries extracted from TestQueryParser.java
#
Apache
Apach~ AND Copy*
a AND b
(a AND b)
c OR (a AND b)
a AND NOT b
a AND -b
a AND !b
a && b
a && ! b
a OR b
a || b
a OR !b
a OR ! b
a OR -b
+term -term term
foo:term AND field:anotherTerm
term AND "phrase phrase"
"hello there"
germ term^2.0
(term)^2.0
(germ term)^2.0
term^2.0
term^2
"germ term"^2.0
"term germ"^2
(foo OR bar) AND (baz OR boo)
((a OR b) AND NOT c) OR d
+(apple "steve jobs") -(foo bar baz)
+title:(dog OR cat) -author:"bob dole"
a&b
a&&b
.NET
"term germ"~2
"term germ"~2 flork
"term"~2
"~2 germ"
"term germ"~2^2
3
term 1.0 1 2
term term1 term2
term*
term*^2
term~
term~0.7
term~^2
term^2~
term*germ
term*germ^3
term*
Term*
TERM*
term*
Term*
TERM*
// Then 'full' wildcard queries:
te?m
Te?m
TE?M
Te?m*gerM
te?m
Te?m
TE?M
Te?m*gerM
term term term
term +stop term
term -stop term
drop AND stop AND roll
term phrase term
term AND NOT phrase term
stop
[ a TO c]
[ a TO c ]
{ a TO c}
{ a TO c }
{ a TO c }^2.0
[ a TO c] OR bar
[ a TO c] AND bar
( bar blar { a TO c})
gack ( bar blar { a TO c})
+weltbank +worlbank
+weltbank\n+worlbank
weltbank \n+worlbank
weltbank \n +worlbank
+weltbank\r+worlbank
weltbank \r+worlbank
weltbank \r +worlbank
+weltbank\r\n+worlbank
weltbank \r\n+worlbank
weltbank \r\n +worlbank
weltbank \r \n +worlbank
+weltbank\t+worlbank
weltbank \t+worlbank
weltbank \t +worlbank
term term term
term +term term
term term +term
term +term +term
-term term term
on^1.0
"hello"^2.0
hello^2.0
"on"^1.0
the^3
</pre>
*/
* Verifies that Lucene MemoryIndex and RAMDirectory have the same behaviour,
* returning the same results for queries on some randomish indexes.
*/
public class MemoryIndexTest extends BaseTokenStreamTestCase {
private Set<String> queries = new HashSet<String>();
private Random random;
private Analyzer analyzer;
public static final int ITERATIONS = 100;
private static final String FIELD_NAME = "content";
/** Runs the tests and/or benchmark */
public static void main(String[] args) throws Throwable {
new MemoryIndexTest().run(args);
}
/* all files will be open relative to this */
public String fileDir;
@Override
protected void setUp() throws Exception {
public void setUp() throws Exception {
super.setUp();
fileDir = System.getProperty("lucene.common.dir", null);
queries.addAll(readQueries("testqueries.txt"));
queries.addAll(readQueries("testqueries2.txt"));
random = newRandom();
}
// public void tearDown() {}
public void testMany() throws Throwable {
String[] files = listFiles(new String[] {
"*.txt", "*.html", "*.xml", "xdocs/*.xml",
"src/java/test/org/apache/lucene/queryParser/*.java",
"contrib/memory/src/java/org/apache/lucene/index/memory/*.java",
});
if (VERBOSE) System.out.println("files = " + java.util.Arrays.asList(files));
String[] xargs = new String[] {
"1", "1", "memram",
"@contrib/memory/src/test/org/apache/lucene/index/memory/testqueries.txt",
};
String[] args = new String[xargs.length + files.length];
System.arraycopy(xargs, 0, args, 0, xargs.length);
System.arraycopy(files, 0, args, xargs.length, files.length);
run(args);
}
private void run(String[] args) throws Throwable {
int k = -1;
int iters = 1;
if (args.length > ++k) iters = Math.max(1, Integer.parseInt(args[k]));
int runs = 1;
if (args.length > ++k) runs = Math.max(1, Integer.parseInt(args[k]));
String cmd = "memram";
if (args.length > ++k) cmd = args[k];
boolean useMemIndex = cmd.indexOf("mem") >= 0;
boolean useRAMIndex = cmd.indexOf("ram") >= 0;
String[] queries = { "term", "term*", "term~", "Apache", "Apach~ AND Copy*" };
if (args.length > ++k) {
String arg = args[k];
if (arg.startsWith("@"))
queries = readLines(new File(fileDir, arg.substring(1)));
else
queries = new String[] { arg };
}
File[] files = new File[] {new File("CHANGES.txt"), new File("LICENSE.txt") };
if (args.length > ++k) {
files = new File[args.length - k];
for (int i=k; i < args.length; i++) {
files[i-k] = new File(args[i]);
}
}
// boolean toLowerCase = false;
// Set stopWords = null;
Analyzer[] analyzers = new Analyzer[] {
new SimpleAnalyzer(TEST_VERSION_CURRENT),
new StopAnalyzer(TEST_VERSION_CURRENT),
new StandardAnalyzer(TEST_VERSION_CURRENT),
// new WhitespaceAnalyzer(TEST_VERSION_CURRENT),
// new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN, false, null),
// new PatternAnalyzer(PatternAnalyzer.NON_WORD_PATTERN, true, stopWords),
// new SnowballAnalyzer("English", StopAnalyzer.ENGLISH_STOP_WORDS),
};
boolean first = true;
for (int iter=0; iter < iters; iter++) {
if (VERBOSE) System.out.println("\n########### iteration=" + iter);
long start = System.currentTimeMillis();
long bytes = 0;
for (int anal=0; anal < analyzers.length; anal++) {
this.analyzer = analyzers[anal];
for (int i=0; i < files.length; i++) {
File file = files[i];
if (!file.exists() || file.isDirectory()) continue; // ignore
bytes += file.length();
String text = toString(new FileInputStream(file), null);
Document doc = createDocument(text);
if (VERBOSE) System.out.println("\n*********** FILE=" + file);
boolean measureIndexing = false; // toggle this to measure query performance
MemoryIndex memind = null;
IndexSearcher memsearcher = null;
if (useMemIndex && !measureIndexing) {
memind = createMemoryIndex(doc);
memsearcher = memind.createSearcher();
}
if (first) {
IndexSearcher s = memind.createSearcher();
TermDocs td = s.getIndexReader().termDocs(null);
assertTrue(td.next());
assertEquals(0, td.doc());
assertEquals(1, td.freq());
td.close();
s.close();
first = false;
}
RAMDirectory ramind = null;
IndexSearcher ramsearcher = null;
if (useRAMIndex && !measureIndexing) {
ramind = createRAMIndex(doc);
ramsearcher = new IndexSearcher(ramind);
}
for (int q=0; q < queries.length; q++) {
try {
Query query = parseQuery(queries[q]);
for (int run=0; run < runs; run++) {
float score1 = 0.0f; float score2 = 0.0f;
if (useMemIndex && measureIndexing) {
memind = createMemoryIndex(doc);
memsearcher = memind.createSearcher();
}
if (useMemIndex) score1 = query(memsearcher, query);
if (useRAMIndex && measureIndexing) {
ramind = createRAMIndex(doc);
ramsearcher = new IndexSearcher(ramind);
}
if (useRAMIndex) score2 = query(ramsearcher, query);
if (useMemIndex && useRAMIndex) {
if (VERBOSE) System.out.println("diff="+ (score1-score2) + ", query=" + queries[q] + ", s1=" + score1 + ", s2=" + score2);
if (score1 != score2 || score1 < 0.0f || score2 < 0.0f || score1 > 1.0f || score2 > 1.0f) {
throw new IllegalStateException("BUG DETECTED:" + (i*(q+1)) + " at query=" + queries[q] + ", file=" + file + ", anal=" + analyzer);
}
}
}
} catch (Throwable t) {
if (t instanceof OutOfMemoryError) t.printStackTrace();
if (VERBOSE) System.out.println("Fatal error at query=" + queries[q] + ", file=" + file + ", anal=" + analyzer);
throw t;
}
}
}
}
long end = System.currentTimeMillis();
if (VERBOSE) {
System.out.println("\nsecs = " + ((end-start)/1000.0f));
System.out.println("queries/sec= " +
(1.0f * runs * queries.length * analyzers.length * files.length
/ ((end-start)/1000.0f)));
float mb = (1.0f * bytes * queries.length * runs) / (1024.0f * 1024.0f);
System.out.println("MB/sec = " + (mb / ((end-start)/1000.0f)));
}
}
if (!VERBOSE) return;
if (useMemIndex && useRAMIndex)
System.out.println("No bug found. done.");
else
System.out.println("Done benchmarking (without checking correctness).");
}
// returns file line by line, ignoring empty lines and comments
private String[] readLines(File file) throws Exception {
BufferedReader reader = new BufferedReader(new InputStreamReader(
new FileInputStream(file)));
List<String> lines = new ArrayList<String>();
String line;
/**
* read a set of queries from a resource file
*/
private Set<String> readQueries(String resource) throws IOException {
Set<String> queries = new HashSet<String>();
InputStream stream = getClass().getResourceAsStream(resource);
BufferedReader reader = new BufferedReader(new InputStreamReader(stream, "UTF-8"));
String line = null;
while ((line = reader.readLine()) != null) {
String t = line.trim();
if (t.length() > 0 && t.charAt(0) != '#' && (!t.startsWith("//"))) {
lines.add(line);
line = line.trim();
if (line.length() > 0 && !line.startsWith("#") && !line.startsWith("//")) {
queries.add(line);
}
}
reader.close();
String[] result = new String[lines.size()];
lines.toArray(result);
return result;
return queries;
}
private Document createDocument(String content) {
/**
* runs random tests, up to ITERATIONS times.
*/
public void testRandomQueries() throws Exception {
for (int i = 0; i < ITERATIONS; i++)
assertAgainstRAMDirectory();
}
/**
* Build a randomish document for both RAMDirectory and MemoryIndex,
* and run all the queries against it.
*/
public void assertAgainstRAMDirectory() throws Exception {
StringBuilder fooField = new StringBuilder();
StringBuilder termField = new StringBuilder();
// add up to 250 terms to field "foo"
for (int i = 0; i < random.nextInt(250); i++) {
fooField.append(" ");
fooField.append(randomTerm());
}
// add up to 250 terms to field "term"
for (int i = 0; i < random.nextInt(250); i++) {
termField.append(" ");
termField.append(randomTerm());
}
RAMDirectory ramdir = new RAMDirectory();
Analyzer analyzer = randomAnalyzer();
IndexWriter writer = new IndexWriter(ramdir, analyzer,
IndexWriter.MaxFieldLength.UNLIMITED);
Document doc = new Document();
doc.add(new Field(FIELD_NAME, content, Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS));
return doc;
Field field1 = new Field("foo", fooField.toString(), Field.Store.NO, Field.Index.ANALYZED);
Field field2 = new Field("term", termField.toString(), Field.Store.NO, Field.Index.ANALYZED);
doc.add(field1);
doc.add(field2);
writer.addDocument(doc);
writer.close();
MemoryIndex memory = new MemoryIndex();
memory.addField("foo", fooField.toString(), analyzer);
memory.addField("term", termField.toString(), analyzer);
assertAllQueries(memory, ramdir, analyzer);
}
private MemoryIndex createMemoryIndex(Document doc) {
MemoryIndex index = new MemoryIndex();
Iterator<Fieldable> iter = doc.getFields().iterator();
while (iter.hasNext()) {
Fieldable field = iter.next();
index.addField(field.name(), field.stringValue(), analyzer);
/**
* Run all queries against both the RAMDirectory and MemoryIndex, ensuring they are the same.
*/
public void assertAllQueries(MemoryIndex memory, RAMDirectory ramdir, Analyzer analyzer) throws Exception {
IndexSearcher ram = new IndexSearcher(ramdir);
IndexSearcher mem = memory.createSearcher();
QueryParser qp = new QueryParser(TEST_VERSION_CURRENT, "foo", analyzer);
for (String query : queries) {
TopDocs ramDocs = ram.search(qp.parse(query), 1);
TopDocs memDocs = mem.search(qp.parse(query), 1);
assertEquals(ramDocs.totalHits, memDocs.totalHits);
}
return index;
}
private RAMDirectory createRAMIndex(Document doc) {
RAMDirectory dir = new RAMDirectory();
IndexWriter writer = null;
try {
writer = new IndexWriter(dir, new IndexWriterConfig(TEST_VERSION_CURRENT, analyzer));
writer.addDocument(doc);
writer.optimize();
return dir;
} catch (IOException e) { // should never happen (RAMDirectory)
throw new RuntimeException(e);
} finally {
try {
if (writer != null) writer.close();
} catch (IOException e) { // should never happen (RAMDirectory)
throw new RuntimeException(e);
/**
* Return a random analyzer (Simple, Stop, Standard) to analyze the terms.
*/
private Analyzer randomAnalyzer() {
switch(random.nextInt(3)) {
case 0: return new SimpleAnalyzer(TEST_VERSION_CURRENT);
case 1: return new StopAnalyzer(TEST_VERSION_CURRENT);
default: return new StandardAnalyzer(TEST_VERSION_CURRENT);
}
}
/**
* Some terms to be indexed, in addition to random words.
* These terms are commonly used in the queries.
*/
private static final String[] TEST_TERMS = {"term", "Term", "tErm", "TERM",
"telm", "stop", "drop", "roll", "phrase", "a", "c", "bar", "blar",
"gack", "weltbank", "worlbank", "hello", "on", "the", "apache", "Apache",
"copyright", "Copyright"};
/**
* half of the time, returns a random term from TEST_TERMS.
* the other half of the time, returns a random unicode string.
*/
private String randomTerm() {
if (random.nextBoolean()) {
// return a random TEST_TERM
return TEST_TERMS[random.nextInt(TEST_TERMS.length)];
} else {
// return a random unicode term
return randomString();
}
}
/**
* Return a random unicode term, like TestStressIndexing.
*/
private String randomString() {
final int end = random.nextInt(20);
if (buffer.length < 1 + end) {
char[] newBuffer = new char[(int) ((1 + end) * 1.25)];
System.arraycopy(buffer, 0, newBuffer, 0, buffer.length);
buffer = newBuffer;
}
for (int i = 0; i < end - 1; i++) {
int t = random.nextInt(6);
if (0 == t && i < end - 1) {
// Make a surrogate pair
// High surrogate
buffer[i++] = (char) nextInt(0xd800, 0xdc00);
// Low surrogate
buffer[i] = (char) nextInt(0xdc00, 0xe000);
} else if (t <= 1) buffer[i] = (char) random.nextInt(0x80);
else if (2 == t) buffer[i] = (char) nextInt(0x80, 0x800);
else if (3 == t) buffer[i] = (char) nextInt(0x800, 0xd800);
else if (4 == t) buffer[i] = (char) nextInt(0xe000, 0xffff);
else if (5 == t) {
// Illegal unpaired surrogate
if (random.nextBoolean()) buffer[i] = (char) nextInt(0xd800, 0xdc00);
else buffer[i] = (char) nextInt(0xdc00, 0xe000);
}
}
return new String(buffer, 0, end);
}
final float[] scores = new float[1]; // inits to 0.0f (no match)
private float query(IndexSearcher searcher, Query query) {
// System.out.println("MB=" + (getMemorySize(index) / (1024.0f * 1024.0f)));
try {
searcher.search(query, new Collector() {
private Scorer scorer;
@Override
public void collect(int doc) throws IOException {
scores[0] = scorer.score();
}
@Override
public void setScorer(Scorer scorer) throws IOException {
this.scorer = scorer;
}
@Override
public boolean acceptsDocsOutOfOrder() {
return true;
}
@Override
public void setNextReader(IndexReader reader, int docBase) { }
});
float score = scores[0];
// Hits hits = searcher.search(query);
// float score = hits.length() > 0 ? hits.score(0) : 0.0f;
return score;
} catch (IOException e) { // should never happen (RAMDirectory)
throw new RuntimeException(e);
}
private char buffer[] = new char[20];
// start is inclusive and end is exclusive
private int nextInt(int start, int end) {
return start + random.nextInt(end - start);
}
// for debugging purposes
int getMemorySize(Object index) {
if (index instanceof Directory) {
try {
Directory dir = (Directory) index;
int size = 0;
String[] fileNames = dir.listAll();
for (int i=0; i < fileNames.length; i++) {
size += dir.fileLength(fileNames[i]);
}
return size;
}
catch (IOException e) { // can never happen (RAMDirectory)
throw new RuntimeException(e);
}
}
else {
return ((MemoryIndex) index).getMemorySize();
}
}
private Query parseQuery(String expression) throws ParseException {
QueryParser parser = new QueryParser(TEST_VERSION_CURRENT, FIELD_NAME, analyzer);
// parser.setPhraseSlop(0);
return parser.parse(expression);
}
/** returns all files matching the given file name patterns (quick n'dirty) */
static String[] listFiles(String[] fileNames) {
LinkedHashSet<String> allFiles = new LinkedHashSet<String>();
for (int i=0; i < fileNames.length; i++) {
int k;
if ((k = fileNames[i].indexOf("*")) < 0) {
allFiles.add(fileNames[i]);
} else {
String prefix = fileNames[i].substring(0, k);
if (prefix.length() == 0) prefix = ".";
final String suffix = fileNames[i].substring(k+1);
File[] files = new File(prefix).listFiles(new FilenameFilter() {
public boolean accept(File dir, String name) {
return name.endsWith(suffix);
}
});
if (files != null) {
for (int j=0; j < files.length; j++) {
allFiles.add(files[j].getPath());
}
}
}
}
String[] result = new String[allFiles.size()];
allFiles.toArray(result);
return result;
}
// trick to detect default platform charset
private static final Charset DEFAULT_PLATFORM_CHARSET =
Charset.forName(new InputStreamReader(new ByteArrayInputStream(new byte[0])).getEncoding());
// the following utility methods below are copied from Apache style Nux library - see http://dsd.lbl.gov/nux
private static String toString(InputStream input, Charset charset) throws IOException {
if (charset == null) charset = DEFAULT_PLATFORM_CHARSET;
byte[] data = toByteArray(input);
return charset.decode(ByteBuffer.wrap(data)).toString();
}
private static byte[] toByteArray(InputStream input) throws IOException {
try {
// safe and fast even if input.available() behaves weird or buggy
int len = Math.max(256, input.available());
byte[] buffer = new byte[len];
byte[] output = new byte[len];
len = 0;
int n;
while ((n = input.read(buffer)) >= 0) {
if (len + n > output.length) { // grow capacity
byte tmp[] = new byte[Math.max(output.length << 1, len + n)];
System.arraycopy(output, 0, tmp, 0, len);
System.arraycopy(buffer, 0, tmp, len, n);
buffer = output; // use larger buffer for future larger bulk reads
output = tmp;
} else {
System.arraycopy(buffer, 0, output, len, n);
}
len += n;
}
if (len == output.length) return output;
buffer = null; // help gc
buffer = new byte[len];
System.arraycopy(output, 0, buffer, 0, len);
return buffer;
} finally {
input.close();
}
}
}

View File

@ -42,7 +42,7 @@ import org.apache.lucene.util.LuceneTestCase;
import java.io.IOException;
public class CollationTestBase extends LuceneTestCase {
public abstract class CollationTestBase extends LuceneTestCase {
protected String firstRangeBeginningOriginal = "\u062F";
protected String firstRangeEndOriginal = "\u0698";

View File

@ -19,7 +19,7 @@ package org.apache.lucene.util.cache;
import org.apache.lucene.util.LuceneTestCase;
public class BaseTestLRU extends LuceneTestCase {
public abstract class BaseTestLRU extends LuceneTestCase {
protected void testCache(Cache<Integer,Object> cache, int n) throws Exception {
Object dummy = new Object();