LUCENE-1755: Fix WriteLineDocTask to output a document if it contains either a title or body (or both).

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@795792 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Mark Robert Miller 2009-07-20 12:19:06 +00:00
parent 63402f49c7
commit 3adc61c3ac
3 changed files with 85 additions and 8 deletions

View File

@ -4,6 +4,10 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
$Id:$ $Id:$
7/20/2009
LUCENE-1755: Fix WriteLineDocTask to output a document if it contains either
a title or body (or both). (Shai Erera via Mark Miller)
7/14/2009 7/14/2009
LUCENE-1725: Fix the example Sort algorithm - auto is now deprecated and no longer works LUCENE-1725: Fix the example Sort algorithm - auto is now deprecated and no longer works
with Benchmark. Benchmark will now throw an exception if you specify sort fields without with Benchmark. Benchmark will now throw an exception if you specify sort fields without

View File

@ -97,11 +97,12 @@ public class WriteLineDocTask extends PerfTask {
Document doc = docSize > 0 ? docMaker.makeDocument(docSize) : docMaker.makeDocument(); Document doc = docSize > 0 ? docMaker.makeDocument(docSize) : docMaker.makeDocument();
Field f = doc.getField(DocMaker.BODY_FIELD); Field f = doc.getField(DocMaker.BODY_FIELD);
String body = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : null; String body = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : "";
if (body != null) { f = doc.getField(DocMaker.TITLE_FIELD);
f = doc.getField(DocMaker.TITLE_FIELD); String title = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : "";
String title = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : "";
if (body.length() > 0 || title.length() > 0) {
f = doc.getField(DocMaker.DATE_FIELD); f = doc.getField(DocMaker.DATE_FIELD);
String date = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : ""; String date = f != null ? NORMALIZER.reset(f.stringValue()).replaceAll(" ") : "";

View File

@ -39,7 +39,7 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
// class has to be public so that Class.forName.newInstance() will work // class has to be public so that Class.forName.newInstance() will work
public static final class WriteLineDocMaker extends DocMaker { public static final class WriteLineDocMaker extends DocMaker {
public Document makeDocument() throws Exception { public Document makeDocument() throws Exception {
Document doc = new Document(); Document doc = new Document();
doc.add(new Field(BODY_FIELD, "body", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(BODY_FIELD, "body", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
@ -52,7 +52,7 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
// class has to be public so that Class.forName.newInstance() will work // class has to be public so that Class.forName.newInstance() will work
public static final class NewLinesDocMaker extends DocMaker { public static final class NewLinesDocMaker extends DocMaker {
public Document makeDocument() throws Exception { public Document makeDocument() throws Exception {
Document doc = new Document(); Document doc = new Document();
doc.add(new Field(BODY_FIELD, "body\r\ntext\ttwo", Store.NO, Index.NOT_ANALYZED_NO_NORMS)); doc.add(new Field(BODY_FIELD, "body\r\ntext\ttwo", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
@ -63,6 +63,35 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
} }
// class has to be public so that Class.forName.newInstance() will work
public static final class NoBodyDocMaker extends DocMaker {
public Document makeDocument() throws Exception {
Document doc = new Document();
doc.add(new Field(TITLE_FIELD, "title", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
return doc;
}
}
// class has to be public so that Class.forName.newInstance() will work
public static final class NoTitleDocMaker extends DocMaker {
public Document makeDocument() throws Exception {
Document doc = new Document();
doc.add(new Field(BODY_FIELD, "body", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
return doc;
}
}
// class has to be public so that Class.forName.newInstance() will work
public static final class JustDateDocMaker extends DocMaker {
public Document makeDocument() throws Exception {
Document doc = new Document();
doc.add(new Field(DATE_FIELD, "date", Store.NO, Index.NOT_ANALYZED_NO_NORMS));
return doc;
}
}
private static final CompressorStreamFactory csFactory = new CompressorStreamFactory(); private static final CompressorStreamFactory csFactory = new CompressorStreamFactory();
private PerfRunData createPerfRunData(File file, boolean setBZCompress, private PerfRunData createPerfRunData(File file, boolean setBZCompress,
@ -90,10 +119,13 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
String line = br.readLine(); String line = br.readLine();
assertNotNull(line); assertNotNull(line);
String[] parts = line.split(Character.toString(WriteLineDocTask.SEP)); String[] parts = line.split(Character.toString(WriteLineDocTask.SEP));
assertEquals(3, parts.length); int numExpParts = expBody == null ? 2 : 3;
assertEquals(numExpParts, parts.length);
assertEquals(expTitle, parts[0]); assertEquals(expTitle, parts[0]);
assertEquals(expDate, parts[1]); assertEquals(expDate, parts[1]);
assertEquals(expBody, parts[2]); if (expBody != null) {
assertEquals(expBody, parts[2]);
}
assertNull(br.readLine()); assertNull(br.readLine());
} finally { } finally {
br.close(); br.close();
@ -149,4 +181,44 @@ public class WriteLineDocTaskTest extends BenchmarkTestCase {
doReadTest(file, false, "title text", "date text", "body text two"); doReadTest(file, false, "title text", "date text", "body text two");
} }
public void testEmptyBody() throws Exception {
// WriteLineDocTask threw away documents w/ no BODY element, even if they
// had a TITLE element (LUCENE-1755). It should throw away documents if they
// don't have BODY nor TITLE
File file = new File(getWorkDir(), "one-line");
PerfRunData runData = createPerfRunData(file, false, null, NoBodyDocMaker.class.getName());
WriteLineDocTask wldt = new WriteLineDocTask(runData);
wldt.doLogic();
wldt.close();
doReadTest(file, false, "title", "date", null);
}
public void testEmptyTitle() throws Exception {
File file = new File(getWorkDir(), "one-line");
PerfRunData runData = createPerfRunData(file, false, null, NoTitleDocMaker.class.getName());
WriteLineDocTask wldt = new WriteLineDocTask(runData);
wldt.doLogic();
wldt.close();
doReadTest(file, false, "", "date", "body");
}
public void testJustDate() throws Exception {
File file = new File(getWorkDir(), "one-line");
PerfRunData runData = createPerfRunData(file, false, null, JustDateDocMaker.class.getName());
WriteLineDocTask wldt = new WriteLineDocTask(runData);
wldt.doLogic();
wldt.close();
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(file), "utf-8"));
try {
String line = br.readLine();
assertNull(line);
} finally {
br.close();
}
}
} }