mirror of
https://github.com/apache/lucene.git
synced 2025-02-22 18:27:21 +00:00
LUCENE-2210: fix TrecTopicsReader for descriptions and narratives
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@899369 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
f8bd9819e6
commit
43c11321b4
@ -4,6 +4,10 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
|
||||
|
||||
$Id:$
|
||||
|
||||
1/14/2010
|
||||
LUCENE-2210: TrecTopicsReader now properly reads descriptions and
|
||||
narratives from trec topics files. (Robert Muir)
|
||||
|
||||
1/11/2010
|
||||
LUCENE-2181: Add a benchmark for collation. This adds NewLocaleTask,
|
||||
which sets a Locale in the run data for collation to use, and can be
|
||||
|
@ -76,16 +76,31 @@ public class TrecTopicsReader {
|
||||
k = sb.indexOf(">");
|
||||
String title = sb.substring(k+1).trim();
|
||||
// description
|
||||
sb = read(reader,"<desc>",null,false,false);
|
||||
sb = read(reader,"<narr>",null,false,true);
|
||||
String descripion = sb.toString().trim();
|
||||
read(reader,"<desc>",null,false,false);
|
||||
sb.setLength(0);
|
||||
String line = null;
|
||||
while ((line = reader.readLine()) != null) {
|
||||
if (line.startsWith("<narr>"))
|
||||
break;
|
||||
if (sb.length() > 0) sb.append(' ');
|
||||
sb.append(line);
|
||||
}
|
||||
String description = sb.toString().trim();
|
||||
// narrative
|
||||
sb.setLength(0);
|
||||
while ((line = reader.readLine()) != null) {
|
||||
if (line.startsWith("</top>"))
|
||||
break;
|
||||
if (sb.length() > 0) sb.append(' ');
|
||||
sb.append(line);
|
||||
}
|
||||
String narrative = sb.toString().trim();
|
||||
// we got a topic!
|
||||
fields.put("title",title);
|
||||
fields.put("description",descripion);
|
||||
fields.put("description",description);
|
||||
fields.put("narrative", narrative);
|
||||
QualityQuery topic = new QualityQuery(id,fields);
|
||||
res.add(topic);
|
||||
// skip narrative, get to end of doc
|
||||
read(reader,"</top>",null,false,false);
|
||||
}
|
||||
} finally {
|
||||
reader.close();
|
||||
|
@ -149,6 +149,49 @@ public class TestQualityRun extends TestCase {
|
||||
|
||||
|
||||
}
|
||||
|
||||
public void testTrecTopicsReader() throws Exception {
|
||||
File workDir = new File(System.getProperty("benchmark.work.dir","work"));
|
||||
assertTrue("Bad workDir: " + workDir,
|
||||
workDir.exists() && workDir.isDirectory());
|
||||
|
||||
// <tests src dir> for topics/qrels files:
|
||||
// src/test/org/apache/lucene/benchmark/quality
|
||||
File srcTestDir = new File(new File(new File(new File(new File(
|
||||
new File(new File(workDir.getAbsoluteFile().getParentFile(),
|
||||
"src"),"test"),"org"),"apache"),"lucene"),"benchmark"),"quality");
|
||||
|
||||
// prepare topics
|
||||
File topicsFile = new File(srcTestDir, "trecTopics.txt");
|
||||
assertTrue("Bad topicsFile: " + topicsFile,
|
||||
topicsFile.exists() && topicsFile.isFile());
|
||||
TrecTopicsReader qReader = new TrecTopicsReader();
|
||||
QualityQuery qqs[] = qReader.readQueries(
|
||||
new BufferedReader(new FileReader(topicsFile)));
|
||||
|
||||
assertEquals(20, qqs.length);
|
||||
|
||||
QualityQuery qq = qqs[0];
|
||||
assertEquals("statement months total 1987", qq.getValue("title"));
|
||||
assertEquals("Topic 0 Description Line 1 Topic 0 Description Line 2",
|
||||
qq.getValue("description"));
|
||||
assertEquals("Topic 0 Narrative Line 1 Topic 0 Narrative Line 2",
|
||||
qq.getValue("narrative"));
|
||||
|
||||
qq = qqs[1];
|
||||
assertEquals("agreed 15 against five", qq.getValue("title"));
|
||||
assertEquals("Topic 1 Description Line 1 Topic 1 Description Line 2",
|
||||
qq.getValue("description"));
|
||||
assertEquals("Topic 1 Narrative Line 1 Topic 1 Narrative Line 2",
|
||||
qq.getValue("narrative"));
|
||||
|
||||
qq = qqs[19];
|
||||
assertEquals("20 while common week", qq.getValue("title"));
|
||||
assertEquals("Topic 19 Description Line 1 Topic 19 Description Line 2",
|
||||
qq.getValue("description"));
|
||||
assertEquals("Topic 19 Narrative Line 1 Topic 19 Narrative Line 2",
|
||||
qq.getValue("narrative"));
|
||||
}
|
||||
|
||||
// use benchmark logic to create the full Reuters index
|
||||
private void createReutersIndex() throws Exception {
|
||||
|
@ -26,10 +26,12 @@
|
||||
<title> statement months total 1987
|
||||
|
||||
<desc> Description:
|
||||
|
||||
Topic 0 Description Line 1
|
||||
Topic 0 Description Line 2
|
||||
|
||||
<narr> Narrative:
|
||||
|
||||
Topic 0 Narrative Line 1
|
||||
Topic 0 Narrative Line 2
|
||||
|
||||
</top>
|
||||
|
||||
@ -39,10 +41,12 @@
|
||||
<title> agreed 15 against five
|
||||
|
||||
<desc> Description:
|
||||
|
||||
Topic 1 Description Line 1
|
||||
Topic 1 Description Line 2
|
||||
|
||||
<narr> Narrative:
|
||||
|
||||
Topic 1 Narrative Line 1
|
||||
Topic 1 Narrative Line 2
|
||||
|
||||
</top>
|
||||
|
||||
@ -273,9 +277,11 @@
|
||||
<title> 20 while common week
|
||||
|
||||
<desc> Description:
|
||||
|
||||
Topic 19 Description Line 1
|
||||
Topic 19 Description Line 2
|
||||
|
||||
<narr> Narrative:
|
||||
|
||||
Topic 19 Narrative Line 1
|
||||
Topic 19 Narrative Line 2
|
||||
|
||||
</top>
|
||||
|
Loading…
x
Reference in New Issue
Block a user