diff --git a/contrib/benchmark/CHANGES.txt b/contrib/benchmark/CHANGES.txt index 16a244c0176..02b56b3bd34 100644 --- a/contrib/benchmark/CHANGES.txt +++ b/contrib/benchmark/CHANGES.txt @@ -4,6 +4,10 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety $Id:$ +1/14/2010 + LUCENE-2210: TrecTopicsReader now properly reads descriptions and + narratives from trec topics files. (Robert Muir) + 1/11/2010 LUCENE-2181: Add a benchmark for collation. This adds NewLocaleTask, which sets a Locale in the run data for collation to use, and can be diff --git a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java index 5f940f14f64..24c0a92d1ee 100644 --- a/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java +++ b/contrib/benchmark/src/java/org/apache/lucene/benchmark/quality/trec/TrecTopicsReader.java @@ -76,16 +76,31 @@ public class TrecTopicsReader { k = sb.indexOf(">"); String title = sb.substring(k+1).trim(); // description - sb = read(reader,"",null,false,false); - sb = read(reader,"",null,false,true); - String descripion = sb.toString().trim(); + read(reader,"",null,false,false); + sb.setLength(0); + String line = null; + while ((line = reader.readLine()) != null) { + if (line.startsWith("")) + break; + if (sb.length() > 0) sb.append(' '); + sb.append(line); + } + String description = sb.toString().trim(); + // narrative + sb.setLength(0); + while ((line = reader.readLine()) != null) { + if (line.startsWith("")) + break; + if (sb.length() > 0) sb.append(' '); + sb.append(line); + } + String narrative = sb.toString().trim(); // we got a topic! fields.put("title",title); - fields.put("description",descripion); + fields.put("description",description); + fields.put("narrative", narrative); QualityQuery topic = new QualityQuery(id,fields); res.add(topic); - // skip narrative, get to end of doc - read(reader,"",null,false,false); } } finally { reader.close(); diff --git a/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java b/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java index adfa0f6af57..47341856915 100644 --- a/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java +++ b/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/TestQualityRun.java @@ -149,6 +149,49 @@ public class TestQualityRun extends TestCase { } + + public void testTrecTopicsReader() throws Exception { + File workDir = new File(System.getProperty("benchmark.work.dir","work")); + assertTrue("Bad workDir: " + workDir, + workDir.exists() && workDir.isDirectory()); + + // for topics/qrels files: + // src/test/org/apache/lucene/benchmark/quality + File srcTestDir = new File(new File(new File(new File(new File( + new File(new File(workDir.getAbsoluteFile().getParentFile(), + "src"),"test"),"org"),"apache"),"lucene"),"benchmark"),"quality"); + + // prepare topics + File topicsFile = new File(srcTestDir, "trecTopics.txt"); + assertTrue("Bad topicsFile: " + topicsFile, + topicsFile.exists() && topicsFile.isFile()); + TrecTopicsReader qReader = new TrecTopicsReader(); + QualityQuery qqs[] = qReader.readQueries( + new BufferedReader(new FileReader(topicsFile))); + + assertEquals(20, qqs.length); + + QualityQuery qq = qqs[0]; + assertEquals("statement months total 1987", qq.getValue("title")); + assertEquals("Topic 0 Description Line 1 Topic 0 Description Line 2", + qq.getValue("description")); + assertEquals("Topic 0 Narrative Line 1 Topic 0 Narrative Line 2", + qq.getValue("narrative")); + + qq = qqs[1]; + assertEquals("agreed 15 against five", qq.getValue("title")); + assertEquals("Topic 1 Description Line 1 Topic 1 Description Line 2", + qq.getValue("description")); + assertEquals("Topic 1 Narrative Line 1 Topic 1 Narrative Line 2", + qq.getValue("narrative")); + + qq = qqs[19]; + assertEquals("20 while common week", qq.getValue("title")); + assertEquals("Topic 19 Description Line 1 Topic 19 Description Line 2", + qq.getValue("description")); + assertEquals("Topic 19 Narrative Line 1 Topic 19 Narrative Line 2", + qq.getValue("narrative")); + } // use benchmark logic to create the full Reuters index private void createReutersIndex() throws Exception { diff --git a/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt b/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt index f0ce677b4f5..2f3ada20fe4 100755 --- a/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt +++ b/contrib/benchmark/src/test/org/apache/lucene/benchmark/quality/trecTopics.txt @@ -26,10 +26,12 @@ statement months total 1987 <desc> Description: - +Topic 0 Description Line 1 +Topic 0 Description Line 2 <narr> Narrative: - +Topic 0 Narrative Line 1 +Topic 0 Narrative Line 2 </top> @@ -39,10 +41,12 @@ <title> agreed 15 against five <desc> Description: - +Topic 1 Description Line 1 +Topic 1 Description Line 2 <narr> Narrative: - +Topic 1 Narrative Line 1 +Topic 1 Narrative Line 2 </top> @@ -273,9 +277,11 @@ <title> 20 while common week <desc> Description: - +Topic 19 Description Line 1 +Topic 19 Description Line 2 <narr> Narrative: - +Topic 19 Narrative Line 1 +Topic 19 Narrative Line 2 </top>