mirror of https://github.com/apache/lucene.git
LUCENE-947: add creation of & indexing from 'one document per line' text files to minimize IO overhead of creating documents when running tests
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@559366 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2d16613438
commit
02dd452026
|
@ -4,6 +4,11 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
|
||||||
|
|
||||||
$Id:$
|
$Id:$
|
||||||
|
|
||||||
|
7/24/07
|
||||||
|
LUCENE-947: Add support for creating and index "one document per
|
||||||
|
line" from a large text file, which reduces per-document overhead of
|
||||||
|
opening a single file for each document.
|
||||||
|
|
||||||
6/30/07
|
6/30/07
|
||||||
LUCENE-848: Added support for Wikipedia benchmarking.
|
LUCENE-848: Added support for Wikipedia benchmarking.
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
#/**
|
||||||
|
# * Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# * contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# * this work for additional information regarding copyright ownership.
|
||||||
|
# * The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# * (the "License"); you may not use this file except in compliance with
|
||||||
|
# * the License. You may obtain a copy of the License at
|
||||||
|
# *
|
||||||
|
# * http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# *
|
||||||
|
# * Unless required by applicable law or agreed to in writing, software
|
||||||
|
# * distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# * See the License for the specific language governing permissions and
|
||||||
|
# * limitations under the License.
|
||||||
|
# */
|
||||||
|
# -------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#
|
||||||
|
# This alg will process the Reuters documents feed to produce a
|
||||||
|
# single file that contains all documents, one per line.
|
||||||
|
#
|
||||||
|
# To use this, first cd to contrib/benchmark and then run:
|
||||||
|
#
|
||||||
|
# ant run-task -Dtask.alg=conf/createLineFile.alg
|
||||||
|
#
|
||||||
|
# Then, to index the documents in the line file, see
|
||||||
|
# indexLineFile.alg.
|
||||||
|
#
|
||||||
|
|
||||||
|
# Where to get documents from:
|
||||||
|
doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker
|
||||||
|
|
||||||
|
# Where to write the line file output:
|
||||||
|
line.file.out=work/reuters.lines.txt
|
||||||
|
|
||||||
|
# Stop after processing the document feed once:
|
||||||
|
doc.maker.forever=false
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Process all documents, appending each one to the line file:
|
||||||
|
{WriteLineDoc()}: *
|
|
@ -0,0 +1,53 @@
|
||||||
|
#/**
|
||||||
|
# * Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
# * contributor license agreements. See the NOTICE file distributed with
|
||||||
|
# * this work for additional information regarding copyright ownership.
|
||||||
|
# * The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
# * (the "License"); you may not use this file except in compliance with
|
||||||
|
# * the License. You may obtain a copy of the License at
|
||||||
|
# *
|
||||||
|
# * http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
# *
|
||||||
|
# * Unless required by applicable law or agreed to in writing, software
|
||||||
|
# * distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# * See the License for the specific language governing permissions and
|
||||||
|
# * limitations under the License.
|
||||||
|
# */
|
||||||
|
# -------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
#
|
||||||
|
# This file indexes documents contained in a single text file, one per
|
||||||
|
# line. See createLineFile.alg for how to create this file. The
|
||||||
|
# benefit of this is it removes the IO cost of opening one file per
|
||||||
|
# document to let you more accurately measure time spent analyzing and
|
||||||
|
# indexing your documents vs time spent creating the documents.
|
||||||
|
#
|
||||||
|
# To use this, you must first run the createLineFile.alg, then cd to
|
||||||
|
# contrib/benchmark and then run:
|
||||||
|
#
|
||||||
|
# ant run-task -Dtask.alg=conf/indexLineFile.alg
|
||||||
|
#
|
||||||
|
|
||||||
|
analyzer=org.apache.lucene.analysis.SimpleAnalyzer
|
||||||
|
|
||||||
|
# Feed that knows how to process the line file format:
|
||||||
|
doc.maker=org.apache.lucene.benchmark.byTask.feeds.LineDocMaker
|
||||||
|
|
||||||
|
# File that contains one document per line:
|
||||||
|
docs.file=work/reuters.lines.txt
|
||||||
|
|
||||||
|
# Process documents only once:
|
||||||
|
doc.maker.forever=false
|
||||||
|
|
||||||
|
# -------------------------------------------------------------------------------------
|
||||||
|
|
||||||
|
# Reset the system, create a new index, index all docs from the line
|
||||||
|
# file, close the index, produce a report.
|
||||||
|
|
||||||
|
ResetSystemErase
|
||||||
|
CreateIndex
|
||||||
|
{AddDoc}: *
|
||||||
|
CloseIndex
|
||||||
|
|
||||||
|
RepSumByPref AddDoc
|
|
@ -70,6 +70,7 @@ public class PerfRunData {
|
||||||
private IndexReader indexReader;
|
private IndexReader indexReader;
|
||||||
private IndexWriter indexWriter;
|
private IndexWriter indexWriter;
|
||||||
private Config config;
|
private Config config;
|
||||||
|
private long startTimeMillis;
|
||||||
|
|
||||||
// constructor
|
// constructor
|
||||||
public PerfRunData (Config config) throws Exception {
|
public PerfRunData (Config config) throws Exception {
|
||||||
|
@ -136,6 +137,15 @@ public class PerfRunData {
|
||||||
// release unused stuff
|
// release unused stuff
|
||||||
System.runFinalization();
|
System.runFinalization();
|
||||||
System.gc();
|
System.gc();
|
||||||
|
|
||||||
|
startTimeMillis = System.currentTimeMillis();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @return Start time in milliseconds
|
||||||
|
*/
|
||||||
|
public long getStartTimeMillis() {
|
||||||
|
return startTimeMillis;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -39,6 +39,8 @@ import java.util.Iterator;
|
||||||
* doc.stored=true|FALSE<br/>
|
* doc.stored=true|FALSE<br/>
|
||||||
* doc.tokenized=TRUE|false<br/>
|
* doc.tokenized=TRUE|false<br/>
|
||||||
* doc.term.vector=true|FALSE<br/>
|
* doc.term.vector=true|FALSE<br/>
|
||||||
|
* doc.term.vector.positions=true|FALSE<br/>
|
||||||
|
* doc.term.vector.offsets=true|FALSE<br/>
|
||||||
* doc.store.body.bytes=true|FALSE //Store the body contents raw UTF-8 bytes as a field<br/>
|
* doc.store.body.bytes=true|FALSE //Store the body contents raw UTF-8 bytes as a field<br/>
|
||||||
*/
|
*/
|
||||||
public abstract class BasicDocMaker implements DocMaker {
|
public abstract class BasicDocMaker implements DocMaker {
|
||||||
|
@ -55,7 +57,13 @@ public abstract class BasicDocMaker implements DocMaker {
|
||||||
// leftovers are thread local, because it is unsafe to share residues between threads
|
// leftovers are thread local, because it is unsafe to share residues between threads
|
||||||
private ThreadLocal leftovr = new ThreadLocal();
|
private ThreadLocal leftovr = new ThreadLocal();
|
||||||
|
|
||||||
static final String BODY_FIELD = "body";
|
public static final String BODY_FIELD = "body";
|
||||||
|
public static final String TITLE_FIELD = "doctitle";
|
||||||
|
public static final String DATE_FIELD = "docdate";
|
||||||
|
public static final String ID_FIELD = "docid";
|
||||||
|
public static final String BYTES_FIELD = "bytes";
|
||||||
|
public static final String NAME_FIELD = "docname";
|
||||||
|
|
||||||
private long numBytes = 0;
|
private long numBytes = 0;
|
||||||
private long numUniqueBytes = 0;
|
private long numUniqueBytes = 0;
|
||||||
|
|
||||||
|
@ -97,17 +105,17 @@ public abstract class BasicDocMaker implements DocMaker {
|
||||||
private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
|
private Document createDocument(DocData docData, int size, int cnt) throws UnsupportedEncodingException {
|
||||||
int docid = incrNumDocsCreated();
|
int docid = incrNumDocsCreated();
|
||||||
Document doc = new Document();
|
Document doc = new Document();
|
||||||
doc.add(new Field("docid", "doc"+docid, storeVal, indexVal, termVecVal));
|
doc.add(new Field(ID_FIELD, "doc"+docid, storeVal, indexVal, termVecVal));
|
||||||
if (docData.getName()!=null) {
|
if (docData.getName()!=null) {
|
||||||
String name = (cnt<0 ? docData.getName() : docData.getName()+"_"+cnt);
|
String name = (cnt<0 ? docData.getName() : docData.getName()+"_"+cnt);
|
||||||
doc.add(new Field("docname", name, storeVal, indexVal, termVecVal));
|
doc.add(new Field(NAME_FIELD, name, storeVal, indexVal, termVecVal));
|
||||||
}
|
}
|
||||||
if (docData.getDate()!=null) {
|
if (docData.getDate()!=null) {
|
||||||
String dateStr = DateTools.dateToString(docData.getDate(), DateTools.Resolution.SECOND);
|
String dateStr = DateTools.dateToString(docData.getDate(), DateTools.Resolution.SECOND);
|
||||||
doc.add(new Field("docdate", dateStr, storeVal, indexVal, termVecVal));
|
doc.add(new Field(DATE_FIELD, dateStr, storeVal, indexVal, termVecVal));
|
||||||
}
|
}
|
||||||
if (docData.getTitle()!=null) {
|
if (docData.getTitle()!=null) {
|
||||||
doc.add(new Field("doctitle", docData.getTitle(), storeVal, indexVal, termVecVal));
|
doc.add(new Field(TITLE_FIELD, docData.getTitle(), storeVal, indexVal, termVecVal));
|
||||||
}
|
}
|
||||||
if (docData.getBody()!=null && docData.getBody().length()>0) {
|
if (docData.getBody()!=null && docData.getBody().length()>0) {
|
||||||
String bdy;
|
String bdy;
|
||||||
|
@ -127,7 +135,7 @@ public abstract class BasicDocMaker implements DocMaker {
|
||||||
}
|
}
|
||||||
doc.add(new Field(BODY_FIELD, bdy, storeVal, indexVal, termVecVal));
|
doc.add(new Field(BODY_FIELD, bdy, storeVal, indexVal, termVecVal));
|
||||||
if (storeBytes == true) {
|
if (storeBytes == true) {
|
||||||
doc.add(new Field("bytes", bdy.getBytes("UTF-8"), Field.Store.YES));
|
doc.add(new Field(BYTES_FIELD, bdy.getBytes("UTF-8"), Field.Store.YES));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -188,7 +196,18 @@ public abstract class BasicDocMaker implements DocMaker {
|
||||||
boolean termVec = config.get("doc.term.vector",false);
|
boolean termVec = config.get("doc.term.vector",false);
|
||||||
storeVal = (stored ? Field.Store.YES : Field.Store.NO);
|
storeVal = (stored ? Field.Store.YES : Field.Store.NO);
|
||||||
indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED);
|
indexVal = (tokenized ? Field.Index.TOKENIZED : Field.Index.UN_TOKENIZED);
|
||||||
termVecVal = (termVec ? Field.TermVector.YES : Field.TermVector.NO);
|
boolean termVecPositions = config.get("doc.term.vector.positions",false);
|
||||||
|
boolean termVecOffsets = config.get("doc.term.vector.offsets",false);
|
||||||
|
if (termVecPositions && termVecOffsets)
|
||||||
|
termVecVal = Field.TermVector.WITH_POSITIONS_OFFSETS;
|
||||||
|
else if (termVecPositions)
|
||||||
|
termVecVal = Field.TermVector.WITH_POSITIONS;
|
||||||
|
else if (termVecOffsets)
|
||||||
|
termVecVal = Field.TermVector.WITH_OFFSETS;
|
||||||
|
else if (termVec)
|
||||||
|
termVecVal = Field.TermVector.YES;
|
||||||
|
else
|
||||||
|
termVecVal = Field.TermVector.NO;
|
||||||
storeBytes = config.get("doc.store.body.bytes", false);
|
storeBytes = config.get("doc.store.body.bytes", false);
|
||||||
forever = config.get("doc.maker.forever",true);
|
forever = config.get("doc.maker.forever",true);
|
||||||
}
|
}
|
||||||
|
|
|
@ -40,7 +40,7 @@ import java.util.Stack;
|
||||||
*/
|
*/
|
||||||
public class DirDocMaker extends BasicDocMaker {
|
public class DirDocMaker extends BasicDocMaker {
|
||||||
|
|
||||||
private DateFormat dateFormat;
|
private ThreadLocal dateFormat = new ThreadLocal();
|
||||||
private File dataDir = null;
|
private File dataDir = null;
|
||||||
private int iteration=0;
|
private int iteration=0;
|
||||||
|
|
||||||
|
@ -148,11 +148,21 @@ public class DirDocMaker extends BasicDocMaker {
|
||||||
if (inputFiles==null) {
|
if (inputFiles==null) {
|
||||||
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
|
throw new RuntimeException("No txt files in dataDir: "+dataDir.getAbsolutePath());
|
||||||
}
|
}
|
||||||
// date format: 30-MAR-1987 14:22:36
|
|
||||||
dateFormat = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss",Locale.US);
|
|
||||||
dateFormat.setLenient(true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// get/initiate a thread-local simple date format (must do so
|
||||||
|
// because SimpleDateFormat is not thread-safe).
|
||||||
|
protected DateFormat getDateFormat () {
|
||||||
|
DateFormat df = (DateFormat) dateFormat.get();
|
||||||
|
if (df == null) {
|
||||||
|
// date format: 30-MAR-1987 14:22:36.87
|
||||||
|
df = new SimpleDateFormat("dd-MMM-yyyy kk:mm:ss.SSS",Locale.US);
|
||||||
|
df.setLenient(true);
|
||||||
|
dateFormat.set(df);
|
||||||
|
}
|
||||||
|
return df;
|
||||||
|
}
|
||||||
|
|
||||||
protected DocData getNextDocData() throws Exception {
|
protected DocData getNextDocData() throws Exception {
|
||||||
File f = null;
|
File f = null;
|
||||||
String name = null;
|
String name = null;
|
||||||
|
@ -184,7 +194,7 @@ public class DirDocMaker extends BasicDocMaker {
|
||||||
reader.close();
|
reader.close();
|
||||||
addBytes(f.length());
|
addBytes(f.length());
|
||||||
|
|
||||||
Date date = dateFormat.parse(dateStr.trim());
|
Date date = getDateFormat().parse(dateStr.trim());
|
||||||
return new DocData(name, bodyBuf.toString(), title, null, date);
|
return new DocData(name, bodyBuf.toString(), title, null, date);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -46,7 +46,7 @@ public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMake
|
||||||
|
|
||||||
Analyzer anlzr = (Analyzer) Class.forName(config.get("analyzer",
|
Analyzer anlzr = (Analyzer) Class.forName(config.get("analyzer",
|
||||||
"org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance();
|
"org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance();
|
||||||
String defaultField = config.get("file.query.maker.default.field", "body");
|
String defaultField = config.get("file.query.maker.default.field", BasicDocMaker.BODY_FIELD);
|
||||||
QueryParser qp = new QueryParser(defaultField, anlzr);
|
QueryParser qp = new QueryParser(defaultField, anlzr);
|
||||||
|
|
||||||
List qq = new ArrayList();
|
List qq = new ArrayList();
|
||||||
|
|
|
@ -0,0 +1,159 @@
|
||||||
|
package org.apache.lucene.benchmark.byTask.feeds;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||||
|
import org.apache.lucene.benchmark.byTask.tasks.WriteLineDocTask;
|
||||||
|
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.FileReader;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A DocMaker reading one line at a time as a Document from
|
||||||
|
* a single file. This saves IO cost (over DirDocMaker) of
|
||||||
|
* recursing through a directory and opening a new file for
|
||||||
|
* every document. It also re-uses its Document and Field
|
||||||
|
* instance to improve indexing speed.
|
||||||
|
*
|
||||||
|
* Config properties:
|
||||||
|
* docs.file=<path to the file%gt;
|
||||||
|
*/
|
||||||
|
public class LineDocMaker extends BasicDocMaker {
|
||||||
|
|
||||||
|
private BufferedReader fileIn;
|
||||||
|
private ThreadLocal docState = new ThreadLocal();
|
||||||
|
private String fileName;
|
||||||
|
|
||||||
|
private static int READER_BUFFER_BYTES = 64*1024;
|
||||||
|
|
||||||
|
private class DocState {
|
||||||
|
Document doc;
|
||||||
|
Field bodyField;
|
||||||
|
Field titleField;
|
||||||
|
Field dateField;
|
||||||
|
|
||||||
|
public DocState() {
|
||||||
|
|
||||||
|
bodyField = new Field(BasicDocMaker.BODY_FIELD,
|
||||||
|
"",
|
||||||
|
storeVal,
|
||||||
|
Field.Index.TOKENIZED,
|
||||||
|
termVecVal);
|
||||||
|
titleField = new Field(BasicDocMaker.TITLE_FIELD,
|
||||||
|
"",
|
||||||
|
storeVal,
|
||||||
|
Field.Index.TOKENIZED,
|
||||||
|
termVecVal);
|
||||||
|
dateField = new Field(BasicDocMaker.TITLE_FIELD,
|
||||||
|
"",
|
||||||
|
storeVal,
|
||||||
|
Field.Index.TOKENIZED,
|
||||||
|
termVecVal);
|
||||||
|
|
||||||
|
doc = new Document();
|
||||||
|
doc.add(bodyField);
|
||||||
|
doc.add(titleField);
|
||||||
|
doc.add(dateField);
|
||||||
|
}
|
||||||
|
|
||||||
|
final static String SEP = WriteLineDocTask.SEP;
|
||||||
|
|
||||||
|
public Document setFields(String line) {
|
||||||
|
// title <TAB> date <TAB> body <NEWLINE>
|
||||||
|
int spot = line.indexOf(SEP);
|
||||||
|
titleField.setValue(line.substring(0, spot));
|
||||||
|
int spot2 = line.indexOf(SEP, 1+spot);
|
||||||
|
dateField.setValue(line.substring(1+spot, spot2));
|
||||||
|
bodyField.setValue(line.substring(1+spot2, line.length()));
|
||||||
|
return doc;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* (non-Javadoc)
|
||||||
|
* @see SimpleDocMaker#setConfig(java.util.Properties)
|
||||||
|
*/
|
||||||
|
public void setConfig(Config config) {
|
||||||
|
super.setConfig(config);
|
||||||
|
resetInputs();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected DocData getNextDocData() throws Exception {
|
||||||
|
throw new RuntimeException("not implemented");
|
||||||
|
}
|
||||||
|
|
||||||
|
private DocState getDocState() {
|
||||||
|
DocState ds = (DocState) docState.get();
|
||||||
|
if (ds == null) {
|
||||||
|
ds = new DocState();
|
||||||
|
docState.set(ds);
|
||||||
|
}
|
||||||
|
return ds;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Document makeDocument() throws Exception {
|
||||||
|
|
||||||
|
String line;
|
||||||
|
synchronized(this) {
|
||||||
|
while(true) {
|
||||||
|
line = fileIn.readLine();
|
||||||
|
if (line == null) {
|
||||||
|
if (!forever)
|
||||||
|
throw new NoMoreDataException();
|
||||||
|
else {
|
||||||
|
// Reset the file
|
||||||
|
openFile();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return getDocState().setFields(line);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Document makeDocument(int size) throws Exception {
|
||||||
|
throw new RuntimeException("cannot change document size with LineDocMaker; please use DirDocMaker instead");
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized void resetInputs() {
|
||||||
|
super.resetInputs();
|
||||||
|
fileName = config.get("docs.file", null);
|
||||||
|
if (fileName == null)
|
||||||
|
throw new RuntimeException("docs.file must be set");
|
||||||
|
openFile();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void openFile() {
|
||||||
|
try {
|
||||||
|
if (fileIn != null)
|
||||||
|
fileIn.close();
|
||||||
|
fileIn = new BufferedReader(new FileReader(fileName), READER_BUFFER_BYTES);
|
||||||
|
} catch (IOException e) {
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int numUniqueTexts() {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
|
@ -71,7 +71,7 @@ public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker
|
||||||
* @return array of Lucene queries
|
* @return array of Lucene queries
|
||||||
*/
|
*/
|
||||||
private static Query[] createQueries(List qs, Analyzer a) {
|
private static Query[] createQueries(List qs, Analyzer a) {
|
||||||
QueryParser qp = new QueryParser("body", a);
|
QueryParser qp = new QueryParser(BasicDocMaker.BODY_FIELD, a);
|
||||||
List queries = new ArrayList();
|
List queries = new ArrayList();
|
||||||
for (int i = 0; i < qs.size(); i++) {
|
for (int i = 0; i < qs.size(); i++) {
|
||||||
try {
|
try {
|
||||||
|
@ -107,7 +107,7 @@ public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker
|
||||||
|
|
||||||
List queryList = new ArrayList(20);
|
List queryList = new ArrayList(20);
|
||||||
queryList.addAll(Arrays.asList(STANDARD_QUERIES));
|
queryList.addAll(Arrays.asList(STANDARD_QUERIES));
|
||||||
queryList.addAll(Arrays.asList(getPrebuiltQueries("body")));
|
queryList.addAll(Arrays.asList(getPrebuiltQueries(BasicDocMaker.BODY_FIELD)));
|
||||||
return createQueries(queryList, anlzr);
|
return createQueries(queryList, anlzr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -45,11 +45,11 @@ public class SimpleQueryMaker extends AbstractQueryMaker implements QueryMaker {
|
||||||
Analyzer anlzr= (Analyzer) Class.forName(config.get("analyzer",
|
Analyzer anlzr= (Analyzer) Class.forName(config.get("analyzer",
|
||||||
"org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance();
|
"org.apache.lucene.analysis.standard.StandardAnalyzer")).newInstance();
|
||||||
|
|
||||||
QueryParser qp = new QueryParser("body",anlzr);
|
QueryParser qp = new QueryParser(BasicDocMaker.BODY_FIELD,anlzr);
|
||||||
ArrayList qq = new ArrayList();
|
ArrayList qq = new ArrayList();
|
||||||
Query q1 = new TermQuery(new Term("docid","doc2"));
|
Query q1 = new TermQuery(new Term(BasicDocMaker.ID_FIELD,"doc2"));
|
||||||
qq.add(q1);
|
qq.add(q1);
|
||||||
Query q2 = new TermQuery(new Term("body","simple"));
|
Query q2 = new TermQuery(new Term(BasicDocMaker.BODY_FIELD,"simple"));
|
||||||
qq.add(q2);
|
qq.add(q2);
|
||||||
BooleanQuery bq = new BooleanQuery();
|
BooleanQuery bq = new BooleanQuery();
|
||||||
bq.add(q1,Occur.MUST);
|
bq.add(q1,Occur.MUST);
|
||||||
|
|
|
@ -519,6 +519,8 @@ Here is a list of currently defined properties:
|
||||||
</li><li>doc.stored
|
</li><li>doc.stored
|
||||||
</li><li>doc.tokenized
|
</li><li>doc.tokenized
|
||||||
</li><li>doc.term.vector
|
</li><li>doc.term.vector
|
||||||
|
</li><li>doc.term.vector.positions
|
||||||
|
</li><li>doc.term.vector.offsets
|
||||||
</li><li>doc.store.body.bytes
|
</li><li>doc.store.body.bytes
|
||||||
</li><li>docs.dir
|
</li><li>docs.dir
|
||||||
</li><li>query.maker
|
</li><li>query.maker
|
||||||
|
@ -540,6 +542,8 @@ Here is a list of currently defined properties:
|
||||||
</li><li>merge.factor
|
</li><li>merge.factor
|
||||||
</li><li>max.buffered
|
</li><li>max.buffered
|
||||||
</li><li>directory
|
</li><li>directory
|
||||||
|
</li><li>ram.flush.mb
|
||||||
|
</li><li>autocommit
|
||||||
</li></ul>
|
</li></ul>
|
||||||
</li>
|
</li>
|
||||||
|
|
||||||
|
|
|
@ -20,6 +20,7 @@ package org.apache.lucene.benchmark.byTask.tasks;
|
||||||
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||||
import org.apache.lucene.document.Document;
|
import org.apache.lucene.document.Document;
|
||||||
|
import java.text.NumberFormat;
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -81,7 +82,10 @@ public class AddDocTask extends PerfTask {
|
||||||
logStep = getRunData().getConfig().get("doc.add.log.step",DEFAULT_ADD_DOC_LOG_STEP);
|
logStep = getRunData().getConfig().get("doc.add.log.step",DEFAULT_ADD_DOC_LOG_STEP);
|
||||||
}
|
}
|
||||||
if (logStep>0 && (count%logStep)==0) {
|
if (logStep>0 && (count%logStep)==0) {
|
||||||
System.out.println("--> "+Thread.currentThread().getName()+" processed (add) "+count+" docs");
|
double seconds = (System.currentTimeMillis() - getRunData().getStartTimeMillis())/1000.0;
|
||||||
|
NumberFormat nf = NumberFormat.getInstance();
|
||||||
|
nf.setMaximumFractionDigits(2);
|
||||||
|
System.out.println("--> "+nf.format(seconds) + " sec: " + Thread.currentThread().getName()+" processed (add) "+count+" docs");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -30,7 +30,8 @@ import java.io.IOException;
|
||||||
* Create an index.
|
* Create an index.
|
||||||
* <br>Other side effects: index writer object in perfRunData is set.
|
* <br>Other side effects: index writer object in perfRunData is set.
|
||||||
* <br>Relevant properties: <code>merge.factor, max.buffered,
|
* <br>Relevant properties: <code>merge.factor, max.buffered,
|
||||||
* max.field.length</code>.
|
* max.field.length, ram.flush.mb [default 0], autocommit
|
||||||
|
* [default true]</code>.
|
||||||
*/
|
*/
|
||||||
public class CreateIndexTask extends PerfTask {
|
public class CreateIndexTask extends PerfTask {
|
||||||
|
|
||||||
|
@ -42,19 +43,23 @@ public class CreateIndexTask extends PerfTask {
|
||||||
Directory dir = getRunData().getDirectory();
|
Directory dir = getRunData().getDirectory();
|
||||||
Analyzer analyzer = getRunData().getAnalyzer();
|
Analyzer analyzer = getRunData().getAnalyzer();
|
||||||
|
|
||||||
IndexWriter iw = new IndexWriter(dir, analyzer, true);
|
|
||||||
|
|
||||||
Config config = getRunData().getConfig();
|
Config config = getRunData().getConfig();
|
||||||
|
|
||||||
boolean cmpnd = config.get("compound",true);
|
boolean cmpnd = config.get("compound",true);
|
||||||
int mrgf = config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR);
|
int mrgf = config.get("merge.factor",OpenIndexTask.DEFAULT_MERGE_PFACTOR);
|
||||||
int mxbf = config.get("max.buffered",OpenIndexTask.DEFAULT_MAX_BUFFERED);
|
int mxbf = config.get("max.buffered",OpenIndexTask.DEFAULT_MAX_BUFFERED);
|
||||||
int mxfl = config.get("max.field.length",OpenIndexTask.DEFAULT_MAX_FIELD_LENGTH);
|
int mxfl = config.get("max.field.length",OpenIndexTask.DEFAULT_MAX_FIELD_LENGTH);
|
||||||
|
double flushAtRAMUsage = config.get("ram.flush.mb", OpenIndexTask.DEFAULT_RAM_FLUSH_MB);
|
||||||
|
boolean autoCommit = config.get("autocommit", OpenIndexTask.DEFAULT_AUTO_COMMIT);
|
||||||
|
|
||||||
|
IndexWriter iw = new IndexWriter(dir, autoCommit, analyzer, true);
|
||||||
|
|
||||||
iw.setUseCompoundFile(cmpnd);
|
iw.setUseCompoundFile(cmpnd);
|
||||||
iw.setMergeFactor(mrgf);
|
iw.setMergeFactor(mrgf);
|
||||||
iw.setMaxBufferedDocs(mxbf);
|
iw.setMaxBufferedDocs(mxbf);
|
||||||
iw.setMaxFieldLength(mxfl);
|
iw.setMaxFieldLength(mxfl);
|
||||||
|
if (flushAtRAMUsage > 0)
|
||||||
|
iw.setRAMBufferSizeMB(flushAtRAMUsage);
|
||||||
|
|
||||||
getRunData().setIndexWriter(iw);
|
getRunData().setIndexWriter(iw);
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
@ -30,14 +30,16 @@ import java.io.IOException;
|
||||||
* Open an index writer.
|
* Open an index writer.
|
||||||
* <br>Other side effects: index writer object in perfRunData is set.
|
* <br>Other side effects: index writer object in perfRunData is set.
|
||||||
* <br>Relevant properties: <code>merge.factor, max.buffered,
|
* <br>Relevant properties: <code>merge.factor, max.buffered,
|
||||||
* max.field.length</code>.
|
* max.field.length, ram.flush.mb [default 0], autocommit
|
||||||
</code>.
|
* [default true]</code>.
|
||||||
*/
|
*/
|
||||||
public class OpenIndexTask extends PerfTask {
|
public class OpenIndexTask extends PerfTask {
|
||||||
|
|
||||||
public static final int DEFAULT_MAX_BUFFERED = 10;
|
public static final int DEFAULT_MAX_BUFFERED = 10;
|
||||||
public static final int DEFAULT_MAX_FIELD_LENGTH = 10000;
|
public static final int DEFAULT_MAX_FIELD_LENGTH = 10000;
|
||||||
public static final int DEFAULT_MERGE_PFACTOR = 10;
|
public static final int DEFAULT_MERGE_PFACTOR = 10;
|
||||||
|
public static final int DEFAULT_RAM_FLUSH_MB = 0;
|
||||||
|
public static final boolean DEFAULT_AUTO_COMMIT = true;
|
||||||
|
|
||||||
public OpenIndexTask(PerfRunData runData) {
|
public OpenIndexTask(PerfRunData runData) {
|
||||||
super(runData);
|
super(runData);
|
||||||
|
@ -46,7 +48,6 @@ public class OpenIndexTask extends PerfTask {
|
||||||
public int doLogic() throws IOException {
|
public int doLogic() throws IOException {
|
||||||
Directory dir = getRunData().getDirectory();
|
Directory dir = getRunData().getDirectory();
|
||||||
Analyzer analyzer = getRunData().getAnalyzer();
|
Analyzer analyzer = getRunData().getAnalyzer();
|
||||||
IndexWriter writer = new IndexWriter(dir, analyzer, false);
|
|
||||||
|
|
||||||
Config config = getRunData().getConfig();
|
Config config = getRunData().getConfig();
|
||||||
|
|
||||||
|
@ -54,12 +55,17 @@ public class OpenIndexTask extends PerfTask {
|
||||||
int mrgf = config.get("merge.factor",DEFAULT_MERGE_PFACTOR);
|
int mrgf = config.get("merge.factor",DEFAULT_MERGE_PFACTOR);
|
||||||
int mxbf = config.get("max.buffered",DEFAULT_MAX_BUFFERED);
|
int mxbf = config.get("max.buffered",DEFAULT_MAX_BUFFERED);
|
||||||
int mxfl = config.get("max.field.length",DEFAULT_MAX_FIELD_LENGTH);
|
int mxfl = config.get("max.field.length",DEFAULT_MAX_FIELD_LENGTH);
|
||||||
|
double flushAtRAMUsage = config.get("ram.flush.mb", OpenIndexTask.DEFAULT_RAM_FLUSH_MB);
|
||||||
|
boolean autoCommit = config.get("autocommit", OpenIndexTask.DEFAULT_AUTO_COMMIT);
|
||||||
|
IndexWriter writer = new IndexWriter(dir, autoCommit, analyzer, false);
|
||||||
|
|
||||||
// must update params for newly opened writer
|
// must update params for newly opened writer
|
||||||
writer.setMaxBufferedDocs(mxbf);
|
writer.setMaxBufferedDocs(mxbf);
|
||||||
writer.setMaxFieldLength(mxfl);
|
writer.setMaxFieldLength(mxfl);
|
||||||
writer.setMergeFactor(mrgf);
|
writer.setMergeFactor(mrgf);
|
||||||
writer.setUseCompoundFile(cmpnd); // this one redundant?
|
writer.setUseCompoundFile(cmpnd); // this one redundant?
|
||||||
|
if (flushAtRAMUsage > 0)
|
||||||
|
writer.setRAMBufferSizeMB(flushAtRAMUsage);
|
||||||
|
|
||||||
getRunData().setIndexWriter(writer);
|
getRunData().setIndexWriter(writer);
|
||||||
return 1;
|
return 1;
|
||||||
|
|
|
@ -0,0 +1,137 @@
|
||||||
|
package org.apache.lucene.benchmark.byTask.tasks;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||||
|
* contributor license agreements. See the NOTICE file distributed with
|
||||||
|
* this work for additional information regarding copyright ownership.
|
||||||
|
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||||
|
* (the "License"); you may not use this file except in compliance with
|
||||||
|
* the License. You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.FileWriter;
|
||||||
|
|
||||||
|
import org.apache.lucene.benchmark.byTask.PerfRunData;
|
||||||
|
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||||
|
import org.apache.lucene.benchmark.byTask.feeds.BasicDocMaker;
|
||||||
|
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
|
||||||
|
|
||||||
|
public class WriteLineDocTask extends PerfTask {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Default value for property <code>doc.add.log.step<code> - indicating how often
|
||||||
|
* an "added N docs" message should be logged.
|
||||||
|
*/
|
||||||
|
public static final int DEFAULT_WRITELINE_DOC_LOG_STEP = 1000;
|
||||||
|
|
||||||
|
public WriteLineDocTask(PerfRunData runData) {
|
||||||
|
super(runData);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int logStep = -1;
|
||||||
|
private int docSize = 0;
|
||||||
|
int count = 0;
|
||||||
|
private BufferedWriter lineFileOut=null;
|
||||||
|
private DocMaker docMaker;
|
||||||
|
|
||||||
|
public final static String SEP = "\t";
|
||||||
|
|
||||||
|
/*
|
||||||
|
* (non-Javadoc)
|
||||||
|
* @see PerfTask#setup()
|
||||||
|
*/
|
||||||
|
public void setup() throws Exception {
|
||||||
|
super.setup();
|
||||||
|
if (lineFileOut==null) {
|
||||||
|
Config config = getRunData().getConfig();
|
||||||
|
String fileName = config.get("line.file.out", null);
|
||||||
|
if (fileName == null)
|
||||||
|
throw new Exception("line.file.out must be set");
|
||||||
|
lineFileOut = new BufferedWriter(new FileWriter(fileName));
|
||||||
|
}
|
||||||
|
docMaker = getRunData().getDocMaker();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void tearDown() throws Exception {
|
||||||
|
log(++count);
|
||||||
|
super.tearDown();
|
||||||
|
}
|
||||||
|
|
||||||
|
public int doLogic() throws Exception {
|
||||||
|
Document doc;
|
||||||
|
if (docSize > 0) {
|
||||||
|
doc = docMaker.makeDocument(docSize);
|
||||||
|
} else {
|
||||||
|
doc = docMaker.makeDocument();
|
||||||
|
}
|
||||||
|
|
||||||
|
Field f = doc.getField(BasicDocMaker.BODY_FIELD);
|
||||||
|
|
||||||
|
String body, title, date;
|
||||||
|
if (f != null)
|
||||||
|
body = f.stringValue().replace('\t', ' ');
|
||||||
|
else
|
||||||
|
body = null;
|
||||||
|
|
||||||
|
f = doc.getField(BasicDocMaker.TITLE_FIELD);
|
||||||
|
if (f != null)
|
||||||
|
title = f.stringValue().replace('\t', ' ');
|
||||||
|
else
|
||||||
|
title = "";
|
||||||
|
|
||||||
|
f = doc.getField(BasicDocMaker.DATE_FIELD);
|
||||||
|
if (f != null)
|
||||||
|
date = f.stringValue().replace('\t', ' ');
|
||||||
|
else
|
||||||
|
date = "";
|
||||||
|
|
||||||
|
if (body != null) {
|
||||||
|
lineFileOut.write(title, 0, title.length());
|
||||||
|
lineFileOut.write(SEP);
|
||||||
|
lineFileOut.write(date, 0, date.length());
|
||||||
|
lineFileOut.write(SEP);
|
||||||
|
lineFileOut.write(body, 0, body.length());
|
||||||
|
lineFileOut.newLine();
|
||||||
|
lineFileOut.flush();
|
||||||
|
}
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void log (int count) {
|
||||||
|
if (logStep<0) {
|
||||||
|
// init once per instance
|
||||||
|
logStep = getRunData().getConfig().get("doc.writeline.log.step", DEFAULT_WRITELINE_DOC_LOG_STEP);
|
||||||
|
}
|
||||||
|
if (logStep>0 && (count%logStep)==0) {
|
||||||
|
System.out.println("--> "+Thread.currentThread().getName()+" processed (add) "+count+" docs");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Set the params (docSize only)
|
||||||
|
* @param params docSize, or 0 for no limit.
|
||||||
|
*/
|
||||||
|
public void setParams(String params) {
|
||||||
|
super.setParams(params);
|
||||||
|
docSize = (int) Float.parseFloat(params);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* (non-Javadoc)
|
||||||
|
* @see org.apache.lucene.benchmark.byTask.tasks.PerfTask#supportsParams()
|
||||||
|
*/
|
||||||
|
public boolean supportsParams() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
|
@ -22,6 +22,8 @@ import java.io.ByteArrayInputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.Reader;
|
import java.io.Reader;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
import java.util.Iterator;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
@ -110,7 +112,9 @@ public class Config {
|
||||||
|
|
||||||
private void printProps() {
|
private void printProps() {
|
||||||
System.out.println("------------> config properties:");
|
System.out.println("------------> config properties:");
|
||||||
for (Iterator it = props.keySet().iterator(); it.hasNext();) {
|
List propKeys = new ArrayList(props.keySet());
|
||||||
|
Collections.sort(propKeys);
|
||||||
|
for (Iterator it = propKeys.iterator(); it.hasNext();) {
|
||||||
String propName = (String) it.next();
|
String propName = (String) it.next();
|
||||||
System.out.println(propName + " = " + props.getProperty(propName));
|
System.out.println(propName + " = " + props.getProperty(propName));
|
||||||
}
|
}
|
||||||
|
|
|
@ -18,6 +18,9 @@
|
||||||
package org.apache.lucene.benchmark.byTask;
|
package org.apache.lucene.benchmark.byTask;
|
||||||
|
|
||||||
import java.io.StringReader;
|
import java.io.StringReader;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileReader;
|
||||||
|
import java.io.BufferedReader;
|
||||||
|
|
||||||
import org.apache.lucene.benchmark.byTask.Benchmark;
|
import org.apache.lucene.benchmark.byTask.Benchmark;
|
||||||
import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
|
import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
|
||||||
|
@ -79,6 +82,7 @@ public class TestPerfTasksLogic extends TestCase {
|
||||||
iw.close();
|
iw.close();
|
||||||
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
|
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
|
||||||
assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
|
assertEquals("1000 docs were added to the index, this is what we expect to find!",1000,ir.numDocs());
|
||||||
|
ir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -121,6 +125,7 @@ public class TestPerfTasksLogic extends TestCase {
|
||||||
iw.close();
|
iw.close();
|
||||||
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
|
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
|
||||||
assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
|
assertEquals("1 docs were added to the index, this is what we expect to find!",1,ir.numDocs());
|
||||||
|
ir.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -150,6 +155,69 @@ public class TestPerfTasksLogic extends TestCase {
|
||||||
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
|
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
|
||||||
int ndocsExpected = 21578; // that's how many docs there are in the Reuters collecton.
|
int ndocsExpected = 21578; // that's how many docs there are in the Reuters collecton.
|
||||||
assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
|
assertEquals("wrong number of docs in the index!", ndocsExpected, ir.numDocs());
|
||||||
|
ir.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Test WriteLineDoc and LineDocMaker.
|
||||||
|
*/
|
||||||
|
public void testLineDocFile() throws Exception {
|
||||||
|
File lineFile = new File(System.getProperty("tempDir"), "test.reuters.lines.txt");
|
||||||
|
|
||||||
|
// We will call WriteLineDocs this many times
|
||||||
|
final int NUM_TRY_DOCS = 500;
|
||||||
|
|
||||||
|
// Creates a line file with first 500 docs from reuters
|
||||||
|
String algLines1[] = {
|
||||||
|
"# ----- properties ",
|
||||||
|
"doc.maker=org.apache.lucene.benchmark.byTask.feeds.ReutersDocMaker",
|
||||||
|
"doc.maker.forever=false",
|
||||||
|
"line.file.out=" + lineFile.getAbsolutePath().replace('\\', '/'),
|
||||||
|
"# ----- alg ",
|
||||||
|
"{WriteLineDoc()}:" + NUM_TRY_DOCS,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Run algo
|
||||||
|
Benchmark benchmark = execBenchmark(algLines1);
|
||||||
|
|
||||||
|
// Verify we got somewhere between 1-500 lines (some
|
||||||
|
// Reuters docs have no body, which WriteLineDoc task
|
||||||
|
// skips).
|
||||||
|
BufferedReader r = new BufferedReader(new FileReader(lineFile));
|
||||||
|
int numLines = 0;
|
||||||
|
while(r.readLine() != null)
|
||||||
|
numLines++;
|
||||||
|
r.close();
|
||||||
|
assertTrue("did not see the right number of docs; should be > 0 and <= " + NUM_TRY_DOCS + " but was " + numLines, numLines > 0 && numLines <= NUM_TRY_DOCS);
|
||||||
|
|
||||||
|
// Index the line docs
|
||||||
|
String algLines2[] = {
|
||||||
|
"# ----- properties ",
|
||||||
|
"analyzer=org.apache.lucene.analysis.SimpleAnalyzer",
|
||||||
|
"doc.maker=org.apache.lucene.benchmark.byTask.feeds.LineDocMaker",
|
||||||
|
"docs.file=" + lineFile.getAbsolutePath().replace('\\', '/'),
|
||||||
|
"doc.maker.forever=false",
|
||||||
|
"autocommit=false",
|
||||||
|
"ram.flush.mb=4",
|
||||||
|
"# ----- alg ",
|
||||||
|
"ResetSystemErase",
|
||||||
|
"CreateIndex",
|
||||||
|
"{AddDoc}: *",
|
||||||
|
"CloseIndex",
|
||||||
|
};
|
||||||
|
|
||||||
|
// Run algo
|
||||||
|
benchmark = execBenchmark(algLines2);
|
||||||
|
|
||||||
|
// now we should be able to open the index for write.
|
||||||
|
IndexWriter iw = new IndexWriter(benchmark.getRunData().getDirectory(),null,false);
|
||||||
|
iw.close();
|
||||||
|
|
||||||
|
IndexReader ir = IndexReader.open(benchmark.getRunData().getDirectory());
|
||||||
|
assertEquals(numLines + " lines were were created but " + ir.numDocs() + " docs are in the index", numLines, ir.numDocs());
|
||||||
|
ir.close();
|
||||||
|
|
||||||
|
lineFile.delete();
|
||||||
}
|
}
|
||||||
|
|
||||||
// create the benchmark and execute it.
|
// create the benchmark and execute it.
|
||||||
|
|
Loading…
Reference in New Issue