LUCENE-1102: EnwikiDocMaker now adds a docid field

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@607732 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Grant Ingersoll 2007-12-31 13:07:14 +00:00
parent f39f15ec43
commit 90a735441f
3 changed files with 22 additions and 5 deletions

View File

@ -4,6 +4,10 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
$Id:$ $Id:$
12/31/07
LUCENE-1102: EnwikiDocMaker now indexes the docid field, so results might not be comparable with results prior to this change, although
it is doubted that this one small field makes much difference.
12/13/07 12/13/07
LUCENE-1086: DocMakers setup for the "docs.dir" property LUCENE-1086: DocMakers setup for the "docs.dir" property
fixed to properly handle absolute paths. (Shai Erera via Doron Cohen) fixed to properly handle absolute paths. (Shai Erera via Doron Cohen)

View File

@ -36,7 +36,8 @@ public class EnwikiDocMaker extends LineDocMaker {
static final int TITLE = 0; static final int TITLE = 0;
static final int DATE = TITLE+1; static final int DATE = TITLE+1;
static final int BODY = DATE+1; static final int BODY = DATE+1;
static final int LENGTH = BODY+1; static final int ID = BODY + 1;
static final int LENGTH = ID+1;
static final String[] months = {"JAN", "FEB", "MAR", "APR", static final String[] months = {"JAN", "FEB", "MAR", "APR",
"MAY", "JUN", "JUL", "AUG", "MAY", "JUN", "JUL", "AUG",
@ -113,8 +114,9 @@ public class EnwikiDocMaker extends LineDocMaker {
String title; String title;
String body; String body;
String time; String time;
String id;
static final int BASE = 10;
public void startElement(String namespace, public void startElement(String namespace,
String simple, String simple,
@ -124,12 +126,15 @@ public class EnwikiDocMaker extends LineDocMaker {
title = null; title = null;
body = null; body = null;
time = null; time = null;
id = null;
} else if (qualified.equals("text")) { } else if (qualified.equals("text")) {
contents.setLength(0); contents.setLength(0);
} else if (qualified.equals("timestamp")) { } else if (qualified.equals("timestamp")) {
contents.setLength(0); contents.setLength(0);
} else if (qualified.equals("title")) { } else if (qualified.equals("title")) {
contents.setLength(0); contents.setLength(0);
} else if (qualified.equals("id")) {
contents.setLength(0);
} }
} }
@ -148,11 +153,12 @@ public class EnwikiDocMaker extends LineDocMaker {
return buffer.toString(); return buffer.toString();
} }
public void create(String title, String time, String body) { public void create(String title, String time, String body, String id) {
String[] t = new String[LENGTH]; String[] t = new String[LENGTH];
t[TITLE] = title.replace('\t', ' '); t[TITLE] = title.replace('\t', ' ');
t[DATE] = time.replace('\t', ' '); t[DATE] = time.replace('\t', ' ');
t[BODY] = body.replaceAll("[\t\n]", " "); t[BODY] = body.replaceAll("[\t\n]", " ");
t[ID] = id;
synchronized(this) { synchronized(this) {
while(tuple!=null) { while(tuple!=null) {
try { try {
@ -177,9 +183,12 @@ public class EnwikiDocMaker extends LineDocMaker {
} }
} else if (qualified.equals("timestamp")) { } else if (qualified.equals("timestamp")) {
time = time(contents.toString()); time = time(contents.toString());
} else if (qualified.equals("page")) { } else if (qualified.equals("id") && id == null) {//just get the first id
id = contents.toString();
}
else if (qualified.equals("page")) {
if (body != null) { if (body != null) {
create(title, time, body); create(title, time, body, id);
} }
} }
} }
@ -192,6 +201,7 @@ public class EnwikiDocMaker extends LineDocMaker {
titleField.setValue(tuple[TITLE]); titleField.setValue(tuple[TITLE]);
dateField.setValue(tuple[DATE]); dateField.setValue(tuple[DATE]);
bodyField.setValue(tuple[BODY]); bodyField.setValue(tuple[BODY]);
idField.setValue(tuple[ID]);
return doc; return doc;
} }
} }

View File

@ -52,6 +52,7 @@ public class LineDocMaker extends BasicDocMaker {
Field bodyField; Field bodyField;
Field titleField; Field titleField;
Field dateField; Field dateField;
Field idField;
public DocState() { public DocState() {
@ -70,11 +71,13 @@ public class LineDocMaker extends BasicDocMaker {
storeVal, storeVal,
Field.Index.TOKENIZED, Field.Index.TOKENIZED,
termVecVal); termVecVal);
idField = new Field(BasicDocMaker.ID_FIELD, "", Field.Store.YES, Field.Index.NO_NORMS);
doc = new Document(); doc = new Document();
doc.add(bodyField); doc.add(bodyField);
doc.add(titleField); doc.add(titleField);
doc.add(dateField); doc.add(dateField);
doc.add(idField);
} }
final static String SEP = WriteLineDocTask.SEP; final static String SEP = WriteLineDocTask.SEP;