LUCENE-2029: allow separate control over body stored/tokenized in DocMaker

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@833605 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2009-11-06 23:44:10 +00:00
parent 15743fc179
commit 73944292d4
2 changed files with 22 additions and 5 deletions

View File

@ -4,6 +4,10 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
$Id:$
11/04/2009
LUCENE-2029: Added doc.body.stored and doc.body.tokenized; each
falls back to the non-body variant as its default. (Mike McCandless)
10/28/2009
LUCENE-1994: Fix thread safety of EnwikiContentSource and DocMaker
when doc.reuse.fields is false. Also made docs.reuse.fields=true

View File

@ -40,8 +40,12 @@ import org.apache.lucene.document.Field.TermVector;
* (default <b>SingleDocSource</b>).
* <li><b>doc.stored</b> - specifies whether fields should be stored (default
* <b>false</b>).
* <li><b>doc.body.stored</b> - specifies whether the body field should be stored (default
* = <b>doc.stored</b>).
* <li><b>doc.tokenized</b> - specifies whether fields should be tokenized
* (default <b>true</b>).
* <li><b>doc.body.tokenized</b> - specifies whether the
* body field should be tokenized (default = <b>doc.tokenized</b>).
* <li><b>doc.tokenized.norms</b> - specifies whether norms should be stored in
* the index or not. (default <b>false</b>).
* <li><b>doc.body.tokenized.norms</b> - specifies whether norms should be
@ -82,7 +86,7 @@ public class DocMaker {
final Document doc;
DocData docData = new DocData();
public DocState(boolean reuseFields, Store store, Index index, Index bodyIndex, TermVector termVector) {
public DocState(boolean reuseFields, Store store, Store bodyStore, Index index, Index bodyIndex, TermVector termVector) {
this.reuseFields = reuseFields;
@ -90,7 +94,7 @@ public class DocMaker {
fields = new HashMap<String,Field>();
// Initialize the map with the default fields.
fields.put(BODY_FIELD, new Field(BODY_FIELD, "", store, bodyIndex, termVector));
fields.put(BODY_FIELD, new Field(BODY_FIELD, "", bodyStore, bodyIndex, termVector));
fields.put(TITLE_FIELD, new Field(TITLE_FIELD, "", store, index, termVector));
fields.put(DATE_FIELD, new Field(DATE_FIELD, "", store, index, termVector));
fields.put(ID_FIELD, new Field(ID_FIELD, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
@ -139,6 +143,7 @@ public class DocMaker {
protected Config config;
protected Store storeVal = Store.NO;
protected Store bodyStoreVal = Store.NO;
protected Index indexVal = Index.ANALYZED_NO_NORMS;
protected Index bodyIndexVal = Index.ANALYZED;
protected TermVector termVecVal = TermVector.NO;
@ -207,7 +212,7 @@ public class DocMaker {
bdy = body.substring(0, size); // use part
docData.setBody(body.substring(size)); // some left
}
Field bodyField = ds.getField(BODY_FIELD, storeVal, bodyIndexVal, termVecVal);
Field bodyField = ds.getField(BODY_FIELD, bodyStoreVal, bodyIndexVal, termVecVal);
bodyField.setValue(bdy);
doc.add(bodyField);
@ -241,7 +246,7 @@ public class DocMaker {
protected DocState getDocState() {
DocState ds = docState.get();
if (ds == null) {
ds = new DocState(reuseFields, storeVal, indexVal, bodyIndexVal, termVecVal);
ds = new DocState(reuseFields, storeVal, bodyStoreVal, indexVal, bodyIndexVal, termVecVal);
docState.set(ds);
}
return ds;
@ -375,18 +380,26 @@ public class DocMaker {
}
boolean stored = config.get("doc.stored", false);
boolean bodyStored = config.get("doc.body.stored", stored);
boolean tokenized = config.get("doc.tokenized", true);
boolean bodyTokenized = config.get("doc.body.tokenized", tokenized);
boolean norms = config.get("doc.tokenized.norms", false);
boolean bodyNorms = config.get("doc.body.tokenized.norms", true);
boolean termVec = config.get("doc.term.vector", false);
storeVal = (stored ? Field.Store.YES : Field.Store.NO);
bodyStoreVal = (bodyStored ? Field.Store.YES : Field.Store.NO);
if (tokenized) {
indexVal = norms ? Index.ANALYZED : Index.ANALYZED_NO_NORMS;
bodyIndexVal = bodyNorms ? Index.ANALYZED : Index.ANALYZED_NO_NORMS;
} else {
indexVal = norms ? Index.NOT_ANALYZED : Index.NOT_ANALYZED_NO_NORMS;
}
if (bodyTokenized) {
bodyIndexVal = bodyNorms ? Index.ANALYZED : Index.ANALYZED_NO_NORMS;
} else {
bodyIndexVal = bodyNorms ? Index.NOT_ANALYZED : Index.NOT_ANALYZED_NO_NORMS;
}
boolean termVecPositions = config.get("doc.term.vector.positions", false);
boolean termVecOffsets = config.get("doc.term.vector.offsets", false);
if (termVecPositions && termVecOffsets) {