mirror of https://github.com/apache/lucene.git
LUCENE-2029: allow separate control over body stored/tokenized in DocMaker
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@833605 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
15743fc179
commit
73944292d4
|
@ -4,6 +4,10 @@ The Benchmark contrib package contains code for benchmarking Lucene in a variety
|
||||||
|
|
||||||
$Id:$
|
$Id:$
|
||||||
|
|
||||||
|
11/04/2009
|
||||||
|
LUCENE-2029: Added doc.body.stored and doc.body.tokenized; each
|
||||||
|
falls back to the non-body variant as its default. (Mike McCandless)
|
||||||
|
|
||||||
10/28/2009
|
10/28/2009
|
||||||
LUCENE-1994: Fix thread safety of EnwikiContentSource and DocMaker
|
LUCENE-1994: Fix thread safety of EnwikiContentSource and DocMaker
|
||||||
when doc.reuse.fields is false. Also made docs.reuse.fields=true
|
when doc.reuse.fields is false. Also made docs.reuse.fields=true
|
||||||
|
|
|
@ -40,8 +40,12 @@ import org.apache.lucene.document.Field.TermVector;
|
||||||
* (default <b>SingleDocSource</b>).
|
* (default <b>SingleDocSource</b>).
|
||||||
* <li><b>doc.stored</b> - specifies whether fields should be stored (default
|
* <li><b>doc.stored</b> - specifies whether fields should be stored (default
|
||||||
* <b>false</b>).
|
* <b>false</b>).
|
||||||
|
* <li><b>doc.body.stored</b> - specifies whether the body field should be stored (default
|
||||||
|
* = <b>doc.stored</b>).
|
||||||
* <li><b>doc.tokenized</b> - specifies whether fields should be tokenized
|
* <li><b>doc.tokenized</b> - specifies whether fields should be tokenized
|
||||||
* (default <b>true</b>).
|
* (default <b>true</b>).
|
||||||
|
* <li><b>doc.body.tokenized</b> - specifies whether the
|
||||||
|
* body field should be tokenized (default = <b>doc.tokenized</b>).
|
||||||
* <li><b>doc.tokenized.norms</b> - specifies whether norms should be stored in
|
* <li><b>doc.tokenized.norms</b> - specifies whether norms should be stored in
|
||||||
* the index or not. (default <b>false</b>).
|
* the index or not. (default <b>false</b>).
|
||||||
* <li><b>doc.body.tokenized.norms</b> - specifies whether norms should be
|
* <li><b>doc.body.tokenized.norms</b> - specifies whether norms should be
|
||||||
|
@ -82,7 +86,7 @@ public class DocMaker {
|
||||||
final Document doc;
|
final Document doc;
|
||||||
DocData docData = new DocData();
|
DocData docData = new DocData();
|
||||||
|
|
||||||
public DocState(boolean reuseFields, Store store, Index index, Index bodyIndex, TermVector termVector) {
|
public DocState(boolean reuseFields, Store store, Store bodyStore, Index index, Index bodyIndex, TermVector termVector) {
|
||||||
|
|
||||||
this.reuseFields = reuseFields;
|
this.reuseFields = reuseFields;
|
||||||
|
|
||||||
|
@ -90,7 +94,7 @@ public class DocMaker {
|
||||||
fields = new HashMap<String,Field>();
|
fields = new HashMap<String,Field>();
|
||||||
|
|
||||||
// Initialize the map with the default fields.
|
// Initialize the map with the default fields.
|
||||||
fields.put(BODY_FIELD, new Field(BODY_FIELD, "", store, bodyIndex, termVector));
|
fields.put(BODY_FIELD, new Field(BODY_FIELD, "", bodyStore, bodyIndex, termVector));
|
||||||
fields.put(TITLE_FIELD, new Field(TITLE_FIELD, "", store, index, termVector));
|
fields.put(TITLE_FIELD, new Field(TITLE_FIELD, "", store, index, termVector));
|
||||||
fields.put(DATE_FIELD, new Field(DATE_FIELD, "", store, index, termVector));
|
fields.put(DATE_FIELD, new Field(DATE_FIELD, "", store, index, termVector));
|
||||||
fields.put(ID_FIELD, new Field(ID_FIELD, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
fields.put(ID_FIELD, new Field(ID_FIELD, "", Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
|
||||||
|
@ -139,6 +143,7 @@ public class DocMaker {
|
||||||
protected Config config;
|
protected Config config;
|
||||||
|
|
||||||
protected Store storeVal = Store.NO;
|
protected Store storeVal = Store.NO;
|
||||||
|
protected Store bodyStoreVal = Store.NO;
|
||||||
protected Index indexVal = Index.ANALYZED_NO_NORMS;
|
protected Index indexVal = Index.ANALYZED_NO_NORMS;
|
||||||
protected Index bodyIndexVal = Index.ANALYZED;
|
protected Index bodyIndexVal = Index.ANALYZED;
|
||||||
protected TermVector termVecVal = TermVector.NO;
|
protected TermVector termVecVal = TermVector.NO;
|
||||||
|
@ -207,7 +212,7 @@ public class DocMaker {
|
||||||
bdy = body.substring(0, size); // use part
|
bdy = body.substring(0, size); // use part
|
||||||
docData.setBody(body.substring(size)); // some left
|
docData.setBody(body.substring(size)); // some left
|
||||||
}
|
}
|
||||||
Field bodyField = ds.getField(BODY_FIELD, storeVal, bodyIndexVal, termVecVal);
|
Field bodyField = ds.getField(BODY_FIELD, bodyStoreVal, bodyIndexVal, termVecVal);
|
||||||
bodyField.setValue(bdy);
|
bodyField.setValue(bdy);
|
||||||
doc.add(bodyField);
|
doc.add(bodyField);
|
||||||
|
|
||||||
|
@ -241,7 +246,7 @@ public class DocMaker {
|
||||||
protected DocState getDocState() {
|
protected DocState getDocState() {
|
||||||
DocState ds = docState.get();
|
DocState ds = docState.get();
|
||||||
if (ds == null) {
|
if (ds == null) {
|
||||||
ds = new DocState(reuseFields, storeVal, indexVal, bodyIndexVal, termVecVal);
|
ds = new DocState(reuseFields, storeVal, bodyStoreVal, indexVal, bodyIndexVal, termVecVal);
|
||||||
docState.set(ds);
|
docState.set(ds);
|
||||||
}
|
}
|
||||||
return ds;
|
return ds;
|
||||||
|
@ -375,18 +380,26 @@ public class DocMaker {
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean stored = config.get("doc.stored", false);
|
boolean stored = config.get("doc.stored", false);
|
||||||
|
boolean bodyStored = config.get("doc.body.stored", stored);
|
||||||
boolean tokenized = config.get("doc.tokenized", true);
|
boolean tokenized = config.get("doc.tokenized", true);
|
||||||
|
boolean bodyTokenized = config.get("doc.body.tokenized", tokenized);
|
||||||
boolean norms = config.get("doc.tokenized.norms", false);
|
boolean norms = config.get("doc.tokenized.norms", false);
|
||||||
boolean bodyNorms = config.get("doc.body.tokenized.norms", true);
|
boolean bodyNorms = config.get("doc.body.tokenized.norms", true);
|
||||||
boolean termVec = config.get("doc.term.vector", false);
|
boolean termVec = config.get("doc.term.vector", false);
|
||||||
storeVal = (stored ? Field.Store.YES : Field.Store.NO);
|
storeVal = (stored ? Field.Store.YES : Field.Store.NO);
|
||||||
|
bodyStoreVal = (bodyStored ? Field.Store.YES : Field.Store.NO);
|
||||||
if (tokenized) {
|
if (tokenized) {
|
||||||
indexVal = norms ? Index.ANALYZED : Index.ANALYZED_NO_NORMS;
|
indexVal = norms ? Index.ANALYZED : Index.ANALYZED_NO_NORMS;
|
||||||
bodyIndexVal = bodyNorms ? Index.ANALYZED : Index.ANALYZED_NO_NORMS;
|
|
||||||
} else {
|
} else {
|
||||||
indexVal = norms ? Index.NOT_ANALYZED : Index.NOT_ANALYZED_NO_NORMS;
|
indexVal = norms ? Index.NOT_ANALYZED : Index.NOT_ANALYZED_NO_NORMS;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bodyTokenized) {
|
||||||
|
bodyIndexVal = bodyNorms ? Index.ANALYZED : Index.ANALYZED_NO_NORMS;
|
||||||
|
} else {
|
||||||
bodyIndexVal = bodyNorms ? Index.NOT_ANALYZED : Index.NOT_ANALYZED_NO_NORMS;
|
bodyIndexVal = bodyNorms ? Index.NOT_ANALYZED : Index.NOT_ANALYZED_NO_NORMS;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean termVecPositions = config.get("doc.term.vector.positions", false);
|
boolean termVecPositions = config.get("doc.term.vector.positions", false);
|
||||||
boolean termVecOffsets = config.get("doc.term.vector.offsets", false);
|
boolean termVecOffsets = config.get("doc.term.vector.offsets", false);
|
||||||
if (termVecPositions && termVecOffsets) {
|
if (termVecPositions && termVecOffsets) {
|
||||||
|
|
Loading…
Reference in New Issue