mirror of https://github.com/apache/lucene.git
LUCENE-1257: Add generics to highlighter and more for benchmark
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@829524 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
519095db6e
commit
8086aad514
|
@ -20,7 +20,6 @@ package org.apache.lucene.benchmark.byTask;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
|
||||||
|
@ -64,8 +63,8 @@ public class PerfRunData {
|
||||||
private DocMaker docMaker;
|
private DocMaker docMaker;
|
||||||
|
|
||||||
// we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
|
// we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
|
||||||
private HashMap readTaskQueryMaker;
|
private HashMap<Class<? extends ReadTask>,QueryMaker> readTaskQueryMaker;
|
||||||
private Class qmkrClass;
|
private Class<? extends QueryMaker> qmkrClass;
|
||||||
|
|
||||||
private IndexReader indexReader;
|
private IndexReader indexReader;
|
||||||
private IndexSearcher indexSearcher;
|
private IndexSearcher indexSearcher;
|
||||||
|
@ -80,12 +79,12 @@ public class PerfRunData {
|
||||||
analyzer = NewAnalyzerTask.createAnalyzer(config.get("analyzer",
|
analyzer = NewAnalyzerTask.createAnalyzer(config.get("analyzer",
|
||||||
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
|
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
|
||||||
// doc maker
|
// doc maker
|
||||||
docMaker = (DocMaker) Class.forName(config.get("doc.maker",
|
docMaker = Class.forName(config.get("doc.maker",
|
||||||
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).newInstance();
|
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
|
||||||
docMaker.setConfig(config);
|
docMaker.setConfig(config);
|
||||||
// query makers
|
// query makers
|
||||||
readTaskQueryMaker = new HashMap();
|
readTaskQueryMaker = new HashMap<Class<? extends ReadTask>,QueryMaker>();
|
||||||
qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker"));
|
qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker")).asSubclass(QueryMaker.class);
|
||||||
|
|
||||||
// index stuff
|
// index stuff
|
||||||
reinit(false);
|
reinit(false);
|
||||||
|
@ -239,9 +238,8 @@ public class PerfRunData {
|
||||||
|
|
||||||
public void resetInputs() throws IOException {
|
public void resetInputs() throws IOException {
|
||||||
docMaker.resetInputs();
|
docMaker.resetInputs();
|
||||||
Iterator it = readTaskQueryMaker.values().iterator();
|
for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {
|
||||||
while (it.hasNext()) {
|
queryMaker.resetInputs();
|
||||||
((QueryMaker) it.next()).resetInputs();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -251,11 +249,11 @@ public class PerfRunData {
|
||||||
synchronized public QueryMaker getQueryMaker(ReadTask readTask) {
|
synchronized public QueryMaker getQueryMaker(ReadTask readTask) {
|
||||||
// mapping the query maker by task class allows extending/adding new search/read tasks
|
// mapping the query maker by task class allows extending/adding new search/read tasks
|
||||||
// without needing to modify this class.
|
// without needing to modify this class.
|
||||||
Class readTaskClass = readTask.getClass();
|
Class<? extends ReadTask> readTaskClass = readTask.getClass();
|
||||||
QueryMaker qm = (QueryMaker) readTaskQueryMaker.get(readTaskClass);
|
QueryMaker qm = readTaskQueryMaker.get(readTaskClass);
|
||||||
if (qm == null) {
|
if (qm == null) {
|
||||||
try {
|
try {
|
||||||
qm = (QueryMaker) qmkrClass.newInstance();
|
qm = qmkrClass.newInstance();
|
||||||
qm.setConfig(config);
|
qm.setConfig(config);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
|
|
@ -57,7 +57,7 @@ public abstract class ContentSource {
|
||||||
|
|
||||||
private static final int BZIP = 0;
|
private static final int BZIP = 0;
|
||||||
private static final int OTHER = 1;
|
private static final int OTHER = 1;
|
||||||
private static final Map extensionToType = new HashMap();
|
private static final Map<String,Integer> extensionToType = new HashMap<String,Integer>();
|
||||||
static {
|
static {
|
||||||
extensionToType.put(".bz2", Integer.valueOf(BZIP));
|
extensionToType.put(".bz2", Integer.valueOf(BZIP));
|
||||||
extensionToType.put(".bzip", Integer.valueOf(BZIP));
|
extensionToType.put(".bzip", Integer.valueOf(BZIP));
|
||||||
|
@ -93,7 +93,7 @@ public abstract class ContentSource {
|
||||||
* a given directory. The collected {@link File} instances are stored in the
|
* a given directory. The collected {@link File} instances are stored in the
|
||||||
* given <code>files</code>.
|
* given <code>files</code>.
|
||||||
*/
|
*/
|
||||||
protected final void collectFiles(File dir, ArrayList files) {
|
protected final void collectFiles(File dir, ArrayList<File> files) {
|
||||||
if (!dir.canRead()) {
|
if (!dir.canRead()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -125,7 +125,7 @@ public abstract class ContentSource {
|
||||||
int idx = fileName.lastIndexOf('.');
|
int idx = fileName.lastIndexOf('.');
|
||||||
int type = OTHER;
|
int type = OTHER;
|
||||||
if (idx != -1) {
|
if (idx != -1) {
|
||||||
Integer typeInt = (Integer) extensionToType.get(fileName.substring(idx));
|
Integer typeInt = extensionToType.get(fileName.substring(idx));
|
||||||
if (typeInt != null) {
|
if (typeInt != null) {
|
||||||
type = typeInt.intValue();
|
type = typeInt.intValue();
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,13 +51,12 @@ public class DirContentSource extends ContentSource {
|
||||||
ParsePosition pos;
|
ParsePosition pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static class Iterator implements java.util.Iterator {
|
public static class Iterator implements java.util.Iterator<File> {
|
||||||
|
|
||||||
static class Comparator implements java.util.Comparator {
|
static class Comparator implements java.util.Comparator<File> {
|
||||||
public int compare(Object _a, Object _b) {
|
public int compare(File _a, File _b) {
|
||||||
String a = _a.toString();
|
String a = _a.toString();
|
||||||
String b = _b.toString();
|
String b = _b.toString();
|
||||||
|
|
||||||
int diff = a.length() - b.length();
|
int diff = a.length() - b.length();
|
||||||
|
|
||||||
if (diff > 0) {
|
if (diff > 0) {
|
||||||
|
@ -79,7 +78,7 @@ public class DirContentSource extends ContentSource {
|
||||||
|
|
||||||
int count = 0;
|
int count = 0;
|
||||||
|
|
||||||
Stack stack = new Stack();
|
Stack<File> stack = new Stack<File>();
|
||||||
|
|
||||||
/* this seems silly ... there must be a better way ...
|
/* this seems silly ... there must be a better way ...
|
||||||
not that this is good, but can it matter? */
|
not that this is good, but can it matter? */
|
||||||
|
@ -94,10 +93,10 @@ public class DirContentSource extends ContentSource {
|
||||||
if (stack.empty()) {
|
if (stack.empty()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (!((File)stack.peek()).isDirectory()) {
|
if (!(stack.peek()).isDirectory()) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
File f = (File)stack.pop();
|
File f = stack.pop();
|
||||||
push(f);
|
push(f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -133,10 +132,10 @@ public class DirContentSource extends ContentSource {
|
||||||
return stack.size() > 0;
|
return stack.size() > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Object next() {
|
public File next() {
|
||||||
assert hasNext();
|
assert hasNext();
|
||||||
count++;
|
count++;
|
||||||
Object object = stack.pop();
|
File object = stack.pop();
|
||||||
// System.err.println("pop " + object);
|
// System.err.println("pop " + object);
|
||||||
find();
|
find();
|
||||||
return object;
|
return object;
|
||||||
|
@ -148,7 +147,7 @@ public class DirContentSource extends ContentSource {
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private ThreadLocal dateFormat = new ThreadLocal();
|
private ThreadLocal<DateFormatInfo> dateFormat = new ThreadLocal<DateFormatInfo>();
|
||||||
private File dataDir = null;
|
private File dataDir = null;
|
||||||
private int iteration = 0;
|
private int iteration = 0;
|
||||||
private Iterator inputFiles = null;
|
private Iterator inputFiles = null;
|
||||||
|
@ -156,7 +155,7 @@ public class DirContentSource extends ContentSource {
|
||||||
// get/initiate a thread-local simple date format (must do so
|
// get/initiate a thread-local simple date format (must do so
|
||||||
// because SimpleDateFormat is not thread-safe).
|
// because SimpleDateFormat is not thread-safe).
|
||||||
private DateFormatInfo getDateFormatInfo() {
|
private DateFormatInfo getDateFormatInfo() {
|
||||||
DateFormatInfo dfi = (DateFormatInfo) dateFormat.get();
|
DateFormatInfo dfi = dateFormat.get();
|
||||||
if (dfi == null) {
|
if (dfi == null) {
|
||||||
dfi = new DateFormatInfo();
|
dfi = new DateFormatInfo();
|
||||||
dfi.pos = new ParsePosition(0);
|
dfi.pos = new ParsePosition(0);
|
||||||
|
@ -191,7 +190,7 @@ public class DirContentSource extends ContentSource {
|
||||||
inputFiles = new Iterator(dataDir);
|
inputFiles = new Iterator(dataDir);
|
||||||
iteration++;
|
iteration++;
|
||||||
}
|
}
|
||||||
f = (File) inputFiles.next();
|
f = inputFiles.next();
|
||||||
// System.err.println(f);
|
// System.err.println(f);
|
||||||
name = f.getCanonicalPath()+"_"+iteration;
|
name = f.getCanonicalPath()+"_"+iteration;
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,10 +20,8 @@ package org.apache.lucene.benchmark.byTask.feeds;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.UnsupportedEncodingException;
|
import java.io.UnsupportedEncodingException;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Iterator;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||||
|
@ -79,7 +77,7 @@ public class DocMaker {
|
||||||
|
|
||||||
static class DocState {
|
static class DocState {
|
||||||
|
|
||||||
private final Map fields;
|
private final Map<String,Field> fields;
|
||||||
private final boolean reuseFields;
|
private final boolean reuseFields;
|
||||||
final Document doc;
|
final Document doc;
|
||||||
DocData docData = new DocData();
|
DocData docData = new DocData();
|
||||||
|
@ -89,7 +87,7 @@ public class DocMaker {
|
||||||
this.reuseFields = reuseFields;
|
this.reuseFields = reuseFields;
|
||||||
|
|
||||||
if (reuseFields) {
|
if (reuseFields) {
|
||||||
fields = new HashMap();
|
fields = new HashMap<String,Field>();
|
||||||
|
|
||||||
// Initialize the map with the default fields.
|
// Initialize the map with the default fields.
|
||||||
fields.put(BODY_FIELD, new Field(BODY_FIELD, "", store, bodyIndex, termVector));
|
fields.put(BODY_FIELD, new Field(BODY_FIELD, "", store, bodyIndex, termVector));
|
||||||
|
@ -115,7 +113,7 @@ public class DocMaker {
|
||||||
return new Field(name, "", store, index, termVector);
|
return new Field(name, "", store, index, termVector);
|
||||||
}
|
}
|
||||||
|
|
||||||
Field f = (Field) fields.get(name);
|
Field f = fields.get(name);
|
||||||
if (f == null) {
|
if (f == null) {
|
||||||
f = new Field(name, "", store, index, termVector);
|
f = new Field(name, "", store, index, termVector);
|
||||||
fields.put(name, f);
|
fields.put(name, f);
|
||||||
|
@ -128,8 +126,8 @@ public class DocMaker {
|
||||||
private boolean storeBytes = false;
|
private boolean storeBytes = false;
|
||||||
|
|
||||||
// leftovers are thread local, because it is unsafe to share residues between threads
|
// leftovers are thread local, because it is unsafe to share residues between threads
|
||||||
private ThreadLocal leftovr = new ThreadLocal();
|
private ThreadLocal<LeftOver> leftovr = new ThreadLocal<LeftOver>();
|
||||||
private ThreadLocal docState = new ThreadLocal();
|
private ThreadLocal<DocState> docState = new ThreadLocal<DocState>();
|
||||||
|
|
||||||
public static final String BODY_FIELD = "body";
|
public static final String BODY_FIELD = "body";
|
||||||
public static final String TITLE_FIELD = "doctitle";
|
public static final String TITLE_FIELD = "doctitle";
|
||||||
|
@ -224,8 +222,7 @@ public class DocMaker {
|
||||||
if (indexProperties) {
|
if (indexProperties) {
|
||||||
Properties props = docData.getProps();
|
Properties props = docData.getProps();
|
||||||
if (props != null) {
|
if (props != null) {
|
||||||
for (Iterator iterator = props.entrySet().iterator(); iterator.hasNext();) {
|
for (final Map.Entry<Object,Object> entry : props.entrySet()) {
|
||||||
Entry entry = (Entry) iterator.next();
|
|
||||||
Field f = ds.getField((String) entry.getKey(), storeVal, indexVal, termVecVal);
|
Field f = ds.getField((String) entry.getKey(), storeVal, indexVal, termVecVal);
|
||||||
f.setValue((String) entry.getValue());
|
f.setValue((String) entry.getValue());
|
||||||
doc.add(f);
|
doc.add(f);
|
||||||
|
@ -243,7 +240,7 @@ public class DocMaker {
|
||||||
}
|
}
|
||||||
|
|
||||||
protected DocState getDocState() {
|
protected DocState getDocState() {
|
||||||
DocState ds = (DocState) docState.get();
|
DocState ds = docState.get();
|
||||||
if (ds == null) {
|
if (ds == null) {
|
||||||
ds = new DocState(true, storeVal, indexVal, bodyIndexVal, termVecVal);
|
ds = new DocState(true, storeVal, indexVal, bodyIndexVal, termVecVal);
|
||||||
docState.set(ds);
|
docState.set(ds);
|
||||||
|
@ -299,7 +296,7 @@ public class DocMaker {
|
||||||
* given size input by <code>size</code>.
|
* given size input by <code>size</code>.
|
||||||
*/
|
*/
|
||||||
public Document makeDocument(int size) throws Exception {
|
public Document makeDocument(int size) throws Exception {
|
||||||
LeftOver lvr = (LeftOver) leftovr.get();
|
LeftOver lvr = leftovr.get();
|
||||||
if (lvr == null || lvr.docdata == null || lvr.docdata.getBody() == null
|
if (lvr == null || lvr.docdata == null || lvr.docdata.getBody() == null
|
||||||
|| lvr.docdata.getBody().length() == 0) {
|
|| lvr.docdata.getBody().length() == 0) {
|
||||||
resetLeftovers();
|
resetLeftovers();
|
||||||
|
@ -371,7 +368,7 @@ public class DocMaker {
|
||||||
this.config = config;
|
this.config = config;
|
||||||
try {
|
try {
|
||||||
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
|
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
|
||||||
source = (ContentSource) Class.forName(sourceClass).newInstance();
|
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
|
||||||
source.setConfig(config);
|
source.setConfig(config);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// Should not get here. Throw runtime exception.
|
// Should not get here. Throw runtime exception.
|
||||||
|
@ -413,7 +410,7 @@ public class DocMaker {
|
||||||
// In a multi-rounds run, it is important to reset DocState since settings
|
// In a multi-rounds run, it is important to reset DocState since settings
|
||||||
// of fields may change between rounds, and this is the only way to reset
|
// of fields may change between rounds, and this is the only way to reset
|
||||||
// the cache of all threads.
|
// the cache of all threads.
|
||||||
docState = new ThreadLocal();
|
docState = new ThreadLocal<DocState>();
|
||||||
}
|
}
|
||||||
|
|
||||||
indexProperties = config.get("doc.index.props", false);
|
indexProperties = config.get("doc.index.props", false);
|
||||||
|
|
|
@ -220,7 +220,7 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static final Map ELEMENTS = new HashMap();
|
private static final Map<String,Integer> ELEMENTS = new HashMap<String,Integer>();
|
||||||
private static final int TITLE = 0;
|
private static final int TITLE = 0;
|
||||||
private static final int DATE = TITLE + 1;
|
private static final int DATE = TITLE + 1;
|
||||||
private static final int BODY = DATE + 1;
|
private static final int BODY = DATE + 1;
|
||||||
|
@ -248,7 +248,7 @@ public class EnwikiContentSource extends ContentSource {
|
||||||
* the element qualified name over and over.
|
* the element qualified name over and over.
|
||||||
*/
|
*/
|
||||||
private final static int getElementType(String elem) {
|
private final static int getElementType(String elem) {
|
||||||
Integer val = (Integer) ELEMENTS.get(elem);
|
Integer val = ELEMENTS.get(elem);
|
||||||
return val == null ? -1 : val.intValue();
|
return val == null ? -1 : val.intValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -18,11 +18,6 @@ package org.apache.lucene.benchmark.byTask.feeds;
|
||||||
*/
|
*/
|
||||||
|
|
||||||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.document.Field.Index;
|
|
||||||
import org.apache.lucene.document.Field.Store;
|
|
||||||
import org.apache.lucene.document.Field.TermVector;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A {@link DocMaker} which reads the English Wikipedia dump. Uses
|
* A {@link DocMaker} which reads the English Wikipedia dump. Uses
|
||||||
|
|
|
@ -92,9 +92,9 @@ public class EnwikiQueryMaker extends AbstractQueryMaker implements
|
||||||
* @param a analyzer to use when parsing queries
|
* @param a analyzer to use when parsing queries
|
||||||
* @return array of Lucene queries
|
* @return array of Lucene queries
|
||||||
*/
|
*/
|
||||||
private static Query[] createQueries(List qs, Analyzer a) {
|
private static Query[] createQueries(List<Object> qs, Analyzer a) {
|
||||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a);
|
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a);
|
||||||
List queries = new ArrayList();
|
List<Object> queries = new ArrayList<Object>();
|
||||||
for (int i = 0; i < qs.size(); i++) {
|
for (int i = 0; i < qs.size(); i++) {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
|
@ -119,14 +119,14 @@ public class EnwikiQueryMaker extends AbstractQueryMaker implements
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return (Query[]) queries.toArray(new Query[0]);
|
return queries.toArray(new Query[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Query[] prepareQueries() throws Exception {
|
protected Query[] prepareQueries() throws Exception {
|
||||||
// analyzer (default is standard analyzer)
|
// analyzer (default is standard analyzer)
|
||||||
Analyzer anlzr = NewAnalyzerTask.createAnalyzer(config.get("analyzer", StandardAnalyzer.class.getName()));
|
Analyzer anlzr = NewAnalyzerTask.createAnalyzer(config.get("analyzer", StandardAnalyzer.class.getName()));
|
||||||
|
|
||||||
List queryList = new ArrayList(20);
|
List<Object> queryList = new ArrayList<Object>(20);
|
||||||
queryList.addAll(Arrays.asList(STANDARD_QUERIES));
|
queryList.addAll(Arrays.asList(STANDARD_QUERIES));
|
||||||
if(!config.get("enwikiQueryMaker.disableSpanQueries", false))
|
if(!config.get("enwikiQueryMaker.disableSpanQueries", false))
|
||||||
queryList.addAll(Arrays.asList(getPrebuiltQueries(DocMaker.BODY_FIELD)));
|
queryList.addAll(Arrays.asList(getPrebuiltQueries(DocMaker.BODY_FIELD)));
|
||||||
|
|
|
@ -51,7 +51,7 @@ public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMake
|
||||||
String defaultField = config.get("file.query.maker.default.field", DocMaker.BODY_FIELD);
|
String defaultField = config.get("file.query.maker.default.field", DocMaker.BODY_FIELD);
|
||||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, defaultField, anlzr);
|
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, defaultField, anlzr);
|
||||||
|
|
||||||
List qq = new ArrayList();
|
List<Query> qq = new ArrayList<Query>();
|
||||||
String fileName = config.get("file.query.maker.file", null);
|
String fileName = config.get("file.query.maker.file", null);
|
||||||
if (fileName != null)
|
if (fileName != null)
|
||||||
{
|
{
|
||||||
|
@ -94,7 +94,6 @@ public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMake
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
Query [] result = (Query[]) qq.toArray(new Query[qq.size()]) ;
|
return qq.toArray(new Query[qq.size()]) ;
|
||||||
return result;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,9 +47,9 @@ public class ReutersContentSource extends ContentSource {
|
||||||
ParsePosition pos;
|
ParsePosition pos;
|
||||||
}
|
}
|
||||||
|
|
||||||
private ThreadLocal dateFormat = new ThreadLocal();
|
private ThreadLocal<DateFormatInfo> dateFormat = new ThreadLocal<DateFormatInfo>();
|
||||||
private File dataDir = null;
|
private File dataDir = null;
|
||||||
private ArrayList inputFiles = new ArrayList();
|
private ArrayList<File> inputFiles = new ArrayList<File>();
|
||||||
private int nextFile = 0;
|
private int nextFile = 0;
|
||||||
private int iteration = 0;
|
private int iteration = 0;
|
||||||
|
|
||||||
|
@ -69,7 +69,7 @@ public class ReutersContentSource extends ContentSource {
|
||||||
}
|
}
|
||||||
|
|
||||||
private synchronized DateFormatInfo getDateFormatInfo() {
|
private synchronized DateFormatInfo getDateFormatInfo() {
|
||||||
DateFormatInfo dfi = (DateFormatInfo) dateFormat.get();
|
DateFormatInfo dfi = dateFormat.get();
|
||||||
if (dfi == null) {
|
if (dfi == null) {
|
||||||
dfi = new DateFormatInfo();
|
dfi = new DateFormatInfo();
|
||||||
// date format: 30-MAR-1987 14:22:36.87
|
// date format: 30-MAR-1987 14:22:36.87
|
||||||
|
@ -105,7 +105,7 @@ public class ReutersContentSource extends ContentSource {
|
||||||
nextFile = 0;
|
nextFile = 0;
|
||||||
iteration++;
|
iteration++;
|
||||||
}
|
}
|
||||||
f = (File) inputFiles.get(nextFile++);
|
f = inputFiles.get(nextFile++);
|
||||||
name = f.getCanonicalPath() + "_" + iteration;
|
name = f.getCanonicalPath() + "_" + iteration;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -72,9 +72,9 @@ public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker
|
||||||
* @param a analyzer to use when parsing queries
|
* @param a analyzer to use when parsing queries
|
||||||
* @return array of Lucene queries
|
* @return array of Lucene queries
|
||||||
*/
|
*/
|
||||||
private static Query[] createQueries(List qs, Analyzer a) {
|
private static Query[] createQueries(List<Object> qs, Analyzer a) {
|
||||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a);
|
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a);
|
||||||
List queries = new ArrayList();
|
List<Object> queries = new ArrayList<Object>();
|
||||||
for (int i = 0; i < qs.size(); i++) {
|
for (int i = 0; i < qs.size(); i++) {
|
||||||
try {
|
try {
|
||||||
|
|
||||||
|
@ -99,7 +99,7 @@ public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return (Query[]) queries.toArray(new Query[0]);
|
return queries.toArray(new Query[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected Query[] prepareQueries() throws Exception {
|
protected Query[] prepareQueries() throws Exception {
|
||||||
|
@ -107,7 +107,7 @@ public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker
|
||||||
Analyzer anlzr= NewAnalyzerTask.createAnalyzer(config.get("analyzer",
|
Analyzer anlzr= NewAnalyzerTask.createAnalyzer(config.get("analyzer",
|
||||||
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
|
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
|
||||||
|
|
||||||
List queryList = new ArrayList(20);
|
List<Object> queryList = new ArrayList<Object>(20);
|
||||||
queryList.addAll(Arrays.asList(STANDARD_QUERIES));
|
queryList.addAll(Arrays.asList(STANDARD_QUERIES));
|
||||||
queryList.addAll(Arrays.asList(getPrebuiltQueries(DocMaker.BODY_FIELD)));
|
queryList.addAll(Arrays.asList(getPrebuiltQueries(DocMaker.BODY_FIELD)));
|
||||||
return createQueries(queryList, anlzr);
|
return createQueries(queryList, anlzr);
|
||||||
|
|
|
@ -48,7 +48,7 @@ public class SimpleQueryMaker extends AbstractQueryMaker implements QueryMaker {
|
||||||
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
|
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
|
||||||
|
|
||||||
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD,anlzr);
|
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD,anlzr);
|
||||||
ArrayList qq = new ArrayList();
|
ArrayList<Query> qq = new ArrayList<Query>();
|
||||||
Query q1 = new TermQuery(new Term(DocMaker.ID_FIELD,"doc2"));
|
Query q1 = new TermQuery(new Term(DocMaker.ID_FIELD,"doc2"));
|
||||||
qq.add(q1);
|
qq.add(q1);
|
||||||
Query q2 = new TermQuery(new Term(DocMaker.BODY_FIELD,"simple"));
|
Query q2 = new TermQuery(new Term(DocMaker.BODY_FIELD,"simple"));
|
||||||
|
@ -64,7 +64,7 @@ public class SimpleQueryMaker extends AbstractQueryMaker implements QueryMaker {
|
||||||
qq.add(qp.parse("\"synthetic text\"~3"));
|
qq.add(qp.parse("\"synthetic text\"~3"));
|
||||||
qq.add(qp.parse("zoom*"));
|
qq.add(qp.parse("zoom*"));
|
||||||
qq.add(qp.parse("synth*"));
|
qq.add(qp.parse("synth*"));
|
||||||
return (Query []) qq.toArray(new Query[0]);
|
return qq.toArray(new Query[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,15 +35,15 @@ public class SimpleSloppyPhraseQueryMaker extends SimpleQueryMaker {
|
||||||
protected Query[] prepareQueries() throws Exception {
|
protected Query[] prepareQueries() throws Exception {
|
||||||
// extract some 100 words from doc text to an array
|
// extract some 100 words from doc text to an array
|
||||||
String words[];
|
String words[];
|
||||||
ArrayList w = new ArrayList();
|
ArrayList<String> w = new ArrayList<String>();
|
||||||
StringTokenizer st = new StringTokenizer(SingleDocSource.DOC_TEXT);
|
StringTokenizer st = new StringTokenizer(SingleDocSource.DOC_TEXT);
|
||||||
while (st.hasMoreTokens() && w.size()<100) {
|
while (st.hasMoreTokens() && w.size()<100) {
|
||||||
w.add(st.nextToken());
|
w.add(st.nextToken());
|
||||||
}
|
}
|
||||||
words = (String[]) w.toArray(new String[0]);
|
words = w.toArray(new String[0]);
|
||||||
|
|
||||||
// create queries (that would find stuff) with varying slops
|
// create queries (that would find stuff) with varying slops
|
||||||
ArrayList queries = new ArrayList();
|
ArrayList<Query> queries = new ArrayList<Query>();
|
||||||
for (int slop=0; slop<8; slop++) {
|
for (int slop=0; slop<8; slop++) {
|
||||||
for (int qlen=2; qlen<6; qlen++) {
|
for (int qlen=2; qlen<6; qlen++) {
|
||||||
for (int wd=0; wd<words.length-qlen-slop; wd++) {
|
for (int wd=0; wd<words.length-qlen-slop; wd++) {
|
||||||
|
@ -76,7 +76,7 @@ public class SimpleSloppyPhraseQueryMaker extends SimpleQueryMaker {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return (Query[]) queries.toArray(new Query[0]);
|
return queries.toArray(new Query[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -76,11 +76,11 @@ public class TrecContentSource extends ContentSource {
|
||||||
"EEE MMM dd kk:mm:ss yyyy", // Tue Dec 09 16:45:08 2003
|
"EEE MMM dd kk:mm:ss yyyy", // Tue Dec 09 16:45:08 2003
|
||||||
};
|
};
|
||||||
|
|
||||||
private ThreadLocal dateFormats = new ThreadLocal();
|
private ThreadLocal<DateFormatInfo> dateFormats = new ThreadLocal<DateFormatInfo>();
|
||||||
private ThreadLocal trecDocReader = new ThreadLocal();
|
private ThreadLocal<StringBufferReader> trecDocReader = new ThreadLocal<StringBufferReader>();
|
||||||
private ThreadLocal trecDocBuffer = new ThreadLocal();
|
private ThreadLocal<StringBuffer> trecDocBuffer = new ThreadLocal<StringBuffer>();
|
||||||
private File dataDir = null;
|
private File dataDir = null;
|
||||||
private ArrayList inputFiles = new ArrayList();
|
private ArrayList<File> inputFiles = new ArrayList<File>();
|
||||||
private int nextFile = 0;
|
private int nextFile = 0;
|
||||||
private int rawDocSize;
|
private int rawDocSize;
|
||||||
|
|
||||||
|
@ -93,7 +93,7 @@ public class TrecContentSource extends ContentSource {
|
||||||
HTMLParser htmlParser;
|
HTMLParser htmlParser;
|
||||||
|
|
||||||
private DateFormatInfo getDateFormatInfo() {
|
private DateFormatInfo getDateFormatInfo() {
|
||||||
DateFormatInfo dfi = (DateFormatInfo) dateFormats.get();
|
DateFormatInfo dfi = dateFormats.get();
|
||||||
if (dfi == null) {
|
if (dfi == null) {
|
||||||
dfi = new DateFormatInfo();
|
dfi = new DateFormatInfo();
|
||||||
dfi.dfs = new SimpleDateFormat[DATE_FORMATS.length];
|
dfi.dfs = new SimpleDateFormat[DATE_FORMATS.length];
|
||||||
|
@ -108,7 +108,7 @@ public class TrecContentSource extends ContentSource {
|
||||||
}
|
}
|
||||||
|
|
||||||
private StringBuffer getDocBuffer() {
|
private StringBuffer getDocBuffer() {
|
||||||
StringBuffer sb = (StringBuffer) trecDocBuffer.get();
|
StringBuffer sb = trecDocBuffer.get();
|
||||||
if (sb == null) {
|
if (sb == null) {
|
||||||
sb = new StringBuffer();
|
sb = new StringBuffer();
|
||||||
trecDocBuffer.set(sb);
|
trecDocBuffer.set(sb);
|
||||||
|
@ -117,7 +117,7 @@ public class TrecContentSource extends ContentSource {
|
||||||
}
|
}
|
||||||
|
|
||||||
private Reader getTrecDocReader(StringBuffer docBuffer) {
|
private Reader getTrecDocReader(StringBuffer docBuffer) {
|
||||||
StringBufferReader r = (StringBufferReader) trecDocReader.get();
|
StringBufferReader r = trecDocReader.get();
|
||||||
if (r == null) {
|
if (r == null) {
|
||||||
r = new StringBufferReader(docBuffer);
|
r = new StringBufferReader(docBuffer);
|
||||||
trecDocReader.set(r);
|
trecDocReader.set(r);
|
||||||
|
@ -177,7 +177,7 @@ public class TrecContentSource extends ContentSource {
|
||||||
nextFile = 0;
|
nextFile = 0;
|
||||||
iteration++;
|
iteration++;
|
||||||
}
|
}
|
||||||
File f = (File) inputFiles.get(nextFile++);
|
File f = inputFiles.get(nextFile++);
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
System.out.println("opening: " + f + " length: " + f.length());
|
System.out.println("opening: " + f + " length: " + f.length());
|
||||||
}
|
}
|
||||||
|
@ -330,7 +330,7 @@ public class TrecContentSource extends ContentSource {
|
||||||
try {
|
try {
|
||||||
String parserClassName = config.get("html.parser",
|
String parserClassName = config.get("html.parser",
|
||||||
"org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser");
|
"org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser");
|
||||||
htmlParser = (HTMLParser) Class.forName(parserClassName).newInstance();
|
htmlParser = Class.forName(parserClassName).asSubclass(HTMLParser.class).newInstance();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// Should not get here. Throw runtime exception.
|
// Should not get here. Throw runtime exception.
|
||||||
throw new RuntimeException(e);
|
throw new RuntimeException(e);
|
||||||
|
|
|
@ -42,7 +42,7 @@ public class ConsumeContentSourceTask extends PerfTask {
|
||||||
throw new IllegalArgumentException("content.source must be defined");
|
throw new IllegalArgumentException("content.source must be defined");
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
source = (ContentSource) Class.forName(sourceClass).newInstance();
|
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
|
||||||
source.setConfig(config);
|
source.setConfig(config);
|
||||||
source.resetInputs();
|
source.resetInputs();
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
|
|
@ -61,7 +61,7 @@ public class CreateIndexTask extends PerfTask {
|
||||||
final String mergeScheduler = config.get("merge.scheduler",
|
final String mergeScheduler = config.get("merge.scheduler",
|
||||||
"org.apache.lucene.index.ConcurrentMergeScheduler");
|
"org.apache.lucene.index.ConcurrentMergeScheduler");
|
||||||
try {
|
try {
|
||||||
writer.setMergeScheduler((MergeScheduler) Class.forName(mergeScheduler).newInstance());
|
writer.setMergeScheduler(Class.forName(mergeScheduler).asSubclass(MergeScheduler.class).newInstance());
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException("unable to instantiate class '" + mergeScheduler + "' as merge scheduler", e);
|
throw new RuntimeException("unable to instantiate class '" + mergeScheduler + "' as merge scheduler", e);
|
||||||
}
|
}
|
||||||
|
@ -69,7 +69,7 @@ public class CreateIndexTask extends PerfTask {
|
||||||
final String mergePolicy = config.get("merge.policy",
|
final String mergePolicy = config.get("merge.policy",
|
||||||
"org.apache.lucene.index.LogByteSizeMergePolicy");
|
"org.apache.lucene.index.LogByteSizeMergePolicy");
|
||||||
try {
|
try {
|
||||||
writer.setMergePolicy((MergePolicy) Class.forName(mergePolicy).getConstructor(new Class[] { IndexWriter.class }).newInstance(new Object[] { writer }));
|
writer.setMergePolicy(Class.forName(mergePolicy).asSubclass(MergePolicy.class).getConstructor(IndexWriter.class).newInstance(writer));
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new RuntimeException("unable to instantiate class '" + mergePolicy + "' as merge policy", e);
|
throw new RuntimeException("unable to instantiate class '" + mergePolicy + "' as merge policy", e);
|
||||||
}
|
}
|
||||||
|
@ -106,7 +106,7 @@ public class CreateIndexTask extends PerfTask {
|
||||||
IndexDeletionPolicy indexDeletionPolicy = null;
|
IndexDeletionPolicy indexDeletionPolicy = null;
|
||||||
RuntimeException err = null;
|
RuntimeException err = null;
|
||||||
try {
|
try {
|
||||||
indexDeletionPolicy = ((IndexDeletionPolicy) Class.forName(deletionPolicyName).newInstance());
|
indexDeletionPolicy = Class.forName(deletionPolicyName).asSubclass(IndexDeletionPolicy.class).newInstance();
|
||||||
} catch (IllegalAccessException iae) {
|
} catch (IllegalAccessException iae) {
|
||||||
err = new RuntimeException("unable to instantiate class '" + deletionPolicyName + "' as IndexDeletionPolicy");
|
err = new RuntimeException("unable to instantiate class '" + deletionPolicyName + "' as IndexDeletionPolicy");
|
||||||
err.initCause(iae);
|
err.initCause(iae);
|
||||||
|
|
|
@ -41,7 +41,7 @@ public class OptimizeTask extends PerfTask {
|
||||||
|
|
||||||
public void setParams(String params) {
|
public void setParams(String params) {
|
||||||
super.setParams(params);
|
super.setParams(params);
|
||||||
maxNumSegments = (int) Double.valueOf(params).intValue();
|
maxNumSegments = Double.valueOf(params).intValue();
|
||||||
}
|
}
|
||||||
|
|
||||||
public boolean supportsParams() {
|
public boolean supportsParams() {
|
||||||
|
|
|
@ -184,7 +184,7 @@ public class Highlighter
|
||||||
TextFragment[] frag =getBestTextFragments(tokenStream,text, true,maxNumFragments);
|
TextFragment[] frag =getBestTextFragments(tokenStream,text, true,maxNumFragments);
|
||||||
|
|
||||||
//Get text
|
//Get text
|
||||||
ArrayList fragTexts = new ArrayList();
|
ArrayList<String> fragTexts = new ArrayList<String>();
|
||||||
for (int i = 0; i < frag.length; i++)
|
for (int i = 0; i < frag.length; i++)
|
||||||
{
|
{
|
||||||
if ((frag[i] != null) && (frag[i].getScore() > 0))
|
if ((frag[i] != null) && (frag[i].getScore() > 0))
|
||||||
|
@ -192,7 +192,7 @@ public class Highlighter
|
||||||
fragTexts.add(frag[i].toString());
|
fragTexts.add(frag[i].toString());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return (String[]) fragTexts.toArray(new String[0]);
|
return fragTexts.toArray(new String[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -214,7 +214,7 @@ public class Highlighter
|
||||||
int maxNumFragments)
|
int maxNumFragments)
|
||||||
throws IOException, InvalidTokenOffsetsException
|
throws IOException, InvalidTokenOffsetsException
|
||||||
{
|
{
|
||||||
ArrayList docFrags = new ArrayList();
|
ArrayList<TextFragment> docFrags = new ArrayList<TextFragment>();
|
||||||
StringBuilder newText=new StringBuilder();
|
StringBuilder newText=new StringBuilder();
|
||||||
|
|
||||||
TermAttribute termAtt = tokenStream.addAttribute(TermAttribute.class);
|
TermAttribute termAtt = tokenStream.addAttribute(TermAttribute.class);
|
||||||
|
@ -320,9 +320,9 @@ public class Highlighter
|
||||||
currentFrag.textEndPos = newText.length();
|
currentFrag.textEndPos = newText.length();
|
||||||
|
|
||||||
//sort the most relevant sections of the text
|
//sort the most relevant sections of the text
|
||||||
for (Iterator i = docFrags.iterator(); i.hasNext();)
|
for (Iterator<TextFragment> i = docFrags.iterator(); i.hasNext();)
|
||||||
{
|
{
|
||||||
currentFrag = (TextFragment) i.next();
|
currentFrag = i.next();
|
||||||
|
|
||||||
//If you are running with a version of Lucene before 11th Sept 03
|
//If you are running with a version of Lucene before 11th Sept 03
|
||||||
// you do not have PriorityQueue.insert() - so uncomment the code below
|
// you do not have PriorityQueue.insert() - so uncomment the code below
|
||||||
|
@ -349,14 +349,14 @@ public class Highlighter
|
||||||
TextFragment frag[] = new TextFragment[fragQueue.size()];
|
TextFragment frag[] = new TextFragment[fragQueue.size()];
|
||||||
for (int i = frag.length - 1; i >= 0; i--)
|
for (int i = frag.length - 1; i >= 0; i--)
|
||||||
{
|
{
|
||||||
frag[i] = (TextFragment) fragQueue.pop();
|
frag[i] = fragQueue.pop();
|
||||||
}
|
}
|
||||||
|
|
||||||
//merge any contiguous fragments to improve readability
|
//merge any contiguous fragments to improve readability
|
||||||
if(mergeContiguousFragments)
|
if(mergeContiguousFragments)
|
||||||
{
|
{
|
||||||
mergeContiguousFragments(frag);
|
mergeContiguousFragments(frag);
|
||||||
ArrayList fragTexts = new ArrayList();
|
ArrayList<TextFragment> fragTexts = new ArrayList<TextFragment>();
|
||||||
for (int i = 0; i < frag.length; i++)
|
for (int i = 0; i < frag.length; i++)
|
||||||
{
|
{
|
||||||
if ((frag[i] != null) && (frag[i].getScore() > 0))
|
if ((frag[i] != null) && (frag[i].getScore() > 0))
|
||||||
|
@ -364,7 +364,7 @@ public class Highlighter
|
||||||
fragTexts.add(frag[i]);
|
fragTexts.add(frag[i]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
frag= (TextFragment[]) fragTexts.toArray(new TextFragment[0]);
|
frag= fragTexts.toArray(new TextFragment[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
return frag;
|
return frag;
|
||||||
|
@ -567,17 +567,15 @@ public class Highlighter
|
||||||
this.encoder = encoder;
|
this.encoder = encoder;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
class FragmentQueue extends PriorityQueue
|
class FragmentQueue extends PriorityQueue<TextFragment>
|
||||||
{
|
{
|
||||||
public FragmentQueue(int size)
|
public FragmentQueue(int size)
|
||||||
{
|
{
|
||||||
initialize(size);
|
initialize(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
public final boolean lessThan(Object a, Object b)
|
public final boolean lessThan(TextFragment fragA, TextFragment fragB)
|
||||||
{
|
{
|
||||||
TextFragment fragA = (TextFragment) a;
|
|
||||||
TextFragment fragB = (TextFragment) b;
|
|
||||||
if (fragA.getScore() == fragB.getScore())
|
if (fragA.getScore() == fragB.getScore())
|
||||||
return fragA.fragNum > fragB.fragNum;
|
return fragA.fragNum > fragB.fragNum;
|
||||||
else
|
else
|
||||||
|
|
|
@ -41,8 +41,8 @@ import org.apache.lucene.util.StringHelper;
|
||||||
*/
|
*/
|
||||||
public class QueryScorer implements Scorer {
|
public class QueryScorer implements Scorer {
|
||||||
private float totalScore;
|
private float totalScore;
|
||||||
private Set foundTerms;
|
private Set<String> foundTerms;
|
||||||
private Map fieldWeightedSpanTerms;
|
private Map<String,WeightedSpanTerm> fieldWeightedSpanTerms;
|
||||||
private float maxTermWeight;
|
private float maxTermWeight;
|
||||||
private int position = -1;
|
private int position = -1;
|
||||||
private String defaultField;
|
private String defaultField;
|
||||||
|
@ -103,10 +103,10 @@ public class QueryScorer implements Scorer {
|
||||||
* @param weightedTerms an array of pre-created {@link WeightedSpanTerm}s
|
* @param weightedTerms an array of pre-created {@link WeightedSpanTerm}s
|
||||||
*/
|
*/
|
||||||
public QueryScorer(WeightedSpanTerm[] weightedTerms) {
|
public QueryScorer(WeightedSpanTerm[] weightedTerms) {
|
||||||
this.fieldWeightedSpanTerms = new HashMap(weightedTerms.length);
|
this.fieldWeightedSpanTerms = new HashMap<String,WeightedSpanTerm>(weightedTerms.length);
|
||||||
|
|
||||||
for (int i = 0; i < weightedTerms.length; i++) {
|
for (int i = 0; i < weightedTerms.length; i++) {
|
||||||
WeightedSpanTerm existingTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(weightedTerms[i].term);
|
WeightedSpanTerm existingTerm = fieldWeightedSpanTerms.get(weightedTerms[i].term);
|
||||||
|
|
||||||
if ((existingTerm == null) ||
|
if ((existingTerm == null) ||
|
||||||
(existingTerm.weight < weightedTerms[i].weight)) {
|
(existingTerm.weight < weightedTerms[i].weight)) {
|
||||||
|
@ -149,7 +149,7 @@ public class QueryScorer implements Scorer {
|
||||||
|
|
||||||
WeightedSpanTerm weightedSpanTerm;
|
WeightedSpanTerm weightedSpanTerm;
|
||||||
|
|
||||||
if ((weightedSpanTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(
|
if ((weightedSpanTerm = fieldWeightedSpanTerms.get(
|
||||||
termText)) == null) {
|
termText)) == null) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -194,7 +194,7 @@ public class QueryScorer implements Scorer {
|
||||||
* @return WeightedSpanTerm for token
|
* @return WeightedSpanTerm for token
|
||||||
*/
|
*/
|
||||||
public WeightedSpanTerm getWeightedSpanTerm(String token) {
|
public WeightedSpanTerm getWeightedSpanTerm(String token) {
|
||||||
return (WeightedSpanTerm) fieldWeightedSpanTerms.get(token);
|
return fieldWeightedSpanTerms.get(token);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -232,7 +232,7 @@ public class QueryScorer implements Scorer {
|
||||||
* @see org.apache.lucene.search.highlight.Scorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
|
* @see org.apache.lucene.search.highlight.Scorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
|
||||||
*/
|
*/
|
||||||
public void startFragment(TextFragment newFragment) {
|
public void startFragment(TextFragment newFragment) {
|
||||||
foundTerms = new HashSet();
|
foundTerms = new HashSet<String>();
|
||||||
totalScore = 0;
|
totalScore = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -93,13 +93,13 @@ public final class QueryTermExtractor
|
||||||
*/
|
*/
|
||||||
public static final WeightedTerm[] getTerms(Query query, boolean prohibited, String fieldName)
|
public static final WeightedTerm[] getTerms(Query query, boolean prohibited, String fieldName)
|
||||||
{
|
{
|
||||||
HashSet terms=new HashSet();
|
HashSet<WeightedTerm> terms=new HashSet<WeightedTerm>();
|
||||||
if(fieldName!=null)
|
if(fieldName!=null)
|
||||||
{
|
{
|
||||||
fieldName= StringHelper.intern(fieldName);
|
fieldName= StringHelper.intern(fieldName);
|
||||||
}
|
}
|
||||||
getTerms(query,terms,prohibited,fieldName);
|
getTerms(query,terms,prohibited,fieldName);
|
||||||
return (WeightedTerm[]) terms.toArray(new WeightedTerm[0]);
|
return terms.toArray(new WeightedTerm[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -115,7 +115,7 @@ public final class QueryTermExtractor
|
||||||
}
|
}
|
||||||
|
|
||||||
//fieldname MUST be interned prior to this call
|
//fieldname MUST be interned prior to this call
|
||||||
private static final void getTerms(Query query, HashSet terms,boolean prohibited, String fieldName)
|
private static final void getTerms(Query query, HashSet<WeightedTerm> terms,boolean prohibited, String fieldName)
|
||||||
{
|
{
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
@ -126,11 +126,11 @@ public final class QueryTermExtractor
|
||||||
getTermsFromFilteredQuery((FilteredQuery)query, terms,prohibited, fieldName);
|
getTermsFromFilteredQuery((FilteredQuery)query, terms,prohibited, fieldName);
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
HashSet nonWeightedTerms=new HashSet();
|
HashSet<Term> nonWeightedTerms=new HashSet<Term>();
|
||||||
query.extractTerms(nonWeightedTerms);
|
query.extractTerms(nonWeightedTerms);
|
||||||
for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();)
|
for (Iterator<Term> iter = nonWeightedTerms.iterator(); iter.hasNext();)
|
||||||
{
|
{
|
||||||
Term term = (Term) iter.next();
|
Term term = iter.next();
|
||||||
if((fieldName==null)||(term.field()==fieldName))
|
if((fieldName==null)||(term.field()==fieldName))
|
||||||
{
|
{
|
||||||
terms.add(new WeightedTerm(query.getBoost(),term.text()));
|
terms.add(new WeightedTerm(query.getBoost(),term.text()));
|
||||||
|
@ -155,7 +155,7 @@ public final class QueryTermExtractor
|
||||||
* something common which would allow access to child queries so what follows here are query-specific
|
* something common which would allow access to child queries so what follows here are query-specific
|
||||||
* implementations for accessing embedded query elements.
|
* implementations for accessing embedded query elements.
|
||||||
*/
|
*/
|
||||||
private static final void getTermsFromBooleanQuery(BooleanQuery query, HashSet terms, boolean prohibited, String fieldName)
|
private static final void getTermsFromBooleanQuery(BooleanQuery query, HashSet<WeightedTerm> terms, boolean prohibited, String fieldName)
|
||||||
{
|
{
|
||||||
BooleanClause[] queryClauses = query.getClauses();
|
BooleanClause[] queryClauses = query.getClauses();
|
||||||
for (int i = 0; i < queryClauses.length; i++)
|
for (int i = 0; i < queryClauses.length; i++)
|
||||||
|
@ -164,7 +164,7 @@ public final class QueryTermExtractor
|
||||||
getTerms(queryClauses[i].getQuery(), terms, prohibited, fieldName);
|
getTerms(queryClauses[i].getQuery(), terms, prohibited, fieldName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
private static void getTermsFromFilteredQuery(FilteredQuery query, HashSet terms, boolean prohibited, String fieldName)
|
private static void getTermsFromFilteredQuery(FilteredQuery query, HashSet<WeightedTerm> terms, boolean prohibited, String fieldName)
|
||||||
{
|
{
|
||||||
getTerms(query.getQuery(),terms,prohibited,fieldName);
|
getTerms(query.getQuery(),terms,prohibited,fieldName);
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,11 +35,11 @@ import org.apache.lucene.search.Query;
|
||||||
public class QueryTermScorer implements Scorer {
|
public class QueryTermScorer implements Scorer {
|
||||||
|
|
||||||
TextFragment currentTextFragment = null;
|
TextFragment currentTextFragment = null;
|
||||||
HashSet uniqueTermsInFragment;
|
HashSet<String> uniqueTermsInFragment;
|
||||||
|
|
||||||
float totalScore = 0;
|
float totalScore = 0;
|
||||||
float maxTermWeight = 0;
|
float maxTermWeight = 0;
|
||||||
private HashMap termsToFind;
|
private HashMap<String,WeightedTerm> termsToFind;
|
||||||
|
|
||||||
private TermAttribute termAtt;
|
private TermAttribute termAtt;
|
||||||
|
|
||||||
|
@ -77,9 +77,9 @@ public class QueryTermScorer implements Scorer {
|
||||||
}
|
}
|
||||||
|
|
||||||
public QueryTermScorer(WeightedTerm[] weightedTerms) {
|
public QueryTermScorer(WeightedTerm[] weightedTerms) {
|
||||||
termsToFind = new HashMap();
|
termsToFind = new HashMap<String,WeightedTerm>();
|
||||||
for (int i = 0; i < weightedTerms.length; i++) {
|
for (int i = 0; i < weightedTerms.length; i++) {
|
||||||
WeightedTerm existingTerm = (WeightedTerm) termsToFind
|
WeightedTerm existingTerm = termsToFind
|
||||||
.get(weightedTerms[i].term);
|
.get(weightedTerms[i].term);
|
||||||
if ((existingTerm == null)
|
if ((existingTerm == null)
|
||||||
|| (existingTerm.weight < weightedTerms[i].weight)) {
|
|| (existingTerm.weight < weightedTerms[i].weight)) {
|
||||||
|
@ -107,7 +107,7 @@ public class QueryTermScorer implements Scorer {
|
||||||
* .lucene.search.highlight.TextFragment)
|
* .lucene.search.highlight.TextFragment)
|
||||||
*/
|
*/
|
||||||
public void startFragment(TextFragment newFragment) {
|
public void startFragment(TextFragment newFragment) {
|
||||||
uniqueTermsInFragment = new HashSet();
|
uniqueTermsInFragment = new HashSet<String>();
|
||||||
currentTextFragment = newFragment;
|
currentTextFragment = newFragment;
|
||||||
totalScore = 0;
|
totalScore = 0;
|
||||||
|
|
||||||
|
@ -120,7 +120,7 @@ public class QueryTermScorer implements Scorer {
|
||||||
public float getTokenScore() {
|
public float getTokenScore() {
|
||||||
String termText = termAtt.term();
|
String termText = termAtt.term();
|
||||||
|
|
||||||
WeightedTerm queryTerm = (WeightedTerm) termsToFind.get(termText);
|
WeightedTerm queryTerm = termsToFind.get(termText);
|
||||||
if (queryTerm == null) {
|
if (queryTerm == null) {
|
||||||
// not a query term - return
|
// not a query term - return
|
||||||
return 0;
|
return 0;
|
||||||
|
|
|
@ -73,11 +73,11 @@ public class SimpleSpanFragmenter implements Fragmenter {
|
||||||
WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.term());
|
WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.term());
|
||||||
|
|
||||||
if (wSpanTerm != null) {
|
if (wSpanTerm != null) {
|
||||||
List positionSpans = wSpanTerm.getPositionSpans();
|
List<PositionSpan> positionSpans = wSpanTerm.getPositionSpans();
|
||||||
|
|
||||||
for (int i = 0; i < positionSpans.size(); i++) {
|
for (int i = 0; i < positionSpans.size(); i++) {
|
||||||
if (((PositionSpan) positionSpans.get(i)).start == position) {
|
if (positionSpans.get(i).start == position) {
|
||||||
waitForPos = ((PositionSpan) positionSpans.get(i)).end + 1;
|
waitForPos = positionSpans.get(i).end + 1;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -59,7 +59,7 @@ public class TokenSources
|
||||||
public static TokenStream getAnyTokenStream(IndexReader reader, int docId, String field, Document doc, Analyzer analyzer) throws IOException{
|
public static TokenStream getAnyTokenStream(IndexReader reader, int docId, String field, Document doc, Analyzer analyzer) throws IOException{
|
||||||
TokenStream ts=null;
|
TokenStream ts=null;
|
||||||
|
|
||||||
TermFreqVector tfv=(TermFreqVector) reader.getTermFreqVector(docId,field);
|
TermFreqVector tfv = reader.getTermFreqVector(docId,field);
|
||||||
if(tfv!=null)
|
if(tfv!=null)
|
||||||
{
|
{
|
||||||
if(tfv instanceof TermPositionVector)
|
if(tfv instanceof TermPositionVector)
|
||||||
|
@ -89,7 +89,7 @@ public class TokenSources
|
||||||
{
|
{
|
||||||
TokenStream ts=null;
|
TokenStream ts=null;
|
||||||
|
|
||||||
TermFreqVector tfv=(TermFreqVector) reader.getTermFreqVector(docId,field);
|
TermFreqVector tfv = reader.getTermFreqVector(docId,field);
|
||||||
if(tfv!=null)
|
if(tfv!=null)
|
||||||
{
|
{
|
||||||
if(tfv instanceof TermPositionVector)
|
if(tfv instanceof TermPositionVector)
|
||||||
|
@ -171,7 +171,7 @@ public class TokenSources
|
||||||
totalTokens+=freq[t];
|
totalTokens+=freq[t];
|
||||||
}
|
}
|
||||||
Token tokensInOriginalOrder[]=new Token[totalTokens];
|
Token tokensInOriginalOrder[]=new Token[totalTokens];
|
||||||
ArrayList unsortedTokens = null;
|
ArrayList<Token> unsortedTokens = null;
|
||||||
for (int t = 0; t < freq.length; t++)
|
for (int t = 0; t < freq.length; t++)
|
||||||
{
|
{
|
||||||
TermVectorOffsetInfo[] offsets=tpv.getOffsets(t);
|
TermVectorOffsetInfo[] offsets=tpv.getOffsets(t);
|
||||||
|
@ -191,7 +191,7 @@ public class TokenSources
|
||||||
//tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later
|
//tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later
|
||||||
if(unsortedTokens==null)
|
if(unsortedTokens==null)
|
||||||
{
|
{
|
||||||
unsortedTokens=new ArrayList();
|
unsortedTokens=new ArrayList<Token>();
|
||||||
}
|
}
|
||||||
for (int tp = 0; tp < offsets.length; tp++)
|
for (int tp = 0; tp < offsets.length; tp++)
|
||||||
{
|
{
|
||||||
|
@ -216,14 +216,10 @@ public class TokenSources
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//If the field has been stored without position data we must perform a sort
|
//If the field has been stored without position data we must perform a sort
|
||||||
if(unsortedTokens!=null)
|
if(unsortedTokens!=null) {
|
||||||
{
|
tokensInOriginalOrder= unsortedTokens.toArray(new Token[unsortedTokens.size()]);
|
||||||
tokensInOriginalOrder=(Token[]) unsortedTokens.toArray(new Token[unsortedTokens.size()]);
|
Arrays.sort(tokensInOriginalOrder, new Comparator<Token>(){
|
||||||
Arrays.sort(tokensInOriginalOrder, new Comparator(){
|
public int compare(Token t1, Token t2) {
|
||||||
public int compare(Object o1, Object o2)
|
|
||||||
{
|
|
||||||
Token t1=(Token) o1;
|
|
||||||
Token t2=(Token) o2;
|
|
||||||
if(t1.startOffset()>t2.endOffset())
|
if(t1.startOffset()>t2.endOffset())
|
||||||
return 1;
|
return 1;
|
||||||
if(t1.startOffset()<t2.startOffset())
|
if(t1.startOffset()<t2.startOffset())
|
||||||
|
@ -236,7 +232,7 @@ public class TokenSources
|
||||||
|
|
||||||
public static TokenStream getTokenStream(IndexReader reader,int docId, String field) throws IOException
|
public static TokenStream getTokenStream(IndexReader reader,int docId, String field) throws IOException
|
||||||
{
|
{
|
||||||
TermFreqVector tfv=(TermFreqVector) reader.getTermFreqVector(docId,field);
|
TermFreqVector tfv = reader.getTermFreqVector(docId,field);
|
||||||
if(tfv==null)
|
if(tfv==null)
|
||||||
{
|
{
|
||||||
throw new IllegalArgumentException(field+" in doc #"+docId
|
throw new IllegalArgumentException(field+" in doc #"+docId
|
||||||
|
|
|
@ -28,7 +28,7 @@ import java.util.List;
|
||||||
*/
|
*/
|
||||||
public class WeightedSpanTerm extends WeightedTerm{
|
public class WeightedSpanTerm extends WeightedTerm{
|
||||||
boolean positionSensitive;
|
boolean positionSensitive;
|
||||||
private List positionSpans = new ArrayList();
|
private List<PositionSpan> positionSpans = new ArrayList<PositionSpan>();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @param weight
|
* @param weight
|
||||||
|
@ -36,7 +36,7 @@ public class WeightedSpanTerm extends WeightedTerm{
|
||||||
*/
|
*/
|
||||||
public WeightedSpanTerm(float weight, String term) {
|
public WeightedSpanTerm(float weight, String term) {
|
||||||
super(weight, term);
|
super(weight, term);
|
||||||
this.positionSpans = new ArrayList();
|
this.positionSpans = new ArrayList<PositionSpan>();
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -61,10 +61,10 @@ public class WeightedSpanTerm extends WeightedTerm{
|
||||||
// where kept in some sort of priority queue - that way this method
|
// where kept in some sort of priority queue - that way this method
|
||||||
// could
|
// could
|
||||||
// bail early without checking each PositionSpan.
|
// bail early without checking each PositionSpan.
|
||||||
Iterator positionSpanIt = positionSpans.iterator();
|
Iterator<PositionSpan> positionSpanIt = positionSpans.iterator();
|
||||||
|
|
||||||
while (positionSpanIt.hasNext()) {
|
while (positionSpanIt.hasNext()) {
|
||||||
PositionSpan posSpan = (PositionSpan) positionSpanIt.next();
|
PositionSpan posSpan = positionSpanIt.next();
|
||||||
|
|
||||||
if (((position >= posSpan.start) && (position <= posSpan.end))) {
|
if (((position >= posSpan.start) && (position <= posSpan.end))) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -74,7 +74,7 @@ public class WeightedSpanTerm extends WeightedTerm{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void addPositionSpans(List positionSpans) {
|
public void addPositionSpans(List<PositionSpan> positionSpans) {
|
||||||
this.positionSpans.addAll(positionSpans);
|
this.positionSpans.addAll(positionSpans);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -86,7 +86,7 @@ public class WeightedSpanTerm extends WeightedTerm{
|
||||||
this.positionSensitive = positionSensitive;
|
this.positionSensitive = positionSensitive;
|
||||||
}
|
}
|
||||||
|
|
||||||
public List getPositionSpans() {
|
public List<PositionSpan> getPositionSpans() {
|
||||||
return positionSpans;
|
return positionSpans;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,7 @@ public class WeightedSpanTermExtractor {
|
||||||
|
|
||||||
private String fieldName;
|
private String fieldName;
|
||||||
private TokenStream tokenStream;
|
private TokenStream tokenStream;
|
||||||
private Map readers = new HashMap(10); // Map<String, IndexReader>
|
private Map<String,IndexReader> readers = new HashMap<String,IndexReader>(10);
|
||||||
private String defaultField;
|
private String defaultField;
|
||||||
private boolean expandMultiTermQuery;
|
private boolean expandMultiTermQuery;
|
||||||
private boolean cachedTokenStream;
|
private boolean cachedTokenStream;
|
||||||
|
@ -63,11 +63,9 @@ public class WeightedSpanTermExtractor {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void closeReaders() {
|
private void closeReaders() {
|
||||||
Collection readerSet = readers.values();
|
Collection<IndexReader> readerSet = readers.values();
|
||||||
Iterator it = readerSet.iterator();
|
|
||||||
|
|
||||||
while (it.hasNext()) {
|
for (final IndexReader reader : readerSet) {
|
||||||
IndexReader reader = (IndexReader) it.next();
|
|
||||||
try {
|
try {
|
||||||
reader.close();
|
reader.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -85,7 +83,7 @@ public class WeightedSpanTermExtractor {
|
||||||
* Map to place created WeightedSpanTerms in
|
* Map to place created WeightedSpanTerms in
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
private void extract(Query query, Map terms) throws IOException {
|
private void extract(Query query, Map<String,WeightedSpanTerm> terms) throws IOException {
|
||||||
if (query instanceof BooleanQuery) {
|
if (query instanceof BooleanQuery) {
|
||||||
BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();
|
BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();
|
||||||
|
|
||||||
|
@ -137,8 +135,8 @@ public class WeightedSpanTermExtractor {
|
||||||
} else if (query instanceof FilteredQuery) {
|
} else if (query instanceof FilteredQuery) {
|
||||||
extract(((FilteredQuery) query).getQuery(), terms);
|
extract(((FilteredQuery) query).getQuery(), terms);
|
||||||
} else if (query instanceof DisjunctionMaxQuery) {
|
} else if (query instanceof DisjunctionMaxQuery) {
|
||||||
for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
|
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
|
||||||
extract((Query) iterator.next(), terms);
|
extract(iterator.next(), terms);
|
||||||
}
|
}
|
||||||
} else if (query instanceof MultiTermQuery && expandMultiTermQuery) {
|
} else if (query instanceof MultiTermQuery && expandMultiTermQuery) {
|
||||||
MultiTermQuery mtq = ((MultiTermQuery)query);
|
MultiTermQuery mtq = ((MultiTermQuery)query);
|
||||||
|
@ -163,7 +161,7 @@ public class WeightedSpanTermExtractor {
|
||||||
}
|
}
|
||||||
} else if (query instanceof MultiPhraseQuery) {
|
} else if (query instanceof MultiPhraseQuery) {
|
||||||
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
|
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
|
||||||
final List termArrays = mpq.getTermArrays();
|
final List<Term[]> termArrays = mpq.getTermArrays();
|
||||||
final int[] positions = mpq.getPositions();
|
final int[] positions = mpq.getPositions();
|
||||||
if (positions.length > 0) {
|
if (positions.length > 0) {
|
||||||
|
|
||||||
|
@ -174,14 +172,14 @@ public class WeightedSpanTermExtractor {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
final List[] disjunctLists = new List[maxPosition + 1];
|
final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
|
||||||
int distinctPositions = 0;
|
int distinctPositions = 0;
|
||||||
|
|
||||||
for (int i = 0; i < termArrays.size(); ++i) {
|
for (int i = 0; i < termArrays.size(); ++i) {
|
||||||
final Term[] termArray = (Term[]) termArrays.get(i);
|
final Term[] termArray = termArrays.get(i);
|
||||||
List disjuncts = disjunctLists[positions[i]];
|
List<SpanQuery> disjuncts = disjunctLists[positions[i]];
|
||||||
if (disjuncts == null) {
|
if (disjuncts == null) {
|
||||||
disjuncts = (disjunctLists[positions[i]] = new ArrayList(termArray.length));
|
disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>(termArray.length));
|
||||||
++distinctPositions;
|
++distinctPositions;
|
||||||
}
|
}
|
||||||
for (int j = 0; j < termArray.length; ++j) {
|
for (int j = 0; j < termArray.length; ++j) {
|
||||||
|
@ -193,9 +191,9 @@ public class WeightedSpanTermExtractor {
|
||||||
int position = 0;
|
int position = 0;
|
||||||
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
|
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
|
||||||
for (int i = 0; i < disjunctLists.length; ++i) {
|
for (int i = 0; i < disjunctLists.length; ++i) {
|
||||||
List disjuncts = disjunctLists[i];
|
List<SpanQuery> disjuncts = disjunctLists[i];
|
||||||
if (disjuncts != null) {
|
if (disjuncts != null) {
|
||||||
clauses[position++] = new SpanOrQuery((SpanQuery[]) disjuncts
|
clauses[position++] = new SpanOrQuery(disjuncts
|
||||||
.toArray(new SpanQuery[disjuncts.size()]));
|
.toArray(new SpanQuery[disjuncts.size()]));
|
||||||
} else {
|
} else {
|
||||||
++positionGaps;
|
++positionGaps;
|
||||||
|
@ -221,20 +219,19 @@ public class WeightedSpanTermExtractor {
|
||||||
* SpanQuery to extract Terms from
|
* SpanQuery to extract Terms from
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
private void extractWeightedSpanTerms(Map terms, SpanQuery spanQuery) throws IOException {
|
private void extractWeightedSpanTerms(Map<String,WeightedSpanTerm> terms, SpanQuery spanQuery) throws IOException {
|
||||||
Set nonWeightedTerms = new HashSet();
|
Set<Term> nonWeightedTerms = new HashSet<Term>();
|
||||||
spanQuery.extractTerms(nonWeightedTerms);
|
spanQuery.extractTerms(nonWeightedTerms);
|
||||||
|
|
||||||
Set fieldNames;
|
Set<String> fieldNames;
|
||||||
|
|
||||||
if (fieldName == null) {
|
if (fieldName == null) {
|
||||||
fieldNames = new HashSet();
|
fieldNames = new HashSet<String>();
|
||||||
for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();) {
|
for (final Term queryTerm : nonWeightedTerms) {
|
||||||
Term queryTerm = (Term) iter.next();
|
|
||||||
fieldNames.add(queryTerm.field());
|
fieldNames.add(queryTerm.field());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
fieldNames = new HashSet(1);
|
fieldNames = new HashSet<String>(1);
|
||||||
fieldNames.add(fieldName);
|
fieldNames.add(fieldName);
|
||||||
}
|
}
|
||||||
// To support the use of the default field name
|
// To support the use of the default field name
|
||||||
|
@ -242,11 +239,9 @@ public class WeightedSpanTermExtractor {
|
||||||
fieldNames.add(defaultField);
|
fieldNames.add(defaultField);
|
||||||
}
|
}
|
||||||
|
|
||||||
Iterator it = fieldNames.iterator();
|
List<PositionSpan> spanPositions = new ArrayList<PositionSpan>();
|
||||||
List spanPositions = new ArrayList();
|
|
||||||
|
|
||||||
while (it.hasNext()) {
|
for (final String field : fieldNames) {
|
||||||
String field = (String) it.next();
|
|
||||||
|
|
||||||
IndexReader reader = getReaderForField(field);
|
IndexReader reader = getReaderForField(field);
|
||||||
Spans spans = spanQuery.getSpans(reader);
|
Spans spans = spanQuery.getSpans(reader);
|
||||||
|
@ -263,11 +258,10 @@ public class WeightedSpanTermExtractor {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();) {
|
for (final Term queryTerm : nonWeightedTerms) {
|
||||||
Term queryTerm = (Term) iter.next();
|
|
||||||
|
|
||||||
if (fieldNameComparator(queryTerm.field())) {
|
if (fieldNameComparator(queryTerm.field())) {
|
||||||
WeightedSpanTerm weightedSpanTerm = (WeightedSpanTerm) terms.get(queryTerm.text());
|
WeightedSpanTerm weightedSpanTerm = terms.get(queryTerm.text());
|
||||||
|
|
||||||
if (weightedSpanTerm == null) {
|
if (weightedSpanTerm == null) {
|
||||||
weightedSpanTerm = new WeightedSpanTerm(spanQuery.getBoost(), queryTerm.text());
|
weightedSpanTerm = new WeightedSpanTerm(spanQuery.getBoost(), queryTerm.text());
|
||||||
|
@ -292,12 +286,11 @@ public class WeightedSpanTermExtractor {
|
||||||
* Query to extract Terms from
|
* Query to extract Terms from
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
private void extractWeightedTerms(Map terms, Query query) throws IOException {
|
private void extractWeightedTerms(Map<String,WeightedSpanTerm> terms, Query query) throws IOException {
|
||||||
Set nonWeightedTerms = new HashSet();
|
Set<Term> nonWeightedTerms = new HashSet<Term>();
|
||||||
query.extractTerms(nonWeightedTerms);
|
query.extractTerms(nonWeightedTerms);
|
||||||
|
|
||||||
for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();) {
|
for (final Term queryTerm : nonWeightedTerms) {
|
||||||
Term queryTerm = (Term) iter.next();
|
|
||||||
|
|
||||||
if (fieldNameComparator(queryTerm.field())) {
|
if (fieldNameComparator(queryTerm.field())) {
|
||||||
WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.getBoost(), queryTerm.text());
|
WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.getBoost(), queryTerm.text());
|
||||||
|
@ -320,7 +313,7 @@ public class WeightedSpanTermExtractor {
|
||||||
tokenStream = new CachingTokenFilter(tokenStream);
|
tokenStream = new CachingTokenFilter(tokenStream);
|
||||||
cachedTokenStream = true;
|
cachedTokenStream = true;
|
||||||
}
|
}
|
||||||
IndexReader reader = (IndexReader) readers.get(field);
|
IndexReader reader = readers.get(field);
|
||||||
if (reader == null) {
|
if (reader == null) {
|
||||||
MemoryIndex indexer = new MemoryIndex();
|
MemoryIndex indexer = new MemoryIndex();
|
||||||
indexer.addField(field, tokenStream);
|
indexer.addField(field, tokenStream);
|
||||||
|
@ -345,7 +338,7 @@ public class WeightedSpanTermExtractor {
|
||||||
* @return Map containing WeightedSpanTerms
|
* @return Map containing WeightedSpanTerms
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public Map getWeightedSpanTerms(Query query, TokenStream tokenStream)
|
public Map<String,WeightedSpanTerm> getWeightedSpanTerms(Query query, TokenStream tokenStream)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
return getWeightedSpanTerms(query, tokenStream, null);
|
return getWeightedSpanTerms(query, tokenStream, null);
|
||||||
}
|
}
|
||||||
|
@ -364,7 +357,7 @@ public class WeightedSpanTermExtractor {
|
||||||
* @return Map containing WeightedSpanTerms
|
* @return Map containing WeightedSpanTerms
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public Map getWeightedSpanTerms(Query query, TokenStream tokenStream,
|
public Map<String,WeightedSpanTerm> getWeightedSpanTerms(Query query, TokenStream tokenStream,
|
||||||
String fieldName) throws IOException {
|
String fieldName) throws IOException {
|
||||||
if (fieldName != null) {
|
if (fieldName != null) {
|
||||||
this.fieldName = StringHelper.intern(fieldName);
|
this.fieldName = StringHelper.intern(fieldName);
|
||||||
|
@ -372,7 +365,7 @@ public class WeightedSpanTermExtractor {
|
||||||
this.fieldName = null;
|
this.fieldName = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
Map terms = new PositionCheckingMap();
|
Map<String,WeightedSpanTerm> terms = new PositionCheckingMap<String>();
|
||||||
this.tokenStream = tokenStream;
|
this.tokenStream = tokenStream;
|
||||||
try {
|
try {
|
||||||
extract(query, terms);
|
extract(query, terms);
|
||||||
|
@ -400,7 +393,7 @@ public class WeightedSpanTermExtractor {
|
||||||
* @return Map of WeightedSpanTerms with quasi tf/idf scores
|
* @return Map of WeightedSpanTerms with quasi tf/idf scores
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public Map getWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName,
|
public Map<String,WeightedSpanTerm> getWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName,
|
||||||
IndexReader reader) throws IOException {
|
IndexReader reader) throws IOException {
|
||||||
if (fieldName != null) {
|
if (fieldName != null) {
|
||||||
this.fieldName = StringHelper.intern(fieldName);
|
this.fieldName = StringHelper.intern(fieldName);
|
||||||
|
@ -409,16 +402,16 @@ public class WeightedSpanTermExtractor {
|
||||||
}
|
}
|
||||||
this.tokenStream = tokenStream;
|
this.tokenStream = tokenStream;
|
||||||
|
|
||||||
Map terms = new PositionCheckingMap();
|
Map<String,WeightedSpanTerm> terms = new PositionCheckingMap<String>();
|
||||||
extract(query, terms);
|
extract(query, terms);
|
||||||
|
|
||||||
int totalNumDocs = reader.numDocs();
|
int totalNumDocs = reader.numDocs();
|
||||||
Set weightedTerms = terms.keySet();
|
Set<String> weightedTerms = terms.keySet();
|
||||||
Iterator it = weightedTerms.iterator();
|
Iterator<String> it = weightedTerms.iterator();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
while (it.hasNext()) {
|
while (it.hasNext()) {
|
||||||
WeightedSpanTerm weightedSpanTerm = (WeightedSpanTerm) terms.get(it.next());
|
WeightedSpanTerm weightedSpanTerm = terms.get(it.next());
|
||||||
int docFreq = reader.docFreq(new Term(fieldName, weightedSpanTerm.term));
|
int docFreq = reader.docFreq(new Term(fieldName, weightedSpanTerm.term));
|
||||||
// docFreq counts deletes
|
// docFreq counts deletes
|
||||||
if(totalNumDocs < docFreq) {
|
if(totalNumDocs < docFreq) {
|
||||||
|
@ -440,21 +433,21 @@ public class WeightedSpanTermExtractor {
|
||||||
* This class makes sure that if both position sensitive and insensitive
|
* This class makes sure that if both position sensitive and insensitive
|
||||||
* versions of the same term are added, the position insensitive one wins.
|
* versions of the same term are added, the position insensitive one wins.
|
||||||
*/
|
*/
|
||||||
static private class PositionCheckingMap extends HashMap {
|
static private class PositionCheckingMap<K> extends HashMap<K,WeightedSpanTerm> {
|
||||||
|
|
||||||
public void putAll(Map m) {
|
public void putAll(Map m) {
|
||||||
Iterator it = m.entrySet().iterator();
|
Iterator<Map.Entry<K, WeightedSpanTerm>> it = m.entrySet().iterator();
|
||||||
while (it.hasNext()) {
|
while (it.hasNext()) {
|
||||||
Map.Entry entry = (java.util.Map.Entry) it.next();
|
Map.Entry<K, WeightedSpanTerm> entry = it.next();
|
||||||
this.put(entry.getKey(), entry.getValue());
|
this.put(entry.getKey(), entry.getValue());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public Object put(Object key, Object value) {
|
public WeightedSpanTerm put(K key, WeightedSpanTerm value) {
|
||||||
Object prev = super.put(key, value);
|
WeightedSpanTerm prev = super.put(key, value);
|
||||||
if (prev == null) return prev;
|
if (prev == null) return prev;
|
||||||
WeightedSpanTerm prevTerm = (WeightedSpanTerm)prev;
|
WeightedSpanTerm prevTerm = prev;
|
||||||
WeightedSpanTerm newTerm = (WeightedSpanTerm)value;
|
WeightedSpanTerm newTerm = value;
|
||||||
if (!prevTerm.positionSensitive) {
|
if (!prevTerm.positionSensitive) {
|
||||||
newTerm.positionSensitive = false;
|
newTerm.positionSensitive = false;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue