LUCENE-1257: Add generics to highlighter and more for benchmark

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@829524 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2009-10-25 10:12:58 +00:00
parent 519095db6e
commit 8086aad514
24 changed files with 162 additions and 187 deletions

View File

@ -20,7 +20,6 @@ package org.apache.lucene.benchmark.byTask;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
@ -64,8 +63,8 @@ public class PerfRunData {
private DocMaker docMaker;
// we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
private HashMap readTaskQueryMaker;
private Class qmkrClass;
private HashMap<Class<? extends ReadTask>,QueryMaker> readTaskQueryMaker;
private Class<? extends QueryMaker> qmkrClass;
private IndexReader indexReader;
private IndexSearcher indexSearcher;
@ -80,12 +79,12 @@ public class PerfRunData {
analyzer = NewAnalyzerTask.createAnalyzer(config.get("analyzer",
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
// doc maker
docMaker = (DocMaker) Class.forName(config.get("doc.maker",
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).newInstance();
docMaker = Class.forName(config.get("doc.maker",
"org.apache.lucene.benchmark.byTask.feeds.DocMaker")).asSubclass(DocMaker.class).newInstance();
docMaker.setConfig(config);
// query makers
readTaskQueryMaker = new HashMap();
qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker"));
readTaskQueryMaker = new HashMap<Class<? extends ReadTask>,QueryMaker>();
qmkrClass = Class.forName(config.get("query.maker","org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker")).asSubclass(QueryMaker.class);
// index stuff
reinit(false);
@ -239,9 +238,8 @@ public class PerfRunData {
public void resetInputs() throws IOException {
docMaker.resetInputs();
Iterator it = readTaskQueryMaker.values().iterator();
while (it.hasNext()) {
((QueryMaker) it.next()).resetInputs();
for (final QueryMaker queryMaker : readTaskQueryMaker.values()) {
queryMaker.resetInputs();
}
}
@ -251,11 +249,11 @@ public class PerfRunData {
synchronized public QueryMaker getQueryMaker(ReadTask readTask) {
// mapping the query maker by task class allows extending/adding new search/read tasks
// without needing to modify this class.
Class readTaskClass = readTask.getClass();
QueryMaker qm = (QueryMaker) readTaskQueryMaker.get(readTaskClass);
Class<? extends ReadTask> readTaskClass = readTask.getClass();
QueryMaker qm = readTaskQueryMaker.get(readTaskClass);
if (qm == null) {
try {
qm = (QueryMaker) qmkrClass.newInstance();
qm = qmkrClass.newInstance();
qm.setConfig(config);
} catch (Exception e) {
throw new RuntimeException(e);

View File

@ -57,7 +57,7 @@ public abstract class ContentSource {
private static final int BZIP = 0;
private static final int OTHER = 1;
private static final Map extensionToType = new HashMap();
private static final Map<String,Integer> extensionToType = new HashMap<String,Integer>();
static {
extensionToType.put(".bz2", Integer.valueOf(BZIP));
extensionToType.put(".bzip", Integer.valueOf(BZIP));
@ -93,7 +93,7 @@ public abstract class ContentSource {
* a given directory. The collected {@link File} instances are stored in the
* given <code>files</code>.
*/
protected final void collectFiles(File dir, ArrayList files) {
protected final void collectFiles(File dir, ArrayList<File> files) {
if (!dir.canRead()) {
return;
}
@ -125,7 +125,7 @@ public abstract class ContentSource {
int idx = fileName.lastIndexOf('.');
int type = OTHER;
if (idx != -1) {
Integer typeInt = (Integer) extensionToType.get(fileName.substring(idx));
Integer typeInt = extensionToType.get(fileName.substring(idx));
if (typeInt != null) {
type = typeInt.intValue();
}

View File

@ -51,13 +51,12 @@ public class DirContentSource extends ContentSource {
ParsePosition pos;
}
public static class Iterator implements java.util.Iterator {
public static class Iterator implements java.util.Iterator<File> {
static class Comparator implements java.util.Comparator {
public int compare(Object _a, Object _b) {
static class Comparator implements java.util.Comparator<File> {
public int compare(File _a, File _b) {
String a = _a.toString();
String b = _b.toString();
int diff = a.length() - b.length();
if (diff > 0) {
@ -79,7 +78,7 @@ public class DirContentSource extends ContentSource {
int count = 0;
Stack stack = new Stack();
Stack<File> stack = new Stack<File>();
/* this seems silly ... there must be a better way ...
not that this is good, but can it matter? */
@ -94,10 +93,10 @@ public class DirContentSource extends ContentSource {
if (stack.empty()) {
return;
}
if (!((File)stack.peek()).isDirectory()) {
if (!(stack.peek()).isDirectory()) {
return;
}
File f = (File)stack.pop();
File f = stack.pop();
push(f);
}
@ -133,10 +132,10 @@ public class DirContentSource extends ContentSource {
return stack.size() > 0;
}
public Object next() {
public File next() {
assert hasNext();
count++;
Object object = stack.pop();
File object = stack.pop();
// System.err.println("pop " + object);
find();
return object;
@ -148,7 +147,7 @@ public class DirContentSource extends ContentSource {
}
private ThreadLocal dateFormat = new ThreadLocal();
private ThreadLocal<DateFormatInfo> dateFormat = new ThreadLocal<DateFormatInfo>();
private File dataDir = null;
private int iteration = 0;
private Iterator inputFiles = null;
@ -156,7 +155,7 @@ public class DirContentSource extends ContentSource {
// get/initiate a thread-local simple date format (must do so
// because SimpleDateFormat is not thread-safe).
private DateFormatInfo getDateFormatInfo() {
DateFormatInfo dfi = (DateFormatInfo) dateFormat.get();
DateFormatInfo dfi = dateFormat.get();
if (dfi == null) {
dfi = new DateFormatInfo();
dfi.pos = new ParsePosition(0);
@ -191,7 +190,7 @@ public class DirContentSource extends ContentSource {
inputFiles = new Iterator(dataDir);
iteration++;
}
f = (File) inputFiles.next();
f = inputFiles.next();
// System.err.println(f);
name = f.getCanonicalPath()+"_"+iteration;
}

View File

@ -20,10 +20,8 @@ package org.apache.lucene.benchmark.byTask.feeds;
import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.Map.Entry;
import java.util.Random;
import org.apache.lucene.benchmark.byTask.utils.Config;
@ -79,7 +77,7 @@ public class DocMaker {
static class DocState {
private final Map fields;
private final Map<String,Field> fields;
private final boolean reuseFields;
final Document doc;
DocData docData = new DocData();
@ -89,7 +87,7 @@ public class DocMaker {
this.reuseFields = reuseFields;
if (reuseFields) {
fields = new HashMap();
fields = new HashMap<String,Field>();
// Initialize the map with the default fields.
fields.put(BODY_FIELD, new Field(BODY_FIELD, "", store, bodyIndex, termVector));
@ -115,7 +113,7 @@ public class DocMaker {
return new Field(name, "", store, index, termVector);
}
Field f = (Field) fields.get(name);
Field f = fields.get(name);
if (f == null) {
f = new Field(name, "", store, index, termVector);
fields.put(name, f);
@ -128,8 +126,8 @@ public class DocMaker {
private boolean storeBytes = false;
// leftovers are thread local, because it is unsafe to share residues between threads
private ThreadLocal leftovr = new ThreadLocal();
private ThreadLocal docState = new ThreadLocal();
private ThreadLocal<LeftOver> leftovr = new ThreadLocal<LeftOver>();
private ThreadLocal<DocState> docState = new ThreadLocal<DocState>();
public static final String BODY_FIELD = "body";
public static final String TITLE_FIELD = "doctitle";
@ -224,8 +222,7 @@ public class DocMaker {
if (indexProperties) {
Properties props = docData.getProps();
if (props != null) {
for (Iterator iterator = props.entrySet().iterator(); iterator.hasNext();) {
Entry entry = (Entry) iterator.next();
for (final Map.Entry<Object,Object> entry : props.entrySet()) {
Field f = ds.getField((String) entry.getKey(), storeVal, indexVal, termVecVal);
f.setValue((String) entry.getValue());
doc.add(f);
@ -243,7 +240,7 @@ public class DocMaker {
}
protected DocState getDocState() {
DocState ds = (DocState) docState.get();
DocState ds = docState.get();
if (ds == null) {
ds = new DocState(true, storeVal, indexVal, bodyIndexVal, termVecVal);
docState.set(ds);
@ -299,7 +296,7 @@ public class DocMaker {
* given size input by <code>size</code>.
*/
public Document makeDocument(int size) throws Exception {
LeftOver lvr = (LeftOver) leftovr.get();
LeftOver lvr = leftovr.get();
if (lvr == null || lvr.docdata == null || lvr.docdata.getBody() == null
|| lvr.docdata.getBody().length() == 0) {
resetLeftovers();
@ -371,7 +368,7 @@ public class DocMaker {
this.config = config;
try {
String sourceClass = config.get("content.source", "org.apache.lucene.benchmark.byTask.feeds.SingleDocSource");
source = (ContentSource) Class.forName(sourceClass).newInstance();
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
source.setConfig(config);
} catch (Exception e) {
// Should not get here. Throw runtime exception.
@ -413,7 +410,7 @@ public class DocMaker {
// In a multi-rounds run, it is important to reset DocState since settings
// of fields may change between rounds, and this is the only way to reset
// the cache of all threads.
docState = new ThreadLocal();
docState = new ThreadLocal<DocState>();
}
indexProperties = config.get("doc.index.props", false);

View File

@ -220,7 +220,7 @@ public class EnwikiContentSource extends ContentSource {
}
}
private static final Map ELEMENTS = new HashMap();
private static final Map<String,Integer> ELEMENTS = new HashMap<String,Integer>();
private static final int TITLE = 0;
private static final int DATE = TITLE + 1;
private static final int BODY = DATE + 1;
@ -248,7 +248,7 @@ public class EnwikiContentSource extends ContentSource {
* the element qualified name over and over.
*/
private final static int getElementType(String elem) {
Integer val = (Integer) ELEMENTS.get(elem);
Integer val = ELEMENTS.get(elem);
return val == null ? -1 : val.intValue();
}

View File

@ -18,11 +18,6 @@ package org.apache.lucene.benchmark.byTask.feeds;
*/
import org.apache.lucene.benchmark.byTask.utils.Config;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
/**
* A {@link DocMaker} which reads the English Wikipedia dump. Uses

View File

@ -92,9 +92,9 @@ public class EnwikiQueryMaker extends AbstractQueryMaker implements
* @param a analyzer to use when parsing queries
* @return array of Lucene queries
*/
private static Query[] createQueries(List qs, Analyzer a) {
private static Query[] createQueries(List<Object> qs, Analyzer a) {
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a);
List queries = new ArrayList();
List<Object> queries = new ArrayList<Object>();
for (int i = 0; i < qs.size(); i++) {
try {
@ -119,14 +119,14 @@ public class EnwikiQueryMaker extends AbstractQueryMaker implements
}
}
return (Query[]) queries.toArray(new Query[0]);
return queries.toArray(new Query[0]);
}
protected Query[] prepareQueries() throws Exception {
// analyzer (default is standard analyzer)
Analyzer anlzr = NewAnalyzerTask.createAnalyzer(config.get("analyzer", StandardAnalyzer.class.getName()));
List queryList = new ArrayList(20);
List<Object> queryList = new ArrayList<Object>(20);
queryList.addAll(Arrays.asList(STANDARD_QUERIES));
if(!config.get("enwikiQueryMaker.disableSpanQueries", false))
queryList.addAll(Arrays.asList(getPrebuiltQueries(DocMaker.BODY_FIELD)));

View File

@ -51,7 +51,7 @@ public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMake
String defaultField = config.get("file.query.maker.default.field", DocMaker.BODY_FIELD);
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, defaultField, anlzr);
List qq = new ArrayList();
List<Query> qq = new ArrayList<Query>();
String fileName = config.get("file.query.maker.file", null);
if (fileName != null)
{
@ -94,7 +94,6 @@ public class FileBasedQueryMaker extends AbstractQueryMaker implements QueryMake
}
}
Query [] result = (Query[]) qq.toArray(new Query[qq.size()]) ;
return result;
return qq.toArray(new Query[qq.size()]) ;
}
}

View File

@ -47,9 +47,9 @@ public class ReutersContentSource extends ContentSource {
ParsePosition pos;
}
private ThreadLocal dateFormat = new ThreadLocal();
private ThreadLocal<DateFormatInfo> dateFormat = new ThreadLocal<DateFormatInfo>();
private File dataDir = null;
private ArrayList inputFiles = new ArrayList();
private ArrayList<File> inputFiles = new ArrayList<File>();
private int nextFile = 0;
private int iteration = 0;
@ -69,7 +69,7 @@ public class ReutersContentSource extends ContentSource {
}
private synchronized DateFormatInfo getDateFormatInfo() {
DateFormatInfo dfi = (DateFormatInfo) dateFormat.get();
DateFormatInfo dfi = dateFormat.get();
if (dfi == null) {
dfi = new DateFormatInfo();
// date format: 30-MAR-1987 14:22:36.87
@ -105,7 +105,7 @@ public class ReutersContentSource extends ContentSource {
nextFile = 0;
iteration++;
}
f = (File) inputFiles.get(nextFile++);
f = inputFiles.get(nextFile++);
name = f.getCanonicalPath() + "_" + iteration;
}

View File

@ -72,9 +72,9 @@ public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker
* @param a analyzer to use when parsing queries
* @return array of Lucene queries
*/
private static Query[] createQueries(List qs, Analyzer a) {
private static Query[] createQueries(List<Object> qs, Analyzer a) {
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD, a);
List queries = new ArrayList();
List<Object> queries = new ArrayList<Object>();
for (int i = 0; i < qs.size(); i++) {
try {
@ -99,7 +99,7 @@ public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker
}
}
return (Query[]) queries.toArray(new Query[0]);
return queries.toArray(new Query[0]);
}
protected Query[] prepareQueries() throws Exception {
@ -107,7 +107,7 @@ public class ReutersQueryMaker extends AbstractQueryMaker implements QueryMaker
Analyzer anlzr= NewAnalyzerTask.createAnalyzer(config.get("analyzer",
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
List queryList = new ArrayList(20);
List<Object> queryList = new ArrayList<Object>(20);
queryList.addAll(Arrays.asList(STANDARD_QUERIES));
queryList.addAll(Arrays.asList(getPrebuiltQueries(DocMaker.BODY_FIELD)));
return createQueries(queryList, anlzr);

View File

@ -48,7 +48,7 @@ public class SimpleQueryMaker extends AbstractQueryMaker implements QueryMaker {
"org.apache.lucene.analysis.standard.StandardAnalyzer"));
QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, DocMaker.BODY_FIELD,anlzr);
ArrayList qq = new ArrayList();
ArrayList<Query> qq = new ArrayList<Query>();
Query q1 = new TermQuery(new Term(DocMaker.ID_FIELD,"doc2"));
qq.add(q1);
Query q2 = new TermQuery(new Term(DocMaker.BODY_FIELD,"simple"));
@ -64,7 +64,7 @@ public class SimpleQueryMaker extends AbstractQueryMaker implements QueryMaker {
qq.add(qp.parse("\"synthetic text\"~3"));
qq.add(qp.parse("zoom*"));
qq.add(qp.parse("synth*"));
return (Query []) qq.toArray(new Query[0]);
return qq.toArray(new Query[0]);
}
}

View File

@ -35,15 +35,15 @@ public class SimpleSloppyPhraseQueryMaker extends SimpleQueryMaker {
protected Query[] prepareQueries() throws Exception {
// extract some 100 words from doc text to an array
String words[];
ArrayList w = new ArrayList();
ArrayList<String> w = new ArrayList<String>();
StringTokenizer st = new StringTokenizer(SingleDocSource.DOC_TEXT);
while (st.hasMoreTokens() && w.size()<100) {
w.add(st.nextToken());
}
words = (String[]) w.toArray(new String[0]);
words = w.toArray(new String[0]);
// create queries (that would find stuff) with varying slops
ArrayList queries = new ArrayList();
ArrayList<Query> queries = new ArrayList<Query>();
for (int slop=0; slop<8; slop++) {
for (int qlen=2; qlen<6; qlen++) {
for (int wd=0; wd<words.length-qlen-slop; wd++) {
@ -76,7 +76,7 @@ public class SimpleSloppyPhraseQueryMaker extends SimpleQueryMaker {
}
}
}
return (Query[]) queries.toArray(new Query[0]);
return queries.toArray(new Query[0]);
}
}

View File

@ -76,11 +76,11 @@ public class TrecContentSource extends ContentSource {
"EEE MMM dd kk:mm:ss yyyy", // Tue Dec 09 16:45:08 2003
};
private ThreadLocal dateFormats = new ThreadLocal();
private ThreadLocal trecDocReader = new ThreadLocal();
private ThreadLocal trecDocBuffer = new ThreadLocal();
private ThreadLocal<DateFormatInfo> dateFormats = new ThreadLocal<DateFormatInfo>();
private ThreadLocal<StringBufferReader> trecDocReader = new ThreadLocal<StringBufferReader>();
private ThreadLocal<StringBuffer> trecDocBuffer = new ThreadLocal<StringBuffer>();
private File dataDir = null;
private ArrayList inputFiles = new ArrayList();
private ArrayList<File> inputFiles = new ArrayList<File>();
private int nextFile = 0;
private int rawDocSize;
@ -93,7 +93,7 @@ public class TrecContentSource extends ContentSource {
HTMLParser htmlParser;
private DateFormatInfo getDateFormatInfo() {
DateFormatInfo dfi = (DateFormatInfo) dateFormats.get();
DateFormatInfo dfi = dateFormats.get();
if (dfi == null) {
dfi = new DateFormatInfo();
dfi.dfs = new SimpleDateFormat[DATE_FORMATS.length];
@ -108,7 +108,7 @@ public class TrecContentSource extends ContentSource {
}
private StringBuffer getDocBuffer() {
StringBuffer sb = (StringBuffer) trecDocBuffer.get();
StringBuffer sb = trecDocBuffer.get();
if (sb == null) {
sb = new StringBuffer();
trecDocBuffer.set(sb);
@ -117,7 +117,7 @@ public class TrecContentSource extends ContentSource {
}
private Reader getTrecDocReader(StringBuffer docBuffer) {
StringBufferReader r = (StringBufferReader) trecDocReader.get();
StringBufferReader r = trecDocReader.get();
if (r == null) {
r = new StringBufferReader(docBuffer);
trecDocReader.set(r);
@ -177,7 +177,7 @@ public class TrecContentSource extends ContentSource {
nextFile = 0;
iteration++;
}
File f = (File) inputFiles.get(nextFile++);
File f = inputFiles.get(nextFile++);
if (verbose) {
System.out.println("opening: " + f + " length: " + f.length());
}
@ -330,7 +330,7 @@ public class TrecContentSource extends ContentSource {
try {
String parserClassName = config.get("html.parser",
"org.apache.lucene.benchmark.byTask.feeds.DemoHTMLParser");
htmlParser = (HTMLParser) Class.forName(parserClassName).newInstance();
htmlParser = Class.forName(parserClassName).asSubclass(HTMLParser.class).newInstance();
} catch (Exception e) {
// Should not get here. Throw runtime exception.
throw new RuntimeException(e);

View File

@ -42,7 +42,7 @@ public class ConsumeContentSourceTask extends PerfTask {
throw new IllegalArgumentException("content.source must be defined");
}
try {
source = (ContentSource) Class.forName(sourceClass).newInstance();
source = Class.forName(sourceClass).asSubclass(ContentSource.class).newInstance();
source.setConfig(config);
source.resetInputs();
} catch (Exception e) {

View File

@ -61,7 +61,7 @@ public class CreateIndexTask extends PerfTask {
final String mergeScheduler = config.get("merge.scheduler",
"org.apache.lucene.index.ConcurrentMergeScheduler");
try {
writer.setMergeScheduler((MergeScheduler) Class.forName(mergeScheduler).newInstance());
writer.setMergeScheduler(Class.forName(mergeScheduler).asSubclass(MergeScheduler.class).newInstance());
} catch (Exception e) {
throw new RuntimeException("unable to instantiate class '" + mergeScheduler + "' as merge scheduler", e);
}
@ -69,7 +69,7 @@ public class CreateIndexTask extends PerfTask {
final String mergePolicy = config.get("merge.policy",
"org.apache.lucene.index.LogByteSizeMergePolicy");
try {
writer.setMergePolicy((MergePolicy) Class.forName(mergePolicy).getConstructor(new Class[] { IndexWriter.class }).newInstance(new Object[] { writer }));
writer.setMergePolicy(Class.forName(mergePolicy).asSubclass(MergePolicy.class).getConstructor(IndexWriter.class).newInstance(writer));
} catch (Exception e) {
throw new RuntimeException("unable to instantiate class '" + mergePolicy + "' as merge policy", e);
}
@ -106,7 +106,7 @@ public class CreateIndexTask extends PerfTask {
IndexDeletionPolicy indexDeletionPolicy = null;
RuntimeException err = null;
try {
indexDeletionPolicy = ((IndexDeletionPolicy) Class.forName(deletionPolicyName).newInstance());
indexDeletionPolicy = Class.forName(deletionPolicyName).asSubclass(IndexDeletionPolicy.class).newInstance();
} catch (IllegalAccessException iae) {
err = new RuntimeException("unable to instantiate class '" + deletionPolicyName + "' as IndexDeletionPolicy");
err.initCause(iae);

View File

@ -41,7 +41,7 @@ public class OptimizeTask extends PerfTask {
public void setParams(String params) {
super.setParams(params);
maxNumSegments = (int) Double.valueOf(params).intValue();
maxNumSegments = Double.valueOf(params).intValue();
}
public boolean supportsParams() {

View File

@ -184,7 +184,7 @@ public class Highlighter
TextFragment[] frag =getBestTextFragments(tokenStream,text, true,maxNumFragments);
//Get text
ArrayList fragTexts = new ArrayList();
ArrayList<String> fragTexts = new ArrayList<String>();
for (int i = 0; i < frag.length; i++)
{
if ((frag[i] != null) && (frag[i].getScore() > 0))
@ -192,7 +192,7 @@ public class Highlighter
fragTexts.add(frag[i].toString());
}
}
return (String[]) fragTexts.toArray(new String[0]);
return fragTexts.toArray(new String[0]);
}
@ -214,7 +214,7 @@ public class Highlighter
int maxNumFragments)
throws IOException, InvalidTokenOffsetsException
{
ArrayList docFrags = new ArrayList();
ArrayList<TextFragment> docFrags = new ArrayList<TextFragment>();
StringBuilder newText=new StringBuilder();
TermAttribute termAtt = tokenStream.addAttribute(TermAttribute.class);
@ -320,9 +320,9 @@ public class Highlighter
currentFrag.textEndPos = newText.length();
//sort the most relevant sections of the text
for (Iterator i = docFrags.iterator(); i.hasNext();)
for (Iterator<TextFragment> i = docFrags.iterator(); i.hasNext();)
{
currentFrag = (TextFragment) i.next();
currentFrag = i.next();
//If you are running with a version of Lucene before 11th Sept 03
// you do not have PriorityQueue.insert() - so uncomment the code below
@ -349,14 +349,14 @@ public class Highlighter
TextFragment frag[] = new TextFragment[fragQueue.size()];
for (int i = frag.length - 1; i >= 0; i--)
{
frag[i] = (TextFragment) fragQueue.pop();
frag[i] = fragQueue.pop();
}
//merge any contiguous fragments to improve readability
if(mergeContiguousFragments)
{
mergeContiguousFragments(frag);
ArrayList fragTexts = new ArrayList();
ArrayList<TextFragment> fragTexts = new ArrayList<TextFragment>();
for (int i = 0; i < frag.length; i++)
{
if ((frag[i] != null) && (frag[i].getScore() > 0))
@ -364,7 +364,7 @@ public class Highlighter
fragTexts.add(frag[i]);
}
}
frag= (TextFragment[]) fragTexts.toArray(new TextFragment[0]);
frag= fragTexts.toArray(new TextFragment[0]);
}
return frag;
@ -567,17 +567,15 @@ public class Highlighter
this.encoder = encoder;
}
}
class FragmentQueue extends PriorityQueue
class FragmentQueue extends PriorityQueue<TextFragment>
{
public FragmentQueue(int size)
{
initialize(size);
}
public final boolean lessThan(Object a, Object b)
public final boolean lessThan(TextFragment fragA, TextFragment fragB)
{
TextFragment fragA = (TextFragment) a;
TextFragment fragB = (TextFragment) b;
if (fragA.getScore() == fragB.getScore())
return fragA.fragNum > fragB.fragNum;
else

View File

@ -41,8 +41,8 @@ import org.apache.lucene.util.StringHelper;
*/
public class QueryScorer implements Scorer {
private float totalScore;
private Set foundTerms;
private Map fieldWeightedSpanTerms;
private Set<String> foundTerms;
private Map<String,WeightedSpanTerm> fieldWeightedSpanTerms;
private float maxTermWeight;
private int position = -1;
private String defaultField;
@ -103,10 +103,10 @@ public class QueryScorer implements Scorer {
* @param weightedTerms an array of pre-created {@link WeightedSpanTerm}s
*/
public QueryScorer(WeightedSpanTerm[] weightedTerms) {
this.fieldWeightedSpanTerms = new HashMap(weightedTerms.length);
this.fieldWeightedSpanTerms = new HashMap<String,WeightedSpanTerm>(weightedTerms.length);
for (int i = 0; i < weightedTerms.length; i++) {
WeightedSpanTerm existingTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(weightedTerms[i].term);
WeightedSpanTerm existingTerm = fieldWeightedSpanTerms.get(weightedTerms[i].term);
if ((existingTerm == null) ||
(existingTerm.weight < weightedTerms[i].weight)) {
@ -149,7 +149,7 @@ public class QueryScorer implements Scorer {
WeightedSpanTerm weightedSpanTerm;
if ((weightedSpanTerm = (WeightedSpanTerm) fieldWeightedSpanTerms.get(
if ((weightedSpanTerm = fieldWeightedSpanTerms.get(
termText)) == null) {
return 0;
}
@ -194,7 +194,7 @@ public class QueryScorer implements Scorer {
* @return WeightedSpanTerm for token
*/
public WeightedSpanTerm getWeightedSpanTerm(String token) {
return (WeightedSpanTerm) fieldWeightedSpanTerms.get(token);
return fieldWeightedSpanTerms.get(token);
}
/**
@ -232,7 +232,7 @@ public class QueryScorer implements Scorer {
* @see org.apache.lucene.search.highlight.Scorer#startFragment(org.apache.lucene.search.highlight.TextFragment)
*/
public void startFragment(TextFragment newFragment) {
foundTerms = new HashSet();
foundTerms = new HashSet<String>();
totalScore = 0;
}

View File

@ -93,13 +93,13 @@ public final class QueryTermExtractor
*/
public static final WeightedTerm[] getTerms(Query query, boolean prohibited, String fieldName)
{
HashSet terms=new HashSet();
HashSet<WeightedTerm> terms=new HashSet<WeightedTerm>();
if(fieldName!=null)
{
fieldName= StringHelper.intern(fieldName);
}
getTerms(query,terms,prohibited,fieldName);
return (WeightedTerm[]) terms.toArray(new WeightedTerm[0]);
return terms.toArray(new WeightedTerm[0]);
}
/**
@ -115,7 +115,7 @@ public final class QueryTermExtractor
}
//fieldname MUST be interned prior to this call
private static final void getTerms(Query query, HashSet terms,boolean prohibited, String fieldName)
private static final void getTerms(Query query, HashSet<WeightedTerm> terms,boolean prohibited, String fieldName)
{
try
{
@ -126,11 +126,11 @@ public final class QueryTermExtractor
getTermsFromFilteredQuery((FilteredQuery)query, terms,prohibited, fieldName);
else
{
HashSet nonWeightedTerms=new HashSet();
HashSet<Term> nonWeightedTerms=new HashSet<Term>();
query.extractTerms(nonWeightedTerms);
for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();)
for (Iterator<Term> iter = nonWeightedTerms.iterator(); iter.hasNext();)
{
Term term = (Term) iter.next();
Term term = iter.next();
if((fieldName==null)||(term.field()==fieldName))
{
terms.add(new WeightedTerm(query.getBoost(),term.text()));
@ -155,7 +155,7 @@ public final class QueryTermExtractor
* something common which would allow access to child queries so what follows here are query-specific
* implementations for accessing embedded query elements.
*/
private static final void getTermsFromBooleanQuery(BooleanQuery query, HashSet terms, boolean prohibited, String fieldName)
private static final void getTermsFromBooleanQuery(BooleanQuery query, HashSet<WeightedTerm> terms, boolean prohibited, String fieldName)
{
BooleanClause[] queryClauses = query.getClauses();
for (int i = 0; i < queryClauses.length; i++)
@ -164,7 +164,7 @@ public final class QueryTermExtractor
getTerms(queryClauses[i].getQuery(), terms, prohibited, fieldName);
}
}
private static void getTermsFromFilteredQuery(FilteredQuery query, HashSet terms, boolean prohibited, String fieldName)
private static void getTermsFromFilteredQuery(FilteredQuery query, HashSet<WeightedTerm> terms, boolean prohibited, String fieldName)
{
getTerms(query.getQuery(),terms,prohibited,fieldName);
}

View File

@ -35,11 +35,11 @@ import org.apache.lucene.search.Query;
public class QueryTermScorer implements Scorer {
TextFragment currentTextFragment = null;
HashSet uniqueTermsInFragment;
HashSet<String> uniqueTermsInFragment;
float totalScore = 0;
float maxTermWeight = 0;
private HashMap termsToFind;
private HashMap<String,WeightedTerm> termsToFind;
private TermAttribute termAtt;
@ -77,9 +77,9 @@ public class QueryTermScorer implements Scorer {
}
public QueryTermScorer(WeightedTerm[] weightedTerms) {
termsToFind = new HashMap();
termsToFind = new HashMap<String,WeightedTerm>();
for (int i = 0; i < weightedTerms.length; i++) {
WeightedTerm existingTerm = (WeightedTerm) termsToFind
WeightedTerm existingTerm = termsToFind
.get(weightedTerms[i].term);
if ((existingTerm == null)
|| (existingTerm.weight < weightedTerms[i].weight)) {
@ -107,7 +107,7 @@ public class QueryTermScorer implements Scorer {
* .lucene.search.highlight.TextFragment)
*/
public void startFragment(TextFragment newFragment) {
uniqueTermsInFragment = new HashSet();
uniqueTermsInFragment = new HashSet<String>();
currentTextFragment = newFragment;
totalScore = 0;
@ -120,7 +120,7 @@ public class QueryTermScorer implements Scorer {
public float getTokenScore() {
String termText = termAtt.term();
WeightedTerm queryTerm = (WeightedTerm) termsToFind.get(termText);
WeightedTerm queryTerm = termsToFind.get(termText);
if (queryTerm == null) {
// not a query term - return
return 0;

View File

@ -73,11 +73,11 @@ public class SimpleSpanFragmenter implements Fragmenter {
WeightedSpanTerm wSpanTerm = queryScorer.getWeightedSpanTerm(termAtt.term());
if (wSpanTerm != null) {
List positionSpans = wSpanTerm.getPositionSpans();
List<PositionSpan> positionSpans = wSpanTerm.getPositionSpans();
for (int i = 0; i < positionSpans.size(); i++) {
if (((PositionSpan) positionSpans.get(i)).start == position) {
waitForPos = ((PositionSpan) positionSpans.get(i)).end + 1;
if (positionSpans.get(i).start == position) {
waitForPos = positionSpans.get(i).end + 1;
break;
}
}

View File

@ -59,7 +59,7 @@ public class TokenSources
public static TokenStream getAnyTokenStream(IndexReader reader, int docId, String field, Document doc, Analyzer analyzer) throws IOException{
TokenStream ts=null;
TermFreqVector tfv=(TermFreqVector) reader.getTermFreqVector(docId,field);
TermFreqVector tfv = reader.getTermFreqVector(docId,field);
if(tfv!=null)
{
if(tfv instanceof TermPositionVector)
@ -89,7 +89,7 @@ public class TokenSources
{
TokenStream ts=null;
TermFreqVector tfv=(TermFreqVector) reader.getTermFreqVector(docId,field);
TermFreqVector tfv = reader.getTermFreqVector(docId,field);
if(tfv!=null)
{
if(tfv instanceof TermPositionVector)
@ -171,7 +171,7 @@ public class TokenSources
totalTokens+=freq[t];
}
Token tokensInOriginalOrder[]=new Token[totalTokens];
ArrayList unsortedTokens = null;
ArrayList<Token> unsortedTokens = null;
for (int t = 0; t < freq.length; t++)
{
TermVectorOffsetInfo[] offsets=tpv.getOffsets(t);
@ -191,7 +191,7 @@ public class TokenSources
//tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later
if(unsortedTokens==null)
{
unsortedTokens=new ArrayList();
unsortedTokens=new ArrayList<Token>();
}
for (int tp = 0; tp < offsets.length; tp++)
{
@ -216,14 +216,10 @@ public class TokenSources
}
}
//If the field has been stored without position data we must perform a sort
if(unsortedTokens!=null)
{
tokensInOriginalOrder=(Token[]) unsortedTokens.toArray(new Token[unsortedTokens.size()]);
Arrays.sort(tokensInOriginalOrder, new Comparator(){
public int compare(Object o1, Object o2)
{
Token t1=(Token) o1;
Token t2=(Token) o2;
if(unsortedTokens!=null) {
tokensInOriginalOrder= unsortedTokens.toArray(new Token[unsortedTokens.size()]);
Arrays.sort(tokensInOriginalOrder, new Comparator<Token>(){
public int compare(Token t1, Token t2) {
if(t1.startOffset()>t2.endOffset())
return 1;
if(t1.startOffset()<t2.startOffset())
@ -236,7 +232,7 @@ public class TokenSources
public static TokenStream getTokenStream(IndexReader reader,int docId, String field) throws IOException
{
TermFreqVector tfv=(TermFreqVector) reader.getTermFreqVector(docId,field);
TermFreqVector tfv = reader.getTermFreqVector(docId,field);
if(tfv==null)
{
throw new IllegalArgumentException(field+" in doc #"+docId

View File

@ -28,7 +28,7 @@ import java.util.List;
*/
public class WeightedSpanTerm extends WeightedTerm{
boolean positionSensitive;
private List positionSpans = new ArrayList();
private List<PositionSpan> positionSpans = new ArrayList<PositionSpan>();
/**
* @param weight
@ -36,7 +36,7 @@ public class WeightedSpanTerm extends WeightedTerm{
*/
public WeightedSpanTerm(float weight, String term) {
super(weight, term);
this.positionSpans = new ArrayList();
this.positionSpans = new ArrayList<PositionSpan>();
}
/**
@ -61,10 +61,10 @@ public class WeightedSpanTerm extends WeightedTerm{
// where kept in some sort of priority queue - that way this method
// could
// bail early without checking each PositionSpan.
Iterator positionSpanIt = positionSpans.iterator();
Iterator<PositionSpan> positionSpanIt = positionSpans.iterator();
while (positionSpanIt.hasNext()) {
PositionSpan posSpan = (PositionSpan) positionSpanIt.next();
PositionSpan posSpan = positionSpanIt.next();
if (((position >= posSpan.start) && (position <= posSpan.end))) {
return true;
@ -74,7 +74,7 @@ public class WeightedSpanTerm extends WeightedTerm{
return false;
}
public void addPositionSpans(List positionSpans) {
public void addPositionSpans(List<PositionSpan> positionSpans) {
this.positionSpans.addAll(positionSpans);
}
@ -86,7 +86,7 @@ public class WeightedSpanTerm extends WeightedTerm{
this.positionSensitive = positionSensitive;
}
public List getPositionSpans() {
public List<PositionSpan> getPositionSpans() {
return positionSpans;
}
}

View File

@ -47,7 +47,7 @@ public class WeightedSpanTermExtractor {
private String fieldName;
private TokenStream tokenStream;
private Map readers = new HashMap(10); // Map<String, IndexReader>
private Map<String,IndexReader> readers = new HashMap<String,IndexReader>(10);
private String defaultField;
private boolean expandMultiTermQuery;
private boolean cachedTokenStream;
@ -63,11 +63,9 @@ public class WeightedSpanTermExtractor {
}
private void closeReaders() {
Collection readerSet = readers.values();
Iterator it = readerSet.iterator();
Collection<IndexReader> readerSet = readers.values();
while (it.hasNext()) {
IndexReader reader = (IndexReader) it.next();
for (final IndexReader reader : readerSet) {
try {
reader.close();
} catch (IOException e) {
@ -85,7 +83,7 @@ public class WeightedSpanTermExtractor {
* Map to place created WeightedSpanTerms in
* @throws IOException
*/
private void extract(Query query, Map terms) throws IOException {
private void extract(Query query, Map<String,WeightedSpanTerm> terms) throws IOException {
if (query instanceof BooleanQuery) {
BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses();
@ -137,8 +135,8 @@ public class WeightedSpanTermExtractor {
} else if (query instanceof FilteredQuery) {
extract(((FilteredQuery) query).getQuery(), terms);
} else if (query instanceof DisjunctionMaxQuery) {
for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
extract((Query) iterator.next(), terms);
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator.hasNext();) {
extract(iterator.next(), terms);
}
} else if (query instanceof MultiTermQuery && expandMultiTermQuery) {
MultiTermQuery mtq = ((MultiTermQuery)query);
@ -163,7 +161,7 @@ public class WeightedSpanTermExtractor {
}
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final List termArrays = mpq.getTermArrays();
final List<Term[]> termArrays = mpq.getTermArrays();
final int[] positions = mpq.getPositions();
if (positions.length > 0) {
@ -174,14 +172,14 @@ public class WeightedSpanTermExtractor {
}
}
final List[] disjunctLists = new List[maxPosition + 1];
final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
int distinctPositions = 0;
for (int i = 0; i < termArrays.size(); ++i) {
final Term[] termArray = (Term[]) termArrays.get(i);
List disjuncts = disjunctLists[positions[i]];
final Term[] termArray = termArrays.get(i);
List<SpanQuery> disjuncts = disjunctLists[positions[i]];
if (disjuncts == null) {
disjuncts = (disjunctLists[positions[i]] = new ArrayList(termArray.length));
disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>(termArray.length));
++distinctPositions;
}
for (int j = 0; j < termArray.length; ++j) {
@ -193,9 +191,9 @@ public class WeightedSpanTermExtractor {
int position = 0;
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (int i = 0; i < disjunctLists.length; ++i) {
List disjuncts = disjunctLists[i];
List<SpanQuery> disjuncts = disjunctLists[i];
if (disjuncts != null) {
clauses[position++] = new SpanOrQuery((SpanQuery[]) disjuncts
clauses[position++] = new SpanOrQuery(disjuncts
.toArray(new SpanQuery[disjuncts.size()]));
} else {
++positionGaps;
@ -221,20 +219,19 @@ public class WeightedSpanTermExtractor {
* SpanQuery to extract Terms from
* @throws IOException
*/
private void extractWeightedSpanTerms(Map terms, SpanQuery spanQuery) throws IOException {
Set nonWeightedTerms = new HashSet();
private void extractWeightedSpanTerms(Map<String,WeightedSpanTerm> terms, SpanQuery spanQuery) throws IOException {
Set<Term> nonWeightedTerms = new HashSet<Term>();
spanQuery.extractTerms(nonWeightedTerms);
Set fieldNames;
Set<String> fieldNames;
if (fieldName == null) {
fieldNames = new HashSet();
for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();) {
Term queryTerm = (Term) iter.next();
fieldNames = new HashSet<String>();
for (final Term queryTerm : nonWeightedTerms) {
fieldNames.add(queryTerm.field());
}
} else {
fieldNames = new HashSet(1);
fieldNames = new HashSet<String>(1);
fieldNames.add(fieldName);
}
// To support the use of the default field name
@ -242,11 +239,9 @@ public class WeightedSpanTermExtractor {
fieldNames.add(defaultField);
}
Iterator it = fieldNames.iterator();
List spanPositions = new ArrayList();
List<PositionSpan> spanPositions = new ArrayList<PositionSpan>();
while (it.hasNext()) {
String field = (String) it.next();
for (final String field : fieldNames) {
IndexReader reader = getReaderForField(field);
Spans spans = spanQuery.getSpans(reader);
@ -263,11 +258,10 @@ public class WeightedSpanTermExtractor {
return;
}
for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();) {
Term queryTerm = (Term) iter.next();
for (final Term queryTerm : nonWeightedTerms) {
if (fieldNameComparator(queryTerm.field())) {
WeightedSpanTerm weightedSpanTerm = (WeightedSpanTerm) terms.get(queryTerm.text());
WeightedSpanTerm weightedSpanTerm = terms.get(queryTerm.text());
if (weightedSpanTerm == null) {
weightedSpanTerm = new WeightedSpanTerm(spanQuery.getBoost(), queryTerm.text());
@ -292,12 +286,11 @@ public class WeightedSpanTermExtractor {
* Query to extract Terms from
* @throws IOException
*/
private void extractWeightedTerms(Map terms, Query query) throws IOException {
Set nonWeightedTerms = new HashSet();
private void extractWeightedTerms(Map<String,WeightedSpanTerm> terms, Query query) throws IOException {
Set<Term> nonWeightedTerms = new HashSet<Term>();
query.extractTerms(nonWeightedTerms);
for (Iterator iter = nonWeightedTerms.iterator(); iter.hasNext();) {
Term queryTerm = (Term) iter.next();
for (final Term queryTerm : nonWeightedTerms) {
if (fieldNameComparator(queryTerm.field())) {
WeightedSpanTerm weightedSpanTerm = new WeightedSpanTerm(query.getBoost(), queryTerm.text());
@ -320,7 +313,7 @@ public class WeightedSpanTermExtractor {
tokenStream = new CachingTokenFilter(tokenStream);
cachedTokenStream = true;
}
IndexReader reader = (IndexReader) readers.get(field);
IndexReader reader = readers.get(field);
if (reader == null) {
MemoryIndex indexer = new MemoryIndex();
indexer.addField(field, tokenStream);
@ -345,7 +338,7 @@ public class WeightedSpanTermExtractor {
* @return Map containing WeightedSpanTerms
* @throws IOException
*/
public Map getWeightedSpanTerms(Query query, TokenStream tokenStream)
public Map<String,WeightedSpanTerm> getWeightedSpanTerms(Query query, TokenStream tokenStream)
throws IOException {
return getWeightedSpanTerms(query, tokenStream, null);
}
@ -364,7 +357,7 @@ public class WeightedSpanTermExtractor {
* @return Map containing WeightedSpanTerms
* @throws IOException
*/
public Map getWeightedSpanTerms(Query query, TokenStream tokenStream,
public Map<String,WeightedSpanTerm> getWeightedSpanTerms(Query query, TokenStream tokenStream,
String fieldName) throws IOException {
if (fieldName != null) {
this.fieldName = StringHelper.intern(fieldName);
@ -372,7 +365,7 @@ public class WeightedSpanTermExtractor {
this.fieldName = null;
}
Map terms = new PositionCheckingMap();
Map<String,WeightedSpanTerm> terms = new PositionCheckingMap<String>();
this.tokenStream = tokenStream;
try {
extract(query, terms);
@ -400,7 +393,7 @@ public class WeightedSpanTermExtractor {
* @return Map of WeightedSpanTerms with quasi tf/idf scores
* @throws IOException
*/
public Map getWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName,
public Map<String,WeightedSpanTerm> getWeightedSpanTermsWithScores(Query query, TokenStream tokenStream, String fieldName,
IndexReader reader) throws IOException {
if (fieldName != null) {
this.fieldName = StringHelper.intern(fieldName);
@ -409,16 +402,16 @@ public class WeightedSpanTermExtractor {
}
this.tokenStream = tokenStream;
Map terms = new PositionCheckingMap();
Map<String,WeightedSpanTerm> terms = new PositionCheckingMap<String>();
extract(query, terms);
int totalNumDocs = reader.numDocs();
Set weightedTerms = terms.keySet();
Iterator it = weightedTerms.iterator();
Set<String> weightedTerms = terms.keySet();
Iterator<String> it = weightedTerms.iterator();
try {
while (it.hasNext()) {
WeightedSpanTerm weightedSpanTerm = (WeightedSpanTerm) terms.get(it.next());
WeightedSpanTerm weightedSpanTerm = terms.get(it.next());
int docFreq = reader.docFreq(new Term(fieldName, weightedSpanTerm.term));
// docFreq counts deletes
if(totalNumDocs < docFreq) {
@ -440,21 +433,21 @@ public class WeightedSpanTermExtractor {
* This class makes sure that if both position sensitive and insensitive
* versions of the same term are added, the position insensitive one wins.
*/
static private class PositionCheckingMap extends HashMap {
static private class PositionCheckingMap<K> extends HashMap<K,WeightedSpanTerm> {
public void putAll(Map m) {
Iterator it = m.entrySet().iterator();
Iterator<Map.Entry<K, WeightedSpanTerm>> it = m.entrySet().iterator();
while (it.hasNext()) {
Map.Entry entry = (java.util.Map.Entry) it.next();
Map.Entry<K, WeightedSpanTerm> entry = it.next();
this.put(entry.getKey(), entry.getValue());
}
}
public Object put(Object key, Object value) {
Object prev = super.put(key, value);
public WeightedSpanTerm put(K key, WeightedSpanTerm value) {
WeightedSpanTerm prev = super.put(key, value);
if (prev == null) return prev;
WeightedSpanTerm prevTerm = (WeightedSpanTerm)prev;
WeightedSpanTerm newTerm = (WeightedSpanTerm)value;
WeightedSpanTerm prevTerm = prev;
WeightedSpanTerm newTerm = value;
if (!prevTerm.positionSensitive) {
newTerm.positionSensitive = false;
}