mirror of https://github.com/apache/lucene.git
LUCENE-1257: Generics in contrib/memory, contrib/wordnet (previously memory), contrib/misc, contrib/benchmark
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@830790 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
36b65637fc
commit
786457c0e3
|
@ -25,8 +25,6 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
import java.util.StringTokenizer;
|
||||
|
||||
|
@ -50,7 +48,7 @@ public class Config {
|
|||
|
||||
private int roundNumber = 0;
|
||||
private Properties props;
|
||||
private HashMap valByRound = new HashMap();
|
||||
private HashMap<String,Object> valByRound = new HashMap<String,Object>();
|
||||
private HashMap<String,String> colForValByRound = new HashMap<String,String>();
|
||||
private String algorithmText;
|
||||
|
||||
|
@ -247,8 +245,7 @@ public class Config {
|
|||
// log changes in values
|
||||
if (valByRound.size()>0) {
|
||||
sb.append(": ");
|
||||
for (Iterator iter = valByRound.keySet().iterator(); iter.hasNext();) {
|
||||
String name = (String) iter.next();
|
||||
for (final String name : valByRound.keySet()) {
|
||||
Object a = valByRound.get(name);
|
||||
if (a instanceof int[]) {
|
||||
int ai[] = (int[]) a;
|
||||
|
|
|
@ -175,10 +175,10 @@ import org.apache.lucene.store.RAMDirectory;
|
|||
public class MemoryIndex implements Serializable {
|
||||
|
||||
/** info for each field: Map<String fieldName, Info field> */
|
||||
private final HashMap fields = new HashMap();
|
||||
private final HashMap<String,Info> fields = new HashMap<String,Info>();
|
||||
|
||||
/** fields sorted ascending by fieldName; lazily computed on demand */
|
||||
private transient Map.Entry[] sortedFields;
|
||||
private transient Map.Entry<String,Info>[] sortedFields;
|
||||
|
||||
/** pos: positions[3*i], startOffset: positions[3*i +1], endOffset: positions[3*i +2] */
|
||||
private final int stride;
|
||||
|
@ -270,13 +270,13 @@ public class MemoryIndex implements Serializable {
|
|||
* the keywords to generate tokens for
|
||||
* @return the corresponding token stream
|
||||
*/
|
||||
public TokenStream keywordTokenStream(final Collection keywords) {
|
||||
public <T> TokenStream keywordTokenStream(final Collection<T> keywords) {
|
||||
// TODO: deprecate & move this method into AnalyzerUtil?
|
||||
if (keywords == null)
|
||||
throw new IllegalArgumentException("keywords must not be null");
|
||||
|
||||
return new TokenStream() {
|
||||
private Iterator iter = keywords.iterator();
|
||||
private Iterator<T> iter = keywords.iterator();
|
||||
private int start = 0;
|
||||
private TermAttribute termAtt = addAttribute(TermAttribute.class);
|
||||
private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
|
||||
|
@ -284,7 +284,7 @@ public class MemoryIndex implements Serializable {
|
|||
public boolean incrementToken() {
|
||||
if (!iter.hasNext()) return false;
|
||||
|
||||
Object obj = iter.next();
|
||||
T obj = iter.next();
|
||||
if (obj == null)
|
||||
throw new IllegalArgumentException("keyword must not be null");
|
||||
|
||||
|
@ -335,7 +335,7 @@ public class MemoryIndex implements Serializable {
|
|||
if (fields.get(fieldName) != null)
|
||||
throw new IllegalArgumentException("field must not be added more than once");
|
||||
|
||||
HashMap terms = new HashMap();
|
||||
HashMap<String,ArrayIntList> terms = new HashMap<String,ArrayIntList>();
|
||||
int numTokens = 0;
|
||||
int numOverlapTokens = 0;
|
||||
int pos = -1;
|
||||
|
@ -355,7 +355,7 @@ public class MemoryIndex implements Serializable {
|
|||
numOverlapTokens++;
|
||||
pos += posIncr;
|
||||
|
||||
ArrayIntList positions = (ArrayIntList) terms.get(term);
|
||||
ArrayIntList positions = terms.get(term);
|
||||
if (positions == null) { // term not seen before
|
||||
positions = new ArrayIntList(stride);
|
||||
terms.put(term, positions);
|
||||
|
@ -471,21 +471,19 @@ public class MemoryIndex implements Serializable {
|
|||
if (sortedFields != null) size += VM.sizeOfObjectArray(sortedFields.length);
|
||||
|
||||
size += VM.sizeOfHashMap(fields.size());
|
||||
Iterator iter = fields.entrySet().iterator();
|
||||
while (iter.hasNext()) { // for each Field Info
|
||||
Map.Entry entry = (Map.Entry) iter.next();
|
||||
Info info = (Info) entry.getValue();
|
||||
for (Map.Entry<String, Info> entry : fields.entrySet()) { // for each Field Info
|
||||
Info info = entry.getValue();
|
||||
size += VM.sizeOfObject(2*INT + 3*PTR); // Info instance vars
|
||||
if (info.sortedTerms != null) size += VM.sizeOfObjectArray(info.sortedTerms.length);
|
||||
|
||||
int len = info.terms.size();
|
||||
size += VM.sizeOfHashMap(len);
|
||||
Iterator iter2 = info.terms.entrySet().iterator();
|
||||
Iterator<Map.Entry<String,ArrayIntList>> iter2 = info.terms.entrySet().iterator();
|
||||
while (--len >= 0) { // for each term
|
||||
Map.Entry e = (Map.Entry) iter2.next();
|
||||
Map.Entry<String,ArrayIntList> e = iter2.next();
|
||||
size += VM.sizeOfObject(PTR + 3*INT); // assumes substring() memory overlay
|
||||
// size += STR + 2 * ((String) e.getKey()).length();
|
||||
ArrayIntList positions = (ArrayIntList) e.getValue();
|
||||
ArrayIntList positions = e.getValue();
|
||||
size += VM.sizeOfArrayIntList(positions.size());
|
||||
}
|
||||
}
|
||||
|
@ -502,13 +500,13 @@ public class MemoryIndex implements Serializable {
|
|||
}
|
||||
|
||||
/** returns a view of the given map's entries, sorted ascending by key */
|
||||
private static Map.Entry[] sort(HashMap map) {
|
||||
private static <K,V> Map.Entry<K,V>[] sort(HashMap<K,V> map) {
|
||||
int size = map.size();
|
||||
Map.Entry[] entries = new Map.Entry[size];
|
||||
Map.Entry<K,V>[] entries = new Map.Entry[size];
|
||||
|
||||
Iterator iter = map.entrySet().iterator();
|
||||
Iterator<Map.Entry<K,V>> iter = map.entrySet().iterator();
|
||||
for (int i=0; i < size; i++) {
|
||||
entries[i] = (Map.Entry) iter.next();
|
||||
entries[i] = iter.next();
|
||||
}
|
||||
|
||||
if (size > 1) Arrays.sort(entries, termComparator);
|
||||
|
@ -528,18 +526,18 @@ public class MemoryIndex implements Serializable {
|
|||
int sumTerms = 0;
|
||||
|
||||
for (int i=0; i < sortedFields.length; i++) {
|
||||
Map.Entry entry = sortedFields[i];
|
||||
String fieldName = (String) entry.getKey();
|
||||
Info info = (Info) entry.getValue();
|
||||
Map.Entry<String,Info> entry = sortedFields[i];
|
||||
String fieldName = entry.getKey();
|
||||
Info info = entry.getValue();
|
||||
info.sortTerms();
|
||||
result.append(fieldName + ":\n");
|
||||
|
||||
int numChars = 0;
|
||||
int numPositions = 0;
|
||||
for (int j=0; j < info.sortedTerms.length; j++) {
|
||||
Map.Entry e = info.sortedTerms[j];
|
||||
String term = (String) e.getKey();
|
||||
ArrayIntList positions = (ArrayIntList) e.getValue();
|
||||
Map.Entry<String,ArrayIntList> e = info.sortedTerms[j];
|
||||
String term = e.getKey();
|
||||
ArrayIntList positions = e.getValue();
|
||||
result.append("\t'" + term + "':" + numPositions(positions) + ":");
|
||||
result.append(positions.toString(stride)); // ignore offsets
|
||||
result.append("\n");
|
||||
|
@ -577,10 +575,10 @@ public class MemoryIndex implements Serializable {
|
|||
* Term strings and their positions for this field: Map <String
|
||||
* termText, ArrayIntList positions>
|
||||
*/
|
||||
private final HashMap terms;
|
||||
private final HashMap<String,ArrayIntList> terms;
|
||||
|
||||
/** Terms sorted ascending by term text; computed on demand */
|
||||
private transient Map.Entry[] sortedTerms;
|
||||
private transient Map.Entry<String,ArrayIntList>[] sortedTerms;
|
||||
|
||||
/** Number of added tokens for this field */
|
||||
private final int numTokens;
|
||||
|
@ -596,7 +594,7 @@ public class MemoryIndex implements Serializable {
|
|||
|
||||
private static final long serialVersionUID = 2882195016849084649L;
|
||||
|
||||
public Info(HashMap terms, int numTokens, int numOverlapTokens, float boost) {
|
||||
public Info(HashMap<String,ArrayIntList> terms, int numTokens, int numOverlapTokens, float boost) {
|
||||
this.terms = terms;
|
||||
this.numTokens = numTokens;
|
||||
this.numOverlapTokens = numOverlapTokens;
|
||||
|
@ -617,12 +615,12 @@ public class MemoryIndex implements Serializable {
|
|||
|
||||
/** note that the frequency can be calculated as numPosition(getPositions(x)) */
|
||||
public ArrayIntList getPositions(String term) {
|
||||
return (ArrayIntList) terms.get(term);
|
||||
return terms.get(term);
|
||||
}
|
||||
|
||||
/** note that the frequency can be calculated as numPosition(getPositions(x)) */
|
||||
public ArrayIntList getPositions(int pos) {
|
||||
return (ArrayIntList) sortedTerms[pos].getValue();
|
||||
return sortedTerms[pos].getValue();
|
||||
}
|
||||
|
||||
public float getBoost() {
|
||||
|
@ -736,11 +734,11 @@ public class MemoryIndex implements Serializable {
|
|||
protected void finalize() {}
|
||||
|
||||
private Info getInfo(String fieldName) {
|
||||
return (Info) fields.get(fieldName);
|
||||
return fields.get(fieldName);
|
||||
}
|
||||
|
||||
private Info getInfo(int pos) {
|
||||
return (Info) sortedFields[pos].getValue();
|
||||
return sortedFields[pos].getValue();
|
||||
}
|
||||
|
||||
public int docFreq(Term term) {
|
||||
|
@ -814,7 +812,7 @@ public class MemoryIndex implements Serializable {
|
|||
Info info = getInfo(j);
|
||||
if (i >= info.sortedTerms.length) return null;
|
||||
// if (DEBUG) System.err.println("TermEnum.term: " + i + ", " + info.sortedTerms[i].getKey());
|
||||
return createTerm(info, j, (String) info.sortedTerms[i].getKey());
|
||||
return createTerm(info, j, info.sortedTerms[i].getKey());
|
||||
}
|
||||
|
||||
public int docFreq() {
|
||||
|
@ -834,7 +832,7 @@ public class MemoryIndex implements Serializable {
|
|||
// Assertion: sortFields has already been called before
|
||||
Term template = info.template;
|
||||
if (template == null) { // not yet cached?
|
||||
String fieldName = (String) sortedFields[pos].getKey();
|
||||
String fieldName = sortedFields[pos].getKey();
|
||||
template = new Term(fieldName);
|
||||
info.template = template;
|
||||
}
|
||||
|
@ -949,10 +947,9 @@ public class MemoryIndex implements Serializable {
|
|||
if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVectors");
|
||||
TermFreqVector[] vectors = new TermFreqVector[fields.size()];
|
||||
// if (vectors.length == 0) return null;
|
||||
Iterator iter = fields.keySet().iterator();
|
||||
Iterator<String> iter = fields.keySet().iterator();
|
||||
for (int i=0; i < vectors.length; i++) {
|
||||
String fieldName = (String) iter.next();
|
||||
vectors[i] = getTermFreqVector(docNumber, fieldName);
|
||||
vectors[i] = getTermFreqVector(docNumber, iter.next());
|
||||
}
|
||||
return vectors;
|
||||
}
|
||||
|
@ -962,9 +959,8 @@ public class MemoryIndex implements Serializable {
|
|||
if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVectors");
|
||||
|
||||
// if (vectors.length == 0) return null;
|
||||
for (Iterator iterator = fields.keySet().iterator(); iterator.hasNext();)
|
||||
for (final String fieldName : fields.keySet())
|
||||
{
|
||||
String fieldName = (String) iterator.next();
|
||||
getTermFreqVector(docNumber, fieldName, mapper);
|
||||
}
|
||||
}
|
||||
|
@ -980,7 +976,7 @@ public class MemoryIndex implements Serializable {
|
|||
mapper.setExpectations(field, info.sortedTerms.length, stride != 1, true);
|
||||
for (int i = info.sortedTerms.length; --i >=0;){
|
||||
|
||||
ArrayIntList positions = (ArrayIntList) info.sortedTerms[i].getValue();
|
||||
ArrayIntList positions = info.sortedTerms[i].getValue();
|
||||
int size = positions.size();
|
||||
org.apache.lucene.index.TermVectorOffsetInfo[] offsets =
|
||||
new org.apache.lucene.index.TermVectorOffsetInfo[size / stride];
|
||||
|
@ -990,9 +986,9 @@ public class MemoryIndex implements Serializable {
|
|||
int end = positions.get(j+1);
|
||||
offsets[k] = new org.apache.lucene.index.TermVectorOffsetInfo(start, end);
|
||||
}
|
||||
mapper.map((String)info.sortedTerms[i].getKey(),
|
||||
numPositions((ArrayIntList) info.sortedTerms[i].getValue()),
|
||||
offsets, ((ArrayIntList) info.sortedTerms[i].getValue()).toArray(stride));
|
||||
mapper.map(info.sortedTerms[i].getKey(),
|
||||
numPositions(info.sortedTerms[i].getValue()),
|
||||
offsets, (info.sortedTerms[i].getValue()).toArray(stride));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1004,7 +1000,7 @@ public class MemoryIndex implements Serializable {
|
|||
|
||||
return new TermPositionVector() {
|
||||
|
||||
private final Map.Entry[] sortedTerms = info.sortedTerms;
|
||||
private final Map.Entry<String,ArrayIntList>[] sortedTerms = info.sortedTerms;
|
||||
|
||||
public String getField() {
|
||||
return fieldName;
|
||||
|
@ -1017,7 +1013,7 @@ public class MemoryIndex implements Serializable {
|
|||
public String[] getTerms() {
|
||||
String[] terms = new String[sortedTerms.length];
|
||||
for (int i=sortedTerms.length; --i >= 0; ) {
|
||||
terms[i] = (String) sortedTerms[i].getKey();
|
||||
terms[i] = sortedTerms[i].getKey();
|
||||
}
|
||||
return terms;
|
||||
}
|
||||
|
@ -1025,7 +1021,7 @@ public class MemoryIndex implements Serializable {
|
|||
public int[] getTermFrequencies() {
|
||||
int[] freqs = new int[sortedTerms.length];
|
||||
for (int i=sortedTerms.length; --i >= 0; ) {
|
||||
freqs[i] = numPositions((ArrayIntList) sortedTerms[i].getValue());
|
||||
freqs[i] = numPositions(sortedTerms[i].getValue());
|
||||
}
|
||||
return freqs;
|
||||
}
|
||||
|
@ -1045,14 +1041,14 @@ public class MemoryIndex implements Serializable {
|
|||
|
||||
// lucene >= 1.4.3
|
||||
public int[] getTermPositions(int index) {
|
||||
return ((ArrayIntList) sortedTerms[index].getValue()).toArray(stride);
|
||||
return sortedTerms[index].getValue().toArray(stride);
|
||||
}
|
||||
|
||||
// lucene >= 1.9 (remove this method for lucene-1.4.3)
|
||||
public org.apache.lucene.index.TermVectorOffsetInfo[] getOffsets(int index) {
|
||||
if (stride == 1) return null; // no offsets stored
|
||||
|
||||
ArrayIntList positions = (ArrayIntList) sortedTerms[index].getValue();
|
||||
ArrayIntList positions = sortedTerms[index].getValue();
|
||||
int size = positions.size();
|
||||
org.apache.lucene.index.TermVectorOffsetInfo[] offsets =
|
||||
new org.apache.lucene.index.TermVectorOffsetInfo[size / stride];
|
||||
|
@ -1153,7 +1149,7 @@ public class MemoryIndex implements Serializable {
|
|||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
protected void doCommit(Map commitUserData) {
|
||||
protected void doCommit(Map<String,String> commitUserData) {
|
||||
if (DEBUG) System.err.println("MemoryIndexReader.doCommit");
|
||||
}
|
||||
|
||||
|
@ -1162,16 +1158,16 @@ public class MemoryIndex implements Serializable {
|
|||
}
|
||||
|
||||
// lucene >= 1.9 (remove this method for lucene-1.4.3)
|
||||
public Collection getFieldNames(FieldOption fieldOption) {
|
||||
public Collection<String> getFieldNames(FieldOption fieldOption) {
|
||||
if (DEBUG) System.err.println("MemoryIndexReader.getFieldNamesOption");
|
||||
if (fieldOption == FieldOption.UNINDEXED)
|
||||
return Collections.EMPTY_SET;
|
||||
return Collections.<String>emptySet();
|
||||
if (fieldOption == FieldOption.INDEXED_NO_TERMVECTOR)
|
||||
return Collections.EMPTY_SET;
|
||||
return Collections.<String>emptySet();
|
||||
if (fieldOption == FieldOption.TERMVECTOR_WITH_OFFSET && stride == 1)
|
||||
return Collections.EMPTY_SET;
|
||||
return Collections.<String>emptySet();
|
||||
if (fieldOption == FieldOption.TERMVECTOR_WITH_POSITION_OFFSET && stride == 1)
|
||||
return Collections.EMPTY_SET;
|
||||
return Collections.<String>emptySet();
|
||||
|
||||
return Collections.unmodifiableSet(fields.keySet());
|
||||
}
|
||||
|
|
|
@ -21,13 +21,13 @@ import java.io.IOException;
|
|||
import java.io.Reader;
|
||||
import java.io.StringReader;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashSet;
|
||||
import java.util.Locale;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.StopAnalyzer;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -72,11 +72,8 @@ public class PatternAnalyzer extends Analyzer {
|
|||
/** <code>"\\s+"</code>; Divides text at whitespaces (Character.isWhitespace(c)) */
|
||||
public static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
|
||||
|
||||
private static final Set EXTENDED_ENGLISH_STOP_WORDS;
|
||||
static {
|
||||
EXTENDED_ENGLISH_STOP_WORDS = new HashSet();
|
||||
|
||||
EXTENDED_ENGLISH_STOP_WORDS.addAll(Arrays.asList(new String[] {
|
||||
private static final CharArraySet EXTENDED_ENGLISH_STOP_WORDS =
|
||||
CharArraySet.unmodifiableSet(new CharArraySet(Arrays.asList(
|
||||
"a", "about", "above", "across", "adj", "after", "afterwards",
|
||||
"again", "against", "albeit", "all", "almost", "alone", "along",
|
||||
"already", "also", "although", "always", "among", "amongst", "an",
|
||||
|
@ -117,8 +114,8 @@ public class PatternAnalyzer extends Analyzer {
|
|||
"whomever", "whomsoever", "whose", "whosoever", "why", "will",
|
||||
"with", "within", "without", "would", "xsubj", "xcal", "xauthor",
|
||||
"xother ", "xnote", "yet", "you", "your", "yours", "yourself",
|
||||
"yourselves"}));
|
||||
}
|
||||
"yourselves"
|
||||
), true));
|
||||
|
||||
/**
|
||||
* A lower-casing word analyzer with English stop words (can be shared
|
||||
|
@ -139,7 +136,7 @@ public class PatternAnalyzer extends Analyzer {
|
|||
|
||||
private final Pattern pattern;
|
||||
private final boolean toLowerCase;
|
||||
private final Set stopWords;
|
||||
private final Set<?> stopWords;
|
||||
|
||||
private final Version matchVersion;
|
||||
|
||||
|
@ -162,7 +159,7 @@ public class PatternAnalyzer extends Analyzer {
|
|||
* or <a href="http://www.unine.ch/info/clef/">other stop words
|
||||
* lists </a>.
|
||||
*/
|
||||
public PatternAnalyzer(Version matchVersion, Pattern pattern, boolean toLowerCase, Set stopWords) {
|
||||
public PatternAnalyzer(Version matchVersion, Pattern pattern, boolean toLowerCase, Set<?> stopWords) {
|
||||
if (pattern == null)
|
||||
throw new IllegalArgumentException("pattern must not be null");
|
||||
|
||||
|
@ -313,15 +310,7 @@ public class PatternAnalyzer extends Analyzer {
|
|||
if (input != null) input.close();
|
||||
}
|
||||
}
|
||||
|
||||
/** somewhat oversized to minimize hash collisions */
|
||||
private static Set makeStopSet(Set stopWords) {
|
||||
Set stops = new HashSet(stopWords.size() * 2, 0.3f);
|
||||
stops.addAll(stopWords);
|
||||
return stops;
|
||||
// return Collections.unmodifiableSet(stops);
|
||||
}
|
||||
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Nested classes:
|
||||
|
|
|
@ -103,7 +103,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy
|
|||
}
|
||||
}
|
||||
|
||||
private boolean isOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set segmentsToOptimize) throws IOException {
|
||||
private boolean isOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
|
||||
final int numSegments = infos.size();
|
||||
int numToOptimize = 0;
|
||||
SegmentInfo optimizeInfo = null;
|
||||
|
@ -128,7 +128,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy
|
|||
}
|
||||
|
||||
@Override
|
||||
public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxNumSegments, Set segmentsToOptimize) throws IOException {
|
||||
public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
|
||||
|
||||
assert maxNumSegments > 0;
|
||||
|
||||
|
|
|
@ -55,8 +55,7 @@ public class FieldNormModifier {
|
|||
Similarity s = null;
|
||||
if (!args[1].equals("-n")) {
|
||||
try {
|
||||
Class simClass = Class.forName(args[1]);
|
||||
s = (Similarity)simClass.newInstance();
|
||||
s = Class.forName(args[1]).asSubclass(Similarity.class).newInstance();
|
||||
} catch (Exception e) {
|
||||
System.err.println("Couldn't instantiate similarity with empty constructor: " + args[1]);
|
||||
e.printStackTrace(System.err);
|
||||
|
@ -148,7 +147,7 @@ public class FieldNormModifier {
|
|||
if (sim == null)
|
||||
reader.setNorm(d, fieldName, fakeNorms[0]);
|
||||
else
|
||||
reader.setNorm(d, fieldName, sim.encodeNorm(sim.lengthNorm(fieldName, termCounts[d])));
|
||||
reader.setNorm(d, fieldName, Similarity.encodeNorm(sim.lengthNorm(fieldName, termCounts[d])));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -78,14 +78,14 @@ public class IndexSplitter {
|
|||
for (int x = 2; x < args.length; x++) {
|
||||
segs.add(args[x]);
|
||||
}
|
||||
is.remove((String[]) segs.toArray(new String[0]));
|
||||
is.remove(segs.toArray(new String[0]));
|
||||
} else {
|
||||
File targetDir = new File(args[1]);
|
||||
List<String> segs = new ArrayList<String>();
|
||||
for (int x = 2; x < args.length; x++) {
|
||||
segs.add(args[x]);
|
||||
}
|
||||
is.split(targetDir, (String[]) segs.toArray(new String[0]));
|
||||
is.split(targetDir, segs.toArray(new String[0]));
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -137,9 +137,8 @@ public class IndexSplitter {
|
|||
SegmentInfo info = getInfo(n);
|
||||
destInfos.add(info);
|
||||
// now copy files over
|
||||
List files = info.files();
|
||||
for (int x = 0; x < files.size(); x++) {
|
||||
String srcName = (String) files.get(x);
|
||||
List<String> files = info.files();
|
||||
for (final String srcName : files) {
|
||||
File srcFile = new File(dir, srcName);
|
||||
File destFile = new File(destDir, srcName);
|
||||
copyFile(srcFile, destFile);
|
||||
|
|
|
@ -158,7 +158,7 @@ public class MultiPassIndexSplitter {
|
|||
if (indexes.size() == 1) {
|
||||
input = indexes.get(0);
|
||||
} else {
|
||||
input = new MultiReader((IndexReader[])indexes.toArray(new IndexReader[indexes.size()]));
|
||||
input = new MultiReader(indexes.toArray(new IndexReader[indexes.size()]));
|
||||
}
|
||||
splitter.split(input, dirs, seq);
|
||||
}
|
||||
|
|
|
@ -5,8 +5,6 @@ import org.apache.lucene.util.StringHelper;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Collection;
|
||||
import java.util.Iterator;
|
||||
/*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
|
@ -68,13 +66,13 @@ public class TermVectorAccessor {
|
|||
}
|
||||
|
||||
/** Instance reused to save garbage collector some time */
|
||||
private List/*<String>*/ tokens;
|
||||
private List<String> tokens;
|
||||
|
||||
/** Instance reused to save garbage collector some time */
|
||||
private List/*<int[]>*/ positions;
|
||||
private List<int[]> positions;
|
||||
|
||||
/** Instance reused to save garbage collector some time */
|
||||
private List/*<Integer>*/ frequencies;
|
||||
private List<Integer> frequencies;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -90,9 +88,9 @@ public class TermVectorAccessor {
|
|||
private void build(IndexReader indexReader, String field, TermVectorMapper mapper, int documentNumber) throws IOException {
|
||||
|
||||
if (tokens == null) {
|
||||
tokens = new ArrayList/*<String>*/(500);
|
||||
positions = new ArrayList/*<int[]>*/(500);
|
||||
frequencies = new ArrayList/*<Integer>*/(500);
|
||||
tokens = new ArrayList<String>(500);
|
||||
positions = new ArrayList<int[]>(500);
|
||||
frequencies = new ArrayList<Integer>(500);
|
||||
} else {
|
||||
tokens.clear();
|
||||
frequencies.clear();
|
||||
|
@ -127,7 +125,7 @@ public class TermVectorAccessor {
|
|||
mapper.setDocumentNumber(documentNumber);
|
||||
mapper.setExpectations(field, tokens.size(), false, !mapper.isIgnoringPositions());
|
||||
for (int i = 0; i < tokens.size(); i++) {
|
||||
mapper.map((String) tokens.get(i), ((Integer) frequencies.get(i)).intValue(), (TermVectorOffsetInfo[]) null, (int[]) positions.get(i));
|
||||
mapper.map(tokens.get(i), frequencies.get(i).intValue(), (TermVectorOffsetInfo[]) null, positions.get(i));
|
||||
}
|
||||
}
|
||||
termEnum.close();
|
||||
|
|
|
@ -65,7 +65,7 @@ public class HighFreqTerms {
|
|||
}
|
||||
}
|
||||
while (tiq.size() != 0) {
|
||||
TermInfo termInfo = (TermInfo) tiq.pop();
|
||||
TermInfo termInfo = tiq.pop();
|
||||
System.out.println(termInfo.term + " " + termInfo.docFreq);
|
||||
}
|
||||
|
||||
|
|
|
@ -58,8 +58,7 @@ public class LengthNormModifier {
|
|||
|
||||
Similarity s = null;
|
||||
try {
|
||||
Class simClass = Class.forName(args[1]);
|
||||
s = (Similarity)simClass.newInstance();
|
||||
s = Class.forName(args[1]).asSubclass(Similarity.class).newInstance();
|
||||
} catch (Exception e) {
|
||||
System.err.println("Couldn't instantiate similarity with empty constructor: " + args[1]);
|
||||
e.printStackTrace(System.err);
|
||||
|
@ -142,7 +141,7 @@ public class LengthNormModifier {
|
|||
reader = IndexReader.open(dir, false);
|
||||
for (int d = 0; d < termCounts.length; d++) {
|
||||
if (! reader.isDeleted(d)) {
|
||||
byte norm = sim.encodeNorm(sim.lengthNorm(fieldName, termCounts[d]));
|
||||
byte norm = Similarity.encodeNorm(sim.lengthNorm(fieldName, termCounts[d]));
|
||||
reader.setNorm(d, fieldName, norm);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@
|
|||
|
||||
package org.apache.lucene.misc;
|
||||
|
||||
import org.apache.lucene.search.Similarity;
|
||||
import org.apache.lucene.search.DefaultSimilarity;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
|
||||
|
@ -51,10 +50,10 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
|
|||
private int ln_max = 1;
|
||||
private float ln_steep = 0.5f;
|
||||
|
||||
private Map ln_mins = new HashMap(7);
|
||||
private Map ln_maxs = new HashMap(7);
|
||||
private Map ln_steeps = new HashMap(7);
|
||||
private Map ln_overlaps = new HashMap(7);
|
||||
private Map<String,Number> ln_maxs = new HashMap<String,Number>(7);
|
||||
private Map<String,Number> ln_mins = new HashMap<String,Number>(7);
|
||||
private Map<String,Float> ln_steeps = new HashMap<String,Float>(7);
|
||||
private Map<String,Boolean> ln_overlaps = new HashMap<String,Boolean>(7);
|
||||
|
||||
private float tf_base = 0.0f;
|
||||
private float tf_min = 0.0f;
|
||||
|
@ -139,7 +138,7 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
|
|||
final int numTokens;
|
||||
boolean overlaps = discountOverlaps;
|
||||
if (ln_overlaps.containsKey(fieldName)) {
|
||||
overlaps = ((Boolean)ln_overlaps.get(fieldName)).booleanValue();
|
||||
overlaps = ln_overlaps.get(fieldName).booleanValue();
|
||||
}
|
||||
if (overlaps)
|
||||
numTokens = state.getLength() - state.getNumOverlap();
|
||||
|
@ -173,13 +172,13 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
|
|||
float s = ln_steep;
|
||||
|
||||
if (ln_mins.containsKey(fieldName)) {
|
||||
l = ((Number)ln_mins.get(fieldName)).intValue();
|
||||
l = ln_mins.get(fieldName).intValue();
|
||||
}
|
||||
if (ln_maxs.containsKey(fieldName)) {
|
||||
h = ((Number)ln_maxs.get(fieldName)).intValue();
|
||||
h = ln_maxs.get(fieldName).intValue();
|
||||
}
|
||||
if (ln_steeps.containsKey(fieldName)) {
|
||||
s = ((Number)ln_steeps.get(fieldName)).floatValue();
|
||||
s = ln_steeps.get(fieldName).floatValue();
|
||||
}
|
||||
|
||||
return (float)
|
||||
|
|
|
@ -23,7 +23,6 @@ import java.util.ArrayList;
|
|||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Token;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
|
||||
import org.apache.lucene.queryParser.ParseException;
|
||||
|
@ -76,8 +75,8 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
* @throws ParseException
|
||||
*/
|
||||
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
|
||||
List tlist = new ArrayList();
|
||||
List wlist = new ArrayList();
|
||||
List<String> tlist = new ArrayList<String>();
|
||||
List<String> wlist = new ArrayList<String>();
|
||||
/* somewhat a hack: find/store wildcard chars
|
||||
* in order to put them back after analyzing */
|
||||
boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*"));
|
||||
|
@ -145,8 +144,8 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
/* if wlist contains one wildcard, it must be at the end, because:
|
||||
* 1) wildcards are not allowed in 1st position of a term by QueryParser
|
||||
* 2) if wildcard was *not* in end, there would be *two* or more tokens */
|
||||
return super.getWildcardQuery(field, (String) tlist.get(0)
|
||||
+ (((String) wlist.get(0)).toString()));
|
||||
return super.getWildcardQuery(field, tlist.get(0)
|
||||
+ wlist.get(0).toString());
|
||||
} else {
|
||||
/* we should never get here! if so, this method was called
|
||||
* with a termStr containing no wildcard ... */
|
||||
|
@ -157,9 +156,9 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
* with wildcards put back in postion */
|
||||
StringBuilder sb = new StringBuilder();
|
||||
for (int i = 0; i < tlist.size(); i++) {
|
||||
sb.append((String) tlist.get(i));
|
||||
sb.append( tlist.get(i));
|
||||
if (wlist != null && wlist.size() > i) {
|
||||
sb.append((String) wlist.get(i));
|
||||
sb.append(wlist.get(i));
|
||||
}
|
||||
}
|
||||
return super.getWildcardQuery(field, sb.toString());
|
||||
|
@ -188,7 +187,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
|
||||
// get Analyzer from superclass and tokenize the term
|
||||
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
|
||||
List tlist = new ArrayList();
|
||||
List<String> tlist = new ArrayList<String>();
|
||||
TermAttribute termAtt = source.addAttribute(TermAttribute.class);
|
||||
|
||||
while (true) {
|
||||
|
@ -207,7 +206,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
|
|||
}
|
||||
|
||||
if (tlist.size() == 1) {
|
||||
return super.getPrefixQuery(field, (String) tlist.get(0));
|
||||
return super.getPrefixQuery(field, tlist.get(0));
|
||||
} else {
|
||||
/* this means that the analyzer used either added or consumed
|
||||
* (common for a stemmer) tokens, and we can't build a PrefixQuery */
|
||||
|
|
|
@ -62,7 +62,7 @@ import org.apache.lucene.util.Version;
|
|||
*
|
||||
*/
|
||||
public class ComplexPhraseQueryParser extends QueryParser {
|
||||
private ArrayList/*<ComplexPhraseQuery>*/complexPhrases = null;
|
||||
private ArrayList<ComplexPhraseQuery> complexPhrases = null;
|
||||
|
||||
private boolean isPass2ResolvingPhrases;
|
||||
|
||||
|
@ -102,7 +102,7 @@ public class ComplexPhraseQueryParser extends QueryParser {
|
|||
|
||||
// First pass - parse the top-level query recording any PhraseQuerys
|
||||
// which will need to be resolved
|
||||
complexPhrases = new ArrayList/*<ComplexPhraseQuery>*/();
|
||||
complexPhrases = new ArrayList<ComplexPhraseQuery>();
|
||||
Query q = super.parse(query);
|
||||
|
||||
// Perform second pass, using this QueryParser to parse any nested
|
||||
|
@ -110,8 +110,8 @@ public class ComplexPhraseQueryParser extends QueryParser {
|
|||
// set of syntax restrictions (i.e. all fields must be same)
|
||||
isPass2ResolvingPhrases = true;
|
||||
try {
|
||||
for (Iterator iterator = complexPhrases.iterator(); iterator.hasNext();) {
|
||||
currentPhraseQuery = (ComplexPhraseQuery) iterator.next();
|
||||
for (Iterator<ComplexPhraseQuery> iterator = complexPhrases.iterator(); iterator.hasNext();) {
|
||||
currentPhraseQuery = iterator.next();
|
||||
// in each phrase, now parse the contents between quotes as a
|
||||
// separate parse operation
|
||||
currentPhraseQuery.parsePhraseElements(this);
|
||||
|
@ -247,10 +247,10 @@ public class ComplexPhraseQueryParser extends QueryParser {
|
|||
}
|
||||
|
||||
if (qc instanceof BooleanQuery) {
|
||||
ArrayList sc = new ArrayList();
|
||||
ArrayList<SpanQuery> sc = new ArrayList<SpanQuery>();
|
||||
addComplexPhraseClause(sc, (BooleanQuery) qc);
|
||||
if (sc.size() > 0) {
|
||||
allSpanClauses[i] = (SpanQuery) sc.get(0);
|
||||
allSpanClauses[i] = sc.get(0);
|
||||
} else {
|
||||
// Insert fake term e.g. phrase query was for "Fred Smithe*" and
|
||||
// there were no "Smithe*" terms - need to
|
||||
|
@ -278,14 +278,14 @@ public class ComplexPhraseQueryParser extends QueryParser {
|
|||
// Complex case - we have mixed positives and negatives in the
|
||||
// sequence.
|
||||
// Need to return a SpanNotQuery
|
||||
ArrayList positiveClauses = new ArrayList();
|
||||
ArrayList<SpanQuery> positiveClauses = new ArrayList<SpanQuery>();
|
||||
for (int j = 0; j < allSpanClauses.length; j++) {
|
||||
if (!bclauses[j].getOccur().equals(BooleanClause.Occur.MUST_NOT)) {
|
||||
positiveClauses.add(allSpanClauses[j]);
|
||||
}
|
||||
}
|
||||
|
||||
SpanQuery[] includeClauses = (SpanQuery[]) positiveClauses
|
||||
SpanQuery[] includeClauses = positiveClauses
|
||||
.toArray(new SpanQuery[positiveClauses.size()]);
|
||||
|
||||
SpanQuery include = null;
|
||||
|
@ -304,9 +304,9 @@ public class ComplexPhraseQueryParser extends QueryParser {
|
|||
return snot;
|
||||
}
|
||||
|
||||
private void addComplexPhraseClause(List spanClauses, BooleanQuery qc) {
|
||||
ArrayList ors = new ArrayList();
|
||||
ArrayList nots = new ArrayList();
|
||||
private void addComplexPhraseClause(List<SpanQuery> spanClauses, BooleanQuery qc) {
|
||||
ArrayList<SpanQuery> ors = new ArrayList<SpanQuery>();
|
||||
ArrayList<SpanQuery> nots = new ArrayList<SpanQuery>();
|
||||
BooleanClause[] bclauses = qc.getClauses();
|
||||
|
||||
// For all clauses e.g. one* two~
|
||||
|
@ -314,7 +314,7 @@ public class ComplexPhraseQueryParser extends QueryParser {
|
|||
Query childQuery = bclauses[i].getQuery();
|
||||
|
||||
// select the list to which we will add these options
|
||||
ArrayList chosenList = ors;
|
||||
ArrayList<SpanQuery> chosenList = ors;
|
||||
if (bclauses[i].getOccur() == BooleanClause.Occur.MUST_NOT) {
|
||||
chosenList = nots;
|
||||
}
|
||||
|
@ -336,12 +336,12 @@ public class ComplexPhraseQueryParser extends QueryParser {
|
|||
if (ors.size() == 0) {
|
||||
return;
|
||||
}
|
||||
SpanOrQuery soq = new SpanOrQuery((SpanQuery[]) ors
|
||||
SpanOrQuery soq = new SpanOrQuery(ors
|
||||
.toArray(new SpanQuery[ors.size()]));
|
||||
if (nots.size() == 0) {
|
||||
spanClauses.add(soq);
|
||||
} else {
|
||||
SpanOrQuery snqs = new SpanOrQuery((SpanQuery[]) nots
|
||||
SpanOrQuery snqs = new SpanOrQuery(nots
|
||||
.toArray(new SpanQuery[nots.size()]));
|
||||
SpanNotQuery snq = new SpanNotQuery(soq, snqs);
|
||||
spanClauses.add(snq);
|
||||
|
|
|
@ -18,7 +18,6 @@ package org.apache.lucene.queryParser.precedence;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.queryParser.*;
|
||||
|
||||
import java.io.*;
|
||||
|
||||
|
|
|
@ -8,7 +8,6 @@ import java.util.ArrayList;
|
|||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Vector;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -247,20 +246,13 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
|||
return locale;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated use {@link #addClause(List, int, int, Query)} instead.
|
||||
*/
|
||||
protected void addClause(Vector clauses, int conj, int modifier, Query q) {
|
||||
addClause((List) clauses, conj, modifier, q);
|
||||
}
|
||||
|
||||
protected void addClause(List clauses, int conj, int modifier, Query q) {
|
||||
protected void addClause(List<BooleanClause> clauses, int conj, int modifier, Query q) {
|
||||
boolean required, prohibited;
|
||||
|
||||
// If this term is introduced by AND, make the preceding term required,
|
||||
// unless it's already prohibited
|
||||
if (clauses.size() > 0 && conj == CONJ_AND) {
|
||||
BooleanClause c = (BooleanClause) clauses.get(clauses.size()-1);
|
||||
BooleanClause c = clauses.get(clauses.size()-1);
|
||||
if (!c.isProhibited())
|
||||
c.setOccur(BooleanClause.Occur.MUST);
|
||||
}
|
||||
|
@ -270,7 +262,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
|||
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
|
||||
// notice if the input is a OR b, first term is parsed as required; without
|
||||
// this modification a OR b would parsed as +a OR b
|
||||
BooleanClause c = (BooleanClause) clauses.get(clauses.size()-1);
|
||||
BooleanClause c = clauses.get(clauses.size()-1);
|
||||
if (!c.isProhibited())
|
||||
c.setOccur(BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
|
@ -357,12 +349,12 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
|||
for (int i = 0; i < list.size(); i++) {
|
||||
source.restoreState(list.get(i));
|
||||
if (posincrAtt.getPositionIncrement() == 1 && multiTerms.size() > 0) {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
mpq.add(multiTerms.toArray(new Term[0]));
|
||||
multiTerms.clear();
|
||||
}
|
||||
multiTerms.add(new Term(field, termAtt.term()));
|
||||
}
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
mpq.add(multiTerms.toArray(new Term[0]));
|
||||
return mpq;
|
||||
}
|
||||
}
|
||||
|
@ -436,27 +428,8 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
|||
*
|
||||
* @return Resulting {@link Query} object.
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
* @deprecated use {@link #getBooleanQuery(List)} instead
|
||||
*/
|
||||
protected Query getBooleanQuery(Vector clauses) throws ParseException
|
||||
{
|
||||
return getBooleanQuery((List) clauses, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for generating query, given a set of clauses.
|
||||
* By default creates a boolean query composed of clauses passed in.
|
||||
*
|
||||
* Can be overridden by extending classes, to modify query being
|
||||
* returned.
|
||||
*
|
||||
* @param clauses List that contains {@link BooleanClause} instances
|
||||
* to join.
|
||||
*
|
||||
* @return Resulting {@link Query} object.
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getBooleanQuery(List clauses) throws ParseException
|
||||
protected Query getBooleanQuery(List<BooleanClause> clauses) throws ParseException
|
||||
{
|
||||
return getBooleanQuery(clauses, false);
|
||||
}
|
||||
|
@ -474,36 +447,15 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
|||
*
|
||||
* @return Resulting {@link Query} object.
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
* @deprecated use {@link #getBooleanQuery(List, boolean)} instead
|
||||
*/
|
||||
protected Query getBooleanQuery(Vector clauses, boolean disableCoord)
|
||||
throws ParseException
|
||||
{
|
||||
return getBooleanQuery((List) clauses, disableCoord);
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for generating query, given a set of clauses.
|
||||
* By default creates a boolean query composed of clauses passed in.
|
||||
*
|
||||
* Can be overridden by extending classes, to modify query being
|
||||
* returned.
|
||||
*
|
||||
* @param clauses List that contains {@link BooleanClause} instances
|
||||
* to join.
|
||||
* @param disableCoord true if coord scoring should be disabled.
|
||||
*
|
||||
* @return Resulting {@link Query} object.
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getBooleanQuery(List clauses, boolean disableCoord)
|
||||
protected Query getBooleanQuery(List<BooleanClause> clauses, boolean disableCoord)
|
||||
throws ParseException {
|
||||
if (clauses == null || clauses.size() == 0)
|
||||
return null;
|
||||
|
||||
BooleanQuery query = new BooleanQuery(disableCoord);
|
||||
for (int i = 0; i < clauses.size(); i++) {
|
||||
query.add((BooleanClause)clauses.get(i));
|
||||
query.add(clauses.get(i));
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
@ -705,7 +657,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
|||
}
|
||||
|
||||
final public Query Query(String field) throws ParseException {
|
||||
List clauses = new ArrayList();
|
||||
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
|
||||
Query q, firstQuery=null;
|
||||
boolean orPresent = false;
|
||||
int modifier;
|
||||
|
@ -757,7 +709,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
|
|||
}
|
||||
|
||||
final public Query andExpression(String field) throws ParseException {
|
||||
List clauses = new ArrayList();
|
||||
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
|
||||
Query q, firstQuery=null;
|
||||
int modifier;
|
||||
q = Clause(field);
|
||||
|
|
|
@ -32,7 +32,6 @@ import java.util.ArrayList;
|
|||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Vector;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
|
@ -271,20 +270,13 @@ public class PrecedenceQueryParser {
|
|||
return locale;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated use {@link #addClause(List, int, int, Query)} instead.
|
||||
*/
|
||||
protected void addClause(Vector clauses, int conj, int modifier, Query q) {
|
||||
addClause((List) clauses, conj, modifier, q);
|
||||
}
|
||||
|
||||
protected void addClause(List clauses, int conj, int modifier, Query q) {
|
||||
protected void addClause(List<BooleanClause> clauses, int conj, int modifier, Query q) {
|
||||
boolean required, prohibited;
|
||||
|
||||
// If this term is introduced by AND, make the preceding term required,
|
||||
// unless it's already prohibited
|
||||
if (clauses.size() > 0 && conj == CONJ_AND) {
|
||||
BooleanClause c = (BooleanClause) clauses.get(clauses.size()-1);
|
||||
BooleanClause c = clauses.get(clauses.size()-1);
|
||||
if (!c.isProhibited())
|
||||
c.setOccur(BooleanClause.Occur.MUST);
|
||||
}
|
||||
|
@ -294,7 +286,7 @@ public class PrecedenceQueryParser {
|
|||
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
|
||||
// notice if the input is a OR b, first term is parsed as required; without
|
||||
// this modification a OR b would parsed as +a OR b
|
||||
BooleanClause c = (BooleanClause) clauses.get(clauses.size()-1);
|
||||
BooleanClause c = clauses.get(clauses.size()-1);
|
||||
if (!c.isProhibited())
|
||||
c.setOccur(BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
|
@ -381,12 +373,12 @@ public class PrecedenceQueryParser {
|
|||
for (int i = 0; i < list.size(); i++) {
|
||||
source.restoreState(list.get(i));
|
||||
if (posincrAtt.getPositionIncrement() == 1 && multiTerms.size() > 0) {
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
mpq.add(multiTerms.toArray(new Term[0]));
|
||||
multiTerms.clear();
|
||||
}
|
||||
multiTerms.add(new Term(field, termAtt.term()));
|
||||
}
|
||||
mpq.add((Term[])multiTerms.toArray(new Term[0]));
|
||||
mpq.add(multiTerms.toArray(new Term[0]));
|
||||
return mpq;
|
||||
}
|
||||
}
|
||||
|
@ -460,27 +452,8 @@ public class PrecedenceQueryParser {
|
|||
*
|
||||
* @return Resulting {@link Query} object.
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
* @deprecated use {@link #getBooleanQuery(List)} instead
|
||||
*/
|
||||
protected Query getBooleanQuery(Vector clauses) throws ParseException
|
||||
{
|
||||
return getBooleanQuery((List) clauses, false);
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for generating query, given a set of clauses.
|
||||
* By default creates a boolean query composed of clauses passed in.
|
||||
*
|
||||
* Can be overridden by extending classes, to modify query being
|
||||
* returned.
|
||||
*
|
||||
* @param clauses List that contains {@link BooleanClause} instances
|
||||
* to join.
|
||||
*
|
||||
* @return Resulting {@link Query} object.
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getBooleanQuery(List clauses) throws ParseException
|
||||
protected Query getBooleanQuery(List<BooleanClause> clauses) throws ParseException
|
||||
{
|
||||
return getBooleanQuery(clauses, false);
|
||||
}
|
||||
|
@ -498,36 +471,15 @@ public class PrecedenceQueryParser {
|
|||
*
|
||||
* @return Resulting {@link Query} object.
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
* @deprecated use {@link #getBooleanQuery(List, boolean)} instead
|
||||
*/
|
||||
protected Query getBooleanQuery(Vector clauses, boolean disableCoord)
|
||||
throws ParseException
|
||||
{
|
||||
return getBooleanQuery((List) clauses, disableCoord);
|
||||
}
|
||||
|
||||
/**
|
||||
* Factory method for generating query, given a set of clauses.
|
||||
* By default creates a boolean query composed of clauses passed in.
|
||||
*
|
||||
* Can be overridden by extending classes, to modify query being
|
||||
* returned.
|
||||
*
|
||||
* @param clauses List that contains {@link BooleanClause} instances
|
||||
* to join.
|
||||
* @param disableCoord true if coord scoring should be disabled.
|
||||
*
|
||||
* @return Resulting {@link Query} object.
|
||||
* @exception ParseException throw in overridden method to disallow
|
||||
*/
|
||||
protected Query getBooleanQuery(List clauses, boolean disableCoord)
|
||||
protected Query getBooleanQuery(List<BooleanClause> clauses, boolean disableCoord)
|
||||
throws ParseException {
|
||||
if (clauses == null || clauses.size() == 0)
|
||||
return null;
|
||||
|
||||
BooleanQuery query = new BooleanQuery(disableCoord);
|
||||
for (int i = 0; i < clauses.size(); i++) {
|
||||
query.add((BooleanClause)clauses.get(i));
|
||||
query.add(clauses.get(i));
|
||||
}
|
||||
return query;
|
||||
}
|
||||
|
@ -763,7 +715,7 @@ int Modifier() : {
|
|||
|
||||
Query Query(String field) :
|
||||
{
|
||||
List clauses = new ArrayList();
|
||||
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
|
||||
Query q, firstQuery=null;
|
||||
boolean orPresent = false;
|
||||
int modifier;
|
||||
|
@ -790,7 +742,7 @@ Query Query(String field) :
|
|||
|
||||
Query andExpression(String field) :
|
||||
{
|
||||
List clauses = new ArrayList();
|
||||
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
|
||||
Query q, firstQuery=null;
|
||||
int modifier;
|
||||
}
|
||||
|
|
|
@ -7,7 +7,6 @@ import java.util.ArrayList;
|
|||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Vector;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.tokenattributes.*;
|
||||
|
|
|
@ -238,12 +238,12 @@ public class AnalyzerUtil {
|
|||
|
||||
return new Analyzer() {
|
||||
|
||||
private final HashMap cache = new HashMap();
|
||||
private final HashMap<String,ArrayList<AttributeSource.State>> cache = new HashMap<String,ArrayList<AttributeSource.State>>();
|
||||
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
final ArrayList tokens = (ArrayList) cache.get(fieldName);
|
||||
final ArrayList<AttributeSource.State> tokens = cache.get(fieldName);
|
||||
if (tokens == null) { // not yet cached
|
||||
final ArrayList tokens2 = new ArrayList();
|
||||
final ArrayList<AttributeSource.State> tokens2 = new ArrayList<AttributeSource.State>();
|
||||
TokenStream tokenStream = new TokenFilter(child.tokenStream(fieldName, reader)) {
|
||||
|
||||
public boolean incrementToken() throws IOException {
|
||||
|
@ -258,11 +258,11 @@ public class AnalyzerUtil {
|
|||
} else { // already cached
|
||||
return new TokenStream() {
|
||||
|
||||
private Iterator iter = tokens.iterator();
|
||||
private Iterator<AttributeSource.State> iter = tokens.iterator();
|
||||
|
||||
public boolean incrementToken() {
|
||||
if (!iter.hasNext()) return false;
|
||||
restoreState((AttributeSource.State) iter.next());
|
||||
restoreState(iter.next());
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
@ -305,12 +305,12 @@ public class AnalyzerUtil {
|
|||
if (limit <= 0) limit = Integer.MAX_VALUE;
|
||||
|
||||
// compute frequencies of distinct terms
|
||||
HashMap map = new HashMap();
|
||||
HashMap<String,MutableInteger> map = new HashMap<String,MutableInteger>();
|
||||
TokenStream stream = analyzer.tokenStream("", new StringReader(text));
|
||||
TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
|
||||
try {
|
||||
while (stream.incrementToken()) {
|
||||
MutableInteger freq = (MutableInteger) map.get(termAtt.term());
|
||||
MutableInteger freq = map.get(termAtt.term());
|
||||
if (freq == null) {
|
||||
freq = new MutableInteger(1);
|
||||
map.put(termAtt.term(), freq);
|
||||
|
@ -329,17 +329,15 @@ public class AnalyzerUtil {
|
|||
}
|
||||
|
||||
// sort by frequency, text
|
||||
Map.Entry[] entries = new Map.Entry[map.size()];
|
||||
Map.Entry<String,MutableInteger>[] entries = new Map.Entry[map.size()];
|
||||
map.entrySet().toArray(entries);
|
||||
Arrays.sort(entries, new Comparator() {
|
||||
public int compare(Object o1, Object o2) {
|
||||
Map.Entry e1 = (Map.Entry) o1;
|
||||
Map.Entry e2 = (Map.Entry) o2;
|
||||
int f1 = ((MutableInteger) e1.getValue()).intValue();
|
||||
int f2 = ((MutableInteger) e2.getValue()).intValue();
|
||||
Arrays.sort(entries, new Comparator<Map.Entry<String,MutableInteger>>() {
|
||||
public int compare(Map.Entry<String,MutableInteger> e1, Map.Entry<String,MutableInteger> e2) {
|
||||
int f1 = e1.getValue().intValue();
|
||||
int f2 = e2.getValue().intValue();
|
||||
if (f2 - f1 != 0) return f2 - f1;
|
||||
String s1 = (String) e1.getKey();
|
||||
String s2 = (String) e2.getKey();
|
||||
String s1 = e1.getKey();
|
||||
String s2 = e2.getKey();
|
||||
return s1.compareTo(s2);
|
||||
}
|
||||
});
|
||||
|
|
|
@ -76,7 +76,7 @@ import java.util.TreeSet;
|
|||
public class SynonymMap {
|
||||
|
||||
/** the index data; Map<String word, String[] synonyms> */
|
||||
private final HashMap table;
|
||||
private final HashMap<String,String[]> table;
|
||||
|
||||
private static final String[] EMPTY = new String[0];
|
||||
|
||||
|
@ -93,7 +93,7 @@ public class SynonymMap {
|
|||
* if an error occured while reading the stream.
|
||||
*/
|
||||
public SynonymMap(InputStream input) throws IOException {
|
||||
this.table = input == null ? new HashMap(0) : read(toByteArray(input));
|
||||
this.table = input == null ? new HashMap<String,String[]>(0) : read(toByteArray(input));
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -123,7 +123,7 @@ public class SynonymMap {
|
|||
*/
|
||||
public String toString() {
|
||||
StringBuilder buf = new StringBuilder();
|
||||
Iterator iter = new TreeMap(table).keySet().iterator();
|
||||
Iterator<String> iter = new TreeMap<String,String[]>(table).keySet().iterator();
|
||||
int count = 0;
|
||||
int f0 = 0;
|
||||
int f1 = 0;
|
||||
|
@ -131,7 +131,7 @@ public class SynonymMap {
|
|||
int f3 = 0;
|
||||
|
||||
while (iter.hasNext()) {
|
||||
String word = (String) iter.next();
|
||||
String word = iter.next();
|
||||
buf.append(word + ":");
|
||||
String[] synonyms = getSynonyms(word);
|
||||
buf.append(Arrays.asList(synonyms));
|
||||
|
@ -168,12 +168,12 @@ public class SynonymMap {
|
|||
return true;
|
||||
}
|
||||
|
||||
private HashMap read(byte[] data) {
|
||||
private HashMap<String,String[]> read(byte[] data) {
|
||||
int WORDS = (int) (76401 / 0.7); // presizing
|
||||
int GROUPS = (int) (88022 / 0.7); // presizing
|
||||
HashMap word2Groups = new HashMap(WORDS); // Map<String word, int[] groups>
|
||||
HashMap group2Words = new HashMap(GROUPS); // Map<int group, String[] words>
|
||||
HashMap internedWords = new HashMap(WORDS);// Map<String word, String word>
|
||||
HashMap<String,ArrayList<Integer>> word2Groups = new HashMap<String,ArrayList<Integer>>(WORDS); // Map<String word, int[] groups>
|
||||
HashMap<Integer,ArrayList<String>> group2Words = new HashMap<Integer,ArrayList<String>>(GROUPS); // Map<int group, String[] words>
|
||||
HashMap<String,String> internedWords = new HashMap<String,String>(WORDS);// Map<String word, String word>
|
||||
|
||||
Charset charset = Charset.forName("UTF-8");
|
||||
int lastNum = -1;
|
||||
|
@ -226,7 +226,7 @@ public class SynonymMap {
|
|||
/* Part C: Add (group,word) to tables */
|
||||
|
||||
// ensure compact string representation, minimizing memory overhead
|
||||
String w = (String) internedWords.get(word);
|
||||
String w = internedWords.get(word);
|
||||
if (w == null) {
|
||||
word = new String(word); // ensure compact string
|
||||
internedWords.put(word, word);
|
||||
|
@ -242,17 +242,17 @@ public class SynonymMap {
|
|||
}
|
||||
|
||||
// add word --> group
|
||||
ArrayList groups = (ArrayList) word2Groups.get(word);
|
||||
ArrayList<Integer> groups = word2Groups.get(word);
|
||||
if (groups == null) {
|
||||
groups = new ArrayList(1);
|
||||
groups = new ArrayList<Integer>(1);
|
||||
word2Groups.put(word, groups);
|
||||
}
|
||||
groups.add(group);
|
||||
|
||||
// add group --> word
|
||||
ArrayList words = (ArrayList) group2Words.get(group);
|
||||
ArrayList<String> words = group2Words.get(group);
|
||||
if (words == null) {
|
||||
words = new ArrayList(1);
|
||||
words = new ArrayList<String>(1);
|
||||
group2Words.put(group, words);
|
||||
}
|
||||
words.add(word);
|
||||
|
@ -265,25 +265,26 @@ public class SynonymMap {
|
|||
/* Part E: minimize memory consumption by a factor 3 (or so) */
|
||||
// if (true) return word2Syns;
|
||||
word2Groups = null; // help gc
|
||||
group2Words = null; // help gc
|
||||
//TODO: word2Groups.clear(); would be more appropriate ?
|
||||
group2Words = null; // help gc
|
||||
//TODO: group2Words.clear(); would be more appropriate ?
|
||||
|
||||
return optimize(word2Syns, internedWords);
|
||||
}
|
||||
|
||||
private HashMap createIndex(Map word2Groups, Map group2Words) {
|
||||
HashMap word2Syns = new HashMap();
|
||||
Iterator iter = word2Groups.entrySet().iterator();
|
||||
private HashMap<String,String[]> createIndex(Map<String,ArrayList<Integer>> word2Groups, Map<Integer,ArrayList<String>> group2Words) {
|
||||
HashMap<String,String[]> word2Syns = new HashMap<String,String[]>();
|
||||
|
||||
while (iter.hasNext()) { // for each word
|
||||
Map.Entry entry = (Map.Entry) iter.next();
|
||||
ArrayList group = (ArrayList) entry.getValue();
|
||||
String word = (String) entry.getKey();
|
||||
for (final Map.Entry<String,ArrayList<Integer>> entry : word2Groups.entrySet()) { // for each word
|
||||
ArrayList<Integer> group = entry.getValue();
|
||||
String word = entry.getKey();
|
||||
|
||||
// HashSet synonyms = new HashSet();
|
||||
TreeSet synonyms = new TreeSet();
|
||||
TreeSet<String> synonyms = new TreeSet<String>();
|
||||
for (int i=group.size(); --i >= 0; ) { // for each groupID of word
|
||||
ArrayList words = (ArrayList) group2Words.get(group.get(i));
|
||||
ArrayList<String> words = group2Words.get(group.get(i));
|
||||
for (int j=words.size(); --j >= 0; ) { // add all words
|
||||
Object synonym = words.get(j); // note that w and word are interned
|
||||
String synonym = words.get(j); // note that w and word are interned
|
||||
if (synonym != word) { // a word is implicitly it's own synonym
|
||||
synonyms.add(synonym);
|
||||
}
|
||||
|
@ -294,7 +295,7 @@ public class SynonymMap {
|
|||
if (size > 0) {
|
||||
String[] syns = new String[size];
|
||||
if (size == 1)
|
||||
syns[0] = (String) synonyms.first();
|
||||
syns[0] = synonyms.first();
|
||||
else
|
||||
synonyms.toArray(syns);
|
||||
// if (syns.length > 1) Arrays.sort(syns);
|
||||
|
@ -306,7 +307,7 @@ public class SynonymMap {
|
|||
return word2Syns;
|
||||
}
|
||||
|
||||
private HashMap optimize(HashMap word2Syns, HashMap internedWords) {
|
||||
private HashMap<String,String[]> optimize(HashMap word2Syns, HashMap<String,String> internedWords) {
|
||||
if (DEBUG) {
|
||||
System.err.println("before gc");
|
||||
for (int i=0; i < 10; i++) System.gc();
|
||||
|
@ -318,11 +319,11 @@ public class SynonymMap {
|
|||
int size = word2Syns.size();
|
||||
String[][] allSynonyms = new String[size][];
|
||||
String[] words = new String[size];
|
||||
Iterator iter = word2Syns.entrySet().iterator();
|
||||
Iterator<Map.Entry<String,String[]>> iter = word2Syns.entrySet().iterator();
|
||||
for (int j=0; j < size; j++) {
|
||||
Map.Entry entry = (Map.Entry) iter.next();
|
||||
allSynonyms[j] = (String[]) entry.getValue();
|
||||
words[j] = (String) entry.getKey();
|
||||
Map.Entry<String,String[]> entry = iter.next();
|
||||
allSynonyms[j] = entry.getValue();
|
||||
words[j] = entry.getKey();
|
||||
len += words[j].length();
|
||||
}
|
||||
|
||||
|
@ -343,7 +344,7 @@ public class SynonymMap {
|
|||
for (int j=0; j < size; j++) {
|
||||
String[] syns = allSynonyms[j];
|
||||
for (int k=syns.length; --k >= 0; ) {
|
||||
syns[k] = (String) internedWords.get(syns[k]);
|
||||
syns[k] = internedWords.get(syns[k]);
|
||||
}
|
||||
Object replacement = syns;
|
||||
if (syns.length == 1) replacement = syns[0]; // minimize memory consumption some more
|
||||
|
|
Loading…
Reference in New Issue