LUCENE-1257: Generics in contrib/memory, contrib/wordnet (previously memory), contrib/misc, contrib/benchmark

git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@830790 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Uwe Schindler 2009-10-28 23:21:25 +00:00
parent 36b65637fc
commit 786457c0e3
19 changed files with 174 additions and 298 deletions

View File

@ -25,8 +25,6 @@ import java.util.ArrayList;
import java.util.List;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.StringTokenizer;
@ -50,7 +48,7 @@ public class Config {
private int roundNumber = 0;
private Properties props;
private HashMap valByRound = new HashMap();
private HashMap<String,Object> valByRound = new HashMap<String,Object>();
private HashMap<String,String> colForValByRound = new HashMap<String,String>();
private String algorithmText;
@ -247,8 +245,7 @@ public class Config {
// log changes in values
if (valByRound.size()>0) {
sb.append(": ");
for (Iterator iter = valByRound.keySet().iterator(); iter.hasNext();) {
String name = (String) iter.next();
for (final String name : valByRound.keySet()) {
Object a = valByRound.get(name);
if (a instanceof int[]) {
int ai[] = (int[]) a;

View File

@ -175,10 +175,10 @@ import org.apache.lucene.store.RAMDirectory;
public class MemoryIndex implements Serializable {
/** info for each field: Map<String fieldName, Info field> */
private final HashMap fields = new HashMap();
private final HashMap<String,Info> fields = new HashMap<String,Info>();
/** fields sorted ascending by fieldName; lazily computed on demand */
private transient Map.Entry[] sortedFields;
private transient Map.Entry<String,Info>[] sortedFields;
/** pos: positions[3*i], startOffset: positions[3*i +1], endOffset: positions[3*i +2] */
private final int stride;
@ -270,13 +270,13 @@ public class MemoryIndex implements Serializable {
* the keywords to generate tokens for
* @return the corresponding token stream
*/
public TokenStream keywordTokenStream(final Collection keywords) {
public <T> TokenStream keywordTokenStream(final Collection<T> keywords) {
// TODO: deprecate & move this method into AnalyzerUtil?
if (keywords == null)
throw new IllegalArgumentException("keywords must not be null");
return new TokenStream() {
private Iterator iter = keywords.iterator();
private Iterator<T> iter = keywords.iterator();
private int start = 0;
private TermAttribute termAtt = addAttribute(TermAttribute.class);
private OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
@ -284,7 +284,7 @@ public class MemoryIndex implements Serializable {
public boolean incrementToken() {
if (!iter.hasNext()) return false;
Object obj = iter.next();
T obj = iter.next();
if (obj == null)
throw new IllegalArgumentException("keyword must not be null");
@ -335,7 +335,7 @@ public class MemoryIndex implements Serializable {
if (fields.get(fieldName) != null)
throw new IllegalArgumentException("field must not be added more than once");
HashMap terms = new HashMap();
HashMap<String,ArrayIntList> terms = new HashMap<String,ArrayIntList>();
int numTokens = 0;
int numOverlapTokens = 0;
int pos = -1;
@ -355,7 +355,7 @@ public class MemoryIndex implements Serializable {
numOverlapTokens++;
pos += posIncr;
ArrayIntList positions = (ArrayIntList) terms.get(term);
ArrayIntList positions = terms.get(term);
if (positions == null) { // term not seen before
positions = new ArrayIntList(stride);
terms.put(term, positions);
@ -471,21 +471,19 @@ public class MemoryIndex implements Serializable {
if (sortedFields != null) size += VM.sizeOfObjectArray(sortedFields.length);
size += VM.sizeOfHashMap(fields.size());
Iterator iter = fields.entrySet().iterator();
while (iter.hasNext()) { // for each Field Info
Map.Entry entry = (Map.Entry) iter.next();
Info info = (Info) entry.getValue();
for (Map.Entry<String, Info> entry : fields.entrySet()) { // for each Field Info
Info info = entry.getValue();
size += VM.sizeOfObject(2*INT + 3*PTR); // Info instance vars
if (info.sortedTerms != null) size += VM.sizeOfObjectArray(info.sortedTerms.length);
int len = info.terms.size();
size += VM.sizeOfHashMap(len);
Iterator iter2 = info.terms.entrySet().iterator();
Iterator<Map.Entry<String,ArrayIntList>> iter2 = info.terms.entrySet().iterator();
while (--len >= 0) { // for each term
Map.Entry e = (Map.Entry) iter2.next();
Map.Entry<String,ArrayIntList> e = iter2.next();
size += VM.sizeOfObject(PTR + 3*INT); // assumes substring() memory overlay
// size += STR + 2 * ((String) e.getKey()).length();
ArrayIntList positions = (ArrayIntList) e.getValue();
ArrayIntList positions = e.getValue();
size += VM.sizeOfArrayIntList(positions.size());
}
}
@ -502,13 +500,13 @@ public class MemoryIndex implements Serializable {
}
/** returns a view of the given map's entries, sorted ascending by key */
private static Map.Entry[] sort(HashMap map) {
private static <K,V> Map.Entry<K,V>[] sort(HashMap<K,V> map) {
int size = map.size();
Map.Entry[] entries = new Map.Entry[size];
Map.Entry<K,V>[] entries = new Map.Entry[size];
Iterator iter = map.entrySet().iterator();
Iterator<Map.Entry<K,V>> iter = map.entrySet().iterator();
for (int i=0; i < size; i++) {
entries[i] = (Map.Entry) iter.next();
entries[i] = iter.next();
}
if (size > 1) Arrays.sort(entries, termComparator);
@ -528,18 +526,18 @@ public class MemoryIndex implements Serializable {
int sumTerms = 0;
for (int i=0; i < sortedFields.length; i++) {
Map.Entry entry = sortedFields[i];
String fieldName = (String) entry.getKey();
Info info = (Info) entry.getValue();
Map.Entry<String,Info> entry = sortedFields[i];
String fieldName = entry.getKey();
Info info = entry.getValue();
info.sortTerms();
result.append(fieldName + ":\n");
int numChars = 0;
int numPositions = 0;
for (int j=0; j < info.sortedTerms.length; j++) {
Map.Entry e = info.sortedTerms[j];
String term = (String) e.getKey();
ArrayIntList positions = (ArrayIntList) e.getValue();
Map.Entry<String,ArrayIntList> e = info.sortedTerms[j];
String term = e.getKey();
ArrayIntList positions = e.getValue();
result.append("\t'" + term + "':" + numPositions(positions) + ":");
result.append(positions.toString(stride)); // ignore offsets
result.append("\n");
@ -577,10 +575,10 @@ public class MemoryIndex implements Serializable {
* Term strings and their positions for this field: Map <String
* termText, ArrayIntList positions>
*/
private final HashMap terms;
private final HashMap<String,ArrayIntList> terms;
/** Terms sorted ascending by term text; computed on demand */
private transient Map.Entry[] sortedTerms;
private transient Map.Entry<String,ArrayIntList>[] sortedTerms;
/** Number of added tokens for this field */
private final int numTokens;
@ -596,7 +594,7 @@ public class MemoryIndex implements Serializable {
private static final long serialVersionUID = 2882195016849084649L;
public Info(HashMap terms, int numTokens, int numOverlapTokens, float boost) {
public Info(HashMap<String,ArrayIntList> terms, int numTokens, int numOverlapTokens, float boost) {
this.terms = terms;
this.numTokens = numTokens;
this.numOverlapTokens = numOverlapTokens;
@ -617,12 +615,12 @@ public class MemoryIndex implements Serializable {
/** note that the frequency can be calculated as numPosition(getPositions(x)) */
public ArrayIntList getPositions(String term) {
return (ArrayIntList) terms.get(term);
return terms.get(term);
}
/** note that the frequency can be calculated as numPosition(getPositions(x)) */
public ArrayIntList getPositions(int pos) {
return (ArrayIntList) sortedTerms[pos].getValue();
return sortedTerms[pos].getValue();
}
public float getBoost() {
@ -736,11 +734,11 @@ public class MemoryIndex implements Serializable {
protected void finalize() {}
private Info getInfo(String fieldName) {
return (Info) fields.get(fieldName);
return fields.get(fieldName);
}
private Info getInfo(int pos) {
return (Info) sortedFields[pos].getValue();
return sortedFields[pos].getValue();
}
public int docFreq(Term term) {
@ -814,7 +812,7 @@ public class MemoryIndex implements Serializable {
Info info = getInfo(j);
if (i >= info.sortedTerms.length) return null;
// if (DEBUG) System.err.println("TermEnum.term: " + i + ", " + info.sortedTerms[i].getKey());
return createTerm(info, j, (String) info.sortedTerms[i].getKey());
return createTerm(info, j, info.sortedTerms[i].getKey());
}
public int docFreq() {
@ -834,7 +832,7 @@ public class MemoryIndex implements Serializable {
// Assertion: sortFields has already been called before
Term template = info.template;
if (template == null) { // not yet cached?
String fieldName = (String) sortedFields[pos].getKey();
String fieldName = sortedFields[pos].getKey();
template = new Term(fieldName);
info.template = template;
}
@ -949,10 +947,9 @@ public class MemoryIndex implements Serializable {
if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVectors");
TermFreqVector[] vectors = new TermFreqVector[fields.size()];
// if (vectors.length == 0) return null;
Iterator iter = fields.keySet().iterator();
Iterator<String> iter = fields.keySet().iterator();
for (int i=0; i < vectors.length; i++) {
String fieldName = (String) iter.next();
vectors[i] = getTermFreqVector(docNumber, fieldName);
vectors[i] = getTermFreqVector(docNumber, iter.next());
}
return vectors;
}
@ -962,9 +959,8 @@ public class MemoryIndex implements Serializable {
if (DEBUG) System.err.println("MemoryIndexReader.getTermFreqVectors");
// if (vectors.length == 0) return null;
for (Iterator iterator = fields.keySet().iterator(); iterator.hasNext();)
for (final String fieldName : fields.keySet())
{
String fieldName = (String) iterator.next();
getTermFreqVector(docNumber, fieldName, mapper);
}
}
@ -980,7 +976,7 @@ public class MemoryIndex implements Serializable {
mapper.setExpectations(field, info.sortedTerms.length, stride != 1, true);
for (int i = info.sortedTerms.length; --i >=0;){
ArrayIntList positions = (ArrayIntList) info.sortedTerms[i].getValue();
ArrayIntList positions = info.sortedTerms[i].getValue();
int size = positions.size();
org.apache.lucene.index.TermVectorOffsetInfo[] offsets =
new org.apache.lucene.index.TermVectorOffsetInfo[size / stride];
@ -990,9 +986,9 @@ public class MemoryIndex implements Serializable {
int end = positions.get(j+1);
offsets[k] = new org.apache.lucene.index.TermVectorOffsetInfo(start, end);
}
mapper.map((String)info.sortedTerms[i].getKey(),
numPositions((ArrayIntList) info.sortedTerms[i].getValue()),
offsets, ((ArrayIntList) info.sortedTerms[i].getValue()).toArray(stride));
mapper.map(info.sortedTerms[i].getKey(),
numPositions(info.sortedTerms[i].getValue()),
offsets, (info.sortedTerms[i].getValue()).toArray(stride));
}
}
@ -1004,7 +1000,7 @@ public class MemoryIndex implements Serializable {
return new TermPositionVector() {
private final Map.Entry[] sortedTerms = info.sortedTerms;
private final Map.Entry<String,ArrayIntList>[] sortedTerms = info.sortedTerms;
public String getField() {
return fieldName;
@ -1017,7 +1013,7 @@ public class MemoryIndex implements Serializable {
public String[] getTerms() {
String[] terms = new String[sortedTerms.length];
for (int i=sortedTerms.length; --i >= 0; ) {
terms[i] = (String) sortedTerms[i].getKey();
terms[i] = sortedTerms[i].getKey();
}
return terms;
}
@ -1025,7 +1021,7 @@ public class MemoryIndex implements Serializable {
public int[] getTermFrequencies() {
int[] freqs = new int[sortedTerms.length];
for (int i=sortedTerms.length; --i >= 0; ) {
freqs[i] = numPositions((ArrayIntList) sortedTerms[i].getValue());
freqs[i] = numPositions(sortedTerms[i].getValue());
}
return freqs;
}
@ -1045,14 +1041,14 @@ public class MemoryIndex implements Serializable {
// lucene >= 1.4.3
public int[] getTermPositions(int index) {
return ((ArrayIntList) sortedTerms[index].getValue()).toArray(stride);
return sortedTerms[index].getValue().toArray(stride);
}
// lucene >= 1.9 (remove this method for lucene-1.4.3)
public org.apache.lucene.index.TermVectorOffsetInfo[] getOffsets(int index) {
if (stride == 1) return null; // no offsets stored
ArrayIntList positions = (ArrayIntList) sortedTerms[index].getValue();
ArrayIntList positions = sortedTerms[index].getValue();
int size = positions.size();
org.apache.lucene.index.TermVectorOffsetInfo[] offsets =
new org.apache.lucene.index.TermVectorOffsetInfo[size / stride];
@ -1153,7 +1149,7 @@ public class MemoryIndex implements Serializable {
throw new UnsupportedOperationException();
}
protected void doCommit(Map commitUserData) {
protected void doCommit(Map<String,String> commitUserData) {
if (DEBUG) System.err.println("MemoryIndexReader.doCommit");
}
@ -1162,16 +1158,16 @@ public class MemoryIndex implements Serializable {
}
// lucene >= 1.9 (remove this method for lucene-1.4.3)
public Collection getFieldNames(FieldOption fieldOption) {
public Collection<String> getFieldNames(FieldOption fieldOption) {
if (DEBUG) System.err.println("MemoryIndexReader.getFieldNamesOption");
if (fieldOption == FieldOption.UNINDEXED)
return Collections.EMPTY_SET;
return Collections.<String>emptySet();
if (fieldOption == FieldOption.INDEXED_NO_TERMVECTOR)
return Collections.EMPTY_SET;
return Collections.<String>emptySet();
if (fieldOption == FieldOption.TERMVECTOR_WITH_OFFSET && stride == 1)
return Collections.EMPTY_SET;
return Collections.<String>emptySet();
if (fieldOption == FieldOption.TERMVECTOR_WITH_POSITION_OFFSET && stride == 1)
return Collections.EMPTY_SET;
return Collections.<String>emptySet();
return Collections.unmodifiableSet(fields.keySet());
}

View File

@ -21,13 +21,13 @@ import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Locale;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.StopAnalyzer;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
@ -72,11 +72,8 @@ public class PatternAnalyzer extends Analyzer {
/** <code>"\\s+"</code>; Divides text at whitespaces (Character.isWhitespace(c)) */
public static final Pattern WHITESPACE_PATTERN = Pattern.compile("\\s+");
private static final Set EXTENDED_ENGLISH_STOP_WORDS;
static {
EXTENDED_ENGLISH_STOP_WORDS = new HashSet();
EXTENDED_ENGLISH_STOP_WORDS.addAll(Arrays.asList(new String[] {
private static final CharArraySet EXTENDED_ENGLISH_STOP_WORDS =
CharArraySet.unmodifiableSet(new CharArraySet(Arrays.asList(
"a", "about", "above", "across", "adj", "after", "afterwards",
"again", "against", "albeit", "all", "almost", "alone", "along",
"already", "also", "although", "always", "among", "amongst", "an",
@ -117,8 +114,8 @@ public class PatternAnalyzer extends Analyzer {
"whomever", "whomsoever", "whose", "whosoever", "why", "will",
"with", "within", "without", "would", "xsubj", "xcal", "xauthor",
"xother ", "xnote", "yet", "you", "your", "yours", "yourself",
"yourselves"}));
}
"yourselves"
), true));
/**
* A lower-casing word analyzer with English stop words (can be shared
@ -139,7 +136,7 @@ public class PatternAnalyzer extends Analyzer {
private final Pattern pattern;
private final boolean toLowerCase;
private final Set stopWords;
private final Set<?> stopWords;
private final Version matchVersion;
@ -162,7 +159,7 @@ public class PatternAnalyzer extends Analyzer {
* or <a href="http://www.unine.ch/info/clef/">other stop words
* lists </a>.
*/
public PatternAnalyzer(Version matchVersion, Pattern pattern, boolean toLowerCase, Set stopWords) {
public PatternAnalyzer(Version matchVersion, Pattern pattern, boolean toLowerCase, Set<?> stopWords) {
if (pattern == null)
throw new IllegalArgumentException("pattern must not be null");
@ -313,15 +310,7 @@ public class PatternAnalyzer extends Analyzer {
if (input != null) input.close();
}
}
/** somewhat oversized to minimize hash collisions */
private static Set makeStopSet(Set stopWords) {
Set stops = new HashSet(stopWords.size() * 2, 0.3f);
stops.addAll(stopWords);
return stops;
// return Collections.unmodifiableSet(stops);
}
///////////////////////////////////////////////////////////////////////////////
// Nested classes:

View File

@ -103,7 +103,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy
}
}
private boolean isOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set segmentsToOptimize) throws IOException {
private boolean isOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
final int numSegments = infos.size();
int numToOptimize = 0;
SegmentInfo optimizeInfo = null;
@ -128,7 +128,7 @@ public class BalancedSegmentMergePolicy extends LogByteSizeMergePolicy
}
@Override
public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxNumSegments, Set segmentsToOptimize) throws IOException {
public MergeSpecification findMergesForOptimize(SegmentInfos infos, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException {
assert maxNumSegments > 0;

View File

@ -55,8 +55,7 @@ public class FieldNormModifier {
Similarity s = null;
if (!args[1].equals("-n")) {
try {
Class simClass = Class.forName(args[1]);
s = (Similarity)simClass.newInstance();
s = Class.forName(args[1]).asSubclass(Similarity.class).newInstance();
} catch (Exception e) {
System.err.println("Couldn't instantiate similarity with empty constructor: " + args[1]);
e.printStackTrace(System.err);
@ -148,7 +147,7 @@ public class FieldNormModifier {
if (sim == null)
reader.setNorm(d, fieldName, fakeNorms[0]);
else
reader.setNorm(d, fieldName, sim.encodeNorm(sim.lengthNorm(fieldName, termCounts[d])));
reader.setNorm(d, fieldName, Similarity.encodeNorm(sim.lengthNorm(fieldName, termCounts[d])));
}
}

View File

@ -78,14 +78,14 @@ public class IndexSplitter {
for (int x = 2; x < args.length; x++) {
segs.add(args[x]);
}
is.remove((String[]) segs.toArray(new String[0]));
is.remove(segs.toArray(new String[0]));
} else {
File targetDir = new File(args[1]);
List<String> segs = new ArrayList<String>();
for (int x = 2; x < args.length; x++) {
segs.add(args[x]);
}
is.split(targetDir, (String[]) segs.toArray(new String[0]));
is.split(targetDir, segs.toArray(new String[0]));
}
}
@ -137,9 +137,8 @@ public class IndexSplitter {
SegmentInfo info = getInfo(n);
destInfos.add(info);
// now copy files over
List files = info.files();
for (int x = 0; x < files.size(); x++) {
String srcName = (String) files.get(x);
List<String> files = info.files();
for (final String srcName : files) {
File srcFile = new File(dir, srcName);
File destFile = new File(destDir, srcName);
copyFile(srcFile, destFile);

View File

@ -158,7 +158,7 @@ public class MultiPassIndexSplitter {
if (indexes.size() == 1) {
input = indexes.get(0);
} else {
input = new MultiReader((IndexReader[])indexes.toArray(new IndexReader[indexes.size()]));
input = new MultiReader(indexes.toArray(new IndexReader[indexes.size()]));
}
splitter.split(input, dirs, seq);
}

View File

@ -5,8 +5,6 @@ import org.apache.lucene.util.StringHelper;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Collection;
import java.util.Iterator;
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@ -68,13 +66,13 @@ public class TermVectorAccessor {
}
/** Instance reused to save garbage collector some time */
private List/*<String>*/ tokens;
private List<String> tokens;
/** Instance reused to save garbage collector some time */
private List/*<int[]>*/ positions;
private List<int[]> positions;
/** Instance reused to save garbage collector some time */
private List/*<Integer>*/ frequencies;
private List<Integer> frequencies;
/**
@ -90,9 +88,9 @@ public class TermVectorAccessor {
private void build(IndexReader indexReader, String field, TermVectorMapper mapper, int documentNumber) throws IOException {
if (tokens == null) {
tokens = new ArrayList/*<String>*/(500);
positions = new ArrayList/*<int[]>*/(500);
frequencies = new ArrayList/*<Integer>*/(500);
tokens = new ArrayList<String>(500);
positions = new ArrayList<int[]>(500);
frequencies = new ArrayList<Integer>(500);
} else {
tokens.clear();
frequencies.clear();
@ -127,7 +125,7 @@ public class TermVectorAccessor {
mapper.setDocumentNumber(documentNumber);
mapper.setExpectations(field, tokens.size(), false, !mapper.isIgnoringPositions());
for (int i = 0; i < tokens.size(); i++) {
mapper.map((String) tokens.get(i), ((Integer) frequencies.get(i)).intValue(), (TermVectorOffsetInfo[]) null, (int[]) positions.get(i));
mapper.map(tokens.get(i), frequencies.get(i).intValue(), (TermVectorOffsetInfo[]) null, positions.get(i));
}
}
termEnum.close();

View File

@ -65,7 +65,7 @@ public class HighFreqTerms {
}
}
while (tiq.size() != 0) {
TermInfo termInfo = (TermInfo) tiq.pop();
TermInfo termInfo = tiq.pop();
System.out.println(termInfo.term + " " + termInfo.docFreq);
}

View File

@ -58,8 +58,7 @@ public class LengthNormModifier {
Similarity s = null;
try {
Class simClass = Class.forName(args[1]);
s = (Similarity)simClass.newInstance();
s = Class.forName(args[1]).asSubclass(Similarity.class).newInstance();
} catch (Exception e) {
System.err.println("Couldn't instantiate similarity with empty constructor: " + args[1]);
e.printStackTrace(System.err);
@ -142,7 +141,7 @@ public class LengthNormModifier {
reader = IndexReader.open(dir, false);
for (int d = 0; d < termCounts.length; d++) {
if (! reader.isDeleted(d)) {
byte norm = sim.encodeNorm(sim.lengthNorm(fieldName, termCounts[d]));
byte norm = Similarity.encodeNorm(sim.lengthNorm(fieldName, termCounts[d]));
reader.setNorm(d, fieldName, norm);
}
}

View File

@ -17,7 +17,6 @@
package org.apache.lucene.misc;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.search.DefaultSimilarity;
import org.apache.lucene.index.FieldInvertState;
@ -51,10 +50,10 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
private int ln_max = 1;
private float ln_steep = 0.5f;
private Map ln_mins = new HashMap(7);
private Map ln_maxs = new HashMap(7);
private Map ln_steeps = new HashMap(7);
private Map ln_overlaps = new HashMap(7);
private Map<String,Number> ln_maxs = new HashMap<String,Number>(7);
private Map<String,Number> ln_mins = new HashMap<String,Number>(7);
private Map<String,Float> ln_steeps = new HashMap<String,Float>(7);
private Map<String,Boolean> ln_overlaps = new HashMap<String,Boolean>(7);
private float tf_base = 0.0f;
private float tf_min = 0.0f;
@ -139,7 +138,7 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
final int numTokens;
boolean overlaps = discountOverlaps;
if (ln_overlaps.containsKey(fieldName)) {
overlaps = ((Boolean)ln_overlaps.get(fieldName)).booleanValue();
overlaps = ln_overlaps.get(fieldName).booleanValue();
}
if (overlaps)
numTokens = state.getLength() - state.getNumOverlap();
@ -173,13 +172,13 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
float s = ln_steep;
if (ln_mins.containsKey(fieldName)) {
l = ((Number)ln_mins.get(fieldName)).intValue();
l = ln_mins.get(fieldName).intValue();
}
if (ln_maxs.containsKey(fieldName)) {
h = ((Number)ln_maxs.get(fieldName)).intValue();
h = ln_maxs.get(fieldName).intValue();
}
if (ln_steeps.containsKey(fieldName)) {
s = ((Number)ln_steeps.get(fieldName)).floatValue();
s = ln_steeps.get(fieldName).floatValue();
}
return (float)

View File

@ -23,7 +23,6 @@ import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.queryParser.ParseException;
@ -76,8 +75,8 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
* @throws ParseException
*/
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
List tlist = new ArrayList();
List wlist = new ArrayList();
List<String> tlist = new ArrayList<String>();
List<String> wlist = new ArrayList<String>();
/* somewhat a hack: find/store wildcard chars
* in order to put them back after analyzing */
boolean isWithinToken = (!termStr.startsWith("?") && !termStr.startsWith("*"));
@ -145,8 +144,8 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
/* if wlist contains one wildcard, it must be at the end, because:
* 1) wildcards are not allowed in 1st position of a term by QueryParser
* 2) if wildcard was *not* in end, there would be *two* or more tokens */
return super.getWildcardQuery(field, (String) tlist.get(0)
+ (((String) wlist.get(0)).toString()));
return super.getWildcardQuery(field, tlist.get(0)
+ wlist.get(0).toString());
} else {
/* we should never get here! if so, this method was called
* with a termStr containing no wildcard ... */
@ -157,9 +156,9 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
* with wildcards put back in postion */
StringBuilder sb = new StringBuilder();
for (int i = 0; i < tlist.size(); i++) {
sb.append((String) tlist.get(i));
sb.append( tlist.get(i));
if (wlist != null && wlist.size() > i) {
sb.append((String) wlist.get(i));
sb.append(wlist.get(i));
}
}
return super.getWildcardQuery(field, sb.toString());
@ -188,7 +187,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
// get Analyzer from superclass and tokenize the term
TokenStream source = getAnalyzer().tokenStream(field, new StringReader(termStr));
List tlist = new ArrayList();
List<String> tlist = new ArrayList<String>();
TermAttribute termAtt = source.addAttribute(TermAttribute.class);
while (true) {
@ -207,7 +206,7 @@ public class AnalyzingQueryParser extends org.apache.lucene.queryParser.QueryPar
}
if (tlist.size() == 1) {
return super.getPrefixQuery(field, (String) tlist.get(0));
return super.getPrefixQuery(field, tlist.get(0));
} else {
/* this means that the analyzer used either added or consumed
* (common for a stemmer) tokens, and we can't build a PrefixQuery */

View File

@ -62,7 +62,7 @@ import org.apache.lucene.util.Version;
*
*/
public class ComplexPhraseQueryParser extends QueryParser {
private ArrayList/*<ComplexPhraseQuery>*/complexPhrases = null;
private ArrayList<ComplexPhraseQuery> complexPhrases = null;
private boolean isPass2ResolvingPhrases;
@ -102,7 +102,7 @@ public class ComplexPhraseQueryParser extends QueryParser {
// First pass - parse the top-level query recording any PhraseQuerys
// which will need to be resolved
complexPhrases = new ArrayList/*<ComplexPhraseQuery>*/();
complexPhrases = new ArrayList<ComplexPhraseQuery>();
Query q = super.parse(query);
// Perform second pass, using this QueryParser to parse any nested
@ -110,8 +110,8 @@ public class ComplexPhraseQueryParser extends QueryParser {
// set of syntax restrictions (i.e. all fields must be same)
isPass2ResolvingPhrases = true;
try {
for (Iterator iterator = complexPhrases.iterator(); iterator.hasNext();) {
currentPhraseQuery = (ComplexPhraseQuery) iterator.next();
for (Iterator<ComplexPhraseQuery> iterator = complexPhrases.iterator(); iterator.hasNext();) {
currentPhraseQuery = iterator.next();
// in each phrase, now parse the contents between quotes as a
// separate parse operation
currentPhraseQuery.parsePhraseElements(this);
@ -247,10 +247,10 @@ public class ComplexPhraseQueryParser extends QueryParser {
}
if (qc instanceof BooleanQuery) {
ArrayList sc = new ArrayList();
ArrayList<SpanQuery> sc = new ArrayList<SpanQuery>();
addComplexPhraseClause(sc, (BooleanQuery) qc);
if (sc.size() > 0) {
allSpanClauses[i] = (SpanQuery) sc.get(0);
allSpanClauses[i] = sc.get(0);
} else {
// Insert fake term e.g. phrase query was for "Fred Smithe*" and
// there were no "Smithe*" terms - need to
@ -278,14 +278,14 @@ public class ComplexPhraseQueryParser extends QueryParser {
// Complex case - we have mixed positives and negatives in the
// sequence.
// Need to return a SpanNotQuery
ArrayList positiveClauses = new ArrayList();
ArrayList<SpanQuery> positiveClauses = new ArrayList<SpanQuery>();
for (int j = 0; j < allSpanClauses.length; j++) {
if (!bclauses[j].getOccur().equals(BooleanClause.Occur.MUST_NOT)) {
positiveClauses.add(allSpanClauses[j]);
}
}
SpanQuery[] includeClauses = (SpanQuery[]) positiveClauses
SpanQuery[] includeClauses = positiveClauses
.toArray(new SpanQuery[positiveClauses.size()]);
SpanQuery include = null;
@ -304,9 +304,9 @@ public class ComplexPhraseQueryParser extends QueryParser {
return snot;
}
private void addComplexPhraseClause(List spanClauses, BooleanQuery qc) {
ArrayList ors = new ArrayList();
ArrayList nots = new ArrayList();
private void addComplexPhraseClause(List<SpanQuery> spanClauses, BooleanQuery qc) {
ArrayList<SpanQuery> ors = new ArrayList<SpanQuery>();
ArrayList<SpanQuery> nots = new ArrayList<SpanQuery>();
BooleanClause[] bclauses = qc.getClauses();
// For all clauses e.g. one* two~
@ -314,7 +314,7 @@ public class ComplexPhraseQueryParser extends QueryParser {
Query childQuery = bclauses[i].getQuery();
// select the list to which we will add these options
ArrayList chosenList = ors;
ArrayList<SpanQuery> chosenList = ors;
if (bclauses[i].getOccur() == BooleanClause.Occur.MUST_NOT) {
chosenList = nots;
}
@ -336,12 +336,12 @@ public class ComplexPhraseQueryParser extends QueryParser {
if (ors.size() == 0) {
return;
}
SpanOrQuery soq = new SpanOrQuery((SpanQuery[]) ors
SpanOrQuery soq = new SpanOrQuery(ors
.toArray(new SpanQuery[ors.size()]));
if (nots.size() == 0) {
spanClauses.add(soq);
} else {
SpanOrQuery snqs = new SpanOrQuery((SpanQuery[]) nots
SpanOrQuery snqs = new SpanOrQuery(nots
.toArray(new SpanQuery[nots.size()]));
SpanNotQuery snq = new SpanNotQuery(soq, snqs);
spanClauses.add(snq);

View File

@ -18,7 +18,6 @@ package org.apache.lucene.queryParser.precedence;
* limitations under the License.
*/
import org.apache.lucene.queryParser.*;
import java.io.*;

View File

@ -8,7 +8,6 @@ import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Vector;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -247,20 +246,13 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
return locale;
}
/**
* @deprecated use {@link #addClause(List, int, int, Query)} instead.
*/
protected void addClause(Vector clauses, int conj, int modifier, Query q) {
addClause((List) clauses, conj, modifier, q);
}
protected void addClause(List clauses, int conj, int modifier, Query q) {
protected void addClause(List<BooleanClause> clauses, int conj, int modifier, Query q) {
boolean required, prohibited;
// If this term is introduced by AND, make the preceding term required,
// unless it's already prohibited
if (clauses.size() > 0 && conj == CONJ_AND) {
BooleanClause c = (BooleanClause) clauses.get(clauses.size()-1);
BooleanClause c = clauses.get(clauses.size()-1);
if (!c.isProhibited())
c.setOccur(BooleanClause.Occur.MUST);
}
@ -270,7 +262,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
// notice if the input is a OR b, first term is parsed as required; without
// this modification a OR b would parsed as +a OR b
BooleanClause c = (BooleanClause) clauses.get(clauses.size()-1);
BooleanClause c = clauses.get(clauses.size()-1);
if (!c.isProhibited())
c.setOccur(BooleanClause.Occur.SHOULD);
}
@ -357,12 +349,12 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
for (int i = 0; i < list.size(); i++) {
source.restoreState(list.get(i));
if (posincrAtt.getPositionIncrement() == 1 && multiTerms.size() > 0) {
mpq.add((Term[])multiTerms.toArray(new Term[0]));
mpq.add(multiTerms.toArray(new Term[0]));
multiTerms.clear();
}
multiTerms.add(new Term(field, termAtt.term()));
}
mpq.add((Term[])multiTerms.toArray(new Term[0]));
mpq.add(multiTerms.toArray(new Term[0]));
return mpq;
}
}
@ -436,27 +428,8 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
* @deprecated use {@link #getBooleanQuery(List)} instead
*/
protected Query getBooleanQuery(Vector clauses) throws ParseException
{
return getBooleanQuery((List) clauses, false);
}
/**
* Factory method for generating query, given a set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses List that contains {@link BooleanClause} instances
* to join.
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
*/
protected Query getBooleanQuery(List clauses) throws ParseException
protected Query getBooleanQuery(List<BooleanClause> clauses) throws ParseException
{
return getBooleanQuery(clauses, false);
}
@ -474,36 +447,15 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
* @deprecated use {@link #getBooleanQuery(List, boolean)} instead
*/
protected Query getBooleanQuery(Vector clauses, boolean disableCoord)
throws ParseException
{
return getBooleanQuery((List) clauses, disableCoord);
}
/**
* Factory method for generating query, given a set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses List that contains {@link BooleanClause} instances
* to join.
* @param disableCoord true if coord scoring should be disabled.
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
*/
protected Query getBooleanQuery(List clauses, boolean disableCoord)
protected Query getBooleanQuery(List<BooleanClause> clauses, boolean disableCoord)
throws ParseException {
if (clauses == null || clauses.size() == 0)
return null;
BooleanQuery query = new BooleanQuery(disableCoord);
for (int i = 0; i < clauses.size(); i++) {
query.add((BooleanClause)clauses.get(i));
query.add(clauses.get(i));
}
return query;
}
@ -705,7 +657,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
}
final public Query Query(String field) throws ParseException {
List clauses = new ArrayList();
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
Query q, firstQuery=null;
boolean orPresent = false;
int modifier;
@ -757,7 +709,7 @@ public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
}
final public Query andExpression(String field) throws ParseException {
List clauses = new ArrayList();
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
Query q, firstQuery=null;
int modifier;
q = Clause(field);

View File

@ -32,7 +32,6 @@ import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Vector;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
@ -271,20 +270,13 @@ public class PrecedenceQueryParser {
return locale;
}
/**
* @deprecated use {@link #addClause(List, int, int, Query)} instead.
*/
protected void addClause(Vector clauses, int conj, int modifier, Query q) {
addClause((List) clauses, conj, modifier, q);
}
protected void addClause(List clauses, int conj, int modifier, Query q) {
protected void addClause(List<BooleanClause> clauses, int conj, int modifier, Query q) {
boolean required, prohibited;
// If this term is introduced by AND, make the preceding term required,
// unless it's already prohibited
if (clauses.size() > 0 && conj == CONJ_AND) {
BooleanClause c = (BooleanClause) clauses.get(clauses.size()-1);
BooleanClause c = clauses.get(clauses.size()-1);
if (!c.isProhibited())
c.setOccur(BooleanClause.Occur.MUST);
}
@ -294,7 +286,7 @@ public class PrecedenceQueryParser {
// unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
// notice if the input is a OR b, first term is parsed as required; without
// this modification a OR b would parsed as +a OR b
BooleanClause c = (BooleanClause) clauses.get(clauses.size()-1);
BooleanClause c = clauses.get(clauses.size()-1);
if (!c.isProhibited())
c.setOccur(BooleanClause.Occur.SHOULD);
}
@ -381,12 +373,12 @@ public class PrecedenceQueryParser {
for (int i = 0; i < list.size(); i++) {
source.restoreState(list.get(i));
if (posincrAtt.getPositionIncrement() == 1 && multiTerms.size() > 0) {
mpq.add((Term[])multiTerms.toArray(new Term[0]));
mpq.add(multiTerms.toArray(new Term[0]));
multiTerms.clear();
}
multiTerms.add(new Term(field, termAtt.term()));
}
mpq.add((Term[])multiTerms.toArray(new Term[0]));
mpq.add(multiTerms.toArray(new Term[0]));
return mpq;
}
}
@ -460,27 +452,8 @@ public class PrecedenceQueryParser {
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
* @deprecated use {@link #getBooleanQuery(List)} instead
*/
protected Query getBooleanQuery(Vector clauses) throws ParseException
{
return getBooleanQuery((List) clauses, false);
}
/**
* Factory method for generating query, given a set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses List that contains {@link BooleanClause} instances
* to join.
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
*/
protected Query getBooleanQuery(List clauses) throws ParseException
protected Query getBooleanQuery(List<BooleanClause> clauses) throws ParseException
{
return getBooleanQuery(clauses, false);
}
@ -498,36 +471,15 @@ public class PrecedenceQueryParser {
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
* @deprecated use {@link #getBooleanQuery(List, boolean)} instead
*/
protected Query getBooleanQuery(Vector clauses, boolean disableCoord)
throws ParseException
{
return getBooleanQuery((List) clauses, disableCoord);
}
/**
* Factory method for generating query, given a set of clauses.
* By default creates a boolean query composed of clauses passed in.
*
* Can be overridden by extending classes, to modify query being
* returned.
*
* @param clauses List that contains {@link BooleanClause} instances
* to join.
* @param disableCoord true if coord scoring should be disabled.
*
* @return Resulting {@link Query} object.
* @exception ParseException throw in overridden method to disallow
*/
protected Query getBooleanQuery(List clauses, boolean disableCoord)
protected Query getBooleanQuery(List<BooleanClause> clauses, boolean disableCoord)
throws ParseException {
if (clauses == null || clauses.size() == 0)
return null;
BooleanQuery query = new BooleanQuery(disableCoord);
for (int i = 0; i < clauses.size(); i++) {
query.add((BooleanClause)clauses.get(i));
query.add(clauses.get(i));
}
return query;
}
@ -763,7 +715,7 @@ int Modifier() : {
Query Query(String field) :
{
List clauses = new ArrayList();
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
Query q, firstQuery=null;
boolean orPresent = false;
int modifier;
@ -790,7 +742,7 @@ Query Query(String field) :
Query andExpression(String field) :
{
List clauses = new ArrayList();
List<BooleanClause> clauses = new ArrayList<BooleanClause>();
Query q, firstQuery=null;
int modifier;
}

View File

@ -7,7 +7,6 @@ import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Locale;
import java.util.Vector;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.*;

View File

@ -238,12 +238,12 @@ public class AnalyzerUtil {
return new Analyzer() {
private final HashMap cache = new HashMap();
private final HashMap<String,ArrayList<AttributeSource.State>> cache = new HashMap<String,ArrayList<AttributeSource.State>>();
public TokenStream tokenStream(String fieldName, Reader reader) {
final ArrayList tokens = (ArrayList) cache.get(fieldName);
final ArrayList<AttributeSource.State> tokens = cache.get(fieldName);
if (tokens == null) { // not yet cached
final ArrayList tokens2 = new ArrayList();
final ArrayList<AttributeSource.State> tokens2 = new ArrayList<AttributeSource.State>();
TokenStream tokenStream = new TokenFilter(child.tokenStream(fieldName, reader)) {
public boolean incrementToken() throws IOException {
@ -258,11 +258,11 @@ public class AnalyzerUtil {
} else { // already cached
return new TokenStream() {
private Iterator iter = tokens.iterator();
private Iterator<AttributeSource.State> iter = tokens.iterator();
public boolean incrementToken() {
if (!iter.hasNext()) return false;
restoreState((AttributeSource.State) iter.next());
restoreState(iter.next());
return true;
}
};
@ -305,12 +305,12 @@ public class AnalyzerUtil {
if (limit <= 0) limit = Integer.MAX_VALUE;
// compute frequencies of distinct terms
HashMap map = new HashMap();
HashMap<String,MutableInteger> map = new HashMap<String,MutableInteger>();
TokenStream stream = analyzer.tokenStream("", new StringReader(text));
TermAttribute termAtt = stream.addAttribute(TermAttribute.class);
try {
while (stream.incrementToken()) {
MutableInteger freq = (MutableInteger) map.get(termAtt.term());
MutableInteger freq = map.get(termAtt.term());
if (freq == null) {
freq = new MutableInteger(1);
map.put(termAtt.term(), freq);
@ -329,17 +329,15 @@ public class AnalyzerUtil {
}
// sort by frequency, text
Map.Entry[] entries = new Map.Entry[map.size()];
Map.Entry<String,MutableInteger>[] entries = new Map.Entry[map.size()];
map.entrySet().toArray(entries);
Arrays.sort(entries, new Comparator() {
public int compare(Object o1, Object o2) {
Map.Entry e1 = (Map.Entry) o1;
Map.Entry e2 = (Map.Entry) o2;
int f1 = ((MutableInteger) e1.getValue()).intValue();
int f2 = ((MutableInteger) e2.getValue()).intValue();
Arrays.sort(entries, new Comparator<Map.Entry<String,MutableInteger>>() {
public int compare(Map.Entry<String,MutableInteger> e1, Map.Entry<String,MutableInteger> e2) {
int f1 = e1.getValue().intValue();
int f2 = e2.getValue().intValue();
if (f2 - f1 != 0) return f2 - f1;
String s1 = (String) e1.getKey();
String s2 = (String) e2.getKey();
String s1 = e1.getKey();
String s2 = e2.getKey();
return s1.compareTo(s2);
}
});

View File

@ -76,7 +76,7 @@ import java.util.TreeSet;
public class SynonymMap {
/** the index data; Map<String word, String[] synonyms> */
private final HashMap table;
private final HashMap<String,String[]> table;
private static final String[] EMPTY = new String[0];
@ -93,7 +93,7 @@ public class SynonymMap {
* if an error occured while reading the stream.
*/
public SynonymMap(InputStream input) throws IOException {
this.table = input == null ? new HashMap(0) : read(toByteArray(input));
this.table = input == null ? new HashMap<String,String[]>(0) : read(toByteArray(input));
}
/**
@ -123,7 +123,7 @@ public class SynonymMap {
*/
public String toString() {
StringBuilder buf = new StringBuilder();
Iterator iter = new TreeMap(table).keySet().iterator();
Iterator<String> iter = new TreeMap<String,String[]>(table).keySet().iterator();
int count = 0;
int f0 = 0;
int f1 = 0;
@ -131,7 +131,7 @@ public class SynonymMap {
int f3 = 0;
while (iter.hasNext()) {
String word = (String) iter.next();
String word = iter.next();
buf.append(word + ":");
String[] synonyms = getSynonyms(word);
buf.append(Arrays.asList(synonyms));
@ -168,12 +168,12 @@ public class SynonymMap {
return true;
}
private HashMap read(byte[] data) {
private HashMap<String,String[]> read(byte[] data) {
int WORDS = (int) (76401 / 0.7); // presizing
int GROUPS = (int) (88022 / 0.7); // presizing
HashMap word2Groups = new HashMap(WORDS); // Map<String word, int[] groups>
HashMap group2Words = new HashMap(GROUPS); // Map<int group, String[] words>
HashMap internedWords = new HashMap(WORDS);// Map<String word, String word>
HashMap<String,ArrayList<Integer>> word2Groups = new HashMap<String,ArrayList<Integer>>(WORDS); // Map<String word, int[] groups>
HashMap<Integer,ArrayList<String>> group2Words = new HashMap<Integer,ArrayList<String>>(GROUPS); // Map<int group, String[] words>
HashMap<String,String> internedWords = new HashMap<String,String>(WORDS);// Map<String word, String word>
Charset charset = Charset.forName("UTF-8");
int lastNum = -1;
@ -226,7 +226,7 @@ public class SynonymMap {
/* Part C: Add (group,word) to tables */
// ensure compact string representation, minimizing memory overhead
String w = (String) internedWords.get(word);
String w = internedWords.get(word);
if (w == null) {
word = new String(word); // ensure compact string
internedWords.put(word, word);
@ -242,17 +242,17 @@ public class SynonymMap {
}
// add word --> group
ArrayList groups = (ArrayList) word2Groups.get(word);
ArrayList<Integer> groups = word2Groups.get(word);
if (groups == null) {
groups = new ArrayList(1);
groups = new ArrayList<Integer>(1);
word2Groups.put(word, groups);
}
groups.add(group);
// add group --> word
ArrayList words = (ArrayList) group2Words.get(group);
ArrayList<String> words = group2Words.get(group);
if (words == null) {
words = new ArrayList(1);
words = new ArrayList<String>(1);
group2Words.put(group, words);
}
words.add(word);
@ -265,25 +265,26 @@ public class SynonymMap {
/* Part E: minimize memory consumption by a factor 3 (or so) */
// if (true) return word2Syns;
word2Groups = null; // help gc
group2Words = null; // help gc
//TODO: word2Groups.clear(); would be more appropriate ?
group2Words = null; // help gc
//TODO: group2Words.clear(); would be more appropriate ?
return optimize(word2Syns, internedWords);
}
private HashMap createIndex(Map word2Groups, Map group2Words) {
HashMap word2Syns = new HashMap();
Iterator iter = word2Groups.entrySet().iterator();
private HashMap<String,String[]> createIndex(Map<String,ArrayList<Integer>> word2Groups, Map<Integer,ArrayList<String>> group2Words) {
HashMap<String,String[]> word2Syns = new HashMap<String,String[]>();
while (iter.hasNext()) { // for each word
Map.Entry entry = (Map.Entry) iter.next();
ArrayList group = (ArrayList) entry.getValue();
String word = (String) entry.getKey();
for (final Map.Entry<String,ArrayList<Integer>> entry : word2Groups.entrySet()) { // for each word
ArrayList<Integer> group = entry.getValue();
String word = entry.getKey();
// HashSet synonyms = new HashSet();
TreeSet synonyms = new TreeSet();
TreeSet<String> synonyms = new TreeSet<String>();
for (int i=group.size(); --i >= 0; ) { // for each groupID of word
ArrayList words = (ArrayList) group2Words.get(group.get(i));
ArrayList<String> words = group2Words.get(group.get(i));
for (int j=words.size(); --j >= 0; ) { // add all words
Object synonym = words.get(j); // note that w and word are interned
String synonym = words.get(j); // note that w and word are interned
if (synonym != word) { // a word is implicitly it's own synonym
synonyms.add(synonym);
}
@ -294,7 +295,7 @@ public class SynonymMap {
if (size > 0) {
String[] syns = new String[size];
if (size == 1)
syns[0] = (String) synonyms.first();
syns[0] = synonyms.first();
else
synonyms.toArray(syns);
// if (syns.length > 1) Arrays.sort(syns);
@ -306,7 +307,7 @@ public class SynonymMap {
return word2Syns;
}
private HashMap optimize(HashMap word2Syns, HashMap internedWords) {
private HashMap<String,String[]> optimize(HashMap word2Syns, HashMap<String,String> internedWords) {
if (DEBUG) {
System.err.println("before gc");
for (int i=0; i < 10; i++) System.gc();
@ -318,11 +319,11 @@ public class SynonymMap {
int size = word2Syns.size();
String[][] allSynonyms = new String[size][];
String[] words = new String[size];
Iterator iter = word2Syns.entrySet().iterator();
Iterator<Map.Entry<String,String[]>> iter = word2Syns.entrySet().iterator();
for (int j=0; j < size; j++) {
Map.Entry entry = (Map.Entry) iter.next();
allSynonyms[j] = (String[]) entry.getValue();
words[j] = (String) entry.getKey();
Map.Entry<String,String[]> entry = iter.next();
allSynonyms[j] = entry.getValue();
words[j] = entry.getKey();
len += words[j].length();
}
@ -343,7 +344,7 @@ public class SynonymMap {
for (int j=0; j < size; j++) {
String[] syns = allSynonyms[j];
for (int k=syns.length; --k >= 0; ) {
syns[k] = (String) internedWords.get(syns[k]);
syns[k] = internedWords.get(syns[k]);
}
Object replacement = syns;
if (syns.length == 1) replacement = syns[0]; // minimize memory consumption some more