mirror of https://github.com/apache/lucene.git
LUCENE-3250: remove contrib/misc,wordnet,suggest dependencies on modules/analysis
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1140394 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b3f0ae9b7a
commit
6f6b3cbc5a
|
@ -5,6 +5,11 @@ http://s.apache.org/luceneversions
|
|||
|
||||
======================= Trunk (not yet released) =======================
|
||||
|
||||
Changes in runtime behavior
|
||||
|
||||
* LUCENE-3250: Wordnet's SynExpand requires a non-null Analyzer (it no longer
|
||||
treats null as StandardAnalyzer). (Robert Muir)
|
||||
|
||||
Build
|
||||
|
||||
* LUCENE-2845: Moved contrib/benchmark to modules.
|
||||
|
|
|
@ -27,22 +27,6 @@
|
|||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
|
||||
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
|
||||
|
||||
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
|
||||
<subant target="default">
|
||||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="build-native-unix" >
|
||||
<mkdir dir="${common.build.dir}/native"/>
|
||||
|
||||
|
|
|
@ -21,7 +21,6 @@ import java.io.File;
|
|||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.index.IndexWriter; // javadoc
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.store.Directory;
|
||||
|
@ -98,7 +97,7 @@ public class MultiPassIndexSplitter {
|
|||
}
|
||||
IndexWriter w = new IndexWriter(outputs[i], new IndexWriterConfig(
|
||||
Version.LUCENE_CURRENT,
|
||||
new WhitespaceAnalyzer(Version.LUCENE_CURRENT))
|
||||
null)
|
||||
.setOpenMode(OpenMode.CREATE));
|
||||
System.err.println("Writing part " + (i + 1) + " ...");
|
||||
w.addIndexes(input);
|
||||
|
|
|
@ -16,7 +16,6 @@ package org.apache.lucene.misc;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
|
@ -40,7 +39,7 @@ public class IndexMergeTool {
|
|||
FSDirectory mergedIndex = FSDirectory.open(new File(args[0]));
|
||||
|
||||
IndexWriter writer = new IndexWriter(mergedIndex, new IndexWriterConfig(
|
||||
Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT))
|
||||
Version.LUCENE_CURRENT, null)
|
||||
.setOpenMode(OpenMode.CREATE));
|
||||
|
||||
Directory[] indexes = new Directory[args.length - 1];
|
||||
|
|
|
@ -30,22 +30,6 @@
|
|||
|
||||
<import file="../contrib-build.xml"/>
|
||||
|
||||
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
|
||||
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
|
||||
|
||||
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
|
||||
<subant target="default">
|
||||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="index" depends="compile" description="Build WordNet index">
|
||||
<fail if="synindex.exists">
|
||||
Index already exists - must remove first.
|
||||
|
@ -83,24 +67,4 @@
|
|||
</java>
|
||||
</target>
|
||||
|
||||
<target name="expand" description="Perform synonym expansion on a query">
|
||||
<fail unless="synindex.exists">
|
||||
Index does not exist.
|
||||
</fail>
|
||||
|
||||
<fail unless="query">
|
||||
Must specify 'query' property.
|
||||
</fail>
|
||||
|
||||
<java classname="org.apache.lucene.wordnet.SynExpand">
|
||||
<classpath>
|
||||
<path refid="compile.classpath"/>
|
||||
<pathelement location="${build.dir}/classes"/>
|
||||
</classpath>
|
||||
|
||||
<arg file="${synindex.dir}"/>
|
||||
<arg value="${query}"/>
|
||||
</java>
|
||||
</target>
|
||||
|
||||
</project>
|
||||
|
|
|
@ -17,7 +17,6 @@ package org.apache.lucene.wordnet;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import java.io.File;
|
||||
import java.io.IOException;
|
||||
import java.io.StringReader;
|
||||
import java.util.HashSet;
|
||||
|
@ -28,7 +27,6 @@ import java.util.Set;
|
|||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -41,8 +39,6 @@ import org.apache.lucene.search.IndexSearcher;
|
|||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.util.Version;
|
||||
|
||||
|
||||
/**
|
||||
|
@ -53,41 +49,6 @@ import org.apache.lucene.util.Version;
|
|||
*/
|
||||
public final class SynExpand {
|
||||
|
||||
/**
|
||||
* Test driver for synonym expansion.
|
||||
* Uses boost factor of 0.9 for illustrative purposes.
|
||||
*
|
||||
* If you pass in the query "big dog" then it prints out:
|
||||
*
|
||||
* <code><pre>
|
||||
* Query: big adult^0.9 bad^0.9 bighearted^0.9 boastful^0.9 boastfully^0.9 bounteous^0.9 bountiful^0.9 braggy^0.9 crowing^0.9 freehanded^0.9 giving^0.9 grown^0.9 grownup^0.9 handsome^0.9 large^0.9 liberal^0.9 magnanimous^0.9 momentous^0.9 openhanded^0.9 prominent^0.9 swelled^0.9 vainglorious^0.9 vauntingly^0.9
|
||||
* dog andiron^0.9 blackguard^0.9 bounder^0.9 cad^0.9 chase^0.9 click^0.9 detent^0.9 dogtooth^0.9 firedog^0.9 frank^0.9 frankfurter^0.9 frump^0.9 heel^0.9 hotdog^0.9 hound^0.9 pawl^0.9 tag^0.9 tail^0.9 track^0.9 trail^0.9 weenie^0.9 wiener^0.9 wienerwurst^0.9
|
||||
* </pre></code>
|
||||
*/
|
||||
public static void main(String[] args) throws IOException
|
||||
{
|
||||
if (args.length != 2)
|
||||
{
|
||||
System.out.println(
|
||||
"java org.apache.lucene.wordnet.SynExpand <index path> <query>");
|
||||
}
|
||||
|
||||
FSDirectory directory = FSDirectory.open(new File(args[0]));
|
||||
IndexSearcher searcher = new IndexSearcher(directory, true);
|
||||
|
||||
String query = args[1];
|
||||
String field = "contents";
|
||||
|
||||
Query q = expand( query, searcher, new StandardAnalyzer(Version.LUCENE_CURRENT), field, 0.9f);
|
||||
System.out.println( "Query: " + q.toString( field));
|
||||
|
||||
|
||||
|
||||
searcher.close();
|
||||
directory.close();
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Perform synonym expansion on a query.
|
||||
*
|
||||
|
@ -95,7 +56,7 @@ public final class SynExpand {
|
|||
*
|
||||
* @param syns a opened to the Lucene index you previously created with {@link Syns2Index}. The searcher is not closed or otherwise altered.
|
||||
*
|
||||
* @param a optional analyzer used to parse the users query else {@link StandardAnalyzer} is used
|
||||
* @param a analyzer used to parse the users query.
|
||||
*
|
||||
* @param f optional field name to search in or null if you want the default of "contents"
|
||||
*
|
||||
|
@ -113,7 +74,6 @@ public final class SynExpand {
|
|||
final Set<String> already = new HashSet<String>(); // avoid dups
|
||||
List<String> top = new LinkedList<String>(); // needs to be separately listed..
|
||||
final String field = ( f == null) ? "contents" : f;
|
||||
if ( a == null) a = new StandardAnalyzer(Version.LUCENE_CURRENT);
|
||||
|
||||
// [1] Parse query into separate words so that when we expand we can avoid dups
|
||||
TokenStream ts = a.reusableTokenStream( field, new StringReader( query));
|
||||
|
|
|
@ -41,6 +41,7 @@ import org.apache.lucene.search.Query;
|
|||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Scorer;
|
||||
import org.apache.lucene.search.TermQuery;
|
||||
import org.apache.lucene.search.TotalHitCountCollector;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
|
||||
|
||||
|
@ -49,24 +50,6 @@ import org.apache.lucene.store.FSDirectory;
|
|||
*/
|
||||
public class SynLookup {
|
||||
|
||||
final static class CountingCollector extends Collector {
|
||||
public int numHits = 0;
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorer scorer) throws IOException {}
|
||||
@Override
|
||||
public void collect(int doc) throws IOException {
|
||||
numHits++;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setNextReader(AtomicReaderContext context) {}
|
||||
@Override
|
||||
public boolean acceptsDocsOutOfOrder() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws IOException {
|
||||
if (args.length != 2) {
|
||||
System.out.println(
|
||||
|
@ -78,16 +61,16 @@ public class SynLookup {
|
|||
|
||||
String word = args[1];
|
||||
Query query = new TermQuery(new Term(Syns2Index.F_WORD, word));
|
||||
CountingCollector countingCollector = new CountingCollector();
|
||||
TotalHitCountCollector countingCollector = new TotalHitCountCollector();
|
||||
searcher.search(query, countingCollector);
|
||||
|
||||
if (countingCollector.numHits == 0) {
|
||||
if (countingCollector.getTotalHits() == 0) {
|
||||
System.out.println("No synonyms found for " + word);
|
||||
} else {
|
||||
System.out.println("Synonyms found for \"" + word + "\":");
|
||||
}
|
||||
|
||||
ScoreDoc[] hits = searcher.search(query, countingCollector.numHits).scoreDocs;
|
||||
ScoreDoc[] hits = searcher.search(query, countingCollector.getTotalHits()).scoreDocs;
|
||||
|
||||
for (int i = 0; i < hits.length; i++) {
|
||||
Document doc = searcher.doc(hits[i].doc);
|
||||
|
|
|
@ -22,6 +22,7 @@ import java.io.File;
|
|||
import java.io.FileInputStream;
|
||||
import java.io.InputStreamReader;
|
||||
import java.io.PrintStream;
|
||||
import java.io.Reader;
|
||||
import java.util.Iterator;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
|
@ -31,7 +32,7 @@ import java.util.TreeMap;
|
|||
import java.util.TreeSet;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
|
@ -90,9 +91,15 @@ public class Syns2Index
|
|||
public static final String F_WORD = "word";
|
||||
|
||||
/**
|
||||
*
|
||||
* we don't actually analyze any text (only a NOT_ANALYZED field),
|
||||
* but analyzer can't be null, docinverter wants the offset gap!
|
||||
*/
|
||||
private static final Analyzer ana = new StandardAnalyzer(Version.LUCENE_CURRENT);
|
||||
private static final Analyzer ana = new Analyzer() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Takes arg of prolog file name and index directory.
|
||||
|
|
|
@ -29,21 +29,5 @@
|
|||
|
||||
<import file="../../lucene/contrib/contrib-build.xml"/>
|
||||
|
||||
<module-uptodate name="analysis/common" jarfile="${common.dir}/../modules/analysis/build/common/lucene-analyzers-common-${version}.jar"
|
||||
property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
|
||||
|
||||
<path id="classpath">
|
||||
<pathelement path="${analyzers-common.jar}"/>
|
||||
<path refid="base.classpath"/>
|
||||
</path>
|
||||
|
||||
<target name="compile-core" depends="compile-analyzers-common, common.compile-core" />
|
||||
|
||||
<target name="compile-analyzers-common" unless="analyzers-common.uptodate">
|
||||
<subant target="default">
|
||||
<fileset dir="${common.dir}/../modules/analysis/common" includes="build.xml"/>
|
||||
</subant>
|
||||
</target>
|
||||
|
||||
<target name="dist-maven" depends="jar-core,javadocs,contrib-build.dist-maven" />
|
||||
</project>
|
||||
|
|
|
@ -18,12 +18,14 @@ package org.apache.lucene.search.spell;
|
|||
*/
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.Reader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Comparator;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -113,6 +115,17 @@ public class SpellChecker implements java.io.Closeable {
|
|||
private StringDistance sd;
|
||||
private Comparator<SuggestWord> comparator;
|
||||
|
||||
/** we don't need to actually analyze any content:
|
||||
* all fields are indexed NOT_ANALYZED, but docsinverter
|
||||
* needs this for the offset gap!
|
||||
*/
|
||||
private static Analyzer noAnalyzer = new Analyzer() {
|
||||
@Override
|
||||
public TokenStream tokenStream(String fieldName, Reader reader) {
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Use the given directory as a spell checker index. The directory
|
||||
* is created if it doesn't exist yet.
|
||||
|
@ -168,7 +181,7 @@ public class SpellChecker implements java.io.Closeable {
|
|||
if (!IndexReader.indexExists(spellIndexDir)) {
|
||||
IndexWriter writer = new IndexWriter(spellIndexDir,
|
||||
new IndexWriterConfig(Version.LUCENE_CURRENT,
|
||||
new WhitespaceAnalyzer(Version.LUCENE_CURRENT)));
|
||||
noAnalyzer));
|
||||
writer.close();
|
||||
}
|
||||
swapSearcher(spellIndexDir);
|
||||
|
@ -466,7 +479,7 @@ public class SpellChecker implements java.io.Closeable {
|
|||
final Directory dir = this.spellIndex;
|
||||
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(
|
||||
Version.LUCENE_CURRENT,
|
||||
new WhitespaceAnalyzer(Version.LUCENE_CURRENT))
|
||||
noAnalyzer)
|
||||
.setOpenMode(OpenMode.CREATE));
|
||||
writer.close();
|
||||
swapSearcher(dir);
|
||||
|
@ -503,7 +516,7 @@ public class SpellChecker implements java.io.Closeable {
|
|||
synchronized (modifyCurrentIndexLock) {
|
||||
ensureOpen();
|
||||
final Directory dir = this.spellIndex;
|
||||
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, new WhitespaceAnalyzer(Version.LUCENE_CURRENT)).setRAMBufferSizeMB(ramMB));
|
||||
final IndexWriter writer = new IndexWriter(dir, new IndexWriterConfig(Version.LUCENE_CURRENT, noAnalyzer).setRAMBufferSizeMB(ramMB));
|
||||
((TieredMergePolicy) writer.getConfig().getMergePolicy()).setMaxMergeAtOnce(mergeFactor);
|
||||
IndexSearcher indexSearcher = obtainSearcher();
|
||||
final List<TermsEnum> termsEnums = new ArrayList<TermsEnum>();
|
||||
|
|
Loading…
Reference in New Issue