mirror of https://github.com/apache/lucene.git
LUCENE-1884: massive javadoc and comment cleanup -- primarily dealing with typos
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@811070 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8f88bf0062
commit
c56f4c224f
|
@ -22,9 +22,11 @@ import org.apache.lucene.analysis.LetterTokenizer;
|
|||
import org.apache.lucene.util.AttributeSource;
|
||||
|
||||
/**
|
||||
* Tokenizer that breaks text into runs of letters and diacritics.
|
||||
* <p>
|
||||
* The problem with the standard Letter tokenizer is that it fails on diacritics.
|
||||
* Handling similar to this is necessary for Indic Scripts, Hebrew, Thaana, etc.
|
||||
*
|
||||
* </p>
|
||||
*
|
||||
*/
|
||||
public class ArabicLetterTokenizer extends LetterTokenizer {
|
||||
|
|
|
@ -36,7 +36,7 @@ public class BrazilianStemmer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Stemms the given term to an unique <tt>discriminator</tt>.
|
||||
* Stems the given term to an unique <tt>discriminator</tt>.
|
||||
*
|
||||
* @param term The term that should be stemmed.
|
||||
* @return Discriminator for <tt>term</tt>
|
||||
|
@ -115,7 +115,7 @@ public class BrazilianStemmer {
|
|||
/**
|
||||
* Gets R1
|
||||
*
|
||||
* R1 - is the region after the first non-vowel follwing a vowel,
|
||||
* R1 - is the region after the first non-vowel following a vowel,
|
||||
* or is the null region at the end of the word if there is
|
||||
* no such non-vowel.
|
||||
*
|
||||
|
@ -159,13 +159,13 @@ public class BrazilianStemmer {
|
|||
/**
|
||||
* Gets RV
|
||||
*
|
||||
* RV - IF the second letter is a consoant, RV is the region after
|
||||
* RV - IF the second letter is a consonant, RV is the region after
|
||||
* the next following vowel,
|
||||
*
|
||||
* OR if the first two letters are vowels, RV is the region
|
||||
* after the next consoant,
|
||||
* after the next consonant,
|
||||
*
|
||||
* AND otherwise (consoant-vowel case) RV is the region after
|
||||
* AND otherwise (consonant-vowel case) RV is the region after
|
||||
* the third letter.
|
||||
*
|
||||
* BUT RV is the end of the word if this positions cannot be
|
||||
|
@ -184,7 +184,7 @@ public class BrazilianStemmer {
|
|||
|
||||
i = value.length()-1 ;
|
||||
|
||||
// RV - IF the second letter is a consoant, RV is the region after
|
||||
// RV - IF the second letter is a consonant, RV is the region after
|
||||
// the next following vowel,
|
||||
if ((i > 0) && !isVowel(value.charAt(1))) {
|
||||
// find 1st vowel
|
||||
|
@ -201,7 +201,7 @@ public class BrazilianStemmer {
|
|||
|
||||
|
||||
// RV - OR if the first two letters are vowels, RV is the region
|
||||
// after the next consoant,
|
||||
// after the next consonant,
|
||||
if ((i > 1) &&
|
||||
isVowel(value.charAt(0)) &&
|
||||
isVowel(value.charAt(1))) {
|
||||
|
@ -217,7 +217,7 @@ public class BrazilianStemmer {
|
|||
}
|
||||
}
|
||||
|
||||
// RV - AND otherwise (consoant-vowel case) RV is the region after
|
||||
// RV - AND otherwise (consonant-vowel case) RV is the region after
|
||||
// the third letter.
|
||||
if (i > 2) {
|
||||
return value.substring(3) ;
|
||||
|
@ -394,7 +394,7 @@ public class BrazilianStemmer {
|
|||
|
||||
|
||||
/**
|
||||
* Standart suffix removal.
|
||||
* Standard suffix removal.
|
||||
* Search for the longest among the following suffixes, and perform
|
||||
* the following actions:
|
||||
*
|
||||
|
@ -403,12 +403,12 @@ public class BrazilianStemmer {
|
|||
private boolean step1() {
|
||||
if (CT == null) return false ;
|
||||
|
||||
// suffix lenght = 7
|
||||
// suffix length = 7
|
||||
if (suffix(CT,"uciones") && suffix(R2,"uciones")) {
|
||||
CT = replaceSuffix(CT,"uciones","u") ; return true;
|
||||
}
|
||||
|
||||
// suffix lenght = 6
|
||||
// suffix length = 6
|
||||
if (CT.length() >= 6) {
|
||||
if (suffix(CT,"imentos") && suffix(R2,"imentos")) {
|
||||
CT = removeSuffix(CT,"imentos") ; return true;
|
||||
|
@ -436,7 +436,7 @@ public class BrazilianStemmer {
|
|||
}
|
||||
}
|
||||
|
||||
// suffix lenght = 5
|
||||
// suffix length = 5
|
||||
if (CT.length() >= 5) {
|
||||
if (suffix(CT,"acoes") && suffix(R2,"acoes")) {
|
||||
CT = removeSuffix(CT,"acoes") ; return true;
|
||||
|
@ -473,7 +473,7 @@ public class BrazilianStemmer {
|
|||
}
|
||||
}
|
||||
|
||||
// suffix lenght = 4
|
||||
// suffix length = 4
|
||||
if (CT.length() >= 4) {
|
||||
if (suffix(CT,"acao") && suffix(R2,"acao")) {
|
||||
CT = removeSuffix(CT,"acao") ; return true;
|
||||
|
@ -521,7 +521,7 @@ public class BrazilianStemmer {
|
|||
}
|
||||
}
|
||||
|
||||
// suffix lenght = 3
|
||||
// suffix length = 3
|
||||
if (CT.length() >= 3) {
|
||||
if (suffix(CT,"eza") && suffix(R2,"eza")) {
|
||||
CT = removeSuffix(CT,"eza") ; return true ;
|
||||
|
@ -589,7 +589,7 @@ public class BrazilianStemmer {
|
|||
}
|
||||
}
|
||||
|
||||
// suffix lenght = 6
|
||||
// suffix length = 6
|
||||
if (RV.length() >= 6) {
|
||||
if (suffix(RV,"iremos")) {
|
||||
CT = removeSuffix(CT,"iremos") ; return true;
|
||||
|
@ -633,7 +633,7 @@ public class BrazilianStemmer {
|
|||
}
|
||||
|
||||
|
||||
// suffix lenght = 5
|
||||
// suffix length = 5
|
||||
if (RV.length() >= 5) {
|
||||
if (suffix(RV,"irmos")) {
|
||||
CT = removeSuffix(CT,"irmos") ; return true;
|
||||
|
@ -718,7 +718,7 @@ public class BrazilianStemmer {
|
|||
}
|
||||
}
|
||||
|
||||
// suffix lenght = 4
|
||||
// suffix length = 4
|
||||
if (RV.length() >= 4) {
|
||||
if (suffix(RV,"aria")) {
|
||||
CT = removeSuffix(CT,"aria") ; return true;
|
||||
|
@ -845,7 +845,7 @@ public class BrazilianStemmer {
|
|||
}
|
||||
}
|
||||
|
||||
// suffix lenght = 3
|
||||
// suffix length = 3
|
||||
if (RV.length() >= 3) {
|
||||
if (suffix(RV,"ada")) {
|
||||
CT = removeSuffix(CT,"ada") ; return true;
|
||||
|
@ -888,7 +888,7 @@ public class BrazilianStemmer {
|
|||
}
|
||||
}
|
||||
|
||||
// suffix lenght = 2
|
||||
// suffix length = 2
|
||||
if (RV.length() >= 2) {
|
||||
if (suffix(RV,"ia")) {
|
||||
CT = removeSuffix(CT,"ia") ; return true;
|
||||
|
|
|
@ -150,7 +150,7 @@ public class HyphenationCompoundWordTokenFilter extends
|
|||
}
|
||||
|
||||
protected void decomposeInternal(final Token token) {
|
||||
// get the hpyphenation points
|
||||
// get the hyphenation points
|
||||
Hyphenation hyphens = hyphenator.hyphenate(token.termBuffer(), 0, token
|
||||
.termLength(), 1, 1);
|
||||
// No hyphen points found -> exit
|
||||
|
|
|
@ -25,7 +25,7 @@ import java.io.Serializable;
|
|||
* <h2>Ternary Search Tree.</h2>
|
||||
*
|
||||
* <p>
|
||||
* A ternary search tree is a hibrid between a binary tree and a digital search
|
||||
* A ternary search tree is a hybrid between a binary tree and a digital search
|
||||
* tree (trie). Keys are limited to strings. A data value of type char is stored
|
||||
* in each leaf node. It can be used as an index (or pointer) to the data.
|
||||
* Branches that only contain one key are compressed to one node by storing a
|
||||
|
@ -45,7 +45,7 @@ import java.io.Serializable;
|
|||
* requires from 5000 to 15000 hyphenation patterns which will be keys in this
|
||||
* tree. The strings patterns are usually small (from 2 to 5 characters), but
|
||||
* each char in the tree is stored in a node. Thus memory usage is the main
|
||||
* concern. We will sacrify 'elegance' to keep memory requirenments to the
|
||||
* concern. We will sacrifice 'elegance' to keep memory requirements to the
|
||||
* minimum. Using java's char type as pointer (yes, I know pointer it is a
|
||||
* forbidden word in java) we can keep the size of the node to be just 8 bytes
|
||||
* (3 pointers and the data char). This gives room for about 65000 nodes. In my
|
||||
|
@ -100,7 +100,7 @@ public class TernaryTree implements Cloneable, Serializable {
|
|||
* </ul>
|
||||
* <p>
|
||||
* This shouldn't be a problem if we give the usual semantics to strings since
|
||||
* 0xFFFF is garanteed not to be an Unicode character.
|
||||
* 0xFFFF is guaranteed not to be an Unicode character.
|
||||
* </p>
|
||||
*/
|
||||
protected char[] sc;
|
||||
|
|
|
@ -17,7 +17,7 @@
|
|||
|
||||
<html>
|
||||
<head>
|
||||
<title>Hypenation code for the CompoundWordTokenFilter</title>
|
||||
<title>Hyphenation code for the CompoundWordTokenFilter</title>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
|
|
|
@ -69,7 +69,7 @@ The input token is always preserved and the filters do not alter the case of wor
|
|||
filter available:
|
||||
<ul>
|
||||
<li><i>HyphenationCompoundWordTokenFilter</i>: it uses a
|
||||
hyphenation grammer based approach to find potential word parts of a
|
||||
hyphenation grammar based approach to find potential word parts of a
|
||||
given word.</li>
|
||||
<li><i>DictionaryCompoundWordTokenFilter</i>: it uses a
|
||||
brute-force dictionary-only based approach to find the word parts of a given
|
||||
|
|
|
@ -79,7 +79,7 @@ public class FrenchStemmer {
|
|||
|
||||
|
||||
/**
|
||||
* Stemms the given term to a unique <tt>discriminator</tt>.
|
||||
* Stems the given term to a unique <tt>discriminator</tt>.
|
||||
*
|
||||
* @param term java.langString The term that should be stemmed
|
||||
* @return java.lang.String Discriminator for <tt>term</tt>
|
||||
|
@ -148,7 +148,7 @@ public class FrenchStemmer {
|
|||
}
|
||||
|
||||
/**
|
||||
* First step of the Porter Algorithmn<br>
|
||||
* First step of the Porter Algorithm<br>
|
||||
* refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
|
||||
*/
|
||||
private void step1( ) {
|
||||
|
@ -202,7 +202,7 @@ public class FrenchStemmer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Second step (A) of the Porter Algorithmn<br>
|
||||
* Second step (A) of the Porter Algorithm<br>
|
||||
* Will be performed if nothing changed from the first step
|
||||
* or changed were done in the amment, emment, ments or ment suffixes<br>
|
||||
* refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
|
||||
|
@ -219,7 +219,7 @@ public class FrenchStemmer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Second step (B) of the Porter Algorithmn<br>
|
||||
* Second step (B) of the Porter Algorithm<br>
|
||||
* Will be performed if step 2 A was performed unsuccessfully<br>
|
||||
* refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
|
||||
*/
|
||||
|
@ -238,7 +238,7 @@ public class FrenchStemmer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Third step of the Porter Algorithmn<br>
|
||||
* Third step of the Porter Algorithm<br>
|
||||
* refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
|
||||
*/
|
||||
private void step3() {
|
||||
|
@ -259,7 +259,7 @@ public class FrenchStemmer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Fourth step of the Porter Algorithmn<br>
|
||||
* Fourth step of the Porter Algorithm<br>
|
||||
* refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
|
||||
*/
|
||||
private void step4() {
|
||||
|
@ -286,7 +286,7 @@ public class FrenchStemmer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Fifth step of the Porter Algorithmn<br>
|
||||
* Fifth step of the Porter Algorithm<br>
|
||||
* refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
|
||||
*/
|
||||
private void step5() {
|
||||
|
@ -301,7 +301,7 @@ public class FrenchStemmer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Sixth (and last!) step of the Porter Algorithmn<br>
|
||||
* Sixth (and last!) step of the Porter Algorithm<br>
|
||||
* refer to http://snowball.sourceforge.net/french/stemmer.html for an explanation
|
||||
*/
|
||||
private void step6() {
|
||||
|
|
|
@ -41,7 +41,7 @@ public class DutchStemmer {
|
|||
|
||||
//TODO convert to internal
|
||||
/*
|
||||
* Stemms the given term to an unique <tt>discriminator</tt>.
|
||||
* Stems the given term to an unique <tt>discriminator</tt>.
|
||||
*
|
||||
* @param term The term that should be stemmed.
|
||||
* @return Discriminator for <tt>term</tt>
|
||||
|
|
|
@ -372,7 +372,7 @@ class RussianStemmer
|
|||
|
||||
/**
|
||||
* Finds the ending among the given class of endings, then checks if this ending was
|
||||
* preceded by any of given predessors, and if so, removes it from stemming zone.
|
||||
* preceded by any of given predecessors, and if so, removes it from stemming zone.
|
||||
* Creation date: (17/03/2002 8:18:34 PM)
|
||||
*/
|
||||
private boolean findAndRemoveEnding(StringBuffer stemmingZone,
|
||||
|
|
|
@ -51,10 +51,10 @@ import org.apache.lucene.index.Payload;
|
|||
* be used to replace phrase queries, especially them with 0 slop.
|
||||
*
|
||||
* <p>Without a spacer character
|
||||
* it can be used to handle composition and decomposion of words
|
||||
* it can be used to handle composition and decomposition of words
|
||||
* such as searching for "multi dimensional" instead of "multidimensional".
|
||||
* It is a rather common human problem at query time
|
||||
* in several languages, notebly the northern Germanic branch.
|
||||
* in several languages, notably the northern Germanic branch.
|
||||
*
|
||||
* <p>Shingles are amongst many things also known to solve problems
|
||||
* in spell checking, language detection and document clustering.
|
||||
|
@ -253,7 +253,7 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
* @see #ignoringSinglePrefixOrSuffixShingleByDefault
|
||||
* @see #defaultSettingsCodec
|
||||
*
|
||||
* @param input stream from wich to construct the matrix
|
||||
* @param input stream from which to construct the matrix
|
||||
* @param minimumShingleSize minimum number of tokens in any shingle.
|
||||
* @param maximumShingleSize maximum number of tokens in any shingle.
|
||||
*/
|
||||
|
@ -268,7 +268,7 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
* @see #ignoringSinglePrefixOrSuffixShingleByDefault
|
||||
* @see #defaultSettingsCodec
|
||||
*
|
||||
* @param input stream from wich to construct the matrix
|
||||
* @param input stream from which to construct the matrix
|
||||
* @param minimumShingleSize minimum number of tokens in any shingle.
|
||||
* @param maximumShingleSize maximum number of tokens in any shingle.
|
||||
* @param spacerCharacter character to use between texts of the token parts in a shingle. null for none.
|
||||
|
@ -282,7 +282,7 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
*
|
||||
* @see #defaultSettingsCodec
|
||||
*
|
||||
* @param input stream from wich to construct the matrix
|
||||
* @param input stream from which to construct the matrix
|
||||
* @param minimumShingleSize minimum number of tokens in any shingle.
|
||||
* @param maximumShingleSize maximum number of tokens in any shingle.
|
||||
* @param spacerCharacter character to use between texts of the token parts in a shingle. null for none.
|
||||
|
@ -296,7 +296,7 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
/**
|
||||
* Creates a shingle filter with ad hoc parameter settings.
|
||||
*
|
||||
* @param input stream from wich to construct the matrix
|
||||
* @param input stream from which to construct the matrix
|
||||
* @param minimumShingleSize minimum number of tokens in any shingle.
|
||||
* @param maximumShingleSize maximum number of tokens in any shingle.
|
||||
* @param spacerCharacter character to use between texts of the token parts in a shingle. null for none.
|
||||
|
@ -408,8 +408,8 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
private static final Token request_next_token = new Token();
|
||||
|
||||
/**
|
||||
* This method exists in order to avoid reursive calls to the method
|
||||
* as the complexity of a fairlt small matrix then easily would require
|
||||
* This method exists in order to avoid recursive calls to the method
|
||||
* as the complexity of a fairly small matrix then easily would require
|
||||
* a gigabyte sized stack per thread.
|
||||
*
|
||||
* @param reusableToken
|
||||
|
@ -490,7 +490,7 @@ public class ShingleMatrixFilter extends TokenStream {
|
|||
// don't really care, we just read it.
|
||||
}
|
||||
|
||||
// get rith of resources
|
||||
// get rid of resources
|
||||
|
||||
// delete the first column in the matrix
|
||||
Matrix.Column deletedColumn = (Matrix.Column) matrix.columns.remove(0);
|
||||
|
|
|
@ -147,7 +147,7 @@ class BigramDictionary extends AbstractDictionary {
|
|||
/**
|
||||
* Load the datafile into this BigramDictionary
|
||||
*
|
||||
* @param dctFilePath path to the Bigramdictionary (bigramdict.mem)
|
||||
* @param dctFilePath path to the Bigramdictionary (bigramdict.dct)
|
||||
* @throws FileNotFoundException
|
||||
* @throws IOException
|
||||
* @throws UnsupportedEncodingException
|
||||
|
|
|
@ -184,7 +184,7 @@ class WordDictionary extends AbstractDictionary {
|
|||
/**
|
||||
* Load the datafile into this WordDictionary
|
||||
*
|
||||
* @param dctFilePath path to word dictionary (coredict.mem)
|
||||
* @param dctFilePath path to word dictionary (coredict.dct)
|
||||
* @return number of words read
|
||||
* @throws FileNotFoundException
|
||||
* @throws IOException
|
||||
|
|
|
@ -20,7 +20,7 @@ package org.apache.lucene.benchmark.byTask.feeds;
|
|||
/**
|
||||
* Exception indicating there is no more data.
|
||||
* Thrown by Docs Makers if doc.maker.forever is false and docs sources of that maker where exhausted.
|
||||
* This is usefull for iterating all document of a source, in case we don't know in advance how many docs there are.
|
||||
* This is useful for iterating all document of a source, in case we don't know in advance how many docs there are.
|
||||
*/
|
||||
public class NoMoreDataException extends Exception {
|
||||
|
||||
|
|
|
@ -36,9 +36,9 @@ public class SimpleQueryMaker extends AbstractQueryMaker implements QueryMaker {
|
|||
|
||||
/**
|
||||
* Prepare the queries for this test.
|
||||
* Extending classes can overide this method for preparing different queries.
|
||||
* Extending classes can override this method for preparing different queries.
|
||||
* @return prepared queries.
|
||||
* @throws Exception if canot prepare the queries.
|
||||
* @throws Exception if cannot prepare the queries.
|
||||
*/
|
||||
protected Query[] prepareQueries() throws Exception {
|
||||
// analyzer (default is standard analyzer)
|
||||
|
|
|
@ -33,7 +33,7 @@ public class SimpleSloppyPhraseQueryMaker extends SimpleQueryMaker {
|
|||
* @see org.apache.lucene.benchmark.byTask.feeds.SimpleQueryMaker#prepareQueries()
|
||||
*/
|
||||
protected Query[] prepareQueries() throws Exception {
|
||||
// exatract some 100 words from doc text to an array
|
||||
// extract some 100 words from doc text to an array
|
||||
String words[];
|
||||
ArrayList w = new ArrayList();
|
||||
StringTokenizer st = new StringTokenizer(SingleDocSource.DOC_TEXT);
|
||||
|
@ -60,7 +60,7 @@ public class SimpleSloppyPhraseQueryMaker extends SimpleQueryMaker {
|
|||
}
|
||||
}
|
||||
queries.add(q);
|
||||
// reveresed
|
||||
// reversed
|
||||
remainedSlop = slop;
|
||||
q = new PhraseQuery();
|
||||
q.setSlop(slop+2*qlen);
|
||||
|
|
|
@ -28,7 +28,7 @@ import org.apache.lucene.benchmark.byTask.tasks.TaskSequence;
|
|||
import org.apache.lucene.benchmark.byTask.utils.Config;
|
||||
|
||||
/**
|
||||
* Sample performance test written programatically - no algorithm file is needed here.
|
||||
* Sample performance test written programmatically - no algorithm file is needed here.
|
||||
*/
|
||||
public class Sample {
|
||||
|
||||
|
@ -43,7 +43,7 @@ public class Sample {
|
|||
PerfRunData runData = new PerfRunData(conf);
|
||||
|
||||
// 1. top sequence
|
||||
TaskSequence top = new TaskSequence(runData,null,null,false); // top level, not parralel
|
||||
TaskSequence top = new TaskSequence(runData,null,null,false); // top level, not parallel
|
||||
|
||||
// 2. task to create the index
|
||||
CreateIndexTask create = new CreateIndexTask(runData);
|
||||
|
|
|
@ -17,6 +17,6 @@
|
|||
-->
|
||||
<html>
|
||||
<body>
|
||||
Sample performance test written programatically - no algorithm file is needed here.
|
||||
Sample performance test written programmatically - no algorithm file is needed here.
|
||||
</body>
|
||||
</html>
|
|
@ -42,7 +42,7 @@ public class Report {
|
|||
}
|
||||
|
||||
/**
|
||||
* Returns number of lines in the reoprt.
|
||||
* Returns number of lines in the report.
|
||||
*/
|
||||
public int getSize() {
|
||||
return size;
|
||||
|
|
|
@ -171,7 +171,7 @@ public class TaskStats implements Cloneable {
|
|||
maxUsedMem += stat2.getMaxUsedMem();
|
||||
count += stat2.getCount();
|
||||
if (round != stat2.round) {
|
||||
round = -1; // no meaning if agregating tasks of different ruond.
|
||||
round = -1; // no meaning if aggregating tasks of different round.
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -31,7 +31,7 @@ import org.apache.lucene.store.Directory;
|
|||
|
||||
/**
|
||||
* Open an index reader.
|
||||
* <br>Other side effects: index redaer object in perfRunData is set.
|
||||
* <br>Other side effects: index reader object in perfRunData is set.
|
||||
* <br> Optional params readOnly,commitUserData eg. OpenReader(false,commit1)
|
||||
*/
|
||||
public class OpenReaderTask extends PerfTask {
|
||||
|
|
|
@ -28,8 +28,8 @@ import org.apache.lucene.benchmark.byTask.utils.Format;
|
|||
/**
|
||||
* An abstract task to be tested for performance. <br>
|
||||
* Every performance task extends this class, and provides its own
|
||||
* {@link #doLogic()} method, which performss the actual task. <br>
|
||||
* Tasks performing some work that should be measured for the task, can overide
|
||||
* {@link #doLogic()} method, which performs the actual task. <br>
|
||||
* Tasks performing some work that should be measured for the task, can override
|
||||
* {@link #setup()} and/or {@link #tearDown()} and place that work there. <br>
|
||||
* Relevant properties: <code>task.max.depth.log</code>.<br>
|
||||
* Also supports the following logging attributes:
|
||||
|
@ -40,7 +40,7 @@ import org.apache.lucene.benchmark.byTask.utils.Format;
|
|||
* <li>log.step.[class Task Name] - specifies the same as 'log.step', only for a
|
||||
* particular task name. For example, log.step.AddDoc will be applied only for
|
||||
* {@link AddDocTask}, but not for {@link DeleteDocTask}. It's a way to control
|
||||
* per task logging settings. If you want to ommit logging for any other task,
|
||||
* per task logging settings. If you want to omit logging for any other task,
|
||||
* include log.step=-1. The syntax is "log.step." together with the Task's
|
||||
* 'short' name (i.e., without the 'Task' part).
|
||||
* </ul>
|
||||
|
@ -118,8 +118,8 @@ public abstract class PerfTask implements Cloneable {
|
|||
}
|
||||
|
||||
protected Object clone() throws CloneNotSupportedException {
|
||||
// tasks having non primitive data structures should overide this.
|
||||
// otherwise parallel running of a task sequence might not run crrectly.
|
||||
// tasks having non primitive data structures should override this.
|
||||
// otherwise parallel running of a task sequence might not run correctly.
|
||||
return super.clone();
|
||||
}
|
||||
|
||||
|
@ -152,7 +152,7 @@ public abstract class PerfTask implements Cloneable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Perform the task once (ignoring repetions specification)
|
||||
* Perform the task once (ignoring repetitions specification)
|
||||
* Return number of work items done by this task.
|
||||
* For indexing that can be number of docs added.
|
||||
* For warming that can be number of scanned items, etc.
|
||||
|
@ -230,7 +230,7 @@ public abstract class PerfTask implements Cloneable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Tasks that should never log at start can overide this.
|
||||
* Tasks that should never log at start can override this.
|
||||
* @return true if this task should never log when it start.
|
||||
*/
|
||||
protected boolean shouldNeverLogAtStart () {
|
||||
|
@ -238,7 +238,7 @@ public abstract class PerfTask implements Cloneable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Tasks that should not record statistics can overide this.
|
||||
* Tasks that should not record statistics can override this.
|
||||
* @return true if this task should never record its statistics.
|
||||
*/
|
||||
protected boolean shouldNotRecordStats () {
|
||||
|
@ -274,7 +274,7 @@ public abstract class PerfTask implements Cloneable {
|
|||
}
|
||||
|
||||
/**
|
||||
* Sub classes that supports parameters must overide this method to return true.
|
||||
* Sub classes that supports parameters must override this method to return true.
|
||||
* @return true iff this task supports command line params.
|
||||
*/
|
||||
public boolean supportsParams () {
|
||||
|
|
|
@ -250,7 +250,7 @@ public abstract class ReadTask extends PerfTask {
|
|||
}
|
||||
|
||||
/**
|
||||
* @return the maxiumum number of highlighter fragments
|
||||
* @return the maximum number of highlighter fragments
|
||||
* @deprecated Please define getBenchmarkHighlighter instead
|
||||
*/
|
||||
final int maxNumFragments(){
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
|
|||
/**
|
||||
* Reset all index and input data and call gc, erase index and dir, does NOT clear statistics.
|
||||
* <br>This contains ResetInputs.
|
||||
* <br>Other side effects: writers/readers nulified, deleted, closed.
|
||||
* <br>Other side effects: writers/readers nullified, deleted, closed.
|
||||
* Index is erased.
|
||||
* Directory is erased.
|
||||
*/
|
||||
|
|
|
@ -24,7 +24,7 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
|
|||
/**
|
||||
* Reset all index and input data and call gc, does NOT erase index/dir, does NOT clear statistics.
|
||||
* This contains ResetInputs.
|
||||
* <br>Other side effects: writers/readers nulified, closed.
|
||||
* <br>Other side effects: writers/readers nullified, closed.
|
||||
* Index is NOT erased.
|
||||
* Directory is NOT erased.
|
||||
*/
|
||||
|
|
|
@ -21,7 +21,7 @@ import org.apache.lucene.benchmark.byTask.PerfRunData;
|
|||
|
||||
/**
|
||||
* Set a performance test configuration property.
|
||||
* A property may have a single value, or a sequence of values, seprated by ":".
|
||||
* A property may have a single value, or a sequence of values, separated by ":".
|
||||
* If a sequence of values is specified, each time a new round starts,
|
||||
* the next (cyclic) value is taken.
|
||||
* <br>Other side effects: none.
|
||||
|
|
|
@ -251,7 +251,7 @@ public class TaskSequence extends PerfTask {
|
|||
}
|
||||
}
|
||||
|
||||
// run threadsm with rate
|
||||
// run threads with rate
|
||||
private void startlThreadsWithRate(Thread[] t) throws InterruptedException {
|
||||
long delayStep = (perMin ? 60000 : 1000) /rate;
|
||||
long nextStartTime = System.currentTimeMillis();
|
||||
|
@ -261,7 +261,7 @@ public class TaskSequence extends PerfTask {
|
|||
//System.out.println("thread wait: "+waitMore+" for rate: "+ratePerMin+" (delayStep="+delayStep+")");
|
||||
Thread.sleep(waitMore);
|
||||
}
|
||||
nextStartTime += delayStep; // this aims at avarage rate of starting threads.
|
||||
nextStartTime += delayStep; // this aims at average rate of starting threads.
|
||||
t[i].start();
|
||||
}
|
||||
}
|
||||
|
@ -346,7 +346,7 @@ public class TaskSequence extends PerfTask {
|
|||
}
|
||||
|
||||
public String getName() {
|
||||
return seqName; // overide to include more info
|
||||
return seqName; // override to include more info
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -35,9 +35,9 @@ import org.apache.lucene.document.Field;
|
|||
/**
|
||||
* A task which writes documents, one line per document. Each line is in the
|
||||
* following format: title <TAB> date <TAB> body. The output of this
|
||||
* taske can be consumed by
|
||||
* task can be consumed by
|
||||
* {@link org.apache.lucene.benchmark.byTask.feeds.LineDocMaker} and is intended
|
||||
* to save the IO overhead of opening a file per doument to be indexed.<br>
|
||||
* to save the IO overhead of opening a file per document to be indexed.<br>
|
||||
* Supports the following parameters:
|
||||
* <ul>
|
||||
* <li>line.file.out - the name of the file to write the output to. That
|
||||
|
@ -47,7 +47,7 @@ import org.apache.lucene.document.Field;
|
|||
* false).
|
||||
* </ul>
|
||||
* <b>NOTE:</b> this class is not thread-safe and if used by multiple threads the
|
||||
* output is unspecified (as all will write to the same ouput file in a
|
||||
* output is unspecified (as all will write to the same output file in a
|
||||
* non-synchronized way).
|
||||
*/
|
||||
public class WriteLineDocTask extends PerfTask {
|
||||
|
|
|
@ -32,7 +32,7 @@ import java.util.StringTokenizer;
|
|||
/**
|
||||
* Perf run configuration properties.
|
||||
* <p>
|
||||
* Numeric peroperty containing ":", e.g. "10:100:5" is interpreted
|
||||
* Numeric property containing ":", e.g. "10:100:5" is interpreted
|
||||
* as array of numeric values. It is extracted once, on first use, and
|
||||
* maintain a round number to return the appropriate value.
|
||||
* <p>
|
||||
|
@ -99,7 +99,7 @@ public class Config {
|
|||
}
|
||||
|
||||
/**
|
||||
* Create config without algorithm - usefull for a programmatic perf test.
|
||||
* Create config without algorithm - useful for a programmatic perf test.
|
||||
* @param props - configuration properties.
|
||||
* @throws IOException
|
||||
*/
|
||||
|
@ -135,7 +135,7 @@ public class Config {
|
|||
* Set a property.
|
||||
* Note: once a multiple values property is set, it can no longer be modified.
|
||||
* @param name name of property.
|
||||
* @param value either single or multiple propery value (multple values are separated by ":")
|
||||
* @param value either single or multiple property value (multiple values are separated by ":")
|
||||
* @throws Exception
|
||||
*/
|
||||
public void set (String name, String value) throws Exception {
|
||||
|
@ -208,7 +208,7 @@ public class Config {
|
|||
/**
|
||||
* Return a boolean property.
|
||||
* If the property contain ":", e.g. "true.true.false", it is interpreted
|
||||
* as array of boleans. It is extracted once, on first call
|
||||
* as array of booleans. It is extracted once, on first call
|
||||
* to get() it, and a by-round-value is returned.
|
||||
* @param name name of property
|
||||
* @param dflt default value
|
||||
|
|
|
@ -58,7 +58,7 @@ public class Format {
|
|||
}
|
||||
|
||||
/**
|
||||
* Padd a number from right.
|
||||
* Pad a number from right.
|
||||
* @param numFracDigits number of digits in fraction part - must be 0 or 1 or 2.
|
||||
* @param f number to be formatted.
|
||||
* @param col column name (used for deciding on length).
|
||||
|
@ -75,7 +75,7 @@ public class Format {
|
|||
}
|
||||
|
||||
/**
|
||||
* Padd a number from left.
|
||||
* Pad a number from left.
|
||||
* @param n number to be formatted.
|
||||
* @param col column name (used for deciding on length).
|
||||
* @return formatted string.
|
||||
|
@ -86,7 +86,7 @@ public class Format {
|
|||
}
|
||||
|
||||
/**
|
||||
* Padd a string from right.
|
||||
* Pad a string from right.
|
||||
* @param s string to be formatted.
|
||||
* @param col column name (used for deciding on length).
|
||||
* @return formatted string.
|
||||
|
@ -97,7 +97,7 @@ public class Format {
|
|||
}
|
||||
|
||||
/**
|
||||
* Padd a string from left.
|
||||
* Pad a string from left.
|
||||
* @param s string to be formatted.
|
||||
* @param col column name (used for deciding on length).
|
||||
* @return formatted string.
|
||||
|
|
|
@ -37,7 +37,7 @@ import java.io.Reader;
|
|||
* <pre>
|
||||
* StringBuffer sb = new StringBuffer("some text");
|
||||
* Reader reader = new StringBufferReader(sb);
|
||||
* ... read from reader - dont close it ! ...
|
||||
* ... read from reader - don't close it ! ...
|
||||
* sb.setLength(0);
|
||||
* sb.append("some new text");
|
||||
* reader.reset();
|
||||
|
|
|
@ -63,7 +63,7 @@ public class QualityBenchmark {
|
|||
* @param qqs quality queries to run.
|
||||
* @param qqParser parser for turning QualityQueries into Lucene Queries.
|
||||
* @param searcher index to be searched.
|
||||
* @param docNameField name of field containg the document name.
|
||||
* @param docNameField name of field containing the document name.
|
||||
* This allows to extract the doc name for search results,
|
||||
* and is important for judging the results.
|
||||
*/
|
||||
|
@ -114,7 +114,7 @@ public class QualityBenchmark {
|
|||
private QualityStats analyzeQueryResults(QualityQuery qq, Query q, TopDocs td, Judge judge, PrintWriter logger, long searchTime) throws IOException {
|
||||
QualityStats stts = new QualityStats(judge.maxRecall(qq),searchTime);
|
||||
ScoreDoc sd[] = td.scoreDocs;
|
||||
long t1 = System.currentTimeMillis(); // extraction of first doc name we meassure also construction of doc name extractor, just in case.
|
||||
long t1 = System.currentTimeMillis(); // extraction of first doc name we measure also construction of doc name extractor, just in case.
|
||||
DocNameExtractor xt = new DocNameExtractor(docNameField);
|
||||
for (int i=0; i<sd.length; i++) {
|
||||
String docName = xt.docName(searcher,sd[i].doc);
|
||||
|
|
|
@ -215,7 +215,7 @@ public class QualityStats {
|
|||
}
|
||||
}
|
||||
assert m>0 : "Fishy: no \"good\" queries!";
|
||||
// take average: times go by all queries, other meassures go by "good" queries noly.
|
||||
// take average: times go by all queries, other measures go by "good" queries only.
|
||||
avg.searchTime /= stats.length;
|
||||
avg.docNamesExtractTime /= stats.length;
|
||||
avg.numGoodPoints /= m;
|
||||
|
|
|
@ -36,7 +36,7 @@ public class QualityQueriesFinder {
|
|||
private Directory dir;
|
||||
|
||||
/**
|
||||
* Constrctor over a directory containing the index.
|
||||
* Constructor over a directory containing the index.
|
||||
* @param dir directory containing the index we search for the quality test.
|
||||
*/
|
||||
private QualityQueriesFinder(Directory dir) {
|
||||
|
|
|
@ -281,20 +281,20 @@ public class TestData
|
|||
numFormat[1].setMinimumFractionDigits(1);
|
||||
}
|
||||
|
||||
// padd number from left
|
||||
// pad number from left
|
||||
// numFracDigits must be 0 or 1.
|
||||
static String format(int numFracDigits, float f, String col) {
|
||||
String res = padd + numFormat[numFracDigits].format(f);
|
||||
return res.substring(res.length() - col.length());
|
||||
}
|
||||
|
||||
// padd number from left
|
||||
// pad number from left
|
||||
static String format(int n, String col) {
|
||||
String res = padd + n;
|
||||
return res.substring(res.length() - col.length());
|
||||
}
|
||||
|
||||
// padd string from right
|
||||
// pad string from right
|
||||
static String format(String s, String col) {
|
||||
return (s + padd).substring(0,col.length());
|
||||
}
|
||||
|
@ -350,7 +350,7 @@ public class TestData
|
|||
/**
|
||||
* Similar to {@link #getAll(java.io.File[], org.apache.lucene.analysis.Analyzer[])} but only uses
|
||||
* maxBufferedDocs of 10 and 100 and same for mergeFactor, thus reducing the number of permutations significantly.
|
||||
* It also only uses compund file and optimize is always true.
|
||||
* It also only uses compound file and optimize is always true.
|
||||
*
|
||||
* @param sources
|
||||
* @param analyzers
|
||||
|
|
|
@ -88,7 +88,7 @@ public class TimeData {
|
|||
|
||||
public String toString() { return toString(true); }
|
||||
/**
|
||||
* Return a tab-seprated string containing this data.
|
||||
* Return a tab-separated string containing this data.
|
||||
* @param withMem if true, append also memory information
|
||||
* @return The String
|
||||
*/
|
||||
|
|
|
@ -38,7 +38,7 @@ public class ScoreOrderFragmentsBuilder extends BaseFragmentsBuilder {
|
|||
/**
|
||||
* a constructor.
|
||||
*
|
||||
* @param preTags aray of pre-tags for markup terms.
|
||||
* @param preTags array of pre-tags for markup terms.
|
||||
* @param postTags array of post-tags for markup terms.
|
||||
*/
|
||||
public ScoreOrderFragmentsBuilder( String[] preTags, String[] postTags ){
|
||||
|
|
|
@ -42,7 +42,7 @@ public class GradientFormatter implements Formatter
|
|||
*
|
||||
* @param maxScore
|
||||
* The score (and above) displayed as maxColor (See QueryScorer.getMaxWeight
|
||||
* which can be used to callibrate scoring scale)
|
||||
* which can be used to calibrate scoring scale)
|
||||
* @param minForegroundColor
|
||||
* The hex color used for representing IDF scores of zero eg
|
||||
* #FFFFFF (white) or null if no foreground color required
|
||||
|
@ -194,7 +194,7 @@ public class GradientFormatter implements Formatter
|
|||
* input is nonnegative unless there is a preceding minus sign. This method
|
||||
* reads the input as twos complement instead, so if the input is 8 bytes
|
||||
* long, it will correctly restore a negative int produced by
|
||||
* Integer.toHexString() but not neccesarily one produced by
|
||||
* Integer.toHexString() but not necessarily one produced by
|
||||
* Integer.toString(x,16) since that method will produce a string like '-FF'
|
||||
* for negative integer values.
|
||||
*
|
||||
|
|
|
@ -93,7 +93,7 @@ public class Highlighter
|
|||
*
|
||||
* @param tokenStream a stream of tokens identified in the text parameter, including offset information.
|
||||
* This is typically produced by an analyzer re-parsing a document's
|
||||
* text. Some work may be done on retrieving TokenStreams more efficently
|
||||
* text. Some work may be done on retrieving TokenStreams more efficiently
|
||||
* by adding support for storing original text position data in the Lucene
|
||||
* index but this support is not currently available (as of Lucene 1.4 rc2).
|
||||
* @param text text to highlight terms in
|
||||
|
|
|
@ -54,7 +54,7 @@ public final class QueryTermExtractor
|
|||
*
|
||||
* @param query Query to extract term texts from
|
||||
* @param reader used to compute IDF which can be used to a) score selected fragments better
|
||||
* b) use graded highlights eg chaning intensity of font color
|
||||
* b) use graded highlights eg changing intensity of font color
|
||||
* @param fieldName the field on which Inverse Document Frequency (IDF) calculations are based
|
||||
* @return an array of the terms used in a query, plus their weights.
|
||||
*/
|
||||
|
|
|
@ -130,7 +130,7 @@ public class TokenSources
|
|||
* stemmer/lowercaser/stopword combo)
|
||||
* 2) The number of other fields (Lucene reads ALL fields off the disk
|
||||
* when accessing just one document field - can cost dear!)
|
||||
* 3) Use of compression on field storage - could be faster cos of compression (less disk IO)
|
||||
* 3) Use of compression on field storage - could be faster due to compression (less disk IO)
|
||||
* or slower (more CPU burn) depending on the content.
|
||||
*
|
||||
* @param tpv
|
||||
|
|
|
@ -87,7 +87,7 @@ A subtle use of color can help emphasise the reasons for matching (useful when d
|
|||
you want to see what the basis of the similarities are).</p>
|
||||
|
||||
<p>The QueryScorer class has a new constructor which can use an IndexReader to derive the IDF (inverse document frequency)
|
||||
for each term in order to influcence the score. This is useful for helping to extracting the most significant sections
|
||||
for each term in order to influence the score. This is useful for helping to extracting the most significant sections
|
||||
of a document and in supplying scores used by the new GradientFormatter to color significant words more strongly.
|
||||
The QueryScorer.getMaxWeight method is useful when passed to the GradientFormatter constructor to define the top score
|
||||
which is associated with the top color.</p>
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.io.Serializable;
|
|||
*/
|
||||
|
||||
/**
|
||||
* Essetially a Map<FieldName, {@link org.apache.lucene.store.instantiated.FieldSetting}>
|
||||
* Essentially a Map<FieldName, {@link org.apache.lucene.store.instantiated.FieldSetting}>
|
||||
*/
|
||||
class FieldSettings implements Serializable {
|
||||
|
||||
|
|
|
@ -92,7 +92,7 @@ public class InstantiatedIndex
|
|||
* Creates a new instantiated index that looks just like the index in a specific state as represented by a reader.
|
||||
*
|
||||
* @param sourceIndexReader the source index this new instantiated index will be copied from.
|
||||
* @throws IOException if the source index is not optimized, or when accesing the source.
|
||||
* @throws IOException if the source index is not optimized, or when accessing the source.
|
||||
*/
|
||||
public InstantiatedIndex(IndexReader sourceIndexReader) throws IOException {
|
||||
this(sourceIndexReader, null);
|
||||
|
@ -105,7 +105,7 @@ public class InstantiatedIndex
|
|||
*
|
||||
* @param sourceIndexReader the source index this new instantiated index will be copied from.
|
||||
* @param fields fields to be added, or null for all
|
||||
* @throws IOException if the source index is not optimized, or when accesing the source.
|
||||
* @throws IOException if the source index is not optimized, or when accessing the source.
|
||||
*/
|
||||
public InstantiatedIndex(IndexReader sourceIndexReader, Set<String> fields) throws IOException {
|
||||
|
||||
|
|
|
@ -224,8 +224,8 @@ public class InstantiatedIndexReader extends IndexReader {
|
|||
* over a {@link org.apache.lucene.store.Directory}.
|
||||
* I.e., if you need to touch the document, clone it first!
|
||||
* <p>
|
||||
* This can also be seen as a feature for live canges of stored values,
|
||||
* but be carful! Adding a field with an name unknown to the index
|
||||
* This can also be seen as a feature for live changes of stored values,
|
||||
* but be careful! Adding a field with an name unknown to the index
|
||||
* or to a field with previously no stored values will make
|
||||
* {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
|
||||
* out of sync, causing problems for instance when merging the
|
||||
|
@ -259,8 +259,8 @@ public class InstantiatedIndexReader extends IndexReader {
|
|||
* over a {@link org.apache.lucene.store.Directory}.
|
||||
* I.e., if you need to touch the document, clone it first!
|
||||
* <p>
|
||||
* This can also be seen as a feature for live canges of stored values,
|
||||
* but be carful! Adding a field with an name unknown to the index
|
||||
* This can also be seen as a feature for live changes of stored values,
|
||||
* but be careful! Adding a field with an name unknown to the index
|
||||
* or to a field with previously no stored values will make
|
||||
* {@link org.apache.lucene.store.instantiated.InstantiatedIndexReader#getFieldNames(org.apache.lucene.index.IndexReader.FieldOption)}
|
||||
* out of sync, causing problems for instance when merging the
|
||||
|
|
|
@ -281,7 +281,7 @@ public class InstantiatedIndexWriter {
|
|||
}
|
||||
}
|
||||
|
||||
// create association term document infomation
|
||||
// create association term document information
|
||||
//
|
||||
// [Term]-- {0..*} | {0..* ordered} --(field)[Document]
|
||||
//
|
||||
|
@ -302,7 +302,7 @@ public class InstantiatedIndexWriter {
|
|||
|
||||
InstantiatedTermDocumentInformation info = new InstantiatedTermDocumentInformation(term, document, /*eTermText_TermDocInfoFactory.getValue().termFrequency,*/ positions, payloads);
|
||||
|
||||
// todo optimize, this should be chached and updated to array in batches rather than appending the array once for every position!
|
||||
// todo optimize, this should be cached and updated to array in batches rather than appending the array once for every position!
|
||||
InstantiatedTermDocumentInformation[] associatedDocuments;
|
||||
if (term.getAssociatedDocuments() != null) {
|
||||
associatedDocuments = new InstantiatedTermDocumentInformation[term.getAssociatedDocuments().length + 1];
|
||||
|
@ -363,7 +363,7 @@ public class InstantiatedIndexWriter {
|
|||
|
||||
// order document informations in dirty terms
|
||||
for (InstantiatedTerm term : dirtyTerms) {
|
||||
// todo optimize, i belive this is useless, that the natural order is document number?
|
||||
// todo optimize, i believe this is useless, that the natural order is document number?
|
||||
Arrays.sort(term.getAssociatedDocuments(), InstantiatedTermDocumentInformation.documentNumberComparator);
|
||||
|
||||
// // update association class reference for speedy skipTo()
|
||||
|
|
|
@ -74,10 +74,10 @@ public class InstantiatedTerm
|
|||
private InstantiatedTermDocumentInformation[] associatedDocuments;
|
||||
|
||||
/**
|
||||
* Meta data per document in wich this term is occuring.
|
||||
* Meta data per document in which this term is occurring.
|
||||
* Ordered by document number.
|
||||
*
|
||||
* @return Meta data per document in wich this term is occuring.
|
||||
* @return Meta data per document in which this term is occurring.
|
||||
*/
|
||||
public InstantiatedTermDocumentInformation[] getAssociatedDocuments() {
|
||||
return associatedDocuments;
|
||||
|
@ -85,10 +85,10 @@ public class InstantiatedTerm
|
|||
|
||||
|
||||
/**
|
||||
* Meta data per document in wich this term is occuring.
|
||||
* Meta data per document in which this term is occurring.
|
||||
* Ordered by document number.
|
||||
*
|
||||
* @param associatedDocuments meta data per document in wich this term is occuring, ordered by document number
|
||||
* @param associatedDocuments meta data per document in which this term is occurring, ordered by document number
|
||||
*/
|
||||
void setAssociatedDocuments(InstantiatedTermDocumentInformation[] associatedDocuments) {
|
||||
this.associatedDocuments = associatedDocuments;
|
||||
|
@ -182,7 +182,7 @@ public class InstantiatedTerm
|
|||
// A typical binarySearch algorithm uses pivot = (min + max) / 2.
|
||||
// The pivot we use here tries to be smarter and to choose a pivot close to the expectable location of the key.
|
||||
// This reduces dramatically the number of steps needed to get to the key.
|
||||
// However, it does not work well with a logaritmic distribution of values, for instance.
|
||||
// However, it does not work well with a logarithmic distribution of values, for instance.
|
||||
// When the key is not found quickly the smart way, we switch to the standard pivot.
|
||||
if (nPreviousSteps > 2) {
|
||||
pivot = (min + max) >> 1;
|
||||
|
@ -214,7 +214,7 @@ public class InstantiatedTerm
|
|||
|
||||
|
||||
/**
|
||||
* Navigates to the view of this occurances of this term in a specific document.
|
||||
* Navigates to the view of this occurrences of this term in a specific document.
|
||||
*
|
||||
* This method is only used by InstantiatedIndex(IndexReader) and
|
||||
* should not be optimized for less CPU at the cost of more RAM.
|
||||
|
|
|
@ -23,7 +23,7 @@ import java.util.Comparator;
|
|||
|
||||
/**
|
||||
* There is one instance of this class per indexed term in a document
|
||||
* and it contains the meta data about each occurance of a term in a docment.
|
||||
* and it contains the meta data about each occurrence of a term in a document.
|
||||
*
|
||||
* It is the inner glue of the inverted index.
|
||||
*
|
||||
|
|
|
@ -62,7 +62,7 @@ public class InstantiatedTermPositions
|
|||
public int nextPosition() {
|
||||
currentTermPositionIndex++;
|
||||
// if you get an array out of index exception here,
|
||||
// it might be due to currentDocumentInformation.getIndexFromTerm not beeing set!!
|
||||
// it might be due to currentDocumentInformation.getIndexFromTerm not being set!!
|
||||
return currentDocumentInformation.getTermPositions()[currentTermPositionIndex];
|
||||
}
|
||||
|
||||
|
|
|
@ -49,8 +49,8 @@
|
|||
|
||||
<p>
|
||||
At a few thousand ~160 characters long documents
|
||||
InstantiaedIndex outperforms RAMDirectory some 50x,
|
||||
15x at 100 documents of 2000 charachters length,
|
||||
InstantiatedIndex outperforms RAMDirectory some 50x,
|
||||
15x at 100 documents of 2000 characters length,
|
||||
and is linear to RAMDirectory at 10,000 documents of 2000 characters length.
|
||||
</p>
|
||||
|
||||
|
|
|
@ -81,11 +81,11 @@ public class FieldNormModifier {
|
|||
private Similarity sim;
|
||||
|
||||
/**
|
||||
* Constructor for code that wishes to use this class programatically
|
||||
* Constructor for code that wishes to use this class programmatically
|
||||
* If Similarity is null, kill the field norms.
|
||||
*
|
||||
* @param d the Directory to modify
|
||||
* @param s the Similiary to use (can be null)
|
||||
* @param s the Similarity to use (can be null)
|
||||
*/
|
||||
public FieldNormModifier(Directory d, Similarity s) {
|
||||
dir = d;
|
||||
|
|
|
@ -85,7 +85,7 @@ public class ChainedFilter extends Filter
|
|||
/**
|
||||
* Ctor.
|
||||
* @param chain The chain of filters
|
||||
* @param logic Logicial operation to apply to ALL filters
|
||||
* @param logic Logical operation to apply to ALL filters
|
||||
*/
|
||||
public ChainedFilter(Filter[] chain, int logic)
|
||||
{
|
||||
|
|
|
@ -97,7 +97,7 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
|
|||
|
||||
/**
|
||||
* Sets the default function variables used by lengthNorm when no field
|
||||
* specifc variables have been set.
|
||||
* specific variables have been set.
|
||||
*
|
||||
* @see #lengthNorm
|
||||
*/
|
||||
|
@ -233,7 +233,7 @@ public class SweetSpotSimilarity extends DefaultSimilarity {
|
|||
* </code>
|
||||
*
|
||||
* <p>
|
||||
* This code is provided as a convincience for subclasses that want
|
||||
* This code is provided as a convenience for subclasses that want
|
||||
* to use a hyperbolic tf function.
|
||||
* </p>
|
||||
*
|
||||
|
|
|
@ -40,9 +40,9 @@ import org.apache.lucene.search.spans.SpanQuery;
|
|||
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||
|
||||
/**
|
||||
* QueryParser which permits complex phrase query syntax e.g. "(john jon
|
||||
* jonathan~) peters*"
|
||||
*
|
||||
* QueryParser which permits complex phrase query syntax eg "(john jon
|
||||
* jonathan~) peters*".
|
||||
* <p>
|
||||
* Performs potentially multiple passes over Query text to parse any nested
|
||||
* logic in PhraseQueries. - First pass takes any PhraseQuery content between
|
||||
* quotes and stores for subsequent pass. All other query content is parsed as
|
||||
|
@ -50,13 +50,14 @@ import org.apache.lucene.search.spans.SpanTermQuery;
|
|||
* embedded clauses are referring to the same field and therefore can be
|
||||
* rewritten as Span queries. All PhraseQuery clauses are expressed as
|
||||
* ComplexPhraseQuery objects
|
||||
*
|
||||
* </p>
|
||||
* <p>
|
||||
* This could arguably be done in one pass using a new QueryParser but here I am
|
||||
* working within the constraints of the existing parser as a base class. This
|
||||
* currently simply feeds all phrase content through an analyzer to select
|
||||
* phrase terms - any "special" syntax such as * ~ * etc are not given special
|
||||
* status
|
||||
*
|
||||
* </p>
|
||||
*
|
||||
*/
|
||||
public class ComplexPhraseQueryParser extends QueryParser {
|
||||
|
|
|
@ -232,7 +232,7 @@ public class PrecedenceQueryParser {
|
|||
* In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
|
||||
* are considered optional: for example <code>capital of Hungary</code> is equal to
|
||||
* <code>capital OR of OR Hungary</code>.<br/>
|
||||
* In <code>AND_OPERATOR</code> mode terms are considered to be in conjuction: the
|
||||
* In <code>AND_OPERATOR</code> mode terms are considered to be in conjunction: the
|
||||
* above mentioned query is parsed as <code>capital AND of AND Hungary</code>
|
||||
*/
|
||||
public void setDefaultOperator(Operator op) {
|
||||
|
|
|
@ -87,7 +87,7 @@ import java.util.ArrayList;
|
|||
*
|
||||
* This class has lots of options to try to make it efficient and flexible.
|
||||
* See the body of {@link #main main()} below in the source for real code, or
|
||||
* if you want pseudo code, the simpliest possible usage is as follows. The bold
|
||||
* if you want pseudo code, the simplest possible usage is as follows. The bold
|
||||
* fragment is specific to this class.
|
||||
*
|
||||
* <code><pre>
|
||||
|
@ -109,7 +109,7 @@ import java.util.ArrayList;
|
|||
* <ol>
|
||||
* <li> do your normal, Lucene setup for searching,
|
||||
* <li> create a MoreLikeThis,
|
||||
* <li> get the text of the doc you want to find similaries to
|
||||
* <li> get the text of the doc you want to find similarities to
|
||||
* <li> then call one of the like() calls to generate a similarity query
|
||||
* <li> call the searcher to find the similar docs
|
||||
* </ol>
|
||||
|
@ -139,7 +139,7 @@ import java.util.ArrayList;
|
|||
* Some bugfixing, some refactoring, some optimisation.
|
||||
* - bugfix: retrieveTerms(int docNum) was not working for indexes without a termvector -added missing code
|
||||
* - bugfix: No significant terms being created for fields with a termvector - because
|
||||
* was only counting one occurence per term/field pair in calculations(ie not including frequency info from TermVector)
|
||||
* was only counting one occurrence per term/field pair in calculations(ie not including frequency info from TermVector)
|
||||
* - refactor: moved common code into isNoiseWord()
|
||||
* - optimise: when no termvector support available - used maxNumTermsParsed to limit amount of tokenization
|
||||
* </pre>
|
||||
|
@ -230,7 +230,7 @@ public final class MoreLikeThis {
|
|||
private Analyzer analyzer = DEFAULT_ANALYZER;
|
||||
|
||||
/**
|
||||
* Ignore words less freqent that this.
|
||||
* Ignore words less frequent that this.
|
||||
*/
|
||||
private int minTermFreq = DEFAULT_MIN_TERM_FREQ;
|
||||
|
||||
|
|
|
@ -63,7 +63,7 @@ public final class SimilarityQueries
|
|||
*
|
||||
* <p>
|
||||
* The philosophy behind this method is "two documents are similar if they share lots of words".
|
||||
* Note that behind the scenes, Lucenes scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
|
||||
* Note that behind the scenes, Lucene's scoring algorithm will tend to give two documents a higher similarity score if the share more uncommon words.
|
||||
*
|
||||
* <P>
|
||||
* This method is fail-safe in that if a long 'body' is passed in and
|
||||
|
|
|
@ -27,11 +27,12 @@ import org.apache.lucene.queryParser.core.parser.EscapeQuerySyntax;
|
|||
|
||||
/**
|
||||
* A {@link ModifierQueryNode} indicates the modifier value (+,-,?,NONE) for
|
||||
* each term on the query string for example "+t1 -t2 t3" will have a tree of
|
||||
* <BooleanQueryNode> <ModifierQueryNode modifier="MOD_REQ"> <t1/>
|
||||
* </ModifierQueryNode> <ModifierQueryNode modifier="MOD_NOT"> <t2/>
|
||||
* </ModifierQueryNode> <t3/> </BooleanQueryNode>
|
||||
*
|
||||
* each term on the query string. For example "+t1 -t2 t3" will have a tree of:
|
||||
* <blockquote>
|
||||
* <BooleanQueryNode> <ModifierQueryNode modifier="MOD_REQ"> <t1/>
|
||||
* </ModifierQueryNode> <ModifierQueryNode modifier="MOD_NOT"> <t2/>
|
||||
* </ModifierQueryNode> <t3/> </BooleanQueryNode>
|
||||
* </blockquote>
|
||||
*/
|
||||
public class ModifierQueryNode extends QueryNodeImpl {
|
||||
|
||||
|
|
|
@ -25,17 +25,19 @@ import org.apache.lucene.queryParser.core.parser.EscapeQuerySyntax;
|
|||
import org.apache.lucene.queryParser.core.parser.EscapeQuerySyntax.Type;
|
||||
|
||||
/**
|
||||
* A {@link PathQueryNode} is used for to store queries like
|
||||
* /company/USA/California /product/shoes/brown QueryText are objects that
|
||||
* A {@link PathQueryNode} is used to store queries like
|
||||
* /company/USA/California /product/shoes/brown. QueryText are objects that
|
||||
* contain the text, begin position and end position in the query.
|
||||
*
|
||||
* <p>
|
||||
* Example how the text parser creates these objects:
|
||||
*
|
||||
* List values = ArrayList(); values.add(new PathQueryNode.QueryText("company",
|
||||
* 1, 7)); values.add(new PathQueryNode.QueryText("USA", 9, 12)); values.add(new
|
||||
* PathQueryNode.QueryText("California", 14, 23)); QueryNode q = new
|
||||
* PathQueryNode(values);
|
||||
*
|
||||
* </p>
|
||||
* <pre>
|
||||
* List values = ArrayList();
|
||||
* values.add(new PathQueryNode.QueryText("company", 1, 7));
|
||||
* values.add(new PathQueryNode.QueryText("USA", 9, 12));
|
||||
* values.add(new PathQueryNode.QueryText("California", 14, 23));
|
||||
* QueryNode q = new PathQueryNode(values);
|
||||
* </pre>
|
||||
*/
|
||||
public class PathQueryNode extends QueryNodeImpl {
|
||||
|
||||
|
|
|
@ -106,7 +106,7 @@ public abstract class QueryNodeImpl implements QueryNode, Cloneable {
|
|||
// allocate new children list
|
||||
allocate();
|
||||
|
||||
// add new childs and set parent
|
||||
// add new children and set parent
|
||||
for (QueryNode child : children) {
|
||||
add(child);
|
||||
}
|
||||
|
|
|
@ -17,6 +17,9 @@ package org.apache.lucene.queryParser.core.nodes;
|
|||
* the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface TextableQueryNode {
|
||||
|
||||
CharSequence getText();
|
||||
|
|
|
@ -23,7 +23,7 @@ import org.apache.lucene.queryParser.core.parser.EscapeQuerySyntax;
|
|||
|
||||
/**
|
||||
* A {@link TokenizedPhraseQueryNode} represents a node created by a code that
|
||||
* tokenizes/lemmatizes/analizes.
|
||||
* tokenizes/lemmatizes/analyzes.
|
||||
*/
|
||||
public class TokenizedPhraseQueryNode extends QueryNodeImpl implements
|
||||
FieldableNode {
|
||||
|
|
|
@ -61,7 +61,7 @@ Grouping nodes:
|
|||
<li>FuzzyQueryNode - fuzzy node</li>
|
||||
<li>ParametricRangeQueryNode - used for parametric field:[low_value TO high_value]</li>
|
||||
<li>ProximityQueryNode - used for proximity search</li>
|
||||
<li>TokenizedPhraseQueryNode - used by tokenizers/lemmatizers/analizers for phrases/autophrases</li>
|
||||
<li>TokenizedPhraseQueryNode - used by tokenizers/lemmatizers/analyzers for phrases/autophrases</li>
|
||||
</ul>
|
||||
</p>
|
||||
<p>
|
||||
|
@ -82,7 +82,7 @@ Utility Nodes:
|
|||
<li>DeletedQueryNode - used by processors on optimizations</li>
|
||||
<li>MatchAllDocsQueryNode - used by processors on optimizations</li>
|
||||
<li>MatchNoDocsQueryNode - used by processors on optimizations</li>
|
||||
<li>NoTokenFoundQueryNode - used by tokenizers/lemmatizers/analizers</li>
|
||||
<li>NoTokenFoundQueryNode - used by tokenizers/lemmatizers/analyzers</li>
|
||||
</ul>
|
||||
</p>
|
||||
</body>
|
||||
|
|
|
@ -44,7 +44,7 @@ which is an object structure that represents the elements defined in the query s
|
|||
<p>
|
||||
The query processing phase is performed by a query processor, which implements {@link org.apache.lucene.queryParser.core.processors.QueryNodeProcessor}.
|
||||
A query processor is responsible to perform any processing on a {@link org.apache.lucene.queryParser.core.nodes.QueryNode} tree. This phase
|
||||
is optional and is used only if an extra processing, validation, query expansion, etc needs to be perfomed in a {@link org.apache.lucene.queryParser.core.nodes.QueryNode} tree.
|
||||
is optional and is used only if an extra processing, validation, query expansion, etc needs to be performed in a {@link org.apache.lucene.queryParser.core.nodes.QueryNode} tree.
|
||||
The {@link org.apache.lucene.queryParser.core.nodes.QueryNode} tree can be either be generated by a text parser or programmatically created.
|
||||
</p>
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ public class JakartaRegexpCapabilities implements RegexCapabilities {
|
|||
private RE regexp;
|
||||
|
||||
// Define the flags that are possible. Redefine them here
|
||||
// to avoid exposign the RE class to the caller.
|
||||
// to avoid exposing the RE class to the caller.
|
||||
|
||||
private int flags = RE.MATCH_NORMAL;
|
||||
|
||||
|
@ -44,7 +44,7 @@ public class JakartaRegexpCapabilities implements RegexCapabilities {
|
|||
public static final int FLAG_MATCH_CASEINDEPENDENT = RE.MATCH_CASEINDEPENDENT;
|
||||
|
||||
/**
|
||||
* Contructs a RegexCapabilities with the default MATCH_NORMAL match style.
|
||||
* Constructs a RegexCapabilities with the default MATCH_NORMAL match style.
|
||||
*/
|
||||
public JakartaRegexpCapabilities() {}
|
||||
|
||||
|
|
|
@ -54,7 +54,7 @@ public class JavaUtilRegexCapabilities implements RegexCapabilities {
|
|||
* Constructor that allows for the modification of the flags that
|
||||
* the java.util.regex.Pattern will use to compile the regular expression.
|
||||
* This gives the user the ability to fine-tune how the regular expression
|
||||
* to match the functionlity that they need.
|
||||
* to match the functionality that they need.
|
||||
* The {@link java.util.regex.Pattern Pattern} class supports specifying
|
||||
* these fields via the regular expression text itself, but this gives the caller
|
||||
* another option to modify the behavior. Useful in cases where the regular expression text
|
||||
|
|
|
@ -28,7 +28,7 @@ public class FloatLatLng extends LatLng {
|
|||
private boolean normalized;
|
||||
|
||||
public FloatLatLng(double lat, double lng) {
|
||||
if (lat>90.0 || lat<-90.0) throw new IllegalArgumentException("Illegal lattitude value " + lat);
|
||||
if (lat>90.0 || lat<-90.0) throw new IllegalArgumentException("Illegal latitude value " + lat);
|
||||
this.lat=lat;
|
||||
this.lng=lng;
|
||||
}
|
||||
|
|
|
@ -108,7 +108,7 @@ public abstract class LatLng {
|
|||
* @param ll2
|
||||
* Second lat,lng position to calculate distance to.
|
||||
* @param lUnits
|
||||
* Units to calculate distace, defaults to miles
|
||||
* Units to calculate distance, defaults to miles
|
||||
*
|
||||
* @return Returns the distance in meters or miles.
|
||||
*/
|
||||
|
@ -123,7 +123,7 @@ public abstract class LatLng {
|
|||
if (lat1 == lat2 && lng1 == lng2)
|
||||
return 0.0;
|
||||
|
||||
// Get the m_dLongitude diffeernce. Don't need to worry about
|
||||
// Get the m_dLongitude difference. Don't need to worry about
|
||||
// crossing 180 since cos(x) = cos(-x)
|
||||
double dLon = lng2 - lng1;
|
||||
|
||||
|
|
|
@ -109,7 +109,7 @@ public class Ellipse implements Geometry2D {
|
|||
if (pt1 == null)
|
||||
pt1 = new Point2D();
|
||||
|
||||
// Solution is found by paramterizing the line segment and
|
||||
// Solution is found by parameterizing the line segment and
|
||||
// substituting those values into the ellipse equation.
|
||||
// Results in a quadratic equation.
|
||||
double x1 = center.x();
|
||||
|
|
|
@ -41,7 +41,7 @@ public class LineSegment {
|
|||
|
||||
/**
|
||||
* Finds the distance of a specified point from the line segment and the
|
||||
* closest point on the segement to the specified point.
|
||||
* closest point on the segment to the specified point.
|
||||
*
|
||||
* @param P
|
||||
* Test point.
|
||||
|
|
|
@ -79,7 +79,7 @@ public class CartesianTierPlotter {
|
|||
|
||||
/**
|
||||
* TierBoxId is latitude box id + longitude box id
|
||||
* where latitude box id, and longitude box id are transposded in to position
|
||||
* where latitude box id, and longitude box id are transposed in to position
|
||||
* coordinates.
|
||||
*
|
||||
* @param latitude
|
||||
|
|
|
@ -19,7 +19,7 @@ package org.apache.lucene.spatial.tier.projections;
|
|||
|
||||
/**
|
||||
* Based on Sinusoidal Projections
|
||||
* Project a latitude / longitude on a 2D cartisian map
|
||||
* Project a latitude / longitude on a 2D cartesian map
|
||||
*
|
||||
* <p><font color="red"><b>NOTE:</b> This API is still in
|
||||
* flux and might change in incompatible ways in the next
|
||||
|
|
|
@ -103,7 +103,7 @@ public class JaroWinklerDistance implements StringDistance {
|
|||
|
||||
/**
|
||||
* Returns the current value of the threshold used for adding the Winkler bonus.
|
||||
* The deafult value is 0.7.
|
||||
* The default value is 0.7.
|
||||
* @return the current value of the threshold
|
||||
*/
|
||||
public float getThreshold() {
|
||||
|
|
|
@ -64,7 +64,7 @@ public class QueryParser {
|
|||
final char quote = '\"';
|
||||
final char fieldOperator = ':';
|
||||
final char comma = ','; /* prefix list separator */
|
||||
final char carat = '^'; /* weight oparator */
|
||||
final char carat = '^'; /* weight operator */
|
||||
|
||||
static public SrndQuery parse(String query) throws ParseException {
|
||||
QueryParser parser = new QueryParser();
|
||||
|
|
|
@ -118,7 +118,7 @@ public class ListSearcher extends AbstractListModel {
|
|||
//for each row make a new document
|
||||
Document document = new Document();
|
||||
//add the row number of this row in the decorated list model
|
||||
//this will allow us to retrive the results later
|
||||
//this will allow us to retrieve the results later
|
||||
//and map this list model's row to a row in the decorated
|
||||
//list model
|
||||
document.add(new Field(ROW_NUMBER, "" + row, Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
@ -187,7 +187,7 @@ public class ListSearcher extends AbstractListModel {
|
|||
//iterate through the hits
|
||||
//get the row number stored at the index
|
||||
//that number is the row number of the decorated
|
||||
//tabble model row that we are mapping to
|
||||
//table model row that we are mapping to
|
||||
for (int t=0; t<hits.length(); t++){
|
||||
Document document = hits.doc(t);
|
||||
Fieldable field = document.getField(ROW_NUMBER);
|
||||
|
|
|
@ -43,7 +43,7 @@ import java.util.ArrayList;
|
|||
* a TableModel and provides sorting functionality. The benefit
|
||||
* of this architecture is that you can decorate any TableModel
|
||||
* implementation with this searching table model -- making it
|
||||
* easy to add searching functionaliy to existing JTables -- or
|
||||
* easy to add searching functionality to existing JTables -- or
|
||||
* making new search capable table lucene.
|
||||
*
|
||||
* <p>This decorator works by holding a reference to a decorated ot inner
|
||||
|
@ -169,7 +169,7 @@ public class TableSearcher extends AbstractTableModel {
|
|||
//for each row make a new document
|
||||
Document document = new Document();
|
||||
//add the row number of this row in the decorated table model
|
||||
//this will allow us to retrive the results later
|
||||
//this will allow us to retrieve the results later
|
||||
//and map this table model's row to a row in the decorated
|
||||
//table model
|
||||
document.add(new Field(ROW_NUMBER, "" + row, Field.Store.YES, Field.Index.ANALYZED));
|
||||
|
@ -268,7 +268,7 @@ public class TableSearcher extends AbstractTableModel {
|
|||
//iterate through the hits
|
||||
//get the row number stored at the index
|
||||
//that number is the row number of the decorated
|
||||
//tabble model row that we are mapping to
|
||||
//table model row that we are mapping to
|
||||
for (int t=0; t<hits.length(); t++){
|
||||
Document document = hits.doc(t);
|
||||
Fieldable field = document.getField(ROW_NUMBER);
|
||||
|
|
|
@ -142,8 +142,8 @@ public class WikipediaTokenizer extends Tokenizer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Createa a new instance of the {@link org.apache.lucene.wikipedia.analysis.WikipediaTokenizer}. Attaches the
|
||||
* <conde>input</code> to a the newly created JFlex scanner.
|
||||
* Creates a new instance of the {@link org.apache.lucene.wikipedia.analysis.WikipediaTokenizer}. Attaches the
|
||||
* <code>input</code> to a the newly created JFlex scanner.
|
||||
*
|
||||
* @param input The input
|
||||
* @param tokenOutput One of {@link #TOKENS_ONLY}, {@link #UNTOKENIZED_ONLY}, {@link #BOTH}
|
||||
|
@ -156,8 +156,8 @@ public class WikipediaTokenizer extends Tokenizer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Createa a new instance of the {@link org.apache.lucene.wikipedia.analysis.WikipediaTokenizer}. Attaches the
|
||||
* <conde>input</code> to a the newly created JFlex scanner. Uses the given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
|
||||
* Creates a new instance of the {@link org.apache.lucene.wikipedia.analysis.WikipediaTokenizer}. Attaches the
|
||||
* <code>input</code> to a the newly created JFlex scanner. Uses the given {@link org.apache.lucene.util.AttributeSource.AttributeFactory}.
|
||||
*
|
||||
* @param input The input
|
||||
* @param tokenOutput One of {@link #TOKENS_ONLY}, {@link #UNTOKENIZED_ONLY}, {@link #BOTH}
|
||||
|
@ -170,8 +170,8 @@ public class WikipediaTokenizer extends Tokenizer {
|
|||
}
|
||||
|
||||
/**
|
||||
* Createa a new instance of the {@link org.apache.lucene.wikipedia.analysis.WikipediaTokenizer}. Attaches the
|
||||
* <conde>input</code> to a the newly created JFlex scanner. Uses the given {@link AttributeSource}.
|
||||
* Creates a new instance of the {@link org.apache.lucene.wikipedia.analysis.WikipediaTokenizer}. Attaches the
|
||||
* <code>input</code> to a the newly created JFlex scanner. Uses the given {@link AttributeSource}.
|
||||
*
|
||||
* @param input The input
|
||||
* @param tokenOutput One of {@link #TOKENS_ONLY}, {@link #UNTOKENIZED_ONLY}, {@link #BOTH}
|
||||
|
|
|
@ -230,7 +230,7 @@ public class Syns2Index
|
|||
/**
|
||||
* Forms a Lucene index based on the 2 maps.
|
||||
*
|
||||
* @param indexDir the direcotry where the index should be created
|
||||
* @param indexDir the directory where the index should be created
|
||||
* @param word2Nums
|
||||
* @param num2Words
|
||||
*/
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
<ol>
|
||||
<li> Download the <a href="http://www.cogsci.princeton.edu/2.0/WNprolog-2.0.tar.gz">WordNet prolog database</a> , gunzip, untar etc.
|
||||
<li> Invoke Syn2Index as appropriate to build a synonym index.
|
||||
It'll take 2 arguments, the path to wn_s.pl from that WordNet downlaod, and the index name.
|
||||
It'll take 2 arguments, the path to wn_s.pl from that WordNet download, and the index name.
|
||||
|
||||
<li> Update your UI so that as appropriate you call SynExpand.expand(...) to expand user queries with synonyms.
|
||||
</ol>
|
||||
|
|
|
@ -24,12 +24,16 @@ import org.apache.lucene.xmlparser.builders.TermsFilterBuilder;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class CorePlusExtensionsParser extends CoreParser
|
||||
{
|
||||
|
||||
/**
|
||||
* Construct an XML parser that uses a single instance QueryParser for handling
|
||||
* UserQuery tags - all parse operations are synchronised on this parser
|
||||
* UserQuery tags - all parse operations are synchronized on this parser
|
||||
* @param analyzer
|
||||
* @param parser A QueryParser which will be synchronized on during parse calls.
|
||||
*/
|
||||
|
|
|
@ -24,6 +24,10 @@ import org.xml.sax.InputSource;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class DOMUtils
|
||||
{
|
||||
public static Element getChildByTagOrFail(Element e, String name) throws ParserException
|
||||
|
|
|
@ -22,6 +22,9 @@ import org.w3c.dom.Element;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public interface FilterBuilder {
|
||||
public Filter getFilter(Element e) throws ParserException;
|
||||
}
|
||||
|
|
|
@ -24,6 +24,9 @@ import org.w3c.dom.Element;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class FilterBuilderFactory implements FilterBuilder {
|
||||
|
||||
HashMap builders=new HashMap();
|
||||
|
|
|
@ -19,6 +19,9 @@ package org.apache.lucene.xmlparser;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class ParserException extends Exception {
|
||||
|
||||
/**
|
||||
|
|
|
@ -24,6 +24,9 @@ import org.w3c.dom.Element;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class QueryBuilderFactory implements QueryBuilder {
|
||||
|
||||
HashMap builders=new HashMap();
|
||||
|
|
|
@ -31,6 +31,9 @@ import org.w3c.dom.NodeList;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class BooleanFilterBuilder implements FilterBuilder {
|
||||
|
||||
private FilterBuilder factory;
|
||||
|
|
|
@ -29,7 +29,9 @@ import org.w3c.dom.NodeList;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class BooleanQueryBuilder implements QueryBuilder {
|
||||
|
||||
private QueryBuilder factory;
|
||||
|
|
|
@ -23,6 +23,9 @@ import org.w3c.dom.Element;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class BoostingQueryBuilder implements QueryBuilder
|
||||
{
|
||||
|
||||
|
|
|
@ -24,6 +24,10 @@ import org.w3c.dom.Element;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class BoostingTermBuilder extends SpanBuilderBase
|
||||
{
|
||||
|
||||
|
|
|
@ -23,6 +23,10 @@ import org.w3c.dom.Element;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class ConstantScoreQueryBuilder implements QueryBuilder
|
||||
{
|
||||
private FilterBuilderFactory filterFactory;
|
||||
|
|
|
@ -32,6 +32,9 @@ import org.w3c.dom.NodeList;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class DuplicateFilterBuilder implements FilterBuilder {
|
||||
|
||||
|
||||
|
|
|
@ -29,6 +29,9 @@ import org.w3c.dom.Element;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class FilteredQueryBuilder implements QueryBuilder {
|
||||
|
||||
private FilterBuilder filterFactory;
|
||||
|
|
|
@ -25,6 +25,10 @@ import org.w3c.dom.NodeList;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class FuzzyLikeThisQueryBuilder implements QueryBuilder
|
||||
{
|
||||
int defaultMaxNumTerms=50;
|
||||
|
|
|
@ -34,7 +34,9 @@ import org.w3c.dom.Element;
|
|||
* limitations under the License.
|
||||
*/
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class LikeThisQueryBuilder implements QueryBuilder {
|
||||
|
||||
private Analyzer analyzer;
|
||||
|
|
|
@ -21,6 +21,10 @@ import org.w3c.dom.Element;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class MatchAllDocsQueryBuilder implements QueryBuilder
|
||||
{
|
||||
public Query getQuery(Element e) throws ParserException
|
||||
|
|
|
@ -27,7 +27,9 @@ import org.w3c.dom.Element;
|
|||
*/
|
||||
|
||||
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class RangeFilterBuilder implements FilterBuilder {
|
||||
|
||||
|
||||
|
|
|
@ -19,6 +19,10 @@ import org.w3c.dom.Element;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public abstract class SpanBuilderBase implements SpanQueryBuilder
|
||||
{
|
||||
public Query getQuery(Element e) throws ParserException
|
||||
|
|
|
@ -21,6 +21,10 @@ import org.w3c.dom.Element;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class SpanFirstBuilder extends SpanBuilderBase
|
||||
{
|
||||
SpanQueryBuilder factory;
|
||||
|
|
|
@ -24,6 +24,10 @@ import org.w3c.dom.Node;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class SpanNearBuilder extends SpanBuilderBase
|
||||
{
|
||||
SpanQueryBuilder factory;
|
||||
|
|
|
@ -21,6 +21,10 @@ import org.w3c.dom.Element;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class SpanNotBuilder extends SpanBuilderBase
|
||||
{
|
||||
|
||||
|
|
|
@ -24,6 +24,10 @@ import org.w3c.dom.Node;
|
|||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
*
|
||||
*/
|
||||
public class SpanOrBuilder extends SpanBuilderBase
|
||||
{
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue