mirror of https://github.com/apache/lucene.git
#34331 - Add Paul Elschot's Surround query language parser
git-svn-id: https://svn.apache.org/repos/asf/lucene/java/trunk@209183 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
a8ea081a4d
commit
9d70229506
|
@ -0,0 +1,16 @@
|
||||||
|
<?xml version="1.0"?>
|
||||||
|
|
||||||
|
<project name="surround" default="default">
|
||||||
|
|
||||||
|
<description>
|
||||||
|
Surround query parser
|
||||||
|
</description>
|
||||||
|
|
||||||
|
<import file="../contrib-build.xml"/>
|
||||||
|
|
||||||
|
<target name="javacc">
|
||||||
|
<invoke-javacc target="src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.jj"
|
||||||
|
outputDir="${build.dir}/gen/org/apache/lucene/queryParser/surround/parser"
|
||||||
|
/>
|
||||||
|
</target>
|
||||||
|
</project>
|
|
@ -0,0 +1,110 @@
|
||||||
|
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 3.0 */
|
||||||
|
package org.apache.lucene.queryParser.surround.parser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This interface describes a character stream that maintains line and
|
||||||
|
* column number positions of the characters. It also has the capability
|
||||||
|
* to backup the stream to some extent. An implementation of this
|
||||||
|
* interface is used in the TokenManager implementation generated by
|
||||||
|
* JavaCCParser.
|
||||||
|
*
|
||||||
|
* All the methods except backup can be implemented in any fashion. backup
|
||||||
|
* needs to be implemented correctly for the correct operation of the lexer.
|
||||||
|
* Rest of the methods are all used to get information like line number,
|
||||||
|
* column number and the String that constitutes a token and are not used
|
||||||
|
* by the lexer. Hence their implementation won't affect the generated lexer's
|
||||||
|
* operation.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public interface CharStream {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next character from the selected input. The method
|
||||||
|
* of selecting the input is the responsibility of the class
|
||||||
|
* implementing this interface. Can throw any java.io.IOException.
|
||||||
|
*/
|
||||||
|
char readChar() throws java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the column position of the character last read.
|
||||||
|
* @deprecated
|
||||||
|
* @see #getEndColumn
|
||||||
|
*/
|
||||||
|
int getColumn();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the line number of the character last read.
|
||||||
|
* @deprecated
|
||||||
|
* @see #getEndLine
|
||||||
|
*/
|
||||||
|
int getLine();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the column number of the last character for current token (being
|
||||||
|
* matched after the last call to BeginTOken).
|
||||||
|
*/
|
||||||
|
int getEndColumn();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the line number of the last character for current token (being
|
||||||
|
* matched after the last call to BeginTOken).
|
||||||
|
*/
|
||||||
|
int getEndLine();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the column number of the first character for current token (being
|
||||||
|
* matched after the last call to BeginTOken).
|
||||||
|
*/
|
||||||
|
int getBeginColumn();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the line number of the first character for current token (being
|
||||||
|
* matched after the last call to BeginTOken).
|
||||||
|
*/
|
||||||
|
int getBeginLine();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Backs up the input stream by amount steps. Lexer calls this method if it
|
||||||
|
* had already read some characters, but could not use them to match a
|
||||||
|
* (longer) token. So, they will be used again as the prefix of the next
|
||||||
|
* token and it is the implemetation's responsibility to do this right.
|
||||||
|
*/
|
||||||
|
void backup(int amount);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next character that marks the beginning of the next token.
|
||||||
|
* All characters must remain in the buffer between two successive calls
|
||||||
|
* to this method to implement backup correctly.
|
||||||
|
*/
|
||||||
|
char BeginToken() throws java.io.IOException;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a string made up of characters from the marked token beginning
|
||||||
|
* to the current buffer position. Implementations have the choice of returning
|
||||||
|
* anything that they want to. For example, for efficiency, one might decide
|
||||||
|
* to just return null, which is a valid implementation.
|
||||||
|
*/
|
||||||
|
String GetImage();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns an array of characters that make up the suffix of length 'len' for
|
||||||
|
* the currently matched token. This is used to build up the matched string
|
||||||
|
* for use in actions in the case of MORE. A simple and inefficient
|
||||||
|
* implementation of this is as follows :
|
||||||
|
*
|
||||||
|
* {
|
||||||
|
* String t = GetImage();
|
||||||
|
* return t.substring(t.length() - len, t.length()).toCharArray();
|
||||||
|
* }
|
||||||
|
*/
|
||||||
|
char[] GetSuffix(int len);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The lexer calls this function to indicate that it is done with the stream
|
||||||
|
* and hence implementations can free any resources held by this class.
|
||||||
|
* Again, the body of this function can be just empty and it will not
|
||||||
|
* affect the lexer's operation.
|
||||||
|
*/
|
||||||
|
void Done();
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,120 @@
|
||||||
|
// FastCharStream.java
|
||||||
|
package org.apache.lucene.queryParser.surround.parser;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.*;
|
||||||
|
|
||||||
|
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
||||||
|
* this does not do line-number counting, but instead keeps track of the
|
||||||
|
* character position of the token in the input, as required by Lucene's {@link
|
||||||
|
* org.apache.lucene.analysis.Token} API. */
|
||||||
|
public final class FastCharStream implements CharStream {
|
||||||
|
char[] buffer = null;
|
||||||
|
|
||||||
|
int bufferLength = 0; // end of valid chars
|
||||||
|
int bufferPosition = 0; // next char to read
|
||||||
|
|
||||||
|
int tokenStart = 0; // offset in buffer
|
||||||
|
int bufferStart = 0; // position in file of buffer
|
||||||
|
|
||||||
|
Reader input; // source of chars
|
||||||
|
|
||||||
|
/** Constructs from a Reader. */
|
||||||
|
public FastCharStream(Reader r) {
|
||||||
|
input = r;
|
||||||
|
}
|
||||||
|
|
||||||
|
public final char readChar() throws IOException {
|
||||||
|
if (bufferPosition >= bufferLength)
|
||||||
|
refill();
|
||||||
|
return buffer[bufferPosition++];
|
||||||
|
}
|
||||||
|
|
||||||
|
private final void refill() throws IOException {
|
||||||
|
int newPosition = bufferLength - tokenStart;
|
||||||
|
|
||||||
|
if (tokenStart == 0) { // token won't fit in buffer
|
||||||
|
if (buffer == null) { // first time: alloc buffer
|
||||||
|
buffer = new char[2048];
|
||||||
|
} else if (bufferLength == buffer.length) { // grow buffer
|
||||||
|
char[] newBuffer = new char[buffer.length*2];
|
||||||
|
System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
|
||||||
|
buffer = newBuffer;
|
||||||
|
}
|
||||||
|
} else { // shift token to front
|
||||||
|
System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
|
||||||
|
}
|
||||||
|
|
||||||
|
bufferLength = newPosition; // update state
|
||||||
|
bufferPosition = newPosition;
|
||||||
|
bufferStart += tokenStart;
|
||||||
|
tokenStart = 0;
|
||||||
|
|
||||||
|
int charsRead = // fill space in buffer
|
||||||
|
input.read(buffer, newPosition, buffer.length-newPosition);
|
||||||
|
if (charsRead == -1)
|
||||||
|
throw new IOException("read past eof");
|
||||||
|
else
|
||||||
|
bufferLength += charsRead;
|
||||||
|
}
|
||||||
|
|
||||||
|
public final char BeginToken() throws IOException {
|
||||||
|
tokenStart = bufferPosition;
|
||||||
|
return readChar();
|
||||||
|
}
|
||||||
|
|
||||||
|
public final void backup(int amount) {
|
||||||
|
bufferPosition -= amount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public final String GetImage() {
|
||||||
|
return new String(buffer, tokenStart, bufferPosition - tokenStart);
|
||||||
|
}
|
||||||
|
|
||||||
|
public final char[] GetSuffix(int len) {
|
||||||
|
char[] value = new char[len];
|
||||||
|
System.arraycopy(buffer, bufferPosition - len, value, 0, len);
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
public final void Done() {
|
||||||
|
try {
|
||||||
|
input.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
System.err.println("Caught: " + e + "; ignoring.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public final int getColumn() {
|
||||||
|
return bufferStart + bufferPosition;
|
||||||
|
}
|
||||||
|
public final int getLine() {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
public final int getEndColumn() {
|
||||||
|
return bufferStart + bufferPosition;
|
||||||
|
}
|
||||||
|
public final int getEndLine() {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
public final int getBeginColumn() {
|
||||||
|
return bufferStart + tokenStart;
|
||||||
|
}
|
||||||
|
public final int getBeginLine() {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,193 @@
|
||||||
|
/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 3.0 */
|
||||||
|
|
||||||
|
package org.apache.lucene.queryParser.surround.parser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This exception is thrown when parse errors are encountered.
|
||||||
|
* You can explicitly create objects of this exception type by
|
||||||
|
* calling the method generateParseException in the generated
|
||||||
|
* parser.
|
||||||
|
*
|
||||||
|
* You can modify this class to customize your error reporting
|
||||||
|
* mechanisms so long as you retain the public fields.
|
||||||
|
*/
|
||||||
|
public class ParseException extends Exception {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This constructor is used by the method "generateParseException"
|
||||||
|
* in the generated parser. Calling this constructor generates
|
||||||
|
* a new object of this type with the fields "currentToken",
|
||||||
|
* "expectedTokenSequences", and "tokenImage" set. The boolean
|
||||||
|
* flag "specialConstructor" is also set to true to indicate that
|
||||||
|
* this constructor was used to create this object.
|
||||||
|
* This constructor calls its super class with the empty string
|
||||||
|
* to force the "toString" method of parent class "Throwable" to
|
||||||
|
* print the error message in the form:
|
||||||
|
* ParseException: <result of getMessage>
|
||||||
|
*/
|
||||||
|
public ParseException(Token currentTokenVal,
|
||||||
|
int[][] expectedTokenSequencesVal,
|
||||||
|
String[] tokenImageVal
|
||||||
|
)
|
||||||
|
{
|
||||||
|
super("");
|
||||||
|
specialConstructor = true;
|
||||||
|
currentToken = currentTokenVal;
|
||||||
|
expectedTokenSequences = expectedTokenSequencesVal;
|
||||||
|
tokenImage = tokenImageVal;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The following constructors are for use by you for whatever
|
||||||
|
* purpose you can think of. Constructing the exception in this
|
||||||
|
* manner makes the exception behave in the normal way - i.e., as
|
||||||
|
* documented in the class "Throwable". The fields "errorToken",
|
||||||
|
* "expectedTokenSequences", and "tokenImage" do not contain
|
||||||
|
* relevant information. The JavaCC generated code does not use
|
||||||
|
* these constructors.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public ParseException() {
|
||||||
|
super();
|
||||||
|
specialConstructor = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ParseException(String message) {
|
||||||
|
super(message);
|
||||||
|
specialConstructor = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This variable determines which constructor was used to create
|
||||||
|
* this object and thereby affects the semantics of the
|
||||||
|
* "getMessage" method (see below).
|
||||||
|
*/
|
||||||
|
protected boolean specialConstructor;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is the last token that has been consumed successfully. If
|
||||||
|
* this object has been created due to a parse error, the token
|
||||||
|
* followng this token will (therefore) be the first error token.
|
||||||
|
*/
|
||||||
|
public Token currentToken;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Each entry in this array is an array of integers. Each array
|
||||||
|
* of integers represents a sequence of tokens (by their ordinal
|
||||||
|
* values) that is expected at this point of the parse.
|
||||||
|
*/
|
||||||
|
public int[][] expectedTokenSequences;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This is a reference to the "tokenImage" array of the generated
|
||||||
|
* parser within which the parse error occurred. This array is
|
||||||
|
* defined in the generated ...Constants interface.
|
||||||
|
*/
|
||||||
|
public String[] tokenImage;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This method has the standard behavior when this object has been
|
||||||
|
* created using the standard constructors. Otherwise, it uses
|
||||||
|
* "currentToken" and "expectedTokenSequences" to generate a parse
|
||||||
|
* error message and returns it. If this object has been created
|
||||||
|
* due to a parse error, and you do not catch it (it gets thrown
|
||||||
|
* from the parser), then this method is called during the printing
|
||||||
|
* of the final stack trace, and hence the correct error message
|
||||||
|
* gets displayed.
|
||||||
|
*/
|
||||||
|
public String getMessage() {
|
||||||
|
if (!specialConstructor) {
|
||||||
|
return super.getMessage();
|
||||||
|
}
|
||||||
|
String expected = "";
|
||||||
|
int maxSize = 0;
|
||||||
|
for (int i = 0; i < expectedTokenSequences.length; i++) {
|
||||||
|
if (maxSize < expectedTokenSequences[i].length) {
|
||||||
|
maxSize = expectedTokenSequences[i].length;
|
||||||
|
}
|
||||||
|
for (int j = 0; j < expectedTokenSequences[i].length; j++) {
|
||||||
|
expected += tokenImage[expectedTokenSequences[i][j]] + " ";
|
||||||
|
}
|
||||||
|
if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) {
|
||||||
|
expected += "...";
|
||||||
|
}
|
||||||
|
expected += eol + " ";
|
||||||
|
}
|
||||||
|
String retval = "Encountered \"";
|
||||||
|
Token tok = currentToken.next;
|
||||||
|
for (int i = 0; i < maxSize; i++) {
|
||||||
|
if (i != 0) retval += " ";
|
||||||
|
if (tok.kind == 0) {
|
||||||
|
retval += tokenImage[0];
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
retval += add_escapes(tok.image);
|
||||||
|
tok = tok.next;
|
||||||
|
}
|
||||||
|
retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn;
|
||||||
|
retval += "." + eol;
|
||||||
|
if (expectedTokenSequences.length == 1) {
|
||||||
|
retval += "Was expecting:" + eol + " ";
|
||||||
|
} else {
|
||||||
|
retval += "Was expecting one of:" + eol + " ";
|
||||||
|
}
|
||||||
|
retval += expected;
|
||||||
|
return retval;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The end of line string for this machine.
|
||||||
|
*/
|
||||||
|
protected String eol = System.getProperty("line.separator", "\n");
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Used to convert raw characters to their escaped version
|
||||||
|
* when these raw version cannot be used as part of an ASCII
|
||||||
|
* string literal.
|
||||||
|
*/
|
||||||
|
protected String add_escapes(String str) {
|
||||||
|
StringBuffer retval = new StringBuffer();
|
||||||
|
char ch;
|
||||||
|
for (int i = 0; i < str.length(); i++) {
|
||||||
|
switch (str.charAt(i))
|
||||||
|
{
|
||||||
|
case 0 :
|
||||||
|
continue;
|
||||||
|
case '\b':
|
||||||
|
retval.append("\\b");
|
||||||
|
continue;
|
||||||
|
case '\t':
|
||||||
|
retval.append("\\t");
|
||||||
|
continue;
|
||||||
|
case '\n':
|
||||||
|
retval.append("\\n");
|
||||||
|
continue;
|
||||||
|
case '\f':
|
||||||
|
retval.append("\\f");
|
||||||
|
continue;
|
||||||
|
case '\r':
|
||||||
|
retval.append("\\r");
|
||||||
|
continue;
|
||||||
|
case '\"':
|
||||||
|
retval.append("\\\"");
|
||||||
|
continue;
|
||||||
|
case '\'':
|
||||||
|
retval.append("\\\'");
|
||||||
|
continue;
|
||||||
|
case '\\':
|
||||||
|
retval.append("\\\\");
|
||||||
|
continue;
|
||||||
|
default:
|
||||||
|
if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
|
||||||
|
String s = "0000" + Integer.toString(ch, 16);
|
||||||
|
retval.append("\\u" + s.substring(s.length() - 4, s.length()));
|
||||||
|
} else {
|
||||||
|
retval.append(ch);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return retval.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,727 @@
|
||||||
|
/* Generated By:JavaCC: Do not edit this line. QueryParser.java */
|
||||||
|
package org.apache.lucene.queryParser.surround.parser;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
|
||||||
|
import org.apache.lucene.queryParser.surround.query.SrndQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.FieldsQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.OrQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.AndQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.NotQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.DistanceQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.SrndTermQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.SrndPrefixQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.SrndTruncQuery;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class is generated by JavaCC. The only method that clients should need
|
||||||
|
* to call is <a href="#parse">parse()</a>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class QueryParser implements QueryParserConstants {
|
||||||
|
final int minimumPrefixLength = 3;
|
||||||
|
final int minimumCharsInTrunc = 3;
|
||||||
|
final String truncationErrorMessage = "Too unrestrictive truncation: ";
|
||||||
|
final String boostErrorMessage = "Cannot handle boost value: ";
|
||||||
|
|
||||||
|
/* CHECKME: These should be the same as for the tokenizer. How? */
|
||||||
|
final char truncator = '*';
|
||||||
|
final char anyChar = '?';
|
||||||
|
final char quote = '\"';
|
||||||
|
final char fieldOperator = ':';
|
||||||
|
final char comma = ','; /* prefix list separator */
|
||||||
|
final char carat = '^'; /* weight oparator */
|
||||||
|
|
||||||
|
static public SrndQuery parse(String query) throws ParseException {
|
||||||
|
QueryParser parser = new QueryParser();
|
||||||
|
return parser.parse2(query);
|
||||||
|
}
|
||||||
|
|
||||||
|
public QueryParser() {
|
||||||
|
this(new FastCharStream(new StringReader("")));
|
||||||
|
}
|
||||||
|
|
||||||
|
public SrndQuery parse2(String query) throws ParseException {
|
||||||
|
ReInit(new FastCharStream(new StringReader(query)));
|
||||||
|
try {
|
||||||
|
return TopSrndQuery();
|
||||||
|
} catch (TokenMgrError tme) {
|
||||||
|
throw new ParseException(tme.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getFieldsQuery(
|
||||||
|
SrndQuery q, ArrayList fieldNames) {
|
||||||
|
/* FIXME: check acceptable subquery: at least one subquery should not be
|
||||||
|
* a fields query.
|
||||||
|
*/
|
||||||
|
return new FieldsQuery(q, fieldNames, fieldOperator);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getOrQuery(List queries, boolean infix, Token orToken) {
|
||||||
|
return new OrQuery(queries, infix, orToken.image);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getAndQuery(List queries, boolean infix, Token andToken) {
|
||||||
|
return new AndQuery( queries, infix, andToken.image);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getNotQuery(List queries, Token notToken) {
|
||||||
|
return new NotQuery( queries, notToken.image);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static int getOpDistance(String distanceOp) {
|
||||||
|
/* W, 2W, 3W etc -> 1, 2 3, etc. Same for N, 2N ... */
|
||||||
|
return distanceOp.length() == 1
|
||||||
|
? 1
|
||||||
|
: Integer.parseInt( distanceOp.substring( 0, distanceOp.length() - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static void checkDistanceSubQueries(DistanceQuery distq, String opName)
|
||||||
|
throws ParseException {
|
||||||
|
String m = distq.distanceSubQueryNotAllowed();
|
||||||
|
if (m != null) {
|
||||||
|
throw new ParseException("Operator " + opName + ": " + m);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getDistanceQuery(
|
||||||
|
List queries,
|
||||||
|
boolean infix,
|
||||||
|
Token dToken,
|
||||||
|
boolean ordered) throws ParseException {
|
||||||
|
DistanceQuery dq = new DistanceQuery(queries,
|
||||||
|
infix,
|
||||||
|
getOpDistance(dToken.image),
|
||||||
|
dToken.image,
|
||||||
|
ordered);
|
||||||
|
checkDistanceSubQueries(dq, dToken.image);
|
||||||
|
return dq;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getTermQuery(
|
||||||
|
String term, boolean quoted) {
|
||||||
|
return new SrndTermQuery(term, quoted);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean allowedSuffix(String suffixed) {
|
||||||
|
return (suffixed.length() - 1) >= minimumPrefixLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getPrefixQuery(
|
||||||
|
String prefix, boolean quoted) {
|
||||||
|
return new SrndPrefixQuery(prefix, quoted, truncator);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean allowedTruncation(String truncated) {
|
||||||
|
/* At least 3 normal characters needed. */
|
||||||
|
int nrNormalChars = 0;
|
||||||
|
for (int i = 0; i < truncated.length(); i++) {
|
||||||
|
char c = truncated.charAt(i);
|
||||||
|
if ((c != truncator) && (c != anyChar)) {
|
||||||
|
nrNormalChars++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nrNormalChars >= minimumCharsInTrunc;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getTruncQuery(String truncated) {
|
||||||
|
return new SrndTruncQuery(truncated, truncator, anyChar);
|
||||||
|
}
|
||||||
|
|
||||||
|
final public SrndQuery TopSrndQuery() throws ParseException {
|
||||||
|
SrndQuery q;
|
||||||
|
q = FieldsQuery();
|
||||||
|
jj_consume_token(0);
|
||||||
|
{if (true) return q;}
|
||||||
|
throw new Error("Missing return statement in function");
|
||||||
|
}
|
||||||
|
|
||||||
|
final public SrndQuery FieldsQuery() throws ParseException {
|
||||||
|
SrndQuery q;
|
||||||
|
ArrayList fieldNames;
|
||||||
|
fieldNames = OptionalFields();
|
||||||
|
q = OrQuery();
|
||||||
|
{if (true) return (fieldNames == null) ? q : getFieldsQuery(q, fieldNames);}
|
||||||
|
throw new Error("Missing return statement in function");
|
||||||
|
}
|
||||||
|
|
||||||
|
final public ArrayList OptionalFields() throws ParseException {
|
||||||
|
Token fieldName;
|
||||||
|
ArrayList fieldNames = null;
|
||||||
|
label_1:
|
||||||
|
while (true) {
|
||||||
|
if (jj_2_1(2)) {
|
||||||
|
;
|
||||||
|
} else {
|
||||||
|
break label_1;
|
||||||
|
}
|
||||||
|
// to the colon
|
||||||
|
fieldName = jj_consume_token(TERM);
|
||||||
|
jj_consume_token(COLON);
|
||||||
|
if (fieldNames == null) {
|
||||||
|
fieldNames = new ArrayList();
|
||||||
|
}
|
||||||
|
fieldNames.add(fieldName.image);
|
||||||
|
}
|
||||||
|
{if (true) return fieldNames;}
|
||||||
|
throw new Error("Missing return statement in function");
|
||||||
|
}
|
||||||
|
|
||||||
|
final public SrndQuery OrQuery() throws ParseException {
|
||||||
|
SrndQuery q;
|
||||||
|
ArrayList queries = null;
|
||||||
|
Token oprt = null;
|
||||||
|
q = AndQuery();
|
||||||
|
label_2:
|
||||||
|
while (true) {
|
||||||
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case OR:
|
||||||
|
;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[0] = jj_gen;
|
||||||
|
break label_2;
|
||||||
|
}
|
||||||
|
oprt = jj_consume_token(OR);
|
||||||
|
/* keep only last used operator */
|
||||||
|
if (queries == null) {
|
||||||
|
queries = new ArrayList();
|
||||||
|
queries.add(q);
|
||||||
|
}
|
||||||
|
q = AndQuery();
|
||||||
|
queries.add(q);
|
||||||
|
}
|
||||||
|
{if (true) return (queries == null) ? q : getOrQuery(queries, true /* infix */, oprt);}
|
||||||
|
throw new Error("Missing return statement in function");
|
||||||
|
}
|
||||||
|
|
||||||
|
final public SrndQuery AndQuery() throws ParseException {
|
||||||
|
SrndQuery q;
|
||||||
|
ArrayList queries = null;
|
||||||
|
Token oprt = null;
|
||||||
|
q = NotQuery();
|
||||||
|
label_3:
|
||||||
|
while (true) {
|
||||||
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case AND:
|
||||||
|
;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[1] = jj_gen;
|
||||||
|
break label_3;
|
||||||
|
}
|
||||||
|
oprt = jj_consume_token(AND);
|
||||||
|
/* keep only last used operator */
|
||||||
|
if (queries == null) {
|
||||||
|
queries = new ArrayList();
|
||||||
|
queries.add(q);
|
||||||
|
}
|
||||||
|
q = NotQuery();
|
||||||
|
queries.add(q);
|
||||||
|
}
|
||||||
|
{if (true) return (queries == null) ? q : getAndQuery(queries, true /* infix */, oprt);}
|
||||||
|
throw new Error("Missing return statement in function");
|
||||||
|
}
|
||||||
|
|
||||||
|
final public SrndQuery NotQuery() throws ParseException {
|
||||||
|
SrndQuery q;
|
||||||
|
ArrayList queries = null;
|
||||||
|
Token oprt = null;
|
||||||
|
q = NQuery();
|
||||||
|
label_4:
|
||||||
|
while (true) {
|
||||||
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case NOT:
|
||||||
|
;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[2] = jj_gen;
|
||||||
|
break label_4;
|
||||||
|
}
|
||||||
|
oprt = jj_consume_token(NOT);
|
||||||
|
/* keep only last used operator */
|
||||||
|
if (queries == null) {
|
||||||
|
queries = new ArrayList();
|
||||||
|
queries.add(q);
|
||||||
|
}
|
||||||
|
q = NQuery();
|
||||||
|
queries.add(q);
|
||||||
|
}
|
||||||
|
{if (true) return (queries == null) ? q : getNotQuery(queries, oprt);}
|
||||||
|
throw new Error("Missing return statement in function");
|
||||||
|
}
|
||||||
|
|
||||||
|
final public SrndQuery NQuery() throws ParseException {
|
||||||
|
SrndQuery q;
|
||||||
|
ArrayList queries;
|
||||||
|
Token dt;
|
||||||
|
q = WQuery();
|
||||||
|
label_5:
|
||||||
|
while (true) {
|
||||||
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case N:
|
||||||
|
;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[3] = jj_gen;
|
||||||
|
break label_5;
|
||||||
|
}
|
||||||
|
dt = jj_consume_token(N);
|
||||||
|
queries = new ArrayList();
|
||||||
|
queries.add(q); /* left associative */
|
||||||
|
|
||||||
|
q = WQuery();
|
||||||
|
queries.add(q);
|
||||||
|
q = getDistanceQuery(queries, true /* infix */, dt, false /* not ordered */);
|
||||||
|
}
|
||||||
|
{if (true) return q;}
|
||||||
|
throw new Error("Missing return statement in function");
|
||||||
|
}
|
||||||
|
|
||||||
|
final public SrndQuery WQuery() throws ParseException {
|
||||||
|
SrndQuery q;
|
||||||
|
ArrayList queries;
|
||||||
|
Token wt;
|
||||||
|
q = PrimaryQuery();
|
||||||
|
label_6:
|
||||||
|
while (true) {
|
||||||
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case W:
|
||||||
|
;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[4] = jj_gen;
|
||||||
|
break label_6;
|
||||||
|
}
|
||||||
|
wt = jj_consume_token(W);
|
||||||
|
queries = new ArrayList();
|
||||||
|
queries.add(q); /* left associative */
|
||||||
|
|
||||||
|
q = PrimaryQuery();
|
||||||
|
queries.add(q);
|
||||||
|
q = getDistanceQuery(queries, true /* infix */, wt, true /* ordered */);
|
||||||
|
}
|
||||||
|
{if (true) return q;}
|
||||||
|
throw new Error("Missing return statement in function");
|
||||||
|
}
|
||||||
|
|
||||||
|
final public SrndQuery PrimaryQuery() throws ParseException {
|
||||||
|
/* bracketed weighted query or weighted term */
|
||||||
|
SrndQuery q;
|
||||||
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case LPAREN:
|
||||||
|
jj_consume_token(LPAREN);
|
||||||
|
q = FieldsQuery();
|
||||||
|
jj_consume_token(RPAREN);
|
||||||
|
break;
|
||||||
|
case OR:
|
||||||
|
case AND:
|
||||||
|
case W:
|
||||||
|
case N:
|
||||||
|
q = PrefixOperatorQuery();
|
||||||
|
break;
|
||||||
|
case TRUNCQUOTED:
|
||||||
|
case QUOTED:
|
||||||
|
case SUFFIXTERM:
|
||||||
|
case TRUNCTERM:
|
||||||
|
case TERM:
|
||||||
|
q = SimpleTerm();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[5] = jj_gen;
|
||||||
|
jj_consume_token(-1);
|
||||||
|
throw new ParseException();
|
||||||
|
}
|
||||||
|
OptionalWeights(q);
|
||||||
|
{if (true) return q;}
|
||||||
|
throw new Error("Missing return statement in function");
|
||||||
|
}
|
||||||
|
|
||||||
|
final public SrndQuery PrefixOperatorQuery() throws ParseException {
|
||||||
|
Token oprt;
|
||||||
|
List queries;
|
||||||
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case OR:
|
||||||
|
oprt = jj_consume_token(OR);
|
||||||
|
/* prefix OR */
|
||||||
|
queries = FieldsQueryList();
|
||||||
|
{if (true) return getOrQuery(queries, false /* not infix */, oprt);}
|
||||||
|
break;
|
||||||
|
case AND:
|
||||||
|
oprt = jj_consume_token(AND);
|
||||||
|
/* prefix AND */
|
||||||
|
queries = FieldsQueryList();
|
||||||
|
{if (true) return getAndQuery(queries, false /* not infix */, oprt);}
|
||||||
|
break;
|
||||||
|
case N:
|
||||||
|
oprt = jj_consume_token(N);
|
||||||
|
/* prefix N */
|
||||||
|
queries = FieldsQueryList();
|
||||||
|
{if (true) return getDistanceQuery(queries, false /* not infix */, oprt, false /* not ordered */);}
|
||||||
|
break;
|
||||||
|
case W:
|
||||||
|
oprt = jj_consume_token(W);
|
||||||
|
/* prefix W */
|
||||||
|
queries = FieldsQueryList();
|
||||||
|
{if (true) return getDistanceQuery(queries, false /* not infix */, oprt, true /* ordered */);}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[6] = jj_gen;
|
||||||
|
jj_consume_token(-1);
|
||||||
|
throw new ParseException();
|
||||||
|
}
|
||||||
|
throw new Error("Missing return statement in function");
|
||||||
|
}
|
||||||
|
|
||||||
|
final public List FieldsQueryList() throws ParseException {
|
||||||
|
SrndQuery q;
|
||||||
|
ArrayList queries = new ArrayList();
|
||||||
|
jj_consume_token(LPAREN);
|
||||||
|
q = FieldsQuery();
|
||||||
|
queries.add(q);
|
||||||
|
label_7:
|
||||||
|
while (true) {
|
||||||
|
jj_consume_token(COMMA);
|
||||||
|
q = FieldsQuery();
|
||||||
|
queries.add(q);
|
||||||
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case COMMA:
|
||||||
|
;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[7] = jj_gen;
|
||||||
|
break label_7;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
jj_consume_token(RPAREN);
|
||||||
|
{if (true) return queries;}
|
||||||
|
throw new Error("Missing return statement in function");
|
||||||
|
}
|
||||||
|
|
||||||
|
final public SrndQuery SimpleTerm() throws ParseException {
|
||||||
|
Token term;
|
||||||
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case TERM:
|
||||||
|
term = jj_consume_token(TERM);
|
||||||
|
{if (true) return getTermQuery(term.image, false /* not quoted */);}
|
||||||
|
break;
|
||||||
|
case QUOTED:
|
||||||
|
term = jj_consume_token(QUOTED);
|
||||||
|
{if (true) return getTermQuery(term.image.substring(1, term.image.length()-1), true /* quoted */);}
|
||||||
|
break;
|
||||||
|
case SUFFIXTERM:
|
||||||
|
term = jj_consume_token(SUFFIXTERM);
|
||||||
|
/* ending in * */
|
||||||
|
if (! allowedSuffix(term.image)) {
|
||||||
|
{if (true) throw new ParseException(truncationErrorMessage + term.image);}
|
||||||
|
}
|
||||||
|
{if (true) return getPrefixQuery(term.image.substring(0, term.image.length()-1), false /* not quoted */);}
|
||||||
|
break;
|
||||||
|
case TRUNCTERM:
|
||||||
|
term = jj_consume_token(TRUNCTERM);
|
||||||
|
/* with at least one * or ? */
|
||||||
|
if (! allowedTruncation(term.image)) {
|
||||||
|
{if (true) throw new ParseException(truncationErrorMessage + term.image);}
|
||||||
|
}
|
||||||
|
{if (true) return getTruncQuery(term.image);}
|
||||||
|
break;
|
||||||
|
case TRUNCQUOTED:
|
||||||
|
term = jj_consume_token(TRUNCQUOTED);
|
||||||
|
/* eg. "9b-b,m"* */
|
||||||
|
if ((term.image.length() - 3) < minimumPrefixLength) {
|
||||||
|
{if (true) throw new ParseException(truncationErrorMessage + term.image);}
|
||||||
|
}
|
||||||
|
{if (true) return getPrefixQuery(term.image.substring(1, term.image.length()-2), true /* quoted */);}
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[8] = jj_gen;
|
||||||
|
jj_consume_token(-1);
|
||||||
|
throw new ParseException();
|
||||||
|
}
|
||||||
|
throw new Error("Missing return statement in function");
|
||||||
|
}
|
||||||
|
|
||||||
|
final public void OptionalWeights(SrndQuery q) throws ParseException {
|
||||||
|
Token weight=null;
|
||||||
|
label_8:
|
||||||
|
while (true) {
|
||||||
|
switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
|
||||||
|
case CARAT:
|
||||||
|
;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
jj_la1[9] = jj_gen;
|
||||||
|
break label_8;
|
||||||
|
}
|
||||||
|
jj_consume_token(CARAT);
|
||||||
|
weight = jj_consume_token(NUMBER);
|
||||||
|
float f;
|
||||||
|
try {
|
||||||
|
f = Float.valueOf(weight.image).floatValue();
|
||||||
|
} catch (Exception floatExc) {
|
||||||
|
{if (true) throw new ParseException(boostErrorMessage + weight.image + " (" + floatExc + ")");}
|
||||||
|
}
|
||||||
|
if (f <= 0.0) {
|
||||||
|
{if (true) throw new ParseException(boostErrorMessage + weight.image);}
|
||||||
|
}
|
||||||
|
q.setWeight(f * q.getWeight()); /* left associative, fwiw */
|
||||||
|
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final private boolean jj_2_1(int xla) {
|
||||||
|
jj_la = xla; jj_lastpos = jj_scanpos = token;
|
||||||
|
try { return !jj_3_1(); }
|
||||||
|
catch(LookaheadSuccess ls) { return true; }
|
||||||
|
finally { jj_save(0, xla); }
|
||||||
|
}
|
||||||
|
|
||||||
|
final private boolean jj_3_1() {
|
||||||
|
if (jj_scan_token(TERM)) return true;
|
||||||
|
if (jj_scan_token(COLON)) return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public QueryParserTokenManager token_source;
|
||||||
|
public Token token, jj_nt;
|
||||||
|
private int jj_ntk;
|
||||||
|
private Token jj_scanpos, jj_lastpos;
|
||||||
|
private int jj_la;
|
||||||
|
public boolean lookingAhead = false;
|
||||||
|
private boolean jj_semLA;
|
||||||
|
private int jj_gen;
|
||||||
|
final private int[] jj_la1 = new int[10];
|
||||||
|
static private int[] jj_la1_0;
|
||||||
|
static {
|
||||||
|
jj_la1_0();
|
||||||
|
}
|
||||||
|
private static void jj_la1_0() {
|
||||||
|
jj_la1_0 = new int[] {0x100,0x200,0x400,0x1000,0x800,0x7c3b00,0x1b00,0x8000,0x7c0000,0x20000,};
|
||||||
|
}
|
||||||
|
final private JJCalls[] jj_2_rtns = new JJCalls[1];
|
||||||
|
private boolean jj_rescan = false;
|
||||||
|
private int jj_gc = 0;
|
||||||
|
|
||||||
|
public QueryParser(CharStream stream) {
|
||||||
|
token_source = new QueryParserTokenManager(stream);
|
||||||
|
token = new Token();
|
||||||
|
jj_ntk = -1;
|
||||||
|
jj_gen = 0;
|
||||||
|
for (int i = 0; i < 10; i++) jj_la1[i] = -1;
|
||||||
|
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void ReInit(CharStream stream) {
|
||||||
|
token_source.ReInit(stream);
|
||||||
|
token = new Token();
|
||||||
|
jj_ntk = -1;
|
||||||
|
jj_gen = 0;
|
||||||
|
for (int i = 0; i < 10; i++) jj_la1[i] = -1;
|
||||||
|
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||||
|
}
|
||||||
|
|
||||||
|
public QueryParser(QueryParserTokenManager tm) {
|
||||||
|
token_source = tm;
|
||||||
|
token = new Token();
|
||||||
|
jj_ntk = -1;
|
||||||
|
jj_gen = 0;
|
||||||
|
for (int i = 0; i < 10; i++) jj_la1[i] = -1;
|
||||||
|
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void ReInit(QueryParserTokenManager tm) {
|
||||||
|
token_source = tm;
|
||||||
|
token = new Token();
|
||||||
|
jj_ntk = -1;
|
||||||
|
jj_gen = 0;
|
||||||
|
for (int i = 0; i < 10; i++) jj_la1[i] = -1;
|
||||||
|
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||||
|
}
|
||||||
|
|
||||||
|
final private Token jj_consume_token(int kind) throws ParseException {
|
||||||
|
Token oldToken;
|
||||||
|
if ((oldToken = token).next != null) token = token.next;
|
||||||
|
else token = token.next = token_source.getNextToken();
|
||||||
|
jj_ntk = -1;
|
||||||
|
if (token.kind == kind) {
|
||||||
|
jj_gen++;
|
||||||
|
if (++jj_gc > 100) {
|
||||||
|
jj_gc = 0;
|
||||||
|
for (int i = 0; i < jj_2_rtns.length; i++) {
|
||||||
|
JJCalls c = jj_2_rtns[i];
|
||||||
|
while (c != null) {
|
||||||
|
if (c.gen < jj_gen) c.first = null;
|
||||||
|
c = c.next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
token = oldToken;
|
||||||
|
jj_kind = kind;
|
||||||
|
throw generateParseException();
|
||||||
|
}
|
||||||
|
|
||||||
|
static private final class LookaheadSuccess extends java.lang.Error { }
|
||||||
|
final private LookaheadSuccess jj_ls = new LookaheadSuccess();
|
||||||
|
final private boolean jj_scan_token(int kind) {
|
||||||
|
if (jj_scanpos == jj_lastpos) {
|
||||||
|
jj_la--;
|
||||||
|
if (jj_scanpos.next == null) {
|
||||||
|
jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.getNextToken();
|
||||||
|
} else {
|
||||||
|
jj_lastpos = jj_scanpos = jj_scanpos.next;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
jj_scanpos = jj_scanpos.next;
|
||||||
|
}
|
||||||
|
if (jj_rescan) {
|
||||||
|
int i = 0; Token tok = token;
|
||||||
|
while (tok != null && tok != jj_scanpos) { i++; tok = tok.next; }
|
||||||
|
if (tok != null) jj_add_error_token(kind, i);
|
||||||
|
}
|
||||||
|
if (jj_scanpos.kind != kind) return true;
|
||||||
|
if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
final public Token getNextToken() {
|
||||||
|
if (token.next != null) token = token.next;
|
||||||
|
else token = token.next = token_source.getNextToken();
|
||||||
|
jj_ntk = -1;
|
||||||
|
jj_gen++;
|
||||||
|
return token;
|
||||||
|
}
|
||||||
|
|
||||||
|
final public Token getToken(int index) {
|
||||||
|
Token t = lookingAhead ? jj_scanpos : token;
|
||||||
|
for (int i = 0; i < index; i++) {
|
||||||
|
if (t.next != null) t = t.next;
|
||||||
|
else t = t.next = token_source.getNextToken();
|
||||||
|
}
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
final private int jj_ntk() {
|
||||||
|
if ((jj_nt=token.next) == null)
|
||||||
|
return (jj_ntk = (token.next=token_source.getNextToken()).kind);
|
||||||
|
else
|
||||||
|
return (jj_ntk = jj_nt.kind);
|
||||||
|
}
|
||||||
|
|
||||||
|
private java.util.Vector jj_expentries = new java.util.Vector();
|
||||||
|
private int[] jj_expentry;
|
||||||
|
private int jj_kind = -1;
|
||||||
|
private int[] jj_lasttokens = new int[100];
|
||||||
|
private int jj_endpos;
|
||||||
|
|
||||||
|
private void jj_add_error_token(int kind, int pos) {
|
||||||
|
if (pos >= 100) return;
|
||||||
|
if (pos == jj_endpos + 1) {
|
||||||
|
jj_lasttokens[jj_endpos++] = kind;
|
||||||
|
} else if (jj_endpos != 0) {
|
||||||
|
jj_expentry = new int[jj_endpos];
|
||||||
|
for (int i = 0; i < jj_endpos; i++) {
|
||||||
|
jj_expentry[i] = jj_lasttokens[i];
|
||||||
|
}
|
||||||
|
boolean exists = false;
|
||||||
|
for (java.util.Enumeration e = jj_expentries.elements(); e.hasMoreElements();) {
|
||||||
|
int[] oldentry = (int[])(e.nextElement());
|
||||||
|
if (oldentry.length == jj_expentry.length) {
|
||||||
|
exists = true;
|
||||||
|
for (int i = 0; i < jj_expentry.length; i++) {
|
||||||
|
if (oldentry[i] != jj_expentry[i]) {
|
||||||
|
exists = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (exists) break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!exists) jj_expentries.addElement(jj_expentry);
|
||||||
|
if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public ParseException generateParseException() {
|
||||||
|
jj_expentries.removeAllElements();
|
||||||
|
boolean[] la1tokens = new boolean[24];
|
||||||
|
for (int i = 0; i < 24; i++) {
|
||||||
|
la1tokens[i] = false;
|
||||||
|
}
|
||||||
|
if (jj_kind >= 0) {
|
||||||
|
la1tokens[jj_kind] = true;
|
||||||
|
jj_kind = -1;
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
if (jj_la1[i] == jj_gen) {
|
||||||
|
for (int j = 0; j < 32; j++) {
|
||||||
|
if ((jj_la1_0[i] & (1<<j)) != 0) {
|
||||||
|
la1tokens[j] = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (int i = 0; i < 24; i++) {
|
||||||
|
if (la1tokens[i]) {
|
||||||
|
jj_expentry = new int[1];
|
||||||
|
jj_expentry[0] = i;
|
||||||
|
jj_expentries.addElement(jj_expentry);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
jj_endpos = 0;
|
||||||
|
jj_rescan_token();
|
||||||
|
jj_add_error_token(0, 0);
|
||||||
|
int[][] exptokseq = new int[jj_expentries.size()][];
|
||||||
|
for (int i = 0; i < jj_expentries.size(); i++) {
|
||||||
|
exptokseq[i] = (int[])jj_expentries.elementAt(i);
|
||||||
|
}
|
||||||
|
return new ParseException(token, exptokseq, tokenImage);
|
||||||
|
}
|
||||||
|
|
||||||
|
final public void enable_tracing() {
|
||||||
|
}
|
||||||
|
|
||||||
|
final public void disable_tracing() {
|
||||||
|
}
|
||||||
|
|
||||||
|
final private void jj_rescan_token() {
|
||||||
|
jj_rescan = true;
|
||||||
|
for (int i = 0; i < 1; i++) {
|
||||||
|
JJCalls p = jj_2_rtns[i];
|
||||||
|
do {
|
||||||
|
if (p.gen > jj_gen) {
|
||||||
|
jj_la = p.arg; jj_lastpos = jj_scanpos = p.first;
|
||||||
|
switch (i) {
|
||||||
|
case 0: jj_3_1(); break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p = p.next;
|
||||||
|
} while (p != null);
|
||||||
|
}
|
||||||
|
jj_rescan = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
final private void jj_save(int index, int xla) {
|
||||||
|
JJCalls p = jj_2_rtns[index];
|
||||||
|
while (p.gen > jj_gen) {
|
||||||
|
if (p.next == null) { p = p.next = new JJCalls(); break; }
|
||||||
|
p = p.next;
|
||||||
|
}
|
||||||
|
p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla;
|
||||||
|
}
|
||||||
|
|
||||||
|
static final class JJCalls {
|
||||||
|
int gen;
|
||||||
|
Token first;
|
||||||
|
int arg;
|
||||||
|
JJCalls next;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,453 @@
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Surround query language parser */
|
||||||
|
|
||||||
|
/* Query language operators: OR, AND, NOT, W, N, (, ), ^, *, ?, " and comma */
|
||||||
|
|
||||||
|
|
||||||
|
options {
|
||||||
|
STATIC=false;
|
||||||
|
JAVA_UNICODE_ESCAPE=true;
|
||||||
|
USER_CHAR_STREAM=true;
|
||||||
|
}
|
||||||
|
|
||||||
|
PARSER_BEGIN(QueryParser)
|
||||||
|
|
||||||
|
package org.apache.lucene.queryParser.surround.parser;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.io.StringReader;
|
||||||
|
|
||||||
|
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
|
||||||
|
import org.apache.lucene.queryParser.surround.query.SrndQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.FieldsQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.OrQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.AndQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.NotQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.DistanceQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.SrndTermQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.SrndPrefixQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.SrndTruncQuery;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This class is generated by JavaCC. The only method that clients should need
|
||||||
|
* to call is <a href="#parse">parse()</a>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class QueryParser {
|
||||||
|
final int minimumPrefixLength = 3;
|
||||||
|
final int minimumCharsInTrunc = 3;
|
||||||
|
final String truncationErrorMessage = "Too unrestrictive truncation: ";
|
||||||
|
final String boostErrorMessage = "Cannot handle boost value: ";
|
||||||
|
|
||||||
|
/* CHECKME: These should be the same as for the tokenizer. How? */
|
||||||
|
final char truncator = '*';
|
||||||
|
final char anyChar = '?';
|
||||||
|
final char quote = '\"';
|
||||||
|
final char fieldOperator = ':';
|
||||||
|
final char comma = ','; /* prefix list separator */
|
||||||
|
final char carat = '^'; /* weight oparator */
|
||||||
|
|
||||||
|
static public SrndQuery parse(String query) throws ParseException {
|
||||||
|
QueryParser parser = new QueryParser();
|
||||||
|
return parser.parse2(query);
|
||||||
|
}
|
||||||
|
|
||||||
|
public QueryParser() {
|
||||||
|
this(new FastCharStream(new StringReader("")));
|
||||||
|
}
|
||||||
|
|
||||||
|
public SrndQuery parse2(String query) throws ParseException {
|
||||||
|
ReInit(new FastCharStream(new StringReader(query)));
|
||||||
|
try {
|
||||||
|
return TopSrndQuery();
|
||||||
|
} catch (TokenMgrError tme) {
|
||||||
|
throw new ParseException(tme.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getFieldsQuery(
|
||||||
|
SrndQuery q, ArrayList fieldNames) {
|
||||||
|
/* FIXME: check acceptable subquery: at least one subquery should not be
|
||||||
|
* a fields query.
|
||||||
|
*/
|
||||||
|
return new FieldsQuery(q, fieldNames, fieldOperator);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getOrQuery(List queries, boolean infix, Token orToken) {
|
||||||
|
return new OrQuery(queries, infix, orToken.image);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getAndQuery(List queries, boolean infix, Token andToken) {
|
||||||
|
return new AndQuery( queries, infix, andToken.image);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getNotQuery(List queries, Token notToken) {
|
||||||
|
return new NotQuery( queries, notToken.image);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static int getOpDistance(String distanceOp) {
|
||||||
|
/* W, 2W, 3W etc -> 1, 2 3, etc. Same for N, 2N ... */
|
||||||
|
return distanceOp.length() == 1
|
||||||
|
? 1
|
||||||
|
: Integer.parseInt( distanceOp.substring( 0, distanceOp.length() - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static void checkDistanceSubQueries(DistanceQuery distq, String opName)
|
||||||
|
throws ParseException {
|
||||||
|
String m = distq.distanceSubQueryNotAllowed();
|
||||||
|
if (m != null) {
|
||||||
|
throw new ParseException("Operator " + opName + ": " + m);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getDistanceQuery(
|
||||||
|
List queries,
|
||||||
|
boolean infix,
|
||||||
|
Token dToken,
|
||||||
|
boolean ordered) throws ParseException {
|
||||||
|
DistanceQuery dq = new DistanceQuery(queries,
|
||||||
|
infix,
|
||||||
|
getOpDistance(dToken.image),
|
||||||
|
dToken.image,
|
||||||
|
ordered);
|
||||||
|
checkDistanceSubQueries(dq, dToken.image);
|
||||||
|
return dq;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getTermQuery(
|
||||||
|
String term, boolean quoted) {
|
||||||
|
return new SrndTermQuery(term, quoted);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean allowedSuffix(String suffixed) {
|
||||||
|
return (suffixed.length() - 1) >= minimumPrefixLength;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getPrefixQuery(
|
||||||
|
String prefix, boolean quoted) {
|
||||||
|
return new SrndPrefixQuery(prefix, quoted, truncator);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean allowedTruncation(String truncated) {
|
||||||
|
/* At least 3 normal characters needed. */
|
||||||
|
int nrNormalChars = 0;
|
||||||
|
for (int i = 0; i < truncated.length(); i++) {
|
||||||
|
char c = truncated.charAt(i);
|
||||||
|
if ((c != truncator) && (c != anyChar)) {
|
||||||
|
nrNormalChars++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nrNormalChars >= minimumCharsInTrunc;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected SrndQuery getTruncQuery(String truncated) {
|
||||||
|
return new SrndTruncQuery(truncated, truncator, anyChar);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
PARSER_END(QueryParser)
|
||||||
|
|
||||||
|
/* ***************** */
|
||||||
|
/* Token Definitions */
|
||||||
|
/* ***************** */
|
||||||
|
|
||||||
|
<*> TOKEN : {
|
||||||
|
<#_NUM_CHAR: ["0"-"9"] >
|
||||||
|
| <#_TERM_CHAR: /* everything except whitespace and operators */
|
||||||
|
( ~[ " ", "\t", "\n", "\r",
|
||||||
|
",", "?", "*", "(", ")", ":", "^", "\""]
|
||||||
|
) >
|
||||||
|
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" ) >
|
||||||
|
| <#_STAR: "*" > /* term truncation */
|
||||||
|
| <#_ONE_CHAR: "?" > /* precisely one character in a term */
|
||||||
|
/* 2..99 prefix for distance operators */
|
||||||
|
| <#_DISTOP_NUM: ((["2"-"9"](["0"-"9"])?) | ("1" ["0"-"9"]))>
|
||||||
|
}
|
||||||
|
|
||||||
|
<DEFAULT> SKIP : {
|
||||||
|
<<_WHITESPACE>>
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Operator tokens (in increasing order of precedence): */
|
||||||
|
<DEFAULT> TOKEN :
|
||||||
|
{
|
||||||
|
<OR: "OR" | "or">
|
||||||
|
| <AND: "AND" | "and">
|
||||||
|
| <NOT: "NOT" | "not">
|
||||||
|
| <W: (<_DISTOP_NUM>)? ("W"|"w")>
|
||||||
|
| <N: (<_DISTOP_NUM>)? ("N"|"n")>
|
||||||
|
/* These are excluded in _TERM_CHAR: */
|
||||||
|
| <LPAREN: "(">
|
||||||
|
| <RPAREN: ")">
|
||||||
|
| <COMMA: ",">
|
||||||
|
| <COLON: ":">
|
||||||
|
| <CARAT: "^"> : Boost
|
||||||
|
/* Literal non empty term between single quotes,
|
||||||
|
* escape quoted quote or backslash by backslash.
|
||||||
|
* Evt. truncated.
|
||||||
|
*/
|
||||||
|
| <TRUNCQUOTED: "\"" (~["\""])+ "\"" <_STAR>>
|
||||||
|
| <QUOTED: "\"" ( (~["\"", "\\"]) | ("\\" ["\\", "\""]))+ "\"">
|
||||||
|
| <SUFFIXTERM: (<_TERM_CHAR>)+ <_STAR>>
|
||||||
|
| <TRUNCTERM: (<_TERM_CHAR>)+
|
||||||
|
(<_STAR> | <_ONE_CHAR> )+ /* at least one * or ? */
|
||||||
|
(<_TERM_CHAR> | <_STAR> | <_ONE_CHAR> )*
|
||||||
|
>
|
||||||
|
| <TERM: (<_TERM_CHAR>)+>
|
||||||
|
}
|
||||||
|
|
||||||
|
<Boost> TOKEN : {
|
||||||
|
<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )?> : DEFAULT
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SrndQuery TopSrndQuery() : {
|
||||||
|
SrndQuery q;
|
||||||
|
}{
|
||||||
|
q = FieldsQuery()
|
||||||
|
<EOF>
|
||||||
|
{return q;}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SrndQuery FieldsQuery() : {
|
||||||
|
SrndQuery q;
|
||||||
|
ArrayList fieldNames;
|
||||||
|
}{
|
||||||
|
fieldNames = OptionalFields()
|
||||||
|
q = OrQuery()
|
||||||
|
{return (fieldNames == null) ? q : getFieldsQuery(q, fieldNames);}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
ArrayList OptionalFields() : {
|
||||||
|
Token fieldName;
|
||||||
|
ArrayList fieldNames = null;
|
||||||
|
}{
|
||||||
|
( LOOKAHEAD(2) // to the colon
|
||||||
|
fieldName = <TERM>
|
||||||
|
<COLON> {
|
||||||
|
if (fieldNames == null) {
|
||||||
|
fieldNames = new ArrayList();
|
||||||
|
}
|
||||||
|
fieldNames.add(fieldName.image);
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
{return fieldNames;}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SrndQuery OrQuery() : {
|
||||||
|
SrndQuery q;
|
||||||
|
ArrayList queries = null;
|
||||||
|
Token oprt = null;
|
||||||
|
}{
|
||||||
|
q = AndQuery()
|
||||||
|
( oprt = <OR> { /* keep only last used operator */
|
||||||
|
if (queries == null) {
|
||||||
|
queries = new ArrayList();
|
||||||
|
queries.add(q);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
q = AndQuery() {
|
||||||
|
queries.add(q);
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
{return (queries == null) ? q : getOrQuery(queries, true /* infix */, oprt);}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SrndQuery AndQuery() : {
|
||||||
|
SrndQuery q;
|
||||||
|
ArrayList queries = null;
|
||||||
|
Token oprt = null;
|
||||||
|
}{
|
||||||
|
q = NotQuery()
|
||||||
|
( oprt = <AND> { /* keep only last used operator */
|
||||||
|
if (queries == null) {
|
||||||
|
queries = new ArrayList();
|
||||||
|
queries.add(q);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
q = NotQuery() {
|
||||||
|
queries.add(q);
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
{return (queries == null) ? q : getAndQuery(queries, true /* infix */, oprt);}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SrndQuery NotQuery() : {
|
||||||
|
SrndQuery q;
|
||||||
|
ArrayList queries = null;
|
||||||
|
Token oprt = null;
|
||||||
|
}{
|
||||||
|
q = NQuery()
|
||||||
|
( oprt = <NOT> { /* keep only last used operator */
|
||||||
|
if (queries == null) {
|
||||||
|
queries = new ArrayList();
|
||||||
|
queries.add(q);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
q = NQuery() {
|
||||||
|
queries.add(q);
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
{return (queries == null) ? q : getNotQuery(queries, oprt);}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SrndQuery NQuery() : {
|
||||||
|
SrndQuery q;
|
||||||
|
ArrayList queries;
|
||||||
|
Token dt;
|
||||||
|
}{
|
||||||
|
q = WQuery()
|
||||||
|
( dt = <N> {
|
||||||
|
queries = new ArrayList();
|
||||||
|
queries.add(q); /* left associative */
|
||||||
|
}
|
||||||
|
q = WQuery() {
|
||||||
|
queries.add(q);
|
||||||
|
q = getDistanceQuery(queries, true /* infix */, dt, false /* not ordered */);
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
{return q;}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SrndQuery WQuery() : {
|
||||||
|
SrndQuery q;
|
||||||
|
ArrayList queries;
|
||||||
|
Token wt;
|
||||||
|
}{
|
||||||
|
q = PrimaryQuery()
|
||||||
|
( wt = <W> {
|
||||||
|
queries = new ArrayList();
|
||||||
|
queries.add(q); /* left associative */
|
||||||
|
}
|
||||||
|
q = PrimaryQuery() {
|
||||||
|
queries.add(q);
|
||||||
|
q = getDistanceQuery(queries, true /* infix */, wt, true /* ordered */);
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
{return q;}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SrndQuery PrimaryQuery() : { /* bracketed weighted query or weighted term */
|
||||||
|
SrndQuery q;
|
||||||
|
}{
|
||||||
|
( <LPAREN> q = FieldsQuery() <RPAREN>
|
||||||
|
| q = PrefixOperatorQuery()
|
||||||
|
| q = SimpleTerm()
|
||||||
|
)
|
||||||
|
OptionalWeights(q)
|
||||||
|
{return q;}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SrndQuery PrefixOperatorQuery() : {
|
||||||
|
Token oprt;
|
||||||
|
List queries;
|
||||||
|
}{
|
||||||
|
( oprt = <OR> /* prefix OR */
|
||||||
|
queries = FieldsQueryList()
|
||||||
|
{return getOrQuery(queries, false /* not infix */, oprt);}
|
||||||
|
|
||||||
|
| oprt = <AND> /* prefix AND */
|
||||||
|
queries = FieldsQueryList()
|
||||||
|
{return getAndQuery(queries, false /* not infix */, oprt);}
|
||||||
|
|
||||||
|
| oprt = <N> /* prefix N */
|
||||||
|
queries = FieldsQueryList()
|
||||||
|
{return getDistanceQuery(queries, false /* not infix */, oprt, false /* not ordered */);}
|
||||||
|
|
||||||
|
| oprt = <W> /* prefix W */
|
||||||
|
queries = FieldsQueryList()
|
||||||
|
{return getDistanceQuery(queries, false /* not infix */, oprt, true /* ordered */);}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
List FieldsQueryList() : {
|
||||||
|
SrndQuery q;
|
||||||
|
ArrayList queries = new ArrayList();
|
||||||
|
}{
|
||||||
|
<LPAREN>
|
||||||
|
q = FieldsQuery() {queries.add(q);}
|
||||||
|
(<COMMA> q = FieldsQuery() {queries.add(q);})+
|
||||||
|
<RPAREN>
|
||||||
|
{return queries;}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
SrndQuery SimpleTerm() : {
|
||||||
|
Token term;
|
||||||
|
}{
|
||||||
|
( term=<TERM>
|
||||||
|
{return getTermQuery(term.image, false /* not quoted */);}
|
||||||
|
|
||||||
|
| term=<QUOTED>
|
||||||
|
{return getTermQuery(term.image.substring(1, term.image.length()-1), true /* quoted */);}
|
||||||
|
|
||||||
|
| term=<SUFFIXTERM> { /* ending in * */
|
||||||
|
if (! allowedSuffix(term.image)) {
|
||||||
|
throw new ParseException(truncationErrorMessage + term.image);
|
||||||
|
}
|
||||||
|
return getPrefixQuery(term.image.substring(0, term.image.length()-1), false /* not quoted */);
|
||||||
|
}
|
||||||
|
|
||||||
|
| term=<TRUNCTERM> { /* with at least one * or ? */
|
||||||
|
if (! allowedTruncation(term.image)) {
|
||||||
|
throw new ParseException(truncationErrorMessage + term.image);
|
||||||
|
}
|
||||||
|
return getTruncQuery(term.image);
|
||||||
|
}
|
||||||
|
|
||||||
|
| term=<TRUNCQUOTED> { /* eg. "9b-b,m"* */
|
||||||
|
if ((term.image.length() - 3) < minimumPrefixLength) {
|
||||||
|
throw new ParseException(truncationErrorMessage + term.image);
|
||||||
|
}
|
||||||
|
return getPrefixQuery(term.image.substring(1, term.image.length()-2), true /* quoted */);
|
||||||
|
}
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void OptionalWeights(SrndQuery q) : {
|
||||||
|
Token weight=null;
|
||||||
|
}{
|
||||||
|
( <CARAT> weight=<NUMBER> {
|
||||||
|
float f;
|
||||||
|
try {
|
||||||
|
f = Float.valueOf(weight.image).floatValue();
|
||||||
|
} catch (Exception floatExc) {
|
||||||
|
throw new ParseException(boostErrorMessage + weight.image + " (" + floatExc + ")");
|
||||||
|
}
|
||||||
|
if (f <= 0.0) {
|
||||||
|
throw new ParseException(boostErrorMessage + weight.image);
|
||||||
|
}
|
||||||
|
q.setWeight(f * q.getWeight()); /* left associative, fwiw */
|
||||||
|
}
|
||||||
|
)*
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,60 @@
|
||||||
|
/* Generated By:JavaCC: Do not edit this line. QueryParserConstants.java */
|
||||||
|
package org.apache.lucene.queryParser.surround.parser;
|
||||||
|
|
||||||
|
public interface QueryParserConstants {
|
||||||
|
|
||||||
|
int EOF = 0;
|
||||||
|
int _NUM_CHAR = 1;
|
||||||
|
int _TERM_CHAR = 2;
|
||||||
|
int _WHITESPACE = 3;
|
||||||
|
int _STAR = 4;
|
||||||
|
int _ONE_CHAR = 5;
|
||||||
|
int _DISTOP_NUM = 6;
|
||||||
|
int OR = 8;
|
||||||
|
int AND = 9;
|
||||||
|
int NOT = 10;
|
||||||
|
int W = 11;
|
||||||
|
int N = 12;
|
||||||
|
int LPAREN = 13;
|
||||||
|
int RPAREN = 14;
|
||||||
|
int COMMA = 15;
|
||||||
|
int COLON = 16;
|
||||||
|
int CARAT = 17;
|
||||||
|
int TRUNCQUOTED = 18;
|
||||||
|
int QUOTED = 19;
|
||||||
|
int SUFFIXTERM = 20;
|
||||||
|
int TRUNCTERM = 21;
|
||||||
|
int TERM = 22;
|
||||||
|
int NUMBER = 23;
|
||||||
|
|
||||||
|
int Boost = 0;
|
||||||
|
int DEFAULT = 1;
|
||||||
|
|
||||||
|
String[] tokenImage = {
|
||||||
|
"<EOF>",
|
||||||
|
"<_NUM_CHAR>",
|
||||||
|
"<_TERM_CHAR>",
|
||||||
|
"<_WHITESPACE>",
|
||||||
|
"\"*\"",
|
||||||
|
"\"?\"",
|
||||||
|
"<_DISTOP_NUM>",
|
||||||
|
"<token of kind 7>",
|
||||||
|
"<OR>",
|
||||||
|
"<AND>",
|
||||||
|
"<NOT>",
|
||||||
|
"<W>",
|
||||||
|
"<N>",
|
||||||
|
"\"(\"",
|
||||||
|
"\")\"",
|
||||||
|
"\",\"",
|
||||||
|
"\":\"",
|
||||||
|
"\"^\"",
|
||||||
|
"<TRUNCQUOTED>",
|
||||||
|
"<QUOTED>",
|
||||||
|
"<SUFFIXTERM>",
|
||||||
|
"<TRUNCTERM>",
|
||||||
|
"<TERM>",
|
||||||
|
"<NUMBER>",
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,700 @@
|
||||||
|
/* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */
|
||||||
|
package org.apache.lucene.queryParser.surround.parser;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.io.StringReader;
|
||||||
|
import org.apache.lucene.analysis.TokenStream;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.SrndQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.FieldsQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.OrQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.AndQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.NotQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.DistanceQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.SrndTermQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.SrndPrefixQuery;
|
||||||
|
import org.apache.lucene.queryParser.surround.query.SrndTruncQuery;
|
||||||
|
|
||||||
|
public class QueryParserTokenManager implements QueryParserConstants
|
||||||
|
{
|
||||||
|
public java.io.PrintStream debugStream = System.out;
|
||||||
|
public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; }
|
||||||
|
private final int jjStopStringLiteralDfa_1(int pos, long active0)
|
||||||
|
{
|
||||||
|
switch (pos)
|
||||||
|
{
|
||||||
|
default :
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private final int jjStartNfa_1(int pos, long active0)
|
||||||
|
{
|
||||||
|
return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1);
|
||||||
|
}
|
||||||
|
private final int jjStopAtPos(int pos, int kind)
|
||||||
|
{
|
||||||
|
jjmatchedKind = kind;
|
||||||
|
jjmatchedPos = pos;
|
||||||
|
return pos + 1;
|
||||||
|
}
|
||||||
|
private final int jjStartNfaWithStates_1(int pos, int kind, int state)
|
||||||
|
{
|
||||||
|
jjmatchedKind = kind;
|
||||||
|
jjmatchedPos = pos;
|
||||||
|
try { curChar = input_stream.readChar(); }
|
||||||
|
catch(java.io.IOException e) { return pos + 1; }
|
||||||
|
return jjMoveNfa_1(state, pos + 1);
|
||||||
|
}
|
||||||
|
private final int jjMoveStringLiteralDfa0_1()
|
||||||
|
{
|
||||||
|
switch(curChar)
|
||||||
|
{
|
||||||
|
case 40:
|
||||||
|
return jjStopAtPos(0, 13);
|
||||||
|
case 41:
|
||||||
|
return jjStopAtPos(0, 14);
|
||||||
|
case 44:
|
||||||
|
return jjStopAtPos(0, 15);
|
||||||
|
case 58:
|
||||||
|
return jjStopAtPos(0, 16);
|
||||||
|
case 94:
|
||||||
|
return jjStopAtPos(0, 17);
|
||||||
|
default :
|
||||||
|
return jjMoveNfa_1(0, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private final void jjCheckNAdd(int state)
|
||||||
|
{
|
||||||
|
if (jjrounds[state] != jjround)
|
||||||
|
{
|
||||||
|
jjstateSet[jjnewStateCnt++] = state;
|
||||||
|
jjrounds[state] = jjround;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private final void jjAddStates(int start, int end)
|
||||||
|
{
|
||||||
|
do {
|
||||||
|
jjstateSet[jjnewStateCnt++] = jjnextStates[start];
|
||||||
|
} while (start++ != end);
|
||||||
|
}
|
||||||
|
private final void jjCheckNAddTwoStates(int state1, int state2)
|
||||||
|
{
|
||||||
|
jjCheckNAdd(state1);
|
||||||
|
jjCheckNAdd(state2);
|
||||||
|
}
|
||||||
|
private final void jjCheckNAddStates(int start, int end)
|
||||||
|
{
|
||||||
|
do {
|
||||||
|
jjCheckNAdd(jjnextStates[start]);
|
||||||
|
} while (start++ != end);
|
||||||
|
}
|
||||||
|
private final void jjCheckNAddStates(int start)
|
||||||
|
{
|
||||||
|
jjCheckNAdd(jjnextStates[start]);
|
||||||
|
jjCheckNAdd(jjnextStates[start + 1]);
|
||||||
|
}
|
||||||
|
static final long[] jjbitVec0 = {
|
||||||
|
0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL
|
||||||
|
};
|
||||||
|
static final long[] jjbitVec2 = {
|
||||||
|
0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL
|
||||||
|
};
|
||||||
|
private final int jjMoveNfa_1(int startState, int curPos)
|
||||||
|
{
|
||||||
|
int[] nextStates;
|
||||||
|
int startsAt = 0;
|
||||||
|
jjnewStateCnt = 38;
|
||||||
|
int i = 1;
|
||||||
|
jjstateSet[0] = startState;
|
||||||
|
int j, kind = 0x7fffffff;
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (++jjround == 0x7fffffff)
|
||||||
|
ReInitRounds();
|
||||||
|
if (curChar < 64)
|
||||||
|
{
|
||||||
|
long l = 1L << curChar;
|
||||||
|
MatchLoop: do
|
||||||
|
{
|
||||||
|
switch(jjstateSet[--i])
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
if ((0x7bffe8faffffd9ffL & l) != 0L)
|
||||||
|
{
|
||||||
|
if (kind > 22)
|
||||||
|
kind = 22;
|
||||||
|
jjCheckNAddStates(0, 4);
|
||||||
|
}
|
||||||
|
else if ((0x100002600L & l) != 0L)
|
||||||
|
{
|
||||||
|
if (kind > 7)
|
||||||
|
kind = 7;
|
||||||
|
}
|
||||||
|
else if (curChar == 34)
|
||||||
|
jjCheckNAddStates(5, 7);
|
||||||
|
if ((0x3fc000000000000L & l) != 0L)
|
||||||
|
jjCheckNAddStates(8, 11);
|
||||||
|
else if (curChar == 49)
|
||||||
|
jjCheckNAddTwoStates(20, 21);
|
||||||
|
break;
|
||||||
|
case 19:
|
||||||
|
if ((0x3fc000000000000L & l) != 0L)
|
||||||
|
jjCheckNAddStates(8, 11);
|
||||||
|
break;
|
||||||
|
case 20:
|
||||||
|
if ((0x3ff000000000000L & l) != 0L)
|
||||||
|
jjCheckNAdd(17);
|
||||||
|
break;
|
||||||
|
case 21:
|
||||||
|
if ((0x3ff000000000000L & l) != 0L)
|
||||||
|
jjCheckNAdd(18);
|
||||||
|
break;
|
||||||
|
case 22:
|
||||||
|
if (curChar == 49)
|
||||||
|
jjCheckNAddTwoStates(20, 21);
|
||||||
|
break;
|
||||||
|
case 23:
|
||||||
|
if (curChar == 34)
|
||||||
|
jjCheckNAddStates(5, 7);
|
||||||
|
break;
|
||||||
|
case 24:
|
||||||
|
if ((0xfffffffbffffffffL & l) != 0L)
|
||||||
|
jjCheckNAddTwoStates(24, 25);
|
||||||
|
break;
|
||||||
|
case 25:
|
||||||
|
if (curChar == 34)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 26;
|
||||||
|
break;
|
||||||
|
case 26:
|
||||||
|
if (curChar == 42 && kind > 18)
|
||||||
|
kind = 18;
|
||||||
|
break;
|
||||||
|
case 27:
|
||||||
|
if ((0xfffffffbffffffffL & l) != 0L)
|
||||||
|
jjCheckNAddStates(12, 14);
|
||||||
|
break;
|
||||||
|
case 29:
|
||||||
|
if (curChar == 34)
|
||||||
|
jjCheckNAddStates(12, 14);
|
||||||
|
break;
|
||||||
|
case 30:
|
||||||
|
if (curChar == 34 && kind > 19)
|
||||||
|
kind = 19;
|
||||||
|
break;
|
||||||
|
case 31:
|
||||||
|
if ((0x7bffe8faffffd9ffL & l) == 0L)
|
||||||
|
break;
|
||||||
|
if (kind > 22)
|
||||||
|
kind = 22;
|
||||||
|
jjCheckNAddStates(0, 4);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
if ((0x7bffe8faffffd9ffL & l) != 0L)
|
||||||
|
jjCheckNAddTwoStates(32, 33);
|
||||||
|
break;
|
||||||
|
case 33:
|
||||||
|
if (curChar == 42 && kind > 20)
|
||||||
|
kind = 20;
|
||||||
|
break;
|
||||||
|
case 34:
|
||||||
|
if ((0x7bffe8faffffd9ffL & l) != 0L)
|
||||||
|
jjCheckNAddTwoStates(34, 35);
|
||||||
|
break;
|
||||||
|
case 35:
|
||||||
|
if ((0x8000040000000000L & l) == 0L)
|
||||||
|
break;
|
||||||
|
if (kind > 21)
|
||||||
|
kind = 21;
|
||||||
|
jjCheckNAddTwoStates(35, 36);
|
||||||
|
break;
|
||||||
|
case 36:
|
||||||
|
if ((0xfbffecfaffffd9ffL & l) == 0L)
|
||||||
|
break;
|
||||||
|
if (kind > 21)
|
||||||
|
kind = 21;
|
||||||
|
jjCheckNAdd(36);
|
||||||
|
break;
|
||||||
|
case 37:
|
||||||
|
if ((0x7bffe8faffffd9ffL & l) == 0L)
|
||||||
|
break;
|
||||||
|
if (kind > 22)
|
||||||
|
kind = 22;
|
||||||
|
jjCheckNAdd(37);
|
||||||
|
break;
|
||||||
|
default : break;
|
||||||
|
}
|
||||||
|
} while(i != startsAt);
|
||||||
|
}
|
||||||
|
else if (curChar < 128)
|
||||||
|
{
|
||||||
|
long l = 1L << (curChar & 077);
|
||||||
|
MatchLoop: do
|
||||||
|
{
|
||||||
|
switch(jjstateSet[--i])
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
if ((0xffffffffbfffffffL & l) != 0L)
|
||||||
|
{
|
||||||
|
if (kind > 22)
|
||||||
|
kind = 22;
|
||||||
|
jjCheckNAddStates(0, 4);
|
||||||
|
}
|
||||||
|
if ((0x400000004000L & l) != 0L)
|
||||||
|
{
|
||||||
|
if (kind > 12)
|
||||||
|
kind = 12;
|
||||||
|
}
|
||||||
|
else if ((0x80000000800000L & l) != 0L)
|
||||||
|
{
|
||||||
|
if (kind > 11)
|
||||||
|
kind = 11;
|
||||||
|
}
|
||||||
|
else if (curChar == 97)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 9;
|
||||||
|
else if (curChar == 65)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 6;
|
||||||
|
else if (curChar == 111)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 3;
|
||||||
|
else if (curChar == 79)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 1;
|
||||||
|
if (curChar == 110)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 15;
|
||||||
|
else if (curChar == 78)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 12;
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
if (curChar == 82 && kind > 8)
|
||||||
|
kind = 8;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
if (curChar == 79)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 1;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
if (curChar == 114 && kind > 8)
|
||||||
|
kind = 8;
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
if (curChar == 111)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 3;
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
if (curChar == 68 && kind > 9)
|
||||||
|
kind = 9;
|
||||||
|
break;
|
||||||
|
case 6:
|
||||||
|
if (curChar == 78)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 5;
|
||||||
|
break;
|
||||||
|
case 7:
|
||||||
|
if (curChar == 65)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 6;
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
if (curChar == 100 && kind > 9)
|
||||||
|
kind = 9;
|
||||||
|
break;
|
||||||
|
case 9:
|
||||||
|
if (curChar == 110)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 8;
|
||||||
|
break;
|
||||||
|
case 10:
|
||||||
|
if (curChar == 97)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 9;
|
||||||
|
break;
|
||||||
|
case 11:
|
||||||
|
if (curChar == 84 && kind > 10)
|
||||||
|
kind = 10;
|
||||||
|
break;
|
||||||
|
case 12:
|
||||||
|
if (curChar == 79)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 11;
|
||||||
|
break;
|
||||||
|
case 13:
|
||||||
|
if (curChar == 78)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 12;
|
||||||
|
break;
|
||||||
|
case 14:
|
||||||
|
if (curChar == 116 && kind > 10)
|
||||||
|
kind = 10;
|
||||||
|
break;
|
||||||
|
case 15:
|
||||||
|
if (curChar == 111)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 14;
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
if (curChar == 110)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 15;
|
||||||
|
break;
|
||||||
|
case 17:
|
||||||
|
if ((0x80000000800000L & l) != 0L && kind > 11)
|
||||||
|
kind = 11;
|
||||||
|
break;
|
||||||
|
case 18:
|
||||||
|
if ((0x400000004000L & l) != 0L && kind > 12)
|
||||||
|
kind = 12;
|
||||||
|
break;
|
||||||
|
case 24:
|
||||||
|
jjAddStates(15, 16);
|
||||||
|
break;
|
||||||
|
case 27:
|
||||||
|
if ((0xffffffffefffffffL & l) != 0L)
|
||||||
|
jjCheckNAddStates(12, 14);
|
||||||
|
break;
|
||||||
|
case 28:
|
||||||
|
if (curChar == 92)
|
||||||
|
jjstateSet[jjnewStateCnt++] = 29;
|
||||||
|
break;
|
||||||
|
case 29:
|
||||||
|
if (curChar == 92)
|
||||||
|
jjCheckNAddStates(12, 14);
|
||||||
|
break;
|
||||||
|
case 31:
|
||||||
|
if ((0xffffffffbfffffffL & l) == 0L)
|
||||||
|
break;
|
||||||
|
if (kind > 22)
|
||||||
|
kind = 22;
|
||||||
|
jjCheckNAddStates(0, 4);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
if ((0xffffffffbfffffffL & l) != 0L)
|
||||||
|
jjCheckNAddTwoStates(32, 33);
|
||||||
|
break;
|
||||||
|
case 34:
|
||||||
|
if ((0xffffffffbfffffffL & l) != 0L)
|
||||||
|
jjCheckNAddTwoStates(34, 35);
|
||||||
|
break;
|
||||||
|
case 36:
|
||||||
|
if ((0xffffffffbfffffffL & l) == 0L)
|
||||||
|
break;
|
||||||
|
if (kind > 21)
|
||||||
|
kind = 21;
|
||||||
|
jjstateSet[jjnewStateCnt++] = 36;
|
||||||
|
break;
|
||||||
|
case 37:
|
||||||
|
if ((0xffffffffbfffffffL & l) == 0L)
|
||||||
|
break;
|
||||||
|
if (kind > 22)
|
||||||
|
kind = 22;
|
||||||
|
jjCheckNAdd(37);
|
||||||
|
break;
|
||||||
|
default : break;
|
||||||
|
}
|
||||||
|
} while(i != startsAt);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int hiByte = (int)(curChar >> 8);
|
||||||
|
int i1 = hiByte >> 6;
|
||||||
|
long l1 = 1L << (hiByte & 077);
|
||||||
|
int i2 = (curChar & 0xff) >> 6;
|
||||||
|
long l2 = 1L << (curChar & 077);
|
||||||
|
MatchLoop: do
|
||||||
|
{
|
||||||
|
switch(jjstateSet[--i])
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||||
|
break;
|
||||||
|
if (kind > 22)
|
||||||
|
kind = 22;
|
||||||
|
jjCheckNAddStates(0, 4);
|
||||||
|
break;
|
||||||
|
case 24:
|
||||||
|
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||||
|
jjAddStates(15, 16);
|
||||||
|
break;
|
||||||
|
case 27:
|
||||||
|
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||||
|
jjAddStates(12, 14);
|
||||||
|
break;
|
||||||
|
case 32:
|
||||||
|
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||||
|
jjCheckNAddTwoStates(32, 33);
|
||||||
|
break;
|
||||||
|
case 34:
|
||||||
|
if (jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||||
|
jjCheckNAddTwoStates(34, 35);
|
||||||
|
break;
|
||||||
|
case 36:
|
||||||
|
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||||
|
break;
|
||||||
|
if (kind > 21)
|
||||||
|
kind = 21;
|
||||||
|
jjstateSet[jjnewStateCnt++] = 36;
|
||||||
|
break;
|
||||||
|
case 37:
|
||||||
|
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
|
||||||
|
break;
|
||||||
|
if (kind > 22)
|
||||||
|
kind = 22;
|
||||||
|
jjCheckNAdd(37);
|
||||||
|
break;
|
||||||
|
default : break;
|
||||||
|
}
|
||||||
|
} while(i != startsAt);
|
||||||
|
}
|
||||||
|
if (kind != 0x7fffffff)
|
||||||
|
{
|
||||||
|
jjmatchedKind = kind;
|
||||||
|
jjmatchedPos = curPos;
|
||||||
|
kind = 0x7fffffff;
|
||||||
|
}
|
||||||
|
++curPos;
|
||||||
|
if ((i = jjnewStateCnt) == (startsAt = 38 - (jjnewStateCnt = startsAt)))
|
||||||
|
return curPos;
|
||||||
|
try { curChar = input_stream.readChar(); }
|
||||||
|
catch(java.io.IOException e) { return curPos; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
private final int jjMoveStringLiteralDfa0_0()
|
||||||
|
{
|
||||||
|
return jjMoveNfa_0(0, 0);
|
||||||
|
}
|
||||||
|
private final int jjMoveNfa_0(int startState, int curPos)
|
||||||
|
{
|
||||||
|
int[] nextStates;
|
||||||
|
int startsAt = 0;
|
||||||
|
jjnewStateCnt = 3;
|
||||||
|
int i = 1;
|
||||||
|
jjstateSet[0] = startState;
|
||||||
|
int j, kind = 0x7fffffff;
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
if (++jjround == 0x7fffffff)
|
||||||
|
ReInitRounds();
|
||||||
|
if (curChar < 64)
|
||||||
|
{
|
||||||
|
long l = 1L << curChar;
|
||||||
|
MatchLoop: do
|
||||||
|
{
|
||||||
|
switch(jjstateSet[--i])
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
if ((0x3ff000000000000L & l) == 0L)
|
||||||
|
break;
|
||||||
|
if (kind > 23)
|
||||||
|
kind = 23;
|
||||||
|
jjAddStates(17, 18);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
if (curChar == 46)
|
||||||
|
jjCheckNAdd(2);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
if ((0x3ff000000000000L & l) == 0L)
|
||||||
|
break;
|
||||||
|
if (kind > 23)
|
||||||
|
kind = 23;
|
||||||
|
jjCheckNAdd(2);
|
||||||
|
break;
|
||||||
|
default : break;
|
||||||
|
}
|
||||||
|
} while(i != startsAt);
|
||||||
|
}
|
||||||
|
else if (curChar < 128)
|
||||||
|
{
|
||||||
|
long l = 1L << (curChar & 077);
|
||||||
|
MatchLoop: do
|
||||||
|
{
|
||||||
|
switch(jjstateSet[--i])
|
||||||
|
{
|
||||||
|
default : break;
|
||||||
|
}
|
||||||
|
} while(i != startsAt);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
int hiByte = (int)(curChar >> 8);
|
||||||
|
int i1 = hiByte >> 6;
|
||||||
|
long l1 = 1L << (hiByte & 077);
|
||||||
|
int i2 = (curChar & 0xff) >> 6;
|
||||||
|
long l2 = 1L << (curChar & 077);
|
||||||
|
MatchLoop: do
|
||||||
|
{
|
||||||
|
switch(jjstateSet[--i])
|
||||||
|
{
|
||||||
|
default : break;
|
||||||
|
}
|
||||||
|
} while(i != startsAt);
|
||||||
|
}
|
||||||
|
if (kind != 0x7fffffff)
|
||||||
|
{
|
||||||
|
jjmatchedKind = kind;
|
||||||
|
jjmatchedPos = curPos;
|
||||||
|
kind = 0x7fffffff;
|
||||||
|
}
|
||||||
|
++curPos;
|
||||||
|
if ((i = jjnewStateCnt) == (startsAt = 3 - (jjnewStateCnt = startsAt)))
|
||||||
|
return curPos;
|
||||||
|
try { curChar = input_stream.readChar(); }
|
||||||
|
catch(java.io.IOException e) { return curPos; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
static final int[] jjnextStates = {
|
||||||
|
32, 33, 34, 35, 37, 24, 27, 28, 20, 17, 21, 18, 27, 28, 30, 24,
|
||||||
|
25, 0, 1,
|
||||||
|
};
|
||||||
|
private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2)
|
||||||
|
{
|
||||||
|
switch(hiByte)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
return ((jjbitVec2[i2] & l2) != 0L);
|
||||||
|
default :
|
||||||
|
if ((jjbitVec0[i1] & l1) != 0L)
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
public static final String[] jjstrLiteralImages = {
|
||||||
|
"", null, null, null, null, null, null, null, null, null, null, null, null,
|
||||||
|
"\50", "\51", "\54", "\72", "\136", null, null, null, null, null, null, };
|
||||||
|
public static final String[] lexStateNames = {
|
||||||
|
"Boost",
|
||||||
|
"DEFAULT",
|
||||||
|
};
|
||||||
|
public static final int[] jjnewLexState = {
|
||||||
|
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 1,
|
||||||
|
};
|
||||||
|
static final long[] jjtoToken = {
|
||||||
|
0xffff01L,
|
||||||
|
};
|
||||||
|
static final long[] jjtoSkip = {
|
||||||
|
0x80L,
|
||||||
|
};
|
||||||
|
protected CharStream input_stream;
|
||||||
|
private final int[] jjrounds = new int[38];
|
||||||
|
private final int[] jjstateSet = new int[76];
|
||||||
|
protected char curChar;
|
||||||
|
public QueryParserTokenManager(CharStream stream)
|
||||||
|
{
|
||||||
|
input_stream = stream;
|
||||||
|
}
|
||||||
|
public QueryParserTokenManager(CharStream stream, int lexState)
|
||||||
|
{
|
||||||
|
this(stream);
|
||||||
|
SwitchTo(lexState);
|
||||||
|
}
|
||||||
|
public void ReInit(CharStream stream)
|
||||||
|
{
|
||||||
|
jjmatchedPos = jjnewStateCnt = 0;
|
||||||
|
curLexState = defaultLexState;
|
||||||
|
input_stream = stream;
|
||||||
|
ReInitRounds();
|
||||||
|
}
|
||||||
|
private final void ReInitRounds()
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
jjround = 0x80000001;
|
||||||
|
for (i = 38; i-- > 0;)
|
||||||
|
jjrounds[i] = 0x80000000;
|
||||||
|
}
|
||||||
|
public void ReInit(CharStream stream, int lexState)
|
||||||
|
{
|
||||||
|
ReInit(stream);
|
||||||
|
SwitchTo(lexState);
|
||||||
|
}
|
||||||
|
public void SwitchTo(int lexState)
|
||||||
|
{
|
||||||
|
if (lexState >= 2 || lexState < 0)
|
||||||
|
throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE);
|
||||||
|
else
|
||||||
|
curLexState = lexState;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected Token jjFillToken()
|
||||||
|
{
|
||||||
|
Token t = Token.newToken(jjmatchedKind);
|
||||||
|
t.kind = jjmatchedKind;
|
||||||
|
String im = jjstrLiteralImages[jjmatchedKind];
|
||||||
|
t.image = (im == null) ? input_stream.GetImage() : im;
|
||||||
|
t.beginLine = input_stream.getBeginLine();
|
||||||
|
t.beginColumn = input_stream.getBeginColumn();
|
||||||
|
t.endLine = input_stream.getEndLine();
|
||||||
|
t.endColumn = input_stream.getEndColumn();
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
|
||||||
|
int curLexState = 1;
|
||||||
|
int defaultLexState = 1;
|
||||||
|
int jjnewStateCnt;
|
||||||
|
int jjround;
|
||||||
|
int jjmatchedPos;
|
||||||
|
int jjmatchedKind;
|
||||||
|
|
||||||
|
public Token getNextToken()
|
||||||
|
{
|
||||||
|
int kind;
|
||||||
|
Token specialToken = null;
|
||||||
|
Token matchedToken;
|
||||||
|
int curPos = 0;
|
||||||
|
|
||||||
|
EOFLoop :
|
||||||
|
for (;;)
|
||||||
|
{
|
||||||
|
try
|
||||||
|
{
|
||||||
|
curChar = input_stream.BeginToken();
|
||||||
|
}
|
||||||
|
catch(java.io.IOException e)
|
||||||
|
{
|
||||||
|
jjmatchedKind = 0;
|
||||||
|
matchedToken = jjFillToken();
|
||||||
|
return matchedToken;
|
||||||
|
}
|
||||||
|
|
||||||
|
switch(curLexState)
|
||||||
|
{
|
||||||
|
case 0:
|
||||||
|
jjmatchedKind = 0x7fffffff;
|
||||||
|
jjmatchedPos = 0;
|
||||||
|
curPos = jjMoveStringLiteralDfa0_0();
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
jjmatchedKind = 0x7fffffff;
|
||||||
|
jjmatchedPos = 0;
|
||||||
|
curPos = jjMoveStringLiteralDfa0_1();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (jjmatchedKind != 0x7fffffff)
|
||||||
|
{
|
||||||
|
if (jjmatchedPos + 1 < curPos)
|
||||||
|
input_stream.backup(curPos - jjmatchedPos - 1);
|
||||||
|
if ((jjtoToken[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L)
|
||||||
|
{
|
||||||
|
matchedToken = jjFillToken();
|
||||||
|
if (jjnewLexState[jjmatchedKind] != -1)
|
||||||
|
curLexState = jjnewLexState[jjmatchedKind];
|
||||||
|
return matchedToken;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
if (jjnewLexState[jjmatchedKind] != -1)
|
||||||
|
curLexState = jjnewLexState[jjmatchedKind];
|
||||||
|
continue EOFLoop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int error_line = input_stream.getEndLine();
|
||||||
|
int error_column = input_stream.getEndColumn();
|
||||||
|
String error_after = null;
|
||||||
|
boolean EOFSeen = false;
|
||||||
|
try { input_stream.readChar(); input_stream.backup(1); }
|
||||||
|
catch (java.io.IOException e1) {
|
||||||
|
EOFSeen = true;
|
||||||
|
error_after = curPos <= 1 ? "" : input_stream.GetImage();
|
||||||
|
if (curChar == '\n' || curChar == '\r') {
|
||||||
|
error_line++;
|
||||||
|
error_column = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
error_column++;
|
||||||
|
}
|
||||||
|
if (!EOFSeen) {
|
||||||
|
input_stream.backup(1);
|
||||||
|
error_after = curPos <= 1 ? "" : input_stream.GetImage();
|
||||||
|
}
|
||||||
|
throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,81 @@
|
||||||
|
/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */
|
||||||
|
package org.apache.lucene.queryParser.surround.parser;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Describes the input token stream.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class Token {
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An integer that describes the kind of this token. This numbering
|
||||||
|
* system is determined by JavaCCParser, and a table of these numbers is
|
||||||
|
* stored in the file ...Constants.java.
|
||||||
|
*/
|
||||||
|
public int kind;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* beginLine and beginColumn describe the position of the first character
|
||||||
|
* of this token; endLine and endColumn describe the position of the
|
||||||
|
* last character of this token.
|
||||||
|
*/
|
||||||
|
public int beginLine, beginColumn, endLine, endColumn;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* The string image of the token.
|
||||||
|
*/
|
||||||
|
public String image;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A reference to the next regular (non-special) token from the input
|
||||||
|
* stream. If this is the last token from the input stream, or if the
|
||||||
|
* token manager has not read tokens beyond this one, this field is
|
||||||
|
* set to null. This is true only if this token is also a regular
|
||||||
|
* token. Otherwise, see below for a description of the contents of
|
||||||
|
* this field.
|
||||||
|
*/
|
||||||
|
public Token next;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* This field is used to access special tokens that occur prior to this
|
||||||
|
* token, but after the immediately preceding regular (non-special) token.
|
||||||
|
* If there are no such special tokens, this field is set to null.
|
||||||
|
* When there are more than one such special token, this field refers
|
||||||
|
* to the last of these special tokens, which in turn refers to the next
|
||||||
|
* previous special token through its specialToken field, and so on
|
||||||
|
* until the first special token (whose specialToken field is null).
|
||||||
|
* The next fields of special tokens refer to other special tokens that
|
||||||
|
* immediately follow it (without an intervening regular token). If there
|
||||||
|
* is no such token, this field is null.
|
||||||
|
*/
|
||||||
|
public Token specialToken;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the image.
|
||||||
|
*/
|
||||||
|
public String toString()
|
||||||
|
{
|
||||||
|
return image;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a new Token object, by default. However, if you want, you
|
||||||
|
* can create and return subclass objects based on the value of ofKind.
|
||||||
|
* Simply add the cases to the switch for all those special cases.
|
||||||
|
* For example, if you have a subclass of Token called IDToken that
|
||||||
|
* you want to create if ofKind is ID, simlpy add something like :
|
||||||
|
*
|
||||||
|
* case MyParserConstants.ID : return new IDToken();
|
||||||
|
*
|
||||||
|
* to the following switch statement. Then you can cast matchedToken
|
||||||
|
* variable to the appropriate type and use it in your lexical actions.
|
||||||
|
*/
|
||||||
|
public static final Token newToken(int ofKind)
|
||||||
|
{
|
||||||
|
switch(ofKind)
|
||||||
|
{
|
||||||
|
default : return new Token();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
|
@ -0,0 +1,133 @@
|
||||||
|
/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */
|
||||||
|
package org.apache.lucene.queryParser.surround.parser;
|
||||||
|
|
||||||
|
public class TokenMgrError extends Error
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Ordinals for various reasons why an Error of this type can be thrown.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Lexical error occured.
|
||||||
|
*/
|
||||||
|
static final int LEXICAL_ERROR = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* An attempt wass made to create a second instance of a static token manager.
|
||||||
|
*/
|
||||||
|
static final int STATIC_LEXER_ERROR = 1;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tried to change to an invalid lexical state.
|
||||||
|
*/
|
||||||
|
static final int INVALID_LEXICAL_STATE = 2;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Detected (and bailed out of) an infinite loop in the token manager.
|
||||||
|
*/
|
||||||
|
static final int LOOP_DETECTED = 3;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Indicates the reason why the exception is thrown. It will have
|
||||||
|
* one of the above 4 values.
|
||||||
|
*/
|
||||||
|
int errorCode;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Replaces unprintable characters by their espaced (or unicode escaped)
|
||||||
|
* equivalents in the given string
|
||||||
|
*/
|
||||||
|
protected static final String addEscapes(String str) {
|
||||||
|
StringBuffer retval = new StringBuffer();
|
||||||
|
char ch;
|
||||||
|
for (int i = 0; i < str.length(); i++) {
|
||||||
|
switch (str.charAt(i))
|
||||||
|
{
|
||||||
|
case 0 :
|
||||||
|
continue;
|
||||||
|
case '\b':
|
||||||
|
retval.append("\\b");
|
||||||
|
continue;
|
||||||
|
case '\t':
|
||||||
|
retval.append("\\t");
|
||||||
|
continue;
|
||||||
|
case '\n':
|
||||||
|
retval.append("\\n");
|
||||||
|
continue;
|
||||||
|
case '\f':
|
||||||
|
retval.append("\\f");
|
||||||
|
continue;
|
||||||
|
case '\r':
|
||||||
|
retval.append("\\r");
|
||||||
|
continue;
|
||||||
|
case '\"':
|
||||||
|
retval.append("\\\"");
|
||||||
|
continue;
|
||||||
|
case '\'':
|
||||||
|
retval.append("\\\'");
|
||||||
|
continue;
|
||||||
|
case '\\':
|
||||||
|
retval.append("\\\\");
|
||||||
|
continue;
|
||||||
|
default:
|
||||||
|
if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
|
||||||
|
String s = "0000" + Integer.toString(ch, 16);
|
||||||
|
retval.append("\\u" + s.substring(s.length() - 4, s.length()));
|
||||||
|
} else {
|
||||||
|
retval.append(ch);
|
||||||
|
}
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return retval.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a detailed message for the Error when it is thrown by the
|
||||||
|
* token manager to indicate a lexical error.
|
||||||
|
* Parameters :
|
||||||
|
* EOFSeen : indicates if EOF caused the lexicl error
|
||||||
|
* curLexState : lexical state in which this error occured
|
||||||
|
* errorLine : line number when the error occured
|
||||||
|
* errorColumn : column number when the error occured
|
||||||
|
* errorAfter : prefix that was seen before this error occured
|
||||||
|
* curchar : the offending character
|
||||||
|
* Note: You can customize the lexical error message by modifying this method.
|
||||||
|
*/
|
||||||
|
protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) {
|
||||||
|
return("Lexical error at line " +
|
||||||
|
errorLine + ", column " +
|
||||||
|
errorColumn + ". Encountered: " +
|
||||||
|
(EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") +
|
||||||
|
"after : \"" + addEscapes(errorAfter) + "\"");
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* You can also modify the body of this method to customize your error messages.
|
||||||
|
* For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not
|
||||||
|
* of end-users concern, so you can return something like :
|
||||||
|
*
|
||||||
|
* "Internal Error : Please file a bug report .... "
|
||||||
|
*
|
||||||
|
* from this method for such cases in the release version of your parser.
|
||||||
|
*/
|
||||||
|
public String getMessage() {
|
||||||
|
return super.getMessage();
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Constructors of various flavors follow.
|
||||||
|
*/
|
||||||
|
|
||||||
|
public TokenMgrError() {
|
||||||
|
}
|
||||||
|
|
||||||
|
public TokenMgrError(String message, int reason) {
|
||||||
|
super(message);
|
||||||
|
errorCode = reason;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) {
|
||||||
|
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,32 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
|
||||||
|
public class AndQuery extends ComposedQuery {
|
||||||
|
public AndQuery(List queries, boolean inf, String opName) {
|
||||||
|
super(queries, inf, opName);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query makeLuceneQueryFieldNoBoost(String fieldName, BasicQueryFactory qf) {
|
||||||
|
return SrndBooleanQuery.makeBooleanQuery( /* subqueries can be individually boosted */
|
||||||
|
makeLuceneSubQueriesField(fieldName, qf), BooleanClause.Occur.MUST);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,64 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* Create basic queries to be used during rewrite.
|
||||||
|
* The basic queries are TermQuery and SpanTermQuery.
|
||||||
|
* An exception can be thrown when too many of these are used.
|
||||||
|
* SpanTermQuery and TermQuery use IndexReader.termEnum(Term), which causes the buffer usage.
|
||||||
|
*
|
||||||
|
* Use this class to limit the buffer usage for reading terms from an index.
|
||||||
|
* Default is 1024, the same as the max. number of subqueries for a BooleanQuery.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
|
|
||||||
|
public class BasicQueryFactory {
|
||||||
|
public BasicQueryFactory(int maxBasicQueries) {
|
||||||
|
this.maxBasicQueries = maxBasicQueries;
|
||||||
|
this.queriesMade = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
public BasicQueryFactory() {
|
||||||
|
this(1024);
|
||||||
|
}
|
||||||
|
|
||||||
|
private int maxBasicQueries;
|
||||||
|
private int queriesMade;
|
||||||
|
|
||||||
|
public int getNrQueriesMade() {return queriesMade;}
|
||||||
|
public int getMaxBasicQueries() {return maxBasicQueries;}
|
||||||
|
|
||||||
|
private synchronized void checkMax() throws TooManyBasicQueries {
|
||||||
|
if (queriesMade >= maxBasicQueries)
|
||||||
|
throw new TooManyBasicQueries(getMaxBasicQueries());
|
||||||
|
queriesMade++;
|
||||||
|
}
|
||||||
|
|
||||||
|
public TermQuery newTermQuery(Term term) throws TooManyBasicQueries {
|
||||||
|
checkMax();
|
||||||
|
return new TermQuery(term);
|
||||||
|
}
|
||||||
|
|
||||||
|
public SpanTermQuery newSpanTermQuery(Term term) throws TooManyBasicQueries {
|
||||||
|
checkMax();
|
||||||
|
return new SpanTermQuery(term);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,116 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
public abstract class ComposedQuery extends SrndQuery {
|
||||||
|
|
||||||
|
public ComposedQuery(List qs, boolean operatorInfix, String opName) {
|
||||||
|
recompose(qs);
|
||||||
|
this.operatorInfix = operatorInfix;
|
||||||
|
this.opName = opName;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void recompose(List queries) {
|
||||||
|
if (queries.size() < 2) throw new AssertionError("Too few subqueries");
|
||||||
|
this.queries = queries;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String opName;
|
||||||
|
public String getOperatorName() {return opName;}
|
||||||
|
|
||||||
|
private List queries;
|
||||||
|
|
||||||
|
public Iterator getSubQueriesIterator() {return queries.listIterator();}
|
||||||
|
|
||||||
|
public int getNrSubQueries() {return queries.size();}
|
||||||
|
|
||||||
|
public SrndQuery getSubQuery(int qn) {return (SrndQuery) queries.get(qn);}
|
||||||
|
|
||||||
|
private boolean operatorInfix;
|
||||||
|
public boolean isOperatorInfix() { return operatorInfix; } /* else prefix operator */
|
||||||
|
|
||||||
|
public List makeLuceneSubQueriesField(String fn, BasicQueryFactory qf) {
|
||||||
|
ArrayList luceneSubQueries = new ArrayList();
|
||||||
|
Iterator sqi = getSubQueriesIterator();
|
||||||
|
while (sqi.hasNext()) {
|
||||||
|
luceneSubQueries.add( ((SrndQuery) sqi.next()).makeLuceneQueryField(fn, qf));
|
||||||
|
}
|
||||||
|
return luceneSubQueries;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuffer r = new StringBuffer();
|
||||||
|
if (isOperatorInfix()) {
|
||||||
|
infixToString(r);
|
||||||
|
} else {
|
||||||
|
prefixToString(r);
|
||||||
|
}
|
||||||
|
weightToString(r);
|
||||||
|
return r.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Override for different spacing */
|
||||||
|
protected String getPrefixSeparator() { return ", ";}
|
||||||
|
protected String getBracketOpen() { return "(";}
|
||||||
|
protected String getBracketClose() { return ")";}
|
||||||
|
|
||||||
|
protected void infixToString(StringBuffer r) {
|
||||||
|
/* Brackets are possibly redundant in the result. */
|
||||||
|
Iterator sqi = getSubQueriesIterator();
|
||||||
|
r.append(getBracketOpen());
|
||||||
|
if (sqi.hasNext()) {
|
||||||
|
r.append(sqi.next().toString());
|
||||||
|
while (sqi.hasNext()) {
|
||||||
|
r.append(" ");
|
||||||
|
r.append(getOperatorName()); /* infix operator */
|
||||||
|
r.append(" ");
|
||||||
|
r.append(sqi.next().toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r.append(getBracketClose());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void prefixToString(StringBuffer r) {
|
||||||
|
Iterator sqi = getSubQueriesIterator();
|
||||||
|
r.append(getOperatorName()); /* prefix operator */
|
||||||
|
r.append(getBracketOpen());
|
||||||
|
if (sqi.hasNext()) {
|
||||||
|
r.append(sqi.next().toString());
|
||||||
|
while (sqi.hasNext()) {
|
||||||
|
r.append(getPrefixSeparator());
|
||||||
|
r.append(sqi.next().toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
r.append(getBracketClose());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public boolean isFieldsSubQueryAcceptable() {
|
||||||
|
/* at least one subquery should be acceptable */
|
||||||
|
Iterator sqi = getSubQueriesIterator();
|
||||||
|
while (sqi.hasNext()) {
|
||||||
|
if (((SrndQuery) sqi.next()).isFieldsSubQueryAcceptable()) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,117 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
|
|
||||||
|
public class DistanceQuery extends ComposedQuery implements DistanceSubQuery {
|
||||||
|
public DistanceQuery(
|
||||||
|
List queries,
|
||||||
|
boolean infix,
|
||||||
|
int opDistance,
|
||||||
|
String opName,
|
||||||
|
boolean ordered) {
|
||||||
|
super(queries, infix, opName);
|
||||||
|
this.opDistance = opDistance; /* the distance indicated in the operator */
|
||||||
|
this.ordered = ordered;
|
||||||
|
}
|
||||||
|
|
||||||
|
private int opDistance;
|
||||||
|
public int getOpDistance() {return opDistance;}
|
||||||
|
|
||||||
|
private boolean ordered;
|
||||||
|
public boolean subQueriesOrdered() {return ordered;}
|
||||||
|
|
||||||
|
public String distanceSubQueryNotAllowed() {
|
||||||
|
Iterator sqi = getSubQueriesIterator();
|
||||||
|
while (sqi.hasNext()) {
|
||||||
|
Object leq = sqi.next();
|
||||||
|
if (leq instanceof DistanceSubQuery) {
|
||||||
|
DistanceSubQuery dsq = (DistanceSubQuery) leq;
|
||||||
|
String m = dsq.distanceSubQueryNotAllowed();
|
||||||
|
if (m != null) {
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return "Operator " + getOperatorName() + " does not allow subquery " + leq.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null; /* subqueries acceptable */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public void addSpanQueries(SpanNearClauseFactory sncf) throws IOException {
|
||||||
|
Query snq = getSpanNearQuery(sncf.getIndexReader(),
|
||||||
|
sncf.getFieldName(),
|
||||||
|
getWeight(),
|
||||||
|
sncf.getBasicQueryFactory());
|
||||||
|
sncf.addSpanNearQuery(snq);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query makeLuceneQueryFieldNoBoost(final String fieldName, final BasicQueryFactory qf) {
|
||||||
|
return new Query () {
|
||||||
|
|
||||||
|
public String toString(String fn) {
|
||||||
|
return getClass().toString() + " " + fieldName + " (" + fn + "?)";
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
return getSpanNearQuery(reader, fieldName, getBoost(), qf);
|
||||||
|
}
|
||||||
|
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query getSpanNearQuery(
|
||||||
|
IndexReader reader,
|
||||||
|
String fieldName,
|
||||||
|
float boost,
|
||||||
|
BasicQueryFactory qf) throws IOException {
|
||||||
|
SpanQuery[] spanNearClauses = new SpanQuery[getNrSubQueries()];
|
||||||
|
Iterator sqi = getSubQueriesIterator();
|
||||||
|
int qi = 0;
|
||||||
|
while (sqi.hasNext()) {
|
||||||
|
SpanNearClauseFactory sncf = new SpanNearClauseFactory(reader, fieldName, qf);
|
||||||
|
|
||||||
|
((DistanceSubQuery)sqi.next()).addSpanQueries(sncf);
|
||||||
|
if (sncf.size() == 0) { /* distance operator requires all sub queries */
|
||||||
|
while (sqi.hasNext()) { /* produce evt. error messages but ignore results */
|
||||||
|
((DistanceSubQuery)sqi.next()).addSpanQueries(sncf);
|
||||||
|
sncf.clear();
|
||||||
|
}
|
||||||
|
return SrndQuery.theEmptyLcnQuery;
|
||||||
|
}
|
||||||
|
|
||||||
|
spanNearClauses[qi] = sncf.makeSpanNearClause();
|
||||||
|
|
||||||
|
qi++;
|
||||||
|
}
|
||||||
|
|
||||||
|
SpanNearQuery r = new SpanNearQuery(spanNearClauses, getOpDistance() - 1, subQueriesOrdered());
|
||||||
|
r.setBoost(boost);
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,30 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public interface DistanceSubQuery {
|
||||||
|
/** When distanceSubQueryNotAllowed() returns non null, the reason why the subquery
|
||||||
|
* is not allowed as a distance subquery is returned.
|
||||||
|
* <br>When distanceSubQueryNotAllowed() returns null addSpanNearQueries() can be used
|
||||||
|
* in the creation of the span near clause for the subquery.
|
||||||
|
*/
|
||||||
|
String distanceSubQueryNotAllowed();
|
||||||
|
|
||||||
|
void addSpanQueries(SpanNearClauseFactory sncf) throws IOException;
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,93 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
|
||||||
|
public class FieldsQuery extends SrndQuery { /* mostly untested */
|
||||||
|
private SrndQuery q;
|
||||||
|
private ArrayList fieldNames;
|
||||||
|
private final char fieldOp;
|
||||||
|
private final String OrOperatorName = "OR"; /* for expanded queries, not normally visible */
|
||||||
|
|
||||||
|
public FieldsQuery(SrndQuery q, ArrayList fieldNames, char fieldOp) {
|
||||||
|
this.q = q;
|
||||||
|
this.fieldNames = fieldNames;
|
||||||
|
this.fieldOp = fieldOp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public FieldsQuery(SrndQuery q, String fieldName, char fieldOp) {
|
||||||
|
this.q = q;
|
||||||
|
fieldNames = new ArrayList();
|
||||||
|
fieldNames.add(fieldName);
|
||||||
|
this.fieldOp = fieldOp;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isFieldsSubQueryAcceptable() {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query makeLuceneQueryNoBoost(BasicQueryFactory qf) {
|
||||||
|
if (fieldNames.size() == 1) { /* single field name: no new queries needed */
|
||||||
|
return q.makeLuceneQueryFieldNoBoost((String) fieldNames.get(0), qf);
|
||||||
|
} else { /* OR query over the fields */
|
||||||
|
ArrayList queries = new ArrayList();
|
||||||
|
Iterator fni = getFieldNames().listIterator();
|
||||||
|
SrndQuery qc;
|
||||||
|
while (fni.hasNext()) {
|
||||||
|
qc = (SrndQuery) q.clone();
|
||||||
|
queries.add( new FieldsQuery( qc, (String) fni.next(), fieldOp));
|
||||||
|
}
|
||||||
|
boolean infix = true;
|
||||||
|
OrQuery oq = new OrQuery(queries,
|
||||||
|
true /* infix OR for field names */,
|
||||||
|
OrOperatorName);
|
||||||
|
System.out.println(getClass().toString() + ", fields expanded: " + oq.toString()); /* needs testing */
|
||||||
|
return oq.makeLuceneQueryField(null, qf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query makeLuceneQueryFieldNoBoost(String fieldName, BasicQueryFactory qf) {
|
||||||
|
return makeLuceneQueryNoBoost(qf); /* use this.fieldNames instead of fieldName */
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public List getFieldNames() {return fieldNames;}
|
||||||
|
|
||||||
|
public char getFieldOperator() { return fieldOp;}
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuffer r = new StringBuffer();
|
||||||
|
r.append("(");
|
||||||
|
fieldNamesToString(r);
|
||||||
|
r.append(q.toString());
|
||||||
|
r.append(")");
|
||||||
|
return r.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void fieldNamesToString(StringBuffer r) {
|
||||||
|
Iterator fni = getFieldNames().listIterator();
|
||||||
|
while (fni.hasNext()) {
|
||||||
|
r.append((String) fni.next());
|
||||||
|
r.append(getFieldOperator());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,37 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
|
||||||
|
public class NotQuery extends ComposedQuery {
|
||||||
|
public NotQuery(List queries, String opName) { super(queries, true /* infix */, opName); }
|
||||||
|
|
||||||
|
public Query makeLuceneQueryFieldNoBoost(String fieldName, BasicQueryFactory qf) {
|
||||||
|
List luceneSubQueries = makeLuceneSubQueriesField(fieldName, qf);
|
||||||
|
BooleanQuery bq = new BooleanQuery();
|
||||||
|
bq.add( (Query) luceneSubQueries.get(0), BooleanClause.Occur.MUST);
|
||||||
|
SrndBooleanQuery.addQueriesToBoolean(bq,
|
||||||
|
// FIXME: do not allow weights on prohibited subqueries.
|
||||||
|
luceneSubQueries.subList(1, luceneSubQueries.size()),
|
||||||
|
// later subqueries: not required, prohibited
|
||||||
|
BooleanClause.Occur.MUST_NOT);
|
||||||
|
return bq;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,59 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
public class OrQuery extends ComposedQuery implements DistanceSubQuery {
|
||||||
|
public OrQuery(List queries, boolean infix, String opName) {
|
||||||
|
super(queries, infix, opName);
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query makeLuceneQueryFieldNoBoost(String fieldName, BasicQueryFactory qf) {
|
||||||
|
return SrndBooleanQuery.makeBooleanQuery(
|
||||||
|
/* subqueries can be individually boosted */
|
||||||
|
makeLuceneSubQueriesField(fieldName, qf), BooleanClause.Occur.SHOULD);
|
||||||
|
}
|
||||||
|
|
||||||
|
public String distanceSubQueryNotAllowed() {
|
||||||
|
Iterator sqi = getSubQueriesIterator();
|
||||||
|
while (sqi.hasNext()) {
|
||||||
|
SrndQuery leq = (SrndQuery) sqi.next();
|
||||||
|
if (leq instanceof DistanceSubQuery) {
|
||||||
|
String m = ((DistanceSubQuery)leq).distanceSubQueryNotAllowed();
|
||||||
|
if (m != null) {
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
return "subquery not allowed: " + leq.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addSpanQueries(SpanNearClauseFactory sncf) throws IOException {
|
||||||
|
Iterator sqi = getSubQueriesIterator();
|
||||||
|
while (sqi.hasNext()) {
|
||||||
|
((DistanceSubQuery)sqi.next()).addSpanQueries(sncf);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,109 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.io.IOException;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
|
||||||
|
public abstract class SimpleTerm
|
||||||
|
extends SrndQuery
|
||||||
|
implements DistanceSubQuery, Comparable
|
||||||
|
{
|
||||||
|
public SimpleTerm(boolean q) {quoted = q;}
|
||||||
|
|
||||||
|
private boolean quoted;
|
||||||
|
boolean isQuoted() {return quoted;}
|
||||||
|
|
||||||
|
public String getQuote() {return "\"";}
|
||||||
|
public String getFieldOperator() {return "/";}
|
||||||
|
|
||||||
|
public abstract String toStringUnquoted();
|
||||||
|
|
||||||
|
public int compareTo(Object o) {
|
||||||
|
/* for ordering terms and prefixes before using an index, not used */
|
||||||
|
SimpleTerm ost = (SimpleTerm) o;
|
||||||
|
return this.toStringUnquoted().compareTo( ost.toStringUnquoted());
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void suffixToString(StringBuffer r) {;} /* override for prefix query */
|
||||||
|
|
||||||
|
public String toString() {
|
||||||
|
StringBuffer r = new StringBuffer();
|
||||||
|
if (isQuoted()) {
|
||||||
|
r.append(getQuote());
|
||||||
|
}
|
||||||
|
r.append(toStringUnquoted());
|
||||||
|
if (isQuoted()) {
|
||||||
|
r.append(getQuote());
|
||||||
|
}
|
||||||
|
suffixToString(r);
|
||||||
|
weightToString(r);
|
||||||
|
return r.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract void visitMatchingTerms(
|
||||||
|
IndexReader reader,
|
||||||
|
String fieldName,
|
||||||
|
MatchingTermVisitor mtv) throws IOException;
|
||||||
|
|
||||||
|
public interface MatchingTermVisitor {
|
||||||
|
void visitMatchingTerm(Term t)throws IOException;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String distanceSubQueryNotAllowed() {return null;}
|
||||||
|
|
||||||
|
|
||||||
|
public Query makeLuceneQueryFieldNoBoost(final String fieldName, final BasicQueryFactory qf) {
|
||||||
|
return new Query() {
|
||||||
|
public String toString(String fn) {
|
||||||
|
return getClass().toString() + " " + fieldName + " (" + fn + "?)";
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query rewrite(IndexReader reader) throws IOException {
|
||||||
|
final ArrayList luceneSubQueries = new ArrayList();
|
||||||
|
visitMatchingTerms( reader, fieldName,
|
||||||
|
new MatchingTermVisitor() {
|
||||||
|
public void visitMatchingTerm(Term term) throws IOException {
|
||||||
|
luceneSubQueries.add(qf.newTermQuery(term));
|
||||||
|
}
|
||||||
|
});
|
||||||
|
return (luceneSubQueries.size() == 0) ? SrndQuery.theEmptyLcnQuery
|
||||||
|
: (luceneSubQueries.size() == 1) ? (Query) luceneSubQueries.get(0)
|
||||||
|
: SrndBooleanQuery.makeBooleanQuery(
|
||||||
|
/* luceneSubQueries all have default weight */
|
||||||
|
luceneSubQueries, BooleanClause.Occur.SHOULD); /* OR the subquery terms */
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addSpanQueries(final SpanNearClauseFactory sncf) throws IOException {
|
||||||
|
visitMatchingTerms(
|
||||||
|
sncf.getIndexReader(),
|
||||||
|
sncf.getFieldName(),
|
||||||
|
new MatchingTermVisitor() {
|
||||||
|
public void visitMatchingTerm(Term term) throws IOException {
|
||||||
|
sncf.addTermWeighted(term, getWeight());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,153 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
SpanNearClauseFactory:
|
||||||
|
|
||||||
|
Operations:
|
||||||
|
|
||||||
|
- create for a field name and an indexreader.
|
||||||
|
|
||||||
|
- add a weighted Term
|
||||||
|
this should add a corresponding SpanTermQuery, or
|
||||||
|
increase the weight of an existing one.
|
||||||
|
|
||||||
|
- add a weighted subquery SpanNearQuery
|
||||||
|
|
||||||
|
- create a clause for SpanNearQuery from the things added above.
|
||||||
|
For this, create an array of SpanQuery's from the added ones.
|
||||||
|
The clause normally is a SpanOrQuery over the added subquery SpanNearQuery
|
||||||
|
the SpanTermQuery's for the added Term's
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* When it is necessary to suppress double subqueries as much as possible:
|
||||||
|
hashCode() and equals() on unweighted SpanQuery are needed (possibly via getTerms(),
|
||||||
|
the terms are individually hashable).
|
||||||
|
Idem SpanNearQuery: hash on the subqueries and the slop.
|
||||||
|
Evt. merge SpanNearQuery's by adding the weights of the corresponding subqueries.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/* To be determined:
|
||||||
|
Are SpanQuery weights handled correctly during search by Lucene?
|
||||||
|
Should the resulting SpanOrQuery be sorted?
|
||||||
|
Could other SpanQueries be added for use in this factory:
|
||||||
|
- SpanOrQuery: in principle yes, but it only has access to it's terms
|
||||||
|
via getTerms(); are the corresponding weights available?
|
||||||
|
- SpanFirstQuery: treat similar to subquery SpanNearQuery. (ok?)
|
||||||
|
- SpanNotQuery: treat similar to subquery SpanNearQuery. (ok?)
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import java.util.Comparator;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.TermEnum;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.spans.SpanQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanNearQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanOrQuery;
|
||||||
|
import org.apache.lucene.search.spans.SpanTermQuery;
|
||||||
|
|
||||||
|
|
||||||
|
public class SpanNearClauseFactory {
|
||||||
|
public SpanNearClauseFactory(IndexReader reader, String fieldName, BasicQueryFactory qf) {
|
||||||
|
this.reader = reader;
|
||||||
|
this.fieldName = fieldName;
|
||||||
|
this.weightBySpanQuery = new HashMap();
|
||||||
|
this.qf = qf;
|
||||||
|
}
|
||||||
|
private IndexReader reader;
|
||||||
|
private String fieldName;
|
||||||
|
private HashMap weightBySpanQuery;
|
||||||
|
private BasicQueryFactory qf;
|
||||||
|
|
||||||
|
public IndexReader getIndexReader() {return reader;}
|
||||||
|
|
||||||
|
public String getFieldName() {return fieldName;}
|
||||||
|
|
||||||
|
public BasicQueryFactory getBasicQueryFactory() {return qf;}
|
||||||
|
|
||||||
|
public TermEnum getTermEnum(String termText) throws IOException {
|
||||||
|
return getIndexReader().terms(new Term(getFieldName(), termText));
|
||||||
|
}
|
||||||
|
|
||||||
|
public int size() {return weightBySpanQuery.size();}
|
||||||
|
|
||||||
|
public void clear() {weightBySpanQuery.clear();}
|
||||||
|
|
||||||
|
protected void addSpanQueryWeighted(SpanQuery sq, float weight) {
|
||||||
|
Float w = (Float) weightBySpanQuery.get(sq);
|
||||||
|
if (w != null)
|
||||||
|
w = new Float(w.floatValue() + weight);
|
||||||
|
else
|
||||||
|
w = new Float(weight);
|
||||||
|
weightBySpanQuery.put(sq, w);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addTermWeighted(Term t, float weight) throws IOException {
|
||||||
|
SpanTermQuery stq = qf.newSpanTermQuery(t);
|
||||||
|
/* CHECKME: wrap in Hashable...? */
|
||||||
|
addSpanQueryWeighted(stq, weight);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void addSpanNearQuery(Query q) {
|
||||||
|
if (q == SrndQuery.theEmptyLcnQuery)
|
||||||
|
return;
|
||||||
|
if (! (q instanceof SpanNearQuery))
|
||||||
|
throw new AssertionError("Expected SpanNearQuery: " + q.toString(getFieldName()));
|
||||||
|
/* CHECKME: wrap in Hashable...? */
|
||||||
|
addSpanQueryWeighted((SpanNearQuery)q, q.getBoost());
|
||||||
|
}
|
||||||
|
|
||||||
|
public SpanQuery makeSpanNearClause() {
|
||||||
|
SpanQuery [] spanQueries = new SpanQuery[size()];
|
||||||
|
Iterator sqi = weightBySpanQuery.keySet().iterator();
|
||||||
|
int i = 0;
|
||||||
|
while (sqi.hasNext()) {
|
||||||
|
SpanQuery sq = (SpanQuery) sqi.next();
|
||||||
|
sq.setBoost(((Float)weightBySpanQuery.get(sq)).floatValue());
|
||||||
|
spanQueries[i++] = sq;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* CHECKME: Does the underlying implementation of SpanQuery need sorting? */
|
||||||
|
if (false) /* true when sorting needed */
|
||||||
|
Arrays.sort(spanQueries, new Comparator() {
|
||||||
|
public int compare(Object o1, Object o2) {
|
||||||
|
SpanQuery sq1 = (SpanQuery) o1;
|
||||||
|
SpanQuery sq2 = (SpanQuery) o2;
|
||||||
|
/* compare the text of the first term of each SpanQuery */
|
||||||
|
return ((Term)sq1.getTerms().iterator().next()).text().compareTo(
|
||||||
|
((Term)sq2.getTerms().iterator().next()).text());
|
||||||
|
}
|
||||||
|
public boolean equals(Object o) {return false;}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (spanQueries.length == 1)
|
||||||
|
return spanQueries[0];
|
||||||
|
else
|
||||||
|
return new SpanOrQuery(spanQueries);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,44 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
|
||||||
|
class SrndBooleanQuery {
|
||||||
|
public static void addQueriesToBoolean(
|
||||||
|
BooleanQuery bq,
|
||||||
|
List queries,
|
||||||
|
BooleanClause.Occur occur) {
|
||||||
|
for (int i = 0; i < queries.size(); i++) {
|
||||||
|
bq.add( (Query) queries.get(i), occur);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static Query makeBooleanQuery(
|
||||||
|
List queries,
|
||||||
|
BooleanClause.Occur occur) {
|
||||||
|
if (queries.size() <= 1) {
|
||||||
|
throw new AssertionError("Too few subqueries: " + queries.size());
|
||||||
|
}
|
||||||
|
BooleanQuery bq = new BooleanQuery();
|
||||||
|
addQueriesToBoolean(bq, queries.subList(0, queries.size()), occur);
|
||||||
|
return bq;
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,75 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.TermEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
|
||||||
|
public class SrndPrefixQuery extends SimpleTerm {
|
||||||
|
public SrndPrefixQuery(String prefix, boolean quoted, char truncator) {
|
||||||
|
super(quoted);
|
||||||
|
this.prefix = prefix;
|
||||||
|
this.truncator = truncator;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final String prefix;
|
||||||
|
public String getPrefix() {return prefix;}
|
||||||
|
|
||||||
|
private final char truncator;
|
||||||
|
public char getSuffixOperator() {return truncator;}
|
||||||
|
|
||||||
|
public Term getLucenePrefixTerm(String fieldName) {
|
||||||
|
return new Term(fieldName, getPrefix());
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toStringUnquoted() {return getPrefix();}
|
||||||
|
|
||||||
|
protected void suffixToString(StringBuffer r) {r.append(getSuffixOperator());}
|
||||||
|
|
||||||
|
public void visitMatchingTerms(
|
||||||
|
IndexReader reader,
|
||||||
|
String fieldName,
|
||||||
|
MatchingTermVisitor mtv) throws IOException
|
||||||
|
{
|
||||||
|
/* inspired by PrefixQuery.rewrite(): */
|
||||||
|
TermEnum enumerator = reader.terms(getLucenePrefixTerm(fieldName));
|
||||||
|
boolean expanded = false;
|
||||||
|
try {
|
||||||
|
do {
|
||||||
|
Term term = enumerator.term();
|
||||||
|
if ((term != null)
|
||||||
|
&& term.text().startsWith(getPrefix())
|
||||||
|
&& term.field().equals(fieldName)) {
|
||||||
|
mtv.visitMatchingTerm(term);
|
||||||
|
expanded = true;
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} while (enumerator.next());
|
||||||
|
} finally {
|
||||||
|
enumerator.close();
|
||||||
|
}
|
||||||
|
if (! expanded) {
|
||||||
|
System.out.println("No terms in " + fieldName + " field for: " + toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,86 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Iterator;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.BooleanQuery;
|
||||||
|
import org.apache.lucene.search.BooleanClause;
|
||||||
|
|
||||||
|
public abstract class SrndQuery implements Cloneable {
|
||||||
|
public SrndQuery() {}
|
||||||
|
|
||||||
|
private float weight = (float) 1.0;
|
||||||
|
private boolean weighted = false;
|
||||||
|
|
||||||
|
public void setWeight(float w) {
|
||||||
|
weight = w; /* as parsed from the query text */
|
||||||
|
weighted = true;
|
||||||
|
}
|
||||||
|
public boolean isWeighted() {return weighted;}
|
||||||
|
public float getWeight() { return weight; }
|
||||||
|
public String getWeightString() {return Float.toString(getWeight());}
|
||||||
|
|
||||||
|
public String getWeightOperator() {return "^";}
|
||||||
|
|
||||||
|
protected void weightToString(StringBuffer r) { /* append the weight part of a query */
|
||||||
|
if (isWeighted()) {
|
||||||
|
r.append(getWeightOperator());
|
||||||
|
r.append(getWeightString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public Query makeLuceneQueryField(String fieldName, BasicQueryFactory qf){
|
||||||
|
Query q = makeLuceneQueryFieldNoBoost(fieldName, qf);
|
||||||
|
if (isWeighted()) {
|
||||||
|
q.setBoost(getWeight() * q.getBoost()); /* weight may be at any level in a SrndQuery */
|
||||||
|
}
|
||||||
|
return q;
|
||||||
|
}
|
||||||
|
|
||||||
|
public abstract Query makeLuceneQueryFieldNoBoost(String fieldName, BasicQueryFactory qf);
|
||||||
|
|
||||||
|
public abstract String toString();
|
||||||
|
|
||||||
|
public boolean isFieldsSubQueryAcceptable() {return true;}
|
||||||
|
|
||||||
|
public Object clone() {
|
||||||
|
try {
|
||||||
|
return super.clone();
|
||||||
|
} catch (CloneNotSupportedException cns) {
|
||||||
|
throw new Error(cns);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* An empty Lucene query */
|
||||||
|
public final static Query theEmptyLcnQuery = new BooleanQuery() { /* no changes allowed */
|
||||||
|
public void setBoost(float boost) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
public void add(BooleanClause clause) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
public void add(Query query, BooleanClause.Occur occur) {
|
||||||
|
throw new UnsupportedOperationException();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,67 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.TermQuery;
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.TermEnum;
|
||||||
|
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
|
||||||
|
public class SrndTermQuery extends SimpleTerm {
|
||||||
|
public SrndTermQuery(String termText, boolean quoted) {
|
||||||
|
super(quoted);
|
||||||
|
this.termText = termText;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final String termText;
|
||||||
|
public String getTermText() {return termText;}
|
||||||
|
|
||||||
|
public Term getLuceneTerm(String fieldName) {
|
||||||
|
return new Term(fieldName, getTermText());
|
||||||
|
}
|
||||||
|
|
||||||
|
public String toStringUnquoted() {return getTermText();}
|
||||||
|
|
||||||
|
public void visitMatchingTerms(
|
||||||
|
IndexReader reader,
|
||||||
|
String fieldName,
|
||||||
|
MatchingTermVisitor mtv) throws IOException
|
||||||
|
{
|
||||||
|
/* check term presence in index here for symmetry with other SimpleTerm's */
|
||||||
|
TermEnum enumerator = reader.terms(getLuceneTerm(fieldName));
|
||||||
|
try {
|
||||||
|
Term it= enumerator.term(); /* same or following index term */
|
||||||
|
if ((it != null)
|
||||||
|
&& it.text().equals(getTermText())
|
||||||
|
&& it.field().equals(fieldName)) {
|
||||||
|
mtv.visitMatchingTerm(it);
|
||||||
|
} else {
|
||||||
|
System.out.println("No term in " + fieldName + " field for: " + toString());
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
enumerator.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,111 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import org.apache.lucene.index.Term;
|
||||||
|
import org.apache.lucene.index.TermEnum;
|
||||||
|
import org.apache.lucene.index.IndexReader;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
|
||||||
|
import java.util.regex.Pattern;
|
||||||
|
import java.util.regex.Matcher;
|
||||||
|
|
||||||
|
|
||||||
|
public class SrndTruncQuery extends SimpleTerm {
|
||||||
|
public SrndTruncQuery(String truncated, char unlimited, char mask) {
|
||||||
|
super(false); /* not quoted */
|
||||||
|
this.truncated = truncated;
|
||||||
|
this.unlimited = unlimited;
|
||||||
|
this.mask = mask;
|
||||||
|
truncatedToPrefixAndPattern();
|
||||||
|
}
|
||||||
|
|
||||||
|
private final String truncated;
|
||||||
|
private final char unlimited;
|
||||||
|
private final char mask;
|
||||||
|
|
||||||
|
private String prefix;
|
||||||
|
private Pattern pattern;
|
||||||
|
|
||||||
|
|
||||||
|
public String getTruncated() {return truncated;}
|
||||||
|
|
||||||
|
public String toStringUnquoted() {return getTruncated();}
|
||||||
|
|
||||||
|
|
||||||
|
protected boolean matchingChar(char c) {
|
||||||
|
return (c != unlimited) && (c != mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void appendRegExpForChar(char c, StringBuffer re) {
|
||||||
|
if (c == unlimited)
|
||||||
|
re.append(".*");
|
||||||
|
else if (c == mask)
|
||||||
|
re.append(".");
|
||||||
|
else
|
||||||
|
re.append(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
protected void truncatedToPrefixAndPattern() {
|
||||||
|
int i = 0;
|
||||||
|
while ((i < truncated.length()) && matchingChar(truncated.charAt(i))) {
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
prefix = truncated.substring(0, i);
|
||||||
|
|
||||||
|
StringBuffer re = new StringBuffer();
|
||||||
|
while (i < truncated.length()) {
|
||||||
|
appendRegExpForChar(truncated.charAt(i), re);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
pattern = Pattern.compile(re.toString());
|
||||||
|
}
|
||||||
|
|
||||||
|
public void visitMatchingTerms(
|
||||||
|
IndexReader reader,
|
||||||
|
String fieldName,
|
||||||
|
MatchingTermVisitor mtv) throws IOException
|
||||||
|
{
|
||||||
|
boolean expanded = false;
|
||||||
|
int prefixLength = prefix.length();
|
||||||
|
TermEnum enumerator = reader.terms(new Term(fieldName, prefix));
|
||||||
|
Matcher matcher = pattern.matcher("");
|
||||||
|
try {
|
||||||
|
do {
|
||||||
|
Term term = enumerator.term();
|
||||||
|
if (term != null) {
|
||||||
|
String text = term.text();
|
||||||
|
if ((! text.startsWith(prefix)) || (! term.field().equals(fieldName))) {
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
matcher.reset( text.substring(prefixLength));
|
||||||
|
if (matcher.matches()) {
|
||||||
|
mtv.visitMatchingTerm(term);
|
||||||
|
expanded = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (enumerator.next());
|
||||||
|
} finally {
|
||||||
|
enumerator.close();
|
||||||
|
matcher.reset();
|
||||||
|
}
|
||||||
|
if (! expanded) {
|
||||||
|
System.out.println("No terms in " + fieldName + " field for: " + toString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,26 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
/**
|
||||||
|
* Copyright 2005 The Apache Software Foundation
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
import java.io.IOException; /* subclass to be usable from within Query.rewrite() */
|
||||||
|
|
||||||
|
public class TooManyBasicQueries extends IOException {
|
||||||
|
public TooManyBasicQueries(int maxBasicQueries) {
|
||||||
|
super("Exceeded maximum of " + maxBasicQueries + " basic queries.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,94 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.IndexSearcher;
|
||||||
|
import org.apache.lucene.search.Searcher;
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
import org.apache.lucene.search.HitCollector;
|
||||||
|
|
||||||
|
import org.apache.lucene.queryParser.surround.parser.QueryParser;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
public class BooleanQueryTest {
|
||||||
|
String queryText;
|
||||||
|
final int[] expectedDocNrs;
|
||||||
|
SingleFieldTestDb dBase;
|
||||||
|
String fieldName;
|
||||||
|
TestCase testCase;
|
||||||
|
BasicQueryFactory qf;
|
||||||
|
boolean verbose = true;
|
||||||
|
|
||||||
|
public BooleanQueryTest(
|
||||||
|
String queryText,
|
||||||
|
int[] expectedDocNrs,
|
||||||
|
SingleFieldTestDb dBase,
|
||||||
|
String fieldName,
|
||||||
|
TestCase testCase,
|
||||||
|
BasicQueryFactory qf) {
|
||||||
|
this.queryText = queryText;
|
||||||
|
this.expectedDocNrs = expectedDocNrs;
|
||||||
|
this.dBase = dBase;
|
||||||
|
this.fieldName = fieldName;
|
||||||
|
this.testCase = testCase;
|
||||||
|
this.qf = qf;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setVerbose(boolean verbose) {this.verbose = verbose;}
|
||||||
|
|
||||||
|
class TestCollector extends HitCollector { // FIXME: use check hits from Lucene tests
|
||||||
|
int totalMatched;
|
||||||
|
boolean[] encountered;
|
||||||
|
|
||||||
|
TestCollector() {
|
||||||
|
totalMatched = 0;
|
||||||
|
encountered = new boolean[expectedDocNrs.length];
|
||||||
|
}
|
||||||
|
|
||||||
|
public void collect(int docNr, float score) {
|
||||||
|
/* System.out.println(docNr + " '" + dBase.getDocs()[docNr] + "': " + score); */
|
||||||
|
testCase.assertTrue(queryText + ": positive score", score > 0.0);
|
||||||
|
testCase.assertTrue(queryText + ": too many hits", totalMatched < expectedDocNrs.length);
|
||||||
|
int i;
|
||||||
|
for (i = 0; i < expectedDocNrs.length; i++) {
|
||||||
|
if ((! encountered[i]) && (expectedDocNrs[i] == docNr)) {
|
||||||
|
encountered[i] = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (i == expectedDocNrs.length) {
|
||||||
|
testCase.assertTrue(queryText + ": doc nr for hit not expected: " + docNr, false);
|
||||||
|
}
|
||||||
|
totalMatched++;
|
||||||
|
}
|
||||||
|
|
||||||
|
void checkNrHits() {
|
||||||
|
testCase.assertEquals(queryText + ": nr of hits", expectedDocNrs.length, totalMatched);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public void doTest() throws Exception {
|
||||||
|
QueryParser parser = new QueryParser();
|
||||||
|
|
||||||
|
if (verbose) {
|
||||||
|
System.out.println("");
|
||||||
|
System.out.println("Query: " + queryText);
|
||||||
|
}
|
||||||
|
|
||||||
|
SrndQuery lq = parser.parse(queryText);
|
||||||
|
|
||||||
|
/* if (verbose) System.out.println("Srnd: " + lq.toString()); */
|
||||||
|
|
||||||
|
Query query = lq.makeLuceneQueryField(fieldName, qf);
|
||||||
|
/* if (verbose) System.out.println("Lucene: " + query.toString()); */
|
||||||
|
|
||||||
|
TestCollector tc = new TestCollector();
|
||||||
|
Searcher searcher = new IndexSearcher(dBase.getDb());
|
||||||
|
try {
|
||||||
|
searcher.search(query, tc);
|
||||||
|
} finally {
|
||||||
|
searcher.close();
|
||||||
|
}
|
||||||
|
tc.checkNrHits();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,55 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
|
||||||
|
import org.apache.lucene.search.Query;
|
||||||
|
|
||||||
|
import org.apache.lucene.queryParser.surround.parser.QueryParser;
|
||||||
|
import org.apache.lucene.queryParser.surround.parser.ParseException;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
|
||||||
|
public class ExceptionQueryTest {
|
||||||
|
private String queryText;
|
||||||
|
private boolean verbose;
|
||||||
|
private TestCase testCase;
|
||||||
|
|
||||||
|
public ExceptionQueryTest(String queryText, boolean verbose) {
|
||||||
|
this.queryText = queryText;
|
||||||
|
this.verbose = verbose;
|
||||||
|
this.testCase = testCase;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void doTest(StringBuffer failQueries) {
|
||||||
|
QueryParser parser = new QueryParser();
|
||||||
|
boolean pass = false;
|
||||||
|
SrndQuery lq = null;
|
||||||
|
try {
|
||||||
|
lq = parser.parse(queryText);
|
||||||
|
if (verbose) {
|
||||||
|
System.out.println("Query: " + queryText + "\nParsed as: " + lq.toString());
|
||||||
|
}
|
||||||
|
} catch (ParseException e) {
|
||||||
|
if (verbose) {
|
||||||
|
System.out.println("Parse exception for query:\n"
|
||||||
|
+ queryText + "\n"
|
||||||
|
+ e.getMessage());
|
||||||
|
}
|
||||||
|
pass = true;
|
||||||
|
}
|
||||||
|
if (! pass) {
|
||||||
|
failQueries.append(queryText);
|
||||||
|
failQueries.append("\nParsed as: ");
|
||||||
|
failQueries.append(lq.toString());
|
||||||
|
failQueries.append("\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static String getFailQueries(String[] exceptionQueries, boolean verbose) {
|
||||||
|
StringBuffer failQueries = new StringBuffer();
|
||||||
|
for (int i = 0; i < exceptionQueries.length; i++ ) {
|
||||||
|
new ExceptionQueryTest( exceptionQueries[i], verbose).doTest(failQueries);
|
||||||
|
}
|
||||||
|
return failQueries.toString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,39 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
|
||||||
|
import org.apache.lucene.store.Directory;
|
||||||
|
import org.apache.lucene.store.RAMDirectory;
|
||||||
|
import org.apache.lucene.document.Document;
|
||||||
|
import org.apache.lucene.document.Field;
|
||||||
|
import org.apache.lucene.analysis.WhitespaceAnalyzer;
|
||||||
|
import org.apache.lucene.analysis.Analyzer;
|
||||||
|
import org.apache.lucene.index.IndexWriter;
|
||||||
|
|
||||||
|
public class SingleFieldTestDb {
|
||||||
|
private Directory db;
|
||||||
|
private String[] docs;
|
||||||
|
private String fieldName;
|
||||||
|
private String dbName = "testdb";
|
||||||
|
|
||||||
|
public SingleFieldTestDb(String[] documents, String fName) {
|
||||||
|
try {
|
||||||
|
db = new RAMDirectory();
|
||||||
|
docs = documents;
|
||||||
|
fieldName = fName;
|
||||||
|
Analyzer analyzer = new WhitespaceAnalyzer();
|
||||||
|
IndexWriter writer = new IndexWriter(db, analyzer, true);
|
||||||
|
for (int j = 0; j < docs.length; j++) {
|
||||||
|
Document d = new Document();
|
||||||
|
d.add(new Field(fieldName, docs[j], Field.Store.NO, Field.Index.TOKENIZED));
|
||||||
|
writer.addDocument(d);
|
||||||
|
}
|
||||||
|
writer.close();
|
||||||
|
} catch (java.io.IOException ioe) {
|
||||||
|
throw new Error(ioe);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Directory getDb() {return db;}
|
||||||
|
String[] getDocs() {return docs;}
|
||||||
|
String getFieldname() {return fieldName;}
|
||||||
|
}
|
||||||
|
|
|
@ -0,0 +1,51 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
import junit.framework.TestSuite;
|
||||||
|
import junit.textui.TestRunner;
|
||||||
|
|
||||||
|
public class Test01Exceptions extends TestCase {
|
||||||
|
/** Main for running test case by itself. */
|
||||||
|
public static void main(String args[]) {
|
||||||
|
TestRunner.run(new TestSuite(Test01Exceptions.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
boolean verbose = false; /* to show actual parsing error messages */
|
||||||
|
final String fieldName = "bi";
|
||||||
|
|
||||||
|
String[] exceptionQueries = {
|
||||||
|
"*",
|
||||||
|
"a*",
|
||||||
|
"ab*",
|
||||||
|
"?",
|
||||||
|
"a?",
|
||||||
|
"ab?",
|
||||||
|
"a???b",
|
||||||
|
"a?",
|
||||||
|
"a*b?",
|
||||||
|
"word1 word2",
|
||||||
|
"word2 AND",
|
||||||
|
"word1 OR",
|
||||||
|
"AND(word2)",
|
||||||
|
"AND(word2,)",
|
||||||
|
"AND(word2,word1,)",
|
||||||
|
"OR(word2)",
|
||||||
|
"OR(word2 ,",
|
||||||
|
"OR(word2 , word1 ,)",
|
||||||
|
"xx NOT",
|
||||||
|
"xx (a AND b)",
|
||||||
|
"(a AND b",
|
||||||
|
"a OR b)",
|
||||||
|
"or(word2+ not ord+, and xyz,def)",
|
||||||
|
""
|
||||||
|
};
|
||||||
|
|
||||||
|
public void test01Exceptions() throws Exception {
|
||||||
|
String m = ExceptionQueryTest.getFailQueries(exceptionQueries, verbose);
|
||||||
|
if (m.length() > 0) {
|
||||||
|
fail("No ParseException for:\n" + m);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,105 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
import junit.framework.TestSuite;
|
||||||
|
import junit.textui.TestRunner;
|
||||||
|
|
||||||
|
|
||||||
|
public class Test02Boolean extends TestCase {
|
||||||
|
public static void main(String args[]) {
|
||||||
|
TestRunner.run(new TestSuite(Test02Boolean.class));
|
||||||
|
}
|
||||||
|
|
||||||
|
final String fieldName = "bi";
|
||||||
|
boolean verbose = false;
|
||||||
|
int maxBasicQueries = 16;
|
||||||
|
|
||||||
|
String[] docs1 = {
|
||||||
|
"word1 word2 word3",
|
||||||
|
"word4 word5",
|
||||||
|
"ord1 ord2 ord3",
|
||||||
|
"orda1 orda2 orda3 word2 worda3",
|
||||||
|
"a c e a b c"
|
||||||
|
};
|
||||||
|
|
||||||
|
SingleFieldTestDb db1 = new SingleFieldTestDb(docs1, fieldName);
|
||||||
|
|
||||||
|
public void normalTest1(String query, int[] expdnrs) throws Exception {
|
||||||
|
BooleanQueryTest bqt = new BooleanQueryTest( query, expdnrs, db1, fieldName, this,
|
||||||
|
new BasicQueryFactory(maxBasicQueries));
|
||||||
|
bqt.setVerbose(verbose);
|
||||||
|
bqt.doTest();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test02Terms01() throws Exception {
|
||||||
|
int[] expdnrs = {0}; normalTest1( "word1", expdnrs);
|
||||||
|
}
|
||||||
|
public void test02Terms02() throws Exception {
|
||||||
|
int[] expdnrs = {0, 1, 3}; normalTest1( "word*", expdnrs);
|
||||||
|
}
|
||||||
|
public void test02Terms03() throws Exception {
|
||||||
|
int[] expdnrs = {2}; normalTest1( "ord2", expdnrs);
|
||||||
|
}
|
||||||
|
public void test02Terms04() throws Exception {
|
||||||
|
int[] expdnrs = {}; normalTest1( "kxork*", expdnrs);
|
||||||
|
}
|
||||||
|
public void test02Terms05() throws Exception {
|
||||||
|
int[] expdnrs = {0, 1, 3}; normalTest1( "wor*", expdnrs);
|
||||||
|
}
|
||||||
|
public void test02Terms06() throws Exception {
|
||||||
|
int[] expdnrs = {}; normalTest1( "ab", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test02Terms10() throws Exception {
|
||||||
|
int[] expdnrs = {}; normalTest1( "abc?", expdnrs);
|
||||||
|
}
|
||||||
|
public void test02Terms13() throws Exception {
|
||||||
|
int[] expdnrs = {0,1,3}; normalTest1( "word?", expdnrs);
|
||||||
|
}
|
||||||
|
public void test02Terms14() throws Exception {
|
||||||
|
int[] expdnrs = {0,1,3}; normalTest1( "w?rd?", expdnrs);
|
||||||
|
}
|
||||||
|
public void test02Terms20() throws Exception {
|
||||||
|
int[] expdnrs = {0,1,3}; normalTest1( "w*rd?", expdnrs);
|
||||||
|
}
|
||||||
|
public void test02Terms21() throws Exception {
|
||||||
|
int[] expdnrs = {3}; normalTest1( "w*rd??", expdnrs);
|
||||||
|
}
|
||||||
|
public void test02Terms22() throws Exception {
|
||||||
|
int[] expdnrs = {3}; normalTest1( "w*?da?", expdnrs);
|
||||||
|
}
|
||||||
|
public void test02Terms23() throws Exception {
|
||||||
|
int[] expdnrs = {}; normalTest1( "w?da?", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test03And01() throws Exception {
|
||||||
|
int[] expdnrs = {0}; normalTest1( "word1 AND word2", expdnrs);
|
||||||
|
}
|
||||||
|
public void test03And02() throws Exception {
|
||||||
|
int[] expdnrs = {3}; normalTest1( "word* and ord*", expdnrs);
|
||||||
|
}
|
||||||
|
public void test03And03() throws Exception {
|
||||||
|
int[] expdnrs = {0}; normalTest1( "and(word1,word2)", expdnrs);
|
||||||
|
}
|
||||||
|
public void test04Or01() throws Exception {
|
||||||
|
int[] expdnrs = {0, 3}; normalTest1( "word1 or word2", expdnrs);
|
||||||
|
}
|
||||||
|
public void test04Or02() throws Exception {
|
||||||
|
int[] expdnrs = {0, 1, 2, 3}; normalTest1( "word* OR ord*", expdnrs);
|
||||||
|
}
|
||||||
|
public void test04Or03() throws Exception {
|
||||||
|
int[] expdnrs = {0, 3}; normalTest1( "OR (word1, word2)", expdnrs);
|
||||||
|
}
|
||||||
|
public void test05Not01() throws Exception {
|
||||||
|
int[] expdnrs = {3}; normalTest1( "word2 NOT word1", expdnrs);
|
||||||
|
}
|
||||||
|
public void test05Not02() throws Exception {
|
||||||
|
int[] expdnrs = {0}; normalTest1( "word2* not ord*", expdnrs);
|
||||||
|
}
|
||||||
|
public void test06AndOr01() throws Exception {
|
||||||
|
int[] expdnrs = {0}; normalTest1( "(word1 or ab)and or(word2,xyz, defg)", expdnrs);
|
||||||
|
}
|
||||||
|
public void test07AndOrNot02() throws Exception {
|
||||||
|
int[] expdnrs = {0}; normalTest1( "or( word2* not ord*, and(xyz,def))", expdnrs);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,203 @@
|
||||||
|
package org.apache.lucene.queryParser.surround.query;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
import junit.framework.TestSuite;
|
||||||
|
import junit.textui.TestRunner;
|
||||||
|
|
||||||
|
public class Test03Distance extends TestCase {
|
||||||
|
public static void main(String args[]) {
|
||||||
|
TestRunner.run(new TestSuite(Test03Distance.class));
|
||||||
|
}
|
||||||
|
boolean verbose = false;
|
||||||
|
int maxBasicQueries = 16;
|
||||||
|
|
||||||
|
String [] exceptionQueries = {
|
||||||
|
"(aa and bb) w cc",
|
||||||
|
"(aa or bb) w (cc and dd)",
|
||||||
|
"(aa opt bb) w cc",
|
||||||
|
"(aa not bb) w cc",
|
||||||
|
"(aa or bb) w (bi:cc)",
|
||||||
|
"(aa or bb) w bi:cc",
|
||||||
|
"(aa or bi:bb) w cc",
|
||||||
|
"(aa or (bi:bb)) w cc",
|
||||||
|
"(aa or (bb and dd)) w cc"
|
||||||
|
};
|
||||||
|
|
||||||
|
public void test00Exceptions() throws Exception {
|
||||||
|
String m = ExceptionQueryTest.getFailQueries(exceptionQueries, verbose);
|
||||||
|
if (m.length() > 0) {
|
||||||
|
fail("No ParseException for:\n" + m);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
final String fieldName = "bi";
|
||||||
|
|
||||||
|
String[] docs1 = {
|
||||||
|
"word1 word2 word3",
|
||||||
|
"word4 word5",
|
||||||
|
"ord1 ord2 ord3",
|
||||||
|
"orda1 orda2 orda3 word2 worda3",
|
||||||
|
"a c e a b c"
|
||||||
|
};
|
||||||
|
|
||||||
|
SingleFieldTestDb db1 = new SingleFieldTestDb(docs1, fieldName);
|
||||||
|
|
||||||
|
String[] docs2 = {
|
||||||
|
"w1 w2 w3 w4 w5",
|
||||||
|
"w1 w3 w2 w3",
|
||||||
|
""
|
||||||
|
};
|
||||||
|
|
||||||
|
SingleFieldTestDb db2 = new SingleFieldTestDb(docs2, fieldName);
|
||||||
|
|
||||||
|
public void distanceTest1(String query, int[] expdnrs) throws Exception {
|
||||||
|
BooleanQueryTest bqt = new BooleanQueryTest( query, expdnrs, db1, fieldName, this,
|
||||||
|
new BasicQueryFactory(maxBasicQueries));
|
||||||
|
bqt.setVerbose(verbose);
|
||||||
|
bqt.doTest();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void distanceTest2(String query, int[] expdnrs) throws Exception {
|
||||||
|
BooleanQueryTest bqt = new BooleanQueryTest( query, expdnrs, db2, fieldName, this,
|
||||||
|
new BasicQueryFactory(maxBasicQueries));
|
||||||
|
bqt.setVerbose(verbose);
|
||||||
|
bqt.doTest();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test0W01() throws Exception {
|
||||||
|
int[] expdnrs = {0}; distanceTest1( "word1 w word2", expdnrs);
|
||||||
|
}
|
||||||
|
public void test0N01() throws Exception {
|
||||||
|
int[] expdnrs = {0}; distanceTest1( "word1 n word2", expdnrs);
|
||||||
|
}
|
||||||
|
public void test0N01r() throws Exception { /* r reverse */
|
||||||
|
int[] expdnrs = {0}; distanceTest1( "word2 n word1", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test0W02() throws Exception {
|
||||||
|
int[] expdnrs = {}; distanceTest1( "word2 w word1", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test0W03() throws Exception {
|
||||||
|
int[] expdnrs = {}; distanceTest1( "word2 2W word1", expdnrs);
|
||||||
|
}
|
||||||
|
public void test0N03() throws Exception {
|
||||||
|
int[] expdnrs = {0}; distanceTest1( "word2 2N word1", expdnrs);
|
||||||
|
}
|
||||||
|
public void test0N03r() throws Exception {
|
||||||
|
int[] expdnrs = {0}; distanceTest1( "word1 2N word2", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test0W04() throws Exception {
|
||||||
|
int[] expdnrs = {}; distanceTest1( "word2 3w word1", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test0N04() throws Exception {
|
||||||
|
int[] expdnrs = {0}; distanceTest1( "word2 3n word1", expdnrs);
|
||||||
|
}
|
||||||
|
public void test0N04r() throws Exception {
|
||||||
|
int[] expdnrs = {0}; distanceTest1( "word1 3n word2", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test0W05() throws Exception {
|
||||||
|
int[] expdnrs = {}; distanceTest1( "orda1 w orda3", expdnrs);
|
||||||
|
}
|
||||||
|
public void test0W06() throws Exception {
|
||||||
|
int[] expdnrs = {3}; distanceTest1( "orda1 2w orda3", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test1Wtrunc01() throws Exception {
|
||||||
|
int[] expdnrs = {0}; distanceTest1( "word1* w word2", expdnrs);
|
||||||
|
}
|
||||||
|
public void test1Wtrunc02() throws Exception {
|
||||||
|
int[] expdnrs = {0}; distanceTest1( "word* w word2", expdnrs);
|
||||||
|
}
|
||||||
|
public void test1Wtrunc02r() throws Exception {
|
||||||
|
int[] expdnrs = {0,3}; distanceTest1( "word2 w word*", expdnrs);
|
||||||
|
}
|
||||||
|
public void test1Ntrunc02() throws Exception {
|
||||||
|
int[] expdnrs = {0,3}; distanceTest1( "word* n word2", expdnrs);
|
||||||
|
}
|
||||||
|
public void test1Ntrunc02r() throws Exception {
|
||||||
|
int[] expdnrs = {0,3}; distanceTest1( "word2 n word*", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test1Wtrunc03() throws Exception {
|
||||||
|
int[] expdnrs = {0}; distanceTest1( "word1* w word2*", expdnrs);
|
||||||
|
}
|
||||||
|
public void test1Ntrunc03() throws Exception {
|
||||||
|
int[] expdnrs = {0}; distanceTest1( "word1* N word2*", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test1Wtrunc04() throws Exception {
|
||||||
|
int[] expdnrs = {}; distanceTest1( "kxork* w kxor*", expdnrs);
|
||||||
|
}
|
||||||
|
public void test1Ntrunc04() throws Exception {
|
||||||
|
int[] expdnrs = {}; distanceTest1( "kxork* 99n kxor*", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test1Wtrunc05() throws Exception {
|
||||||
|
int[] expdnrs = {}; distanceTest1( "word2* 2W word1*", expdnrs);
|
||||||
|
}
|
||||||
|
public void test1Ntrunc05() throws Exception {
|
||||||
|
int[] expdnrs = {0}; distanceTest1( "word2* 2N word1*", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test1Wtrunc06() throws Exception {
|
||||||
|
int[] expdnrs = {3}; distanceTest1( "ord* W word*", expdnrs);
|
||||||
|
}
|
||||||
|
public void test1Ntrunc06() throws Exception {
|
||||||
|
int[] expdnrs = {3}; distanceTest1( "ord* N word*", expdnrs);
|
||||||
|
}
|
||||||
|
public void test1Ntrunc06r() throws Exception {
|
||||||
|
int[] expdnrs = {3}; distanceTest1( "word* N ord*", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test1Wtrunc07() throws Exception {
|
||||||
|
int[] expdnrs = {3}; distanceTest1( "(orda2 OR orda3) W word*", expdnrs);
|
||||||
|
}
|
||||||
|
public void test1Wtrunc08() throws Exception {
|
||||||
|
int[] expdnrs = {3}; distanceTest1( "(orda2 OR orda3) W (word2 OR worda3)", expdnrs);
|
||||||
|
}
|
||||||
|
public void test1Wtrunc09() throws Exception {
|
||||||
|
int[] expdnrs = {3}; distanceTest1( "(orda2 OR orda3) 2W (word2 OR worda3)", expdnrs);
|
||||||
|
}
|
||||||
|
public void test1Ntrunc09() throws Exception {
|
||||||
|
int[] expdnrs = {3}; distanceTest1( "(orda2 OR orda3) 2N (word2 OR worda3)", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test2Wprefix01() throws Exception {
|
||||||
|
int[] expdnrs = {0}; distanceTest2( "W (w1, w2, w3)", expdnrs);
|
||||||
|
}
|
||||||
|
public void test2Nprefix01a() throws Exception {
|
||||||
|
int[] expdnrs = {0,1}; distanceTest2( "N(w1, w2, w3)", expdnrs);
|
||||||
|
}
|
||||||
|
public void test2Nprefix01b() throws Exception {
|
||||||
|
int[] expdnrs = {0,1}; distanceTest2( "N(w3, w1, w2)", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test2Wprefix02() throws Exception {
|
||||||
|
int[] expdnrs = {0,1}; distanceTest2( "2W(w1,w2,w3)", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test2Nprefix02a() throws Exception {
|
||||||
|
int[] expdnrs = {0,1}; distanceTest2( "2N(w1,w2,w3)", expdnrs);
|
||||||
|
}
|
||||||
|
public void test2Nprefix02b() throws Exception {
|
||||||
|
int[] expdnrs = {0,1}; distanceTest2( "2N(w2,w3,w1)", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test2Wnested01() throws Exception {
|
||||||
|
int[] expdnrs = {0}; distanceTest2( "w1 W w2 W w3", expdnrs);
|
||||||
|
}
|
||||||
|
public void test2Nnested01() throws Exception {
|
||||||
|
int[] expdnrs = {0}; distanceTest2( "w1 N w2 N w3", expdnrs);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void test2Wnested02() throws Exception {
|
||||||
|
int[] expdnrs = {0,1}; distanceTest2( "w1 2W w2 2W w3", expdnrs);
|
||||||
|
}
|
||||||
|
public void test2Nnested02() throws Exception {
|
||||||
|
int[] expdnrs = {0,1}; distanceTest2( "w1 2N w2 2N w3", expdnrs);
|
||||||
|
}
|
||||||
|
}
|
|
@ -0,0 +1,75 @@
|
||||||
|
Description of Surround:
|
||||||
|
|
||||||
|
Surround consists of operators (uppercase/lowercase):
|
||||||
|
|
||||||
|
AND/OR/NOT/nW/nN/() as infix and
|
||||||
|
AND/OR/nW/nN as prefix.
|
||||||
|
|
||||||
|
Distance operators W and N have default n=1, max 99.
|
||||||
|
Implemented as SpanQuery with slop = (n - 1).
|
||||||
|
An example prefix form is:
|
||||||
|
|
||||||
|
20n(aa*, bb*, cc*)
|
||||||
|
|
||||||
|
The name Surround was chosen because of this prefix form
|
||||||
|
and because it uses the newly introduced span queries
|
||||||
|
to implement the proximity operators.
|
||||||
|
The names of the operators and the prefix and suffix
|
||||||
|
forms have been borrowed from various other query
|
||||||
|
languages described on the internet.
|
||||||
|
|
||||||
|
|
||||||
|
Query terms from the Lucene standard query parser:
|
||||||
|
|
||||||
|
field:termtext
|
||||||
|
^ boost
|
||||||
|
* internal and suffix truncation
|
||||||
|
? one character
|
||||||
|
|
||||||
|
|
||||||
|
Some examples:
|
||||||
|
|
||||||
|
aa
|
||||||
|
aa and bb
|
||||||
|
aa and bb or cc same effect as: (aa and bb) or cc
|
||||||
|
aa NOT bb NOT cc same effect as: (aa NOT bb) NOT cc
|
||||||
|
|
||||||
|
and(aa,bb,cc) aa and bb and cc
|
||||||
|
99w(aa,bb,cc) ordered span query with slop 98
|
||||||
|
99n(aa,bb,cc) unordered span query with slop 98
|
||||||
|
|
||||||
|
20n(aa*,bb*)
|
||||||
|
3w(a?a or bb?, cc+)
|
||||||
|
|
||||||
|
title: text: aa
|
||||||
|
title : text : aa or bb
|
||||||
|
title:text: aa not bb
|
||||||
|
title:aa not text:bb
|
||||||
|
|
||||||
|
cc 3w dd infix: dual.
|
||||||
|
|
||||||
|
cc N dd N ee same effect as: (cc N dd) N ee
|
||||||
|
|
||||||
|
text: aa 3d bb
|
||||||
|
|
||||||
|
For examples on using the Surround language, see the
|
||||||
|
test packages.
|
||||||
|
|
||||||
|
|
||||||
|
Development status
|
||||||
|
|
||||||
|
Not tested: multiple fields, internally mapped to OR queries,
|
||||||
|
not compared to Lucene's MultipleFieldQuery.
|
||||||
|
|
||||||
|
* suffix truncation is implemented very similar to Lucene's PrefixQuery.
|
||||||
|
|
||||||
|
Wildcards (? and internal *) are implemented with regular expressions
|
||||||
|
allow further variations. A reimplementation using
|
||||||
|
WildCardTermEnum (correct name?) should be no problem.
|
||||||
|
|
||||||
|
Warnings about missing terms are sent to System.out, this might
|
||||||
|
be replaced by another stream.
|
||||||
|
|
||||||
|
BooleanQueryTest.TestCollector uses a results checking method that should
|
||||||
|
be replaced by the checking method from Lucene's TestBasics.java.
|
||||||
|
|
Loading…
Reference in New Issue