diff --git a/contrib/surround/build-old.xml b/contrib/surround/build-old.xml new file mode 100644 index 00000000000..e69de29bb2d diff --git a/contrib/surround/build.xml b/contrib/surround/build.xml new file mode 100644 index 00000000000..2e098a64384 --- /dev/null +++ b/contrib/surround/build.xml @@ -0,0 +1,16 @@ + + + + + + Surround query parser + + + + + + + + diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/CharStream.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/CharStream.java new file mode 100644 index 00000000000..341425f73ac --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/CharStream.java @@ -0,0 +1,110 @@ +/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 3.0 */ +package org.apache.lucene.queryParser.surround.parser; + +/** + * This interface describes a character stream that maintains line and + * column number positions of the characters. It also has the capability + * to backup the stream to some extent. An implementation of this + * interface is used in the TokenManager implementation generated by + * JavaCCParser. + * + * All the methods except backup can be implemented in any fashion. backup + * needs to be implemented correctly for the correct operation of the lexer. + * Rest of the methods are all used to get information like line number, + * column number and the String that constitutes a token and are not used + * by the lexer. Hence their implementation won't affect the generated lexer's + * operation. + */ + +public interface CharStream { + + /** + * Returns the next character from the selected input. The method + * of selecting the input is the responsibility of the class + * implementing this interface. Can throw any java.io.IOException. + */ + char readChar() throws java.io.IOException; + + /** + * Returns the column position of the character last read. + * @deprecated + * @see #getEndColumn + */ + int getColumn(); + + /** + * Returns the line number of the character last read. + * @deprecated + * @see #getEndLine + */ + int getLine(); + + /** + * Returns the column number of the last character for current token (being + * matched after the last call to BeginTOken). + */ + int getEndColumn(); + + /** + * Returns the line number of the last character for current token (being + * matched after the last call to BeginTOken). + */ + int getEndLine(); + + /** + * Returns the column number of the first character for current token (being + * matched after the last call to BeginTOken). + */ + int getBeginColumn(); + + /** + * Returns the line number of the first character for current token (being + * matched after the last call to BeginTOken). + */ + int getBeginLine(); + + /** + * Backs up the input stream by amount steps. Lexer calls this method if it + * had already read some characters, but could not use them to match a + * (longer) token. So, they will be used again as the prefix of the next + * token and it is the implemetation's responsibility to do this right. + */ + void backup(int amount); + + /** + * Returns the next character that marks the beginning of the next token. + * All characters must remain in the buffer between two successive calls + * to this method to implement backup correctly. + */ + char BeginToken() throws java.io.IOException; + + /** + * Returns a string made up of characters from the marked token beginning + * to the current buffer position. Implementations have the choice of returning + * anything that they want to. For example, for efficiency, one might decide + * to just return null, which is a valid implementation. + */ + String GetImage(); + + /** + * Returns an array of characters that make up the suffix of length 'len' for + * the currently matched token. This is used to build up the matched string + * for use in actions in the case of MORE. A simple and inefficient + * implementation of this is as follows : + * + * { + * String t = GetImage(); + * return t.substring(t.length() - len, t.length()).toCharArray(); + * } + */ + char[] GetSuffix(int len); + + /** + * The lexer calls this function to indicate that it is done with the stream + * and hence implementations can free any resources held by this class. + * Again, the body of this function can be just empty and it will not + * affect the lexer's operation. + */ + void Done(); + +} diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/FastCharStream.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/FastCharStream.java new file mode 100644 index 00000000000..bfa636c545e --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/FastCharStream.java @@ -0,0 +1,120 @@ +// FastCharStream.java +package org.apache.lucene.queryParser.surround.parser; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.*; + +/** An efficient implementation of JavaCC's CharStream interface.

Note that + * this does not do line-number counting, but instead keeps track of the + * character position of the token in the input, as required by Lucene's {@link + * org.apache.lucene.analysis.Token} API. */ +public final class FastCharStream implements CharStream { + char[] buffer = null; + + int bufferLength = 0; // end of valid chars + int bufferPosition = 0; // next char to read + + int tokenStart = 0; // offset in buffer + int bufferStart = 0; // position in file of buffer + + Reader input; // source of chars + + /** Constructs from a Reader. */ + public FastCharStream(Reader r) { + input = r; + } + + public final char readChar() throws IOException { + if (bufferPosition >= bufferLength) + refill(); + return buffer[bufferPosition++]; + } + + private final void refill() throws IOException { + int newPosition = bufferLength - tokenStart; + + if (tokenStart == 0) { // token won't fit in buffer + if (buffer == null) { // first time: alloc buffer + buffer = new char[2048]; + } else if (bufferLength == buffer.length) { // grow buffer + char[] newBuffer = new char[buffer.length*2]; + System.arraycopy(buffer, 0, newBuffer, 0, bufferLength); + buffer = newBuffer; + } + } else { // shift token to front + System.arraycopy(buffer, tokenStart, buffer, 0, newPosition); + } + + bufferLength = newPosition; // update state + bufferPosition = newPosition; + bufferStart += tokenStart; + tokenStart = 0; + + int charsRead = // fill space in buffer + input.read(buffer, newPosition, buffer.length-newPosition); + if (charsRead == -1) + throw new IOException("read past eof"); + else + bufferLength += charsRead; + } + + public final char BeginToken() throws IOException { + tokenStart = bufferPosition; + return readChar(); + } + + public final void backup(int amount) { + bufferPosition -= amount; + } + + public final String GetImage() { + return new String(buffer, tokenStart, bufferPosition - tokenStart); + } + + public final char[] GetSuffix(int len) { + char[] value = new char[len]; + System.arraycopy(buffer, bufferPosition - len, value, 0, len); + return value; + } + + public final void Done() { + try { + input.close(); + } catch (IOException e) { + System.err.println("Caught: " + e + "; ignoring."); + } + } + + public final int getColumn() { + return bufferStart + bufferPosition; + } + public final int getLine() { + return 1; + } + public final int getEndColumn() { + return bufferStart + bufferPosition; + } + public final int getEndLine() { + return 1; + } + public final int getBeginColumn() { + return bufferStart + tokenStart; + } + public final int getBeginLine() { + return 1; + } +} diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/ParseException.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/ParseException.java new file mode 100644 index 00000000000..6cf472bcdd1 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/ParseException.java @@ -0,0 +1,193 @@ +/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 3.0 */ + +package org.apache.lucene.queryParser.surround.parser; + +/** + * This exception is thrown when parse errors are encountered. + * You can explicitly create objects of this exception type by + * calling the method generateParseException in the generated + * parser. + * + * You can modify this class to customize your error reporting + * mechanisms so long as you retain the public fields. + */ +public class ParseException extends Exception { + + /** + * This constructor is used by the method "generateParseException" + * in the generated parser. Calling this constructor generates + * a new object of this type with the fields "currentToken", + * "expectedTokenSequences", and "tokenImage" set. The boolean + * flag "specialConstructor" is also set to true to indicate that + * this constructor was used to create this object. + * This constructor calls its super class with the empty string + * to force the "toString" method of parent class "Throwable" to + * print the error message in the form: + * ParseException: + */ + public ParseException(Token currentTokenVal, + int[][] expectedTokenSequencesVal, + String[] tokenImageVal + ) + { + super(""); + specialConstructor = true; + currentToken = currentTokenVal; + expectedTokenSequences = expectedTokenSequencesVal; + tokenImage = tokenImageVal; + } + + /** + * The following constructors are for use by you for whatever + * purpose you can think of. Constructing the exception in this + * manner makes the exception behave in the normal way - i.e., as + * documented in the class "Throwable". The fields "errorToken", + * "expectedTokenSequences", and "tokenImage" do not contain + * relevant information. The JavaCC generated code does not use + * these constructors. + */ + + public ParseException() { + super(); + specialConstructor = false; + } + + public ParseException(String message) { + super(message); + specialConstructor = false; + } + + /** + * This variable determines which constructor was used to create + * this object and thereby affects the semantics of the + * "getMessage" method (see below). + */ + protected boolean specialConstructor; + + /** + * This is the last token that has been consumed successfully. If + * this object has been created due to a parse error, the token + * followng this token will (therefore) be the first error token. + */ + public Token currentToken; + + /** + * Each entry in this array is an array of integers. Each array + * of integers represents a sequence of tokens (by their ordinal + * values) that is expected at this point of the parse. + */ + public int[][] expectedTokenSequences; + + /** + * This is a reference to the "tokenImage" array of the generated + * parser within which the parse error occurred. This array is + * defined in the generated ...Constants interface. + */ + public String[] tokenImage; + + /** + * This method has the standard behavior when this object has been + * created using the standard constructors. Otherwise, it uses + * "currentToken" and "expectedTokenSequences" to generate a parse + * error message and returns it. If this object has been created + * due to a parse error, and you do not catch it (it gets thrown + * from the parser), then this method is called during the printing + * of the final stack trace, and hence the correct error message + * gets displayed. + */ + public String getMessage() { + if (!specialConstructor) { + return super.getMessage(); + } + String expected = ""; + int maxSize = 0; + for (int i = 0; i < expectedTokenSequences.length; i++) { + if (maxSize < expectedTokenSequences[i].length) { + maxSize = expectedTokenSequences[i].length; + } + for (int j = 0; j < expectedTokenSequences[i].length; j++) { + expected += tokenImage[expectedTokenSequences[i][j]] + " "; + } + if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) { + expected += "..."; + } + expected += eol + " "; + } + String retval = "Encountered \""; + Token tok = currentToken.next; + for (int i = 0; i < maxSize; i++) { + if (i != 0) retval += " "; + if (tok.kind == 0) { + retval += tokenImage[0]; + break; + } + retval += add_escapes(tok.image); + tok = tok.next; + } + retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn; + retval += "." + eol; + if (expectedTokenSequences.length == 1) { + retval += "Was expecting:" + eol + " "; + } else { + retval += "Was expecting one of:" + eol + " "; + } + retval += expected; + return retval; + } + + /** + * The end of line string for this machine. + */ + protected String eol = System.getProperty("line.separator", "\n"); + + /** + * Used to convert raw characters to their escaped version + * when these raw version cannot be used as part of an ASCII + * string literal. + */ + protected String add_escapes(String str) { + StringBuffer retval = new StringBuffer(); + char ch; + for (int i = 0; i < str.length(); i++) { + switch (str.charAt(i)) + { + case 0 : + continue; + case '\b': + retval.append("\\b"); + continue; + case '\t': + retval.append("\\t"); + continue; + case '\n': + retval.append("\\n"); + continue; + case '\f': + retval.append("\\f"); + continue; + case '\r': + retval.append("\\r"); + continue; + case '\"': + retval.append("\\\""); + continue; + case '\'': + retval.append("\\\'"); + continue; + case '\\': + retval.append("\\\\"); + continue; + default: + if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { + String s = "0000" + Integer.toString(ch, 16); + retval.append("\\u" + s.substring(s.length() - 4, s.length())); + } else { + retval.append(ch); + } + continue; + } + } + return retval.toString(); + } + +} diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.java new file mode 100644 index 00000000000..87b1f14dd43 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.java @@ -0,0 +1,727 @@ +/* Generated By:JavaCC: Do not edit this line. QueryParser.java */ +package org.apache.lucene.queryParser.surround.parser; + +import java.util.ArrayList; +import java.util.List; +import java.io.StringReader; + + +import org.apache.lucene.analysis.TokenStream; + +import org.apache.lucene.queryParser.surround.query.SrndQuery; +import org.apache.lucene.queryParser.surround.query.FieldsQuery; +import org.apache.lucene.queryParser.surround.query.OrQuery; +import org.apache.lucene.queryParser.surround.query.AndQuery; +import org.apache.lucene.queryParser.surround.query.NotQuery; +import org.apache.lucene.queryParser.surround.query.DistanceQuery; +import org.apache.lucene.queryParser.surround.query.SrndTermQuery; +import org.apache.lucene.queryParser.surround.query.SrndPrefixQuery; +import org.apache.lucene.queryParser.surround.query.SrndTruncQuery; + +/** + * This class is generated by JavaCC. The only method that clients should need + * to call is parse(). + */ + +public class QueryParser implements QueryParserConstants { + final int minimumPrefixLength = 3; + final int minimumCharsInTrunc = 3; + final String truncationErrorMessage = "Too unrestrictive truncation: "; + final String boostErrorMessage = "Cannot handle boost value: "; + + /* CHECKME: These should be the same as for the tokenizer. How? */ + final char truncator = '*'; + final char anyChar = '?'; + final char quote = '\"'; + final char fieldOperator = ':'; + final char comma = ','; /* prefix list separator */ + final char carat = '^'; /* weight oparator */ + + static public SrndQuery parse(String query) throws ParseException { + QueryParser parser = new QueryParser(); + return parser.parse2(query); + } + + public QueryParser() { + this(new FastCharStream(new StringReader(""))); + } + + public SrndQuery parse2(String query) throws ParseException { + ReInit(new FastCharStream(new StringReader(query))); + try { + return TopSrndQuery(); + } catch (TokenMgrError tme) { + throw new ParseException(tme.getMessage()); + } + } + + protected SrndQuery getFieldsQuery( + SrndQuery q, ArrayList fieldNames) { + /* FIXME: check acceptable subquery: at least one subquery should not be + * a fields query. + */ + return new FieldsQuery(q, fieldNames, fieldOperator); + } + + protected SrndQuery getOrQuery(List queries, boolean infix, Token orToken) { + return new OrQuery(queries, infix, orToken.image); + } + + protected SrndQuery getAndQuery(List queries, boolean infix, Token andToken) { + return new AndQuery( queries, infix, andToken.image); + } + + protected SrndQuery getNotQuery(List queries, Token notToken) { + return new NotQuery( queries, notToken.image); + } + + protected static int getOpDistance(String distanceOp) { + /* W, 2W, 3W etc -> 1, 2 3, etc. Same for N, 2N ... */ + return distanceOp.length() == 1 + ? 1 + : Integer.parseInt( distanceOp.substring( 0, distanceOp.length() - 1)); + } + + protected static void checkDistanceSubQueries(DistanceQuery distq, String opName) + throws ParseException { + String m = distq.distanceSubQueryNotAllowed(); + if (m != null) { + throw new ParseException("Operator " + opName + ": " + m); + } + } + + protected SrndQuery getDistanceQuery( + List queries, + boolean infix, + Token dToken, + boolean ordered) throws ParseException { + DistanceQuery dq = new DistanceQuery(queries, + infix, + getOpDistance(dToken.image), + dToken.image, + ordered); + checkDistanceSubQueries(dq, dToken.image); + return dq; + } + + protected SrndQuery getTermQuery( + String term, boolean quoted) { + return new SrndTermQuery(term, quoted); + } + + protected boolean allowedSuffix(String suffixed) { + return (suffixed.length() - 1) >= minimumPrefixLength; + } + + protected SrndQuery getPrefixQuery( + String prefix, boolean quoted) { + return new SrndPrefixQuery(prefix, quoted, truncator); + } + + protected boolean allowedTruncation(String truncated) { + /* At least 3 normal characters needed. */ + int nrNormalChars = 0; + for (int i = 0; i < truncated.length(); i++) { + char c = truncated.charAt(i); + if ((c != truncator) && (c != anyChar)) { + nrNormalChars++; + } + } + return nrNormalChars >= minimumCharsInTrunc; + } + + protected SrndQuery getTruncQuery(String truncated) { + return new SrndTruncQuery(truncated, truncator, anyChar); + } + + final public SrndQuery TopSrndQuery() throws ParseException { + SrndQuery q; + q = FieldsQuery(); + jj_consume_token(0); + {if (true) return q;} + throw new Error("Missing return statement in function"); + } + + final public SrndQuery FieldsQuery() throws ParseException { + SrndQuery q; + ArrayList fieldNames; + fieldNames = OptionalFields(); + q = OrQuery(); + {if (true) return (fieldNames == null) ? q : getFieldsQuery(q, fieldNames);} + throw new Error("Missing return statement in function"); + } + + final public ArrayList OptionalFields() throws ParseException { + Token fieldName; + ArrayList fieldNames = null; + label_1: + while (true) { + if (jj_2_1(2)) { + ; + } else { + break label_1; + } + // to the colon + fieldName = jj_consume_token(TERM); + jj_consume_token(COLON); + if (fieldNames == null) { + fieldNames = new ArrayList(); + } + fieldNames.add(fieldName.image); + } + {if (true) return fieldNames;} + throw new Error("Missing return statement in function"); + } + + final public SrndQuery OrQuery() throws ParseException { + SrndQuery q; + ArrayList queries = null; + Token oprt = null; + q = AndQuery(); + label_2: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case OR: + ; + break; + default: + jj_la1[0] = jj_gen; + break label_2; + } + oprt = jj_consume_token(OR); + /* keep only last used operator */ + if (queries == null) { + queries = new ArrayList(); + queries.add(q); + } + q = AndQuery(); + queries.add(q); + } + {if (true) return (queries == null) ? q : getOrQuery(queries, true /* infix */, oprt);} + throw new Error("Missing return statement in function"); + } + + final public SrndQuery AndQuery() throws ParseException { + SrndQuery q; + ArrayList queries = null; + Token oprt = null; + q = NotQuery(); + label_3: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case AND: + ; + break; + default: + jj_la1[1] = jj_gen; + break label_3; + } + oprt = jj_consume_token(AND); + /* keep only last used operator */ + if (queries == null) { + queries = new ArrayList(); + queries.add(q); + } + q = NotQuery(); + queries.add(q); + } + {if (true) return (queries == null) ? q : getAndQuery(queries, true /* infix */, oprt);} + throw new Error("Missing return statement in function"); + } + + final public SrndQuery NotQuery() throws ParseException { + SrndQuery q; + ArrayList queries = null; + Token oprt = null; + q = NQuery(); + label_4: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case NOT: + ; + break; + default: + jj_la1[2] = jj_gen; + break label_4; + } + oprt = jj_consume_token(NOT); + /* keep only last used operator */ + if (queries == null) { + queries = new ArrayList(); + queries.add(q); + } + q = NQuery(); + queries.add(q); + } + {if (true) return (queries == null) ? q : getNotQuery(queries, oprt);} + throw new Error("Missing return statement in function"); + } + + final public SrndQuery NQuery() throws ParseException { + SrndQuery q; + ArrayList queries; + Token dt; + q = WQuery(); + label_5: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case N: + ; + break; + default: + jj_la1[3] = jj_gen; + break label_5; + } + dt = jj_consume_token(N); + queries = new ArrayList(); + queries.add(q); /* left associative */ + + q = WQuery(); + queries.add(q); + q = getDistanceQuery(queries, true /* infix */, dt, false /* not ordered */); + } + {if (true) return q;} + throw new Error("Missing return statement in function"); + } + + final public SrndQuery WQuery() throws ParseException { + SrndQuery q; + ArrayList queries; + Token wt; + q = PrimaryQuery(); + label_6: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case W: + ; + break; + default: + jj_la1[4] = jj_gen; + break label_6; + } + wt = jj_consume_token(W); + queries = new ArrayList(); + queries.add(q); /* left associative */ + + q = PrimaryQuery(); + queries.add(q); + q = getDistanceQuery(queries, true /* infix */, wt, true /* ordered */); + } + {if (true) return q;} + throw new Error("Missing return statement in function"); + } + + final public SrndQuery PrimaryQuery() throws ParseException { + /* bracketed weighted query or weighted term */ + SrndQuery q; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case LPAREN: + jj_consume_token(LPAREN); + q = FieldsQuery(); + jj_consume_token(RPAREN); + break; + case OR: + case AND: + case W: + case N: + q = PrefixOperatorQuery(); + break; + case TRUNCQUOTED: + case QUOTED: + case SUFFIXTERM: + case TRUNCTERM: + case TERM: + q = SimpleTerm(); + break; + default: + jj_la1[5] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + OptionalWeights(q); + {if (true) return q;} + throw new Error("Missing return statement in function"); + } + + final public SrndQuery PrefixOperatorQuery() throws ParseException { + Token oprt; + List queries; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case OR: + oprt = jj_consume_token(OR); + /* prefix OR */ + queries = FieldsQueryList(); + {if (true) return getOrQuery(queries, false /* not infix */, oprt);} + break; + case AND: + oprt = jj_consume_token(AND); + /* prefix AND */ + queries = FieldsQueryList(); + {if (true) return getAndQuery(queries, false /* not infix */, oprt);} + break; + case N: + oprt = jj_consume_token(N); + /* prefix N */ + queries = FieldsQueryList(); + {if (true) return getDistanceQuery(queries, false /* not infix */, oprt, false /* not ordered */);} + break; + case W: + oprt = jj_consume_token(W); + /* prefix W */ + queries = FieldsQueryList(); + {if (true) return getDistanceQuery(queries, false /* not infix */, oprt, true /* ordered */);} + break; + default: + jj_la1[6] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + throw new Error("Missing return statement in function"); + } + + final public List FieldsQueryList() throws ParseException { + SrndQuery q; + ArrayList queries = new ArrayList(); + jj_consume_token(LPAREN); + q = FieldsQuery(); + queries.add(q); + label_7: + while (true) { + jj_consume_token(COMMA); + q = FieldsQuery(); + queries.add(q); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case COMMA: + ; + break; + default: + jj_la1[7] = jj_gen; + break label_7; + } + } + jj_consume_token(RPAREN); + {if (true) return queries;} + throw new Error("Missing return statement in function"); + } + + final public SrndQuery SimpleTerm() throws ParseException { + Token term; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case TERM: + term = jj_consume_token(TERM); + {if (true) return getTermQuery(term.image, false /* not quoted */);} + break; + case QUOTED: + term = jj_consume_token(QUOTED); + {if (true) return getTermQuery(term.image.substring(1, term.image.length()-1), true /* quoted */);} + break; + case SUFFIXTERM: + term = jj_consume_token(SUFFIXTERM); + /* ending in * */ + if (! allowedSuffix(term.image)) { + {if (true) throw new ParseException(truncationErrorMessage + term.image);} + } + {if (true) return getPrefixQuery(term.image.substring(0, term.image.length()-1), false /* not quoted */);} + break; + case TRUNCTERM: + term = jj_consume_token(TRUNCTERM); + /* with at least one * or ? */ + if (! allowedTruncation(term.image)) { + {if (true) throw new ParseException(truncationErrorMessage + term.image);} + } + {if (true) return getTruncQuery(term.image);} + break; + case TRUNCQUOTED: + term = jj_consume_token(TRUNCQUOTED); + /* eg. "9b-b,m"* */ + if ((term.image.length() - 3) < minimumPrefixLength) { + {if (true) throw new ParseException(truncationErrorMessage + term.image);} + } + {if (true) return getPrefixQuery(term.image.substring(1, term.image.length()-2), true /* quoted */);} + break; + default: + jj_la1[8] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + throw new Error("Missing return statement in function"); + } + + final public void OptionalWeights(SrndQuery q) throws ParseException { + Token weight=null; + label_8: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CARAT: + ; + break; + default: + jj_la1[9] = jj_gen; + break label_8; + } + jj_consume_token(CARAT); + weight = jj_consume_token(NUMBER); + float f; + try { + f = Float.valueOf(weight.image).floatValue(); + } catch (Exception floatExc) { + {if (true) throw new ParseException(boostErrorMessage + weight.image + " (" + floatExc + ")");} + } + if (f <= 0.0) { + {if (true) throw new ParseException(boostErrorMessage + weight.image);} + } + q.setWeight(f * q.getWeight()); /* left associative, fwiw */ + + } + } + + final private boolean jj_2_1(int xla) { + jj_la = xla; jj_lastpos = jj_scanpos = token; + try { return !jj_3_1(); } + catch(LookaheadSuccess ls) { return true; } + finally { jj_save(0, xla); } + } + + final private boolean jj_3_1() { + if (jj_scan_token(TERM)) return true; + if (jj_scan_token(COLON)) return true; + return false; + } + + public QueryParserTokenManager token_source; + public Token token, jj_nt; + private int jj_ntk; + private Token jj_scanpos, jj_lastpos; + private int jj_la; + public boolean lookingAhead = false; + private boolean jj_semLA; + private int jj_gen; + final private int[] jj_la1 = new int[10]; + static private int[] jj_la1_0; + static { + jj_la1_0(); + } + private static void jj_la1_0() { + jj_la1_0 = new int[] {0x100,0x200,0x400,0x1000,0x800,0x7c3b00,0x1b00,0x8000,0x7c0000,0x20000,}; + } + final private JJCalls[] jj_2_rtns = new JJCalls[1]; + private boolean jj_rescan = false; + private int jj_gc = 0; + + public QueryParser(CharStream stream) { + token_source = new QueryParserTokenManager(stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 10; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + public void ReInit(CharStream stream) { + token_source.ReInit(stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 10; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + public QueryParser(QueryParserTokenManager tm) { + token_source = tm; + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 10; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + public void ReInit(QueryParserTokenManager tm) { + token_source = tm; + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 10; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + final private Token jj_consume_token(int kind) throws ParseException { + Token oldToken; + if ((oldToken = token).next != null) token = token.next; + else token = token.next = token_source.getNextToken(); + jj_ntk = -1; + if (token.kind == kind) { + jj_gen++; + if (++jj_gc > 100) { + jj_gc = 0; + for (int i = 0; i < jj_2_rtns.length; i++) { + JJCalls c = jj_2_rtns[i]; + while (c != null) { + if (c.gen < jj_gen) c.first = null; + c = c.next; + } + } + } + return token; + } + token = oldToken; + jj_kind = kind; + throw generateParseException(); + } + + static private final class LookaheadSuccess extends java.lang.Error { } + final private LookaheadSuccess jj_ls = new LookaheadSuccess(); + final private boolean jj_scan_token(int kind) { + if (jj_scanpos == jj_lastpos) { + jj_la--; + if (jj_scanpos.next == null) { + jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.getNextToken(); + } else { + jj_lastpos = jj_scanpos = jj_scanpos.next; + } + } else { + jj_scanpos = jj_scanpos.next; + } + if (jj_rescan) { + int i = 0; Token tok = token; + while (tok != null && tok != jj_scanpos) { i++; tok = tok.next; } + if (tok != null) jj_add_error_token(kind, i); + } + if (jj_scanpos.kind != kind) return true; + if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls; + return false; + } + + final public Token getNextToken() { + if (token.next != null) token = token.next; + else token = token.next = token_source.getNextToken(); + jj_ntk = -1; + jj_gen++; + return token; + } + + final public Token getToken(int index) { + Token t = lookingAhead ? jj_scanpos : token; + for (int i = 0; i < index; i++) { + if (t.next != null) t = t.next; + else t = t.next = token_source.getNextToken(); + } + return t; + } + + final private int jj_ntk() { + if ((jj_nt=token.next) == null) + return (jj_ntk = (token.next=token_source.getNextToken()).kind); + else + return (jj_ntk = jj_nt.kind); + } + + private java.util.Vector jj_expentries = new java.util.Vector(); + private int[] jj_expentry; + private int jj_kind = -1; + private int[] jj_lasttokens = new int[100]; + private int jj_endpos; + + private void jj_add_error_token(int kind, int pos) { + if (pos >= 100) return; + if (pos == jj_endpos + 1) { + jj_lasttokens[jj_endpos++] = kind; + } else if (jj_endpos != 0) { + jj_expentry = new int[jj_endpos]; + for (int i = 0; i < jj_endpos; i++) { + jj_expentry[i] = jj_lasttokens[i]; + } + boolean exists = false; + for (java.util.Enumeration e = jj_expentries.elements(); e.hasMoreElements();) { + int[] oldentry = (int[])(e.nextElement()); + if (oldentry.length == jj_expentry.length) { + exists = true; + for (int i = 0; i < jj_expentry.length; i++) { + if (oldentry[i] != jj_expentry[i]) { + exists = false; + break; + } + } + if (exists) break; + } + } + if (!exists) jj_expentries.addElement(jj_expentry); + if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind; + } + } + + public ParseException generateParseException() { + jj_expentries.removeAllElements(); + boolean[] la1tokens = new boolean[24]; + for (int i = 0; i < 24; i++) { + la1tokens[i] = false; + } + if (jj_kind >= 0) { + la1tokens[jj_kind] = true; + jj_kind = -1; + } + for (int i = 0; i < 10; i++) { + if (jj_la1[i] == jj_gen) { + for (int j = 0; j < 32; j++) { + if ((jj_la1_0[i] & (1< jj_gen) { + jj_la = p.arg; jj_lastpos = jj_scanpos = p.first; + switch (i) { + case 0: jj_3_1(); break; + } + } + p = p.next; + } while (p != null); + } + jj_rescan = false; + } + + final private void jj_save(int index, int xla) { + JJCalls p = jj_2_rtns[index]; + while (p.gen > jj_gen) { + if (p.next == null) { p = p.next = new JJCalls(); break; } + p = p.next; + } + p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla; + } + + static final class JJCalls { + int gen; + Token first; + int arg; + JJCalls next; + } + +} diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.jj b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.jj new file mode 100644 index 00000000000..98f687d8d5e --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/QueryParser.jj @@ -0,0 +1,453 @@ +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Surround query language parser */ + +/* Query language operators: OR, AND, NOT, W, N, (, ), ^, *, ?, " and comma */ + + +options { + STATIC=false; + JAVA_UNICODE_ESCAPE=true; + USER_CHAR_STREAM=true; +} + +PARSER_BEGIN(QueryParser) + +package org.apache.lucene.queryParser.surround.parser; + +import java.util.ArrayList; +import java.util.List; +import java.io.StringReader; + + +import org.apache.lucene.analysis.TokenStream; + +import org.apache.lucene.queryParser.surround.query.SrndQuery; +import org.apache.lucene.queryParser.surround.query.FieldsQuery; +import org.apache.lucene.queryParser.surround.query.OrQuery; +import org.apache.lucene.queryParser.surround.query.AndQuery; +import org.apache.lucene.queryParser.surround.query.NotQuery; +import org.apache.lucene.queryParser.surround.query.DistanceQuery; +import org.apache.lucene.queryParser.surround.query.SrndTermQuery; +import org.apache.lucene.queryParser.surround.query.SrndPrefixQuery; +import org.apache.lucene.queryParser.surround.query.SrndTruncQuery; + +/** + * This class is generated by JavaCC. The only method that clients should need + * to call is parse(). + */ + +public class QueryParser { + final int minimumPrefixLength = 3; + final int minimumCharsInTrunc = 3; + final String truncationErrorMessage = "Too unrestrictive truncation: "; + final String boostErrorMessage = "Cannot handle boost value: "; + + /* CHECKME: These should be the same as for the tokenizer. How? */ + final char truncator = '*'; + final char anyChar = '?'; + final char quote = '\"'; + final char fieldOperator = ':'; + final char comma = ','; /* prefix list separator */ + final char carat = '^'; /* weight oparator */ + + static public SrndQuery parse(String query) throws ParseException { + QueryParser parser = new QueryParser(); + return parser.parse2(query); + } + + public QueryParser() { + this(new FastCharStream(new StringReader(""))); + } + + public SrndQuery parse2(String query) throws ParseException { + ReInit(new FastCharStream(new StringReader(query))); + try { + return TopSrndQuery(); + } catch (TokenMgrError tme) { + throw new ParseException(tme.getMessage()); + } + } + + protected SrndQuery getFieldsQuery( + SrndQuery q, ArrayList fieldNames) { + /* FIXME: check acceptable subquery: at least one subquery should not be + * a fields query. + */ + return new FieldsQuery(q, fieldNames, fieldOperator); + } + + protected SrndQuery getOrQuery(List queries, boolean infix, Token orToken) { + return new OrQuery(queries, infix, orToken.image); + } + + protected SrndQuery getAndQuery(List queries, boolean infix, Token andToken) { + return new AndQuery( queries, infix, andToken.image); + } + + protected SrndQuery getNotQuery(List queries, Token notToken) { + return new NotQuery( queries, notToken.image); + } + + protected static int getOpDistance(String distanceOp) { + /* W, 2W, 3W etc -> 1, 2 3, etc. Same for N, 2N ... */ + return distanceOp.length() == 1 + ? 1 + : Integer.parseInt( distanceOp.substring( 0, distanceOp.length() - 1)); + } + + protected static void checkDistanceSubQueries(DistanceQuery distq, String opName) + throws ParseException { + String m = distq.distanceSubQueryNotAllowed(); + if (m != null) { + throw new ParseException("Operator " + opName + ": " + m); + } + } + + protected SrndQuery getDistanceQuery( + List queries, + boolean infix, + Token dToken, + boolean ordered) throws ParseException { + DistanceQuery dq = new DistanceQuery(queries, + infix, + getOpDistance(dToken.image), + dToken.image, + ordered); + checkDistanceSubQueries(dq, dToken.image); + return dq; + } + + protected SrndQuery getTermQuery( + String term, boolean quoted) { + return new SrndTermQuery(term, quoted); + } + + protected boolean allowedSuffix(String suffixed) { + return (suffixed.length() - 1) >= minimumPrefixLength; + } + + protected SrndQuery getPrefixQuery( + String prefix, boolean quoted) { + return new SrndPrefixQuery(prefix, quoted, truncator); + } + + protected boolean allowedTruncation(String truncated) { + /* At least 3 normal characters needed. */ + int nrNormalChars = 0; + for (int i = 0; i < truncated.length(); i++) { + char c = truncated.charAt(i); + if ((c != truncator) && (c != anyChar)) { + nrNormalChars++; + } + } + return nrNormalChars >= minimumCharsInTrunc; + } + + protected SrndQuery getTruncQuery(String truncated) { + return new SrndTruncQuery(truncated, truncator, anyChar); + } +} + +PARSER_END(QueryParser) + +/* ***************** */ +/* Token Definitions */ +/* ***************** */ + +<*> TOKEN : { + <#_NUM_CHAR: ["0"-"9"] > +| <#_TERM_CHAR: /* everything except whitespace and operators */ + ( ~[ " ", "\t", "\n", "\r", + ",", "?", "*", "(", ")", ":", "^", "\""] + ) > +| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" ) > +| <#_STAR: "*" > /* term truncation */ +| <#_ONE_CHAR: "?" > /* precisely one character in a term */ +/* 2..99 prefix for distance operators */ +| <#_DISTOP_NUM: ((["2"-"9"](["0"-"9"])?) | ("1" ["0"-"9"]))> +} + + SKIP : { + <<_WHITESPACE>> +} + +/* Operator tokens (in increasing order of precedence): */ + TOKEN : +{ + +| +| +| )? ("W"|"w")> +| )? ("N"|"n")> +/* These are excluded in _TERM_CHAR: */ +| +| +| +| +| : Boost +/* Literal non empty term between single quotes, + * escape quoted quote or backslash by backslash. + * Evt. truncated. + */ +| > +| +| )+ <_STAR>> +| )+ + (<_STAR> | <_ONE_CHAR> )+ /* at least one * or ? */ + (<_TERM_CHAR> | <_STAR> | <_ONE_CHAR> )* + > +| )+> +} + + TOKEN : { +)+ ( "." (<_NUM_CHAR>)+ )?> : DEFAULT +} + + +SrndQuery TopSrndQuery() : { + SrndQuery q; +}{ + q = FieldsQuery() + + {return q;} +} + + +SrndQuery FieldsQuery() : { + SrndQuery q; + ArrayList fieldNames; +}{ + fieldNames = OptionalFields() + q = OrQuery() + {return (fieldNames == null) ? q : getFieldsQuery(q, fieldNames);} +} + + +ArrayList OptionalFields() : { + Token fieldName; + ArrayList fieldNames = null; +}{ + ( LOOKAHEAD(2) // to the colon + fieldName = + { + if (fieldNames == null) { + fieldNames = new ArrayList(); + } + fieldNames.add(fieldName.image); + } + )* + {return fieldNames;} +} + + +SrndQuery OrQuery() : { + SrndQuery q; + ArrayList queries = null; + Token oprt = null; +}{ + q = AndQuery() + ( oprt = { /* keep only last used operator */ + if (queries == null) { + queries = new ArrayList(); + queries.add(q); + } + } + q = AndQuery() { + queries.add(q); + } + )* + {return (queries == null) ? q : getOrQuery(queries, true /* infix */, oprt);} +} + + +SrndQuery AndQuery() : { + SrndQuery q; + ArrayList queries = null; + Token oprt = null; +}{ + q = NotQuery() + ( oprt = { /* keep only last used operator */ + if (queries == null) { + queries = new ArrayList(); + queries.add(q); + } + } + q = NotQuery() { + queries.add(q); + } + )* + {return (queries == null) ? q : getAndQuery(queries, true /* infix */, oprt);} +} + + +SrndQuery NotQuery() : { + SrndQuery q; + ArrayList queries = null; + Token oprt = null; +}{ + q = NQuery() + ( oprt = { /* keep only last used operator */ + if (queries == null) { + queries = new ArrayList(); + queries.add(q); + } + } + q = NQuery() { + queries.add(q); + } + )* + {return (queries == null) ? q : getNotQuery(queries, oprt);} +} + + +SrndQuery NQuery() : { + SrndQuery q; + ArrayList queries; + Token dt; +}{ + q = WQuery() + ( dt = { + queries = new ArrayList(); + queries.add(q); /* left associative */ + } + q = WQuery() { + queries.add(q); + q = getDistanceQuery(queries, true /* infix */, dt, false /* not ordered */); + } + )* + {return q;} +} + + +SrndQuery WQuery() : { + SrndQuery q; + ArrayList queries; + Token wt; +}{ + q = PrimaryQuery() + ( wt = { + queries = new ArrayList(); + queries.add(q); /* left associative */ + } + q = PrimaryQuery() { + queries.add(q); + q = getDistanceQuery(queries, true /* infix */, wt, true /* ordered */); + } + )* + {return q;} +} + + +SrndQuery PrimaryQuery() : { /* bracketed weighted query or weighted term */ + SrndQuery q; +}{ + ( q = FieldsQuery() + | q = PrefixOperatorQuery() + | q = SimpleTerm() + ) + OptionalWeights(q) + {return q;} +} + + +SrndQuery PrefixOperatorQuery() : { + Token oprt; + List queries; +}{ + ( oprt = /* prefix OR */ + queries = FieldsQueryList() + {return getOrQuery(queries, false /* not infix */, oprt);} + + | oprt = /* prefix AND */ + queries = FieldsQueryList() + {return getAndQuery(queries, false /* not infix */, oprt);} + + | oprt = /* prefix N */ + queries = FieldsQueryList() + {return getDistanceQuery(queries, false /* not infix */, oprt, false /* not ordered */);} + + | oprt = /* prefix W */ + queries = FieldsQueryList() + {return getDistanceQuery(queries, false /* not infix */, oprt, true /* ordered */);} + ) +} + + +List FieldsQueryList() : { + SrndQuery q; + ArrayList queries = new ArrayList(); +}{ + + q = FieldsQuery() {queries.add(q);} + ( q = FieldsQuery() {queries.add(q);})+ + + {return queries;} +} + + +SrndQuery SimpleTerm() : { + Token term; +}{ + ( term= + {return getTermQuery(term.image, false /* not quoted */);} + + | term= + {return getTermQuery(term.image.substring(1, term.image.length()-1), true /* quoted */);} + + | term= { /* ending in * */ + if (! allowedSuffix(term.image)) { + throw new ParseException(truncationErrorMessage + term.image); + } + return getPrefixQuery(term.image.substring(0, term.image.length()-1), false /* not quoted */); + } + + | term= { /* with at least one * or ? */ + if (! allowedTruncation(term.image)) { + throw new ParseException(truncationErrorMessage + term.image); + } + return getTruncQuery(term.image); + } + + | term= { /* eg. "9b-b,m"* */ + if ((term.image.length() - 3) < minimumPrefixLength) { + throw new ParseException(truncationErrorMessage + term.image); + } + return getPrefixQuery(term.image.substring(1, term.image.length()-2), true /* quoted */); + } + ) +} + + +void OptionalWeights(SrndQuery q) : { + Token weight=null; +}{ + ( weight= { + float f; + try { + f = Float.valueOf(weight.image).floatValue(); + } catch (Exception floatExc) { + throw new ParseException(boostErrorMessage + weight.image + " (" + floatExc + ")"); + } + if (f <= 0.0) { + throw new ParseException(boostErrorMessage + weight.image); + } + q.setWeight(f * q.getWeight()); /* left associative, fwiw */ + } + )* +} + diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/QueryParserConstants.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/QueryParserConstants.java new file mode 100644 index 00000000000..a537da25b11 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/QueryParserConstants.java @@ -0,0 +1,60 @@ +/* Generated By:JavaCC: Do not edit this line. QueryParserConstants.java */ +package org.apache.lucene.queryParser.surround.parser; + +public interface QueryParserConstants { + + int EOF = 0; + int _NUM_CHAR = 1; + int _TERM_CHAR = 2; + int _WHITESPACE = 3; + int _STAR = 4; + int _ONE_CHAR = 5; + int _DISTOP_NUM = 6; + int OR = 8; + int AND = 9; + int NOT = 10; + int W = 11; + int N = 12; + int LPAREN = 13; + int RPAREN = 14; + int COMMA = 15; + int COLON = 16; + int CARAT = 17; + int TRUNCQUOTED = 18; + int QUOTED = 19; + int SUFFIXTERM = 20; + int TRUNCTERM = 21; + int TERM = 22; + int NUMBER = 23; + + int Boost = 0; + int DEFAULT = 1; + + String[] tokenImage = { + "", + "<_NUM_CHAR>", + "<_TERM_CHAR>", + "<_WHITESPACE>", + "\"*\"", + "\"?\"", + "<_DISTOP_NUM>", + "", + "", + "", + "", + "", + "", + "\"(\"", + "\")\"", + "\",\"", + "\":\"", + "\"^\"", + "", + "", + "", + "", + "", + "", + }; + +} diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/QueryParserTokenManager.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/QueryParserTokenManager.java new file mode 100644 index 00000000000..e34ef6acc32 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/QueryParserTokenManager.java @@ -0,0 +1,700 @@ +/* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */ +package org.apache.lucene.queryParser.surround.parser; +import java.util.ArrayList; +import java.util.List; +import java.io.StringReader; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.queryParser.surround.query.SrndQuery; +import org.apache.lucene.queryParser.surround.query.FieldsQuery; +import org.apache.lucene.queryParser.surround.query.OrQuery; +import org.apache.lucene.queryParser.surround.query.AndQuery; +import org.apache.lucene.queryParser.surround.query.NotQuery; +import org.apache.lucene.queryParser.surround.query.DistanceQuery; +import org.apache.lucene.queryParser.surround.query.SrndTermQuery; +import org.apache.lucene.queryParser.surround.query.SrndPrefixQuery; +import org.apache.lucene.queryParser.surround.query.SrndTruncQuery; + +public class QueryParserTokenManager implements QueryParserConstants +{ + public java.io.PrintStream debugStream = System.out; + public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; } +private final int jjStopStringLiteralDfa_1(int pos, long active0) +{ + switch (pos) + { + default : + return -1; + } +} +private final int jjStartNfa_1(int pos, long active0) +{ + return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1); +} +private final int jjStopAtPos(int pos, int kind) +{ + jjmatchedKind = kind; + jjmatchedPos = pos; + return pos + 1; +} +private final int jjStartNfaWithStates_1(int pos, int kind, int state) +{ + jjmatchedKind = kind; + jjmatchedPos = pos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return pos + 1; } + return jjMoveNfa_1(state, pos + 1); +} +private final int jjMoveStringLiteralDfa0_1() +{ + switch(curChar) + { + case 40: + return jjStopAtPos(0, 13); + case 41: + return jjStopAtPos(0, 14); + case 44: + return jjStopAtPos(0, 15); + case 58: + return jjStopAtPos(0, 16); + case 94: + return jjStopAtPos(0, 17); + default : + return jjMoveNfa_1(0, 0); + } +} +private final void jjCheckNAdd(int state) +{ + if (jjrounds[state] != jjround) + { + jjstateSet[jjnewStateCnt++] = state; + jjrounds[state] = jjround; + } +} +private final void jjAddStates(int start, int end) +{ + do { + jjstateSet[jjnewStateCnt++] = jjnextStates[start]; + } while (start++ != end); +} +private final void jjCheckNAddTwoStates(int state1, int state2) +{ + jjCheckNAdd(state1); + jjCheckNAdd(state2); +} +private final void jjCheckNAddStates(int start, int end) +{ + do { + jjCheckNAdd(jjnextStates[start]); + } while (start++ != end); +} +private final void jjCheckNAddStates(int start) +{ + jjCheckNAdd(jjnextStates[start]); + jjCheckNAdd(jjnextStates[start + 1]); +} +static final long[] jjbitVec0 = { + 0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL +}; +static final long[] jjbitVec2 = { + 0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL +}; +private final int jjMoveNfa_1(int startState, int curPos) +{ + int[] nextStates; + int startsAt = 0; + jjnewStateCnt = 38; + int i = 1; + jjstateSet[0] = startState; + int j, kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + if ((0x7bffe8faffffd9ffL & l) != 0L) + { + if (kind > 22) + kind = 22; + jjCheckNAddStates(0, 4); + } + else if ((0x100002600L & l) != 0L) + { + if (kind > 7) + kind = 7; + } + else if (curChar == 34) + jjCheckNAddStates(5, 7); + if ((0x3fc000000000000L & l) != 0L) + jjCheckNAddStates(8, 11); + else if (curChar == 49) + jjCheckNAddTwoStates(20, 21); + break; + case 19: + if ((0x3fc000000000000L & l) != 0L) + jjCheckNAddStates(8, 11); + break; + case 20: + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAdd(17); + break; + case 21: + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAdd(18); + break; + case 22: + if (curChar == 49) + jjCheckNAddTwoStates(20, 21); + break; + case 23: + if (curChar == 34) + jjCheckNAddStates(5, 7); + break; + case 24: + if ((0xfffffffbffffffffL & l) != 0L) + jjCheckNAddTwoStates(24, 25); + break; + case 25: + if (curChar == 34) + jjstateSet[jjnewStateCnt++] = 26; + break; + case 26: + if (curChar == 42 && kind > 18) + kind = 18; + break; + case 27: + if ((0xfffffffbffffffffL & l) != 0L) + jjCheckNAddStates(12, 14); + break; + case 29: + if (curChar == 34) + jjCheckNAddStates(12, 14); + break; + case 30: + if (curChar == 34 && kind > 19) + kind = 19; + break; + case 31: + if ((0x7bffe8faffffd9ffL & l) == 0L) + break; + if (kind > 22) + kind = 22; + jjCheckNAddStates(0, 4); + break; + case 32: + if ((0x7bffe8faffffd9ffL & l) != 0L) + jjCheckNAddTwoStates(32, 33); + break; + case 33: + if (curChar == 42 && kind > 20) + kind = 20; + break; + case 34: + if ((0x7bffe8faffffd9ffL & l) != 0L) + jjCheckNAddTwoStates(34, 35); + break; + case 35: + if ((0x8000040000000000L & l) == 0L) + break; + if (kind > 21) + kind = 21; + jjCheckNAddTwoStates(35, 36); + break; + case 36: + if ((0xfbffecfaffffd9ffL & l) == 0L) + break; + if (kind > 21) + kind = 21; + jjCheckNAdd(36); + break; + case 37: + if ((0x7bffe8faffffd9ffL & l) == 0L) + break; + if (kind > 22) + kind = 22; + jjCheckNAdd(37); + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + long l = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + if ((0xffffffffbfffffffL & l) != 0L) + { + if (kind > 22) + kind = 22; + jjCheckNAddStates(0, 4); + } + if ((0x400000004000L & l) != 0L) + { + if (kind > 12) + kind = 12; + } + else if ((0x80000000800000L & l) != 0L) + { + if (kind > 11) + kind = 11; + } + else if (curChar == 97) + jjstateSet[jjnewStateCnt++] = 9; + else if (curChar == 65) + jjstateSet[jjnewStateCnt++] = 6; + else if (curChar == 111) + jjstateSet[jjnewStateCnt++] = 3; + else if (curChar == 79) + jjstateSet[jjnewStateCnt++] = 1; + if (curChar == 110) + jjstateSet[jjnewStateCnt++] = 15; + else if (curChar == 78) + jjstateSet[jjnewStateCnt++] = 12; + break; + case 1: + if (curChar == 82 && kind > 8) + kind = 8; + break; + case 2: + if (curChar == 79) + jjstateSet[jjnewStateCnt++] = 1; + break; + case 3: + if (curChar == 114 && kind > 8) + kind = 8; + break; + case 4: + if (curChar == 111) + jjstateSet[jjnewStateCnt++] = 3; + break; + case 5: + if (curChar == 68 && kind > 9) + kind = 9; + break; + case 6: + if (curChar == 78) + jjstateSet[jjnewStateCnt++] = 5; + break; + case 7: + if (curChar == 65) + jjstateSet[jjnewStateCnt++] = 6; + break; + case 8: + if (curChar == 100 && kind > 9) + kind = 9; + break; + case 9: + if (curChar == 110) + jjstateSet[jjnewStateCnt++] = 8; + break; + case 10: + if (curChar == 97) + jjstateSet[jjnewStateCnt++] = 9; + break; + case 11: + if (curChar == 84 && kind > 10) + kind = 10; + break; + case 12: + if (curChar == 79) + jjstateSet[jjnewStateCnt++] = 11; + break; + case 13: + if (curChar == 78) + jjstateSet[jjnewStateCnt++] = 12; + break; + case 14: + if (curChar == 116 && kind > 10) + kind = 10; + break; + case 15: + if (curChar == 111) + jjstateSet[jjnewStateCnt++] = 14; + break; + case 16: + if (curChar == 110) + jjstateSet[jjnewStateCnt++] = 15; + break; + case 17: + if ((0x80000000800000L & l) != 0L && kind > 11) + kind = 11; + break; + case 18: + if ((0x400000004000L & l) != 0L && kind > 12) + kind = 12; + break; + case 24: + jjAddStates(15, 16); + break; + case 27: + if ((0xffffffffefffffffL & l) != 0L) + jjCheckNAddStates(12, 14); + break; + case 28: + if (curChar == 92) + jjstateSet[jjnewStateCnt++] = 29; + break; + case 29: + if (curChar == 92) + jjCheckNAddStates(12, 14); + break; + case 31: + if ((0xffffffffbfffffffL & l) == 0L) + break; + if (kind > 22) + kind = 22; + jjCheckNAddStates(0, 4); + break; + case 32: + if ((0xffffffffbfffffffL & l) != 0L) + jjCheckNAddTwoStates(32, 33); + break; + case 34: + if ((0xffffffffbfffffffL & l) != 0L) + jjCheckNAddTwoStates(34, 35); + break; + case 36: + if ((0xffffffffbfffffffL & l) == 0L) + break; + if (kind > 21) + kind = 21; + jjstateSet[jjnewStateCnt++] = 36; + break; + case 37: + if ((0xffffffffbfffffffL & l) == 0L) + break; + if (kind > 22) + kind = 22; + jjCheckNAdd(37); + break; + default : break; + } + } while(i != startsAt); + } + else + { + int hiByte = (int)(curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 077); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 22) + kind = 22; + jjCheckNAddStates(0, 4); + break; + case 24: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + jjAddStates(15, 16); + break; + case 27: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + jjAddStates(12, 14); + break; + case 32: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(32, 33); + break; + case 34: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + jjCheckNAddTwoStates(34, 35); + break; + case 36: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 21) + kind = 21; + jjstateSet[jjnewStateCnt++] = 36; + break; + case 37: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 22) + kind = 22; + jjCheckNAdd(37); + break; + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 38 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +private final int jjMoveStringLiteralDfa0_0() +{ + return jjMoveNfa_0(0, 0); +} +private final int jjMoveNfa_0(int startState, int curPos) +{ + int[] nextStates; + int startsAt = 0; + jjnewStateCnt = 3; + int i = 1; + jjstateSet[0] = startState; + int j, kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 23) + kind = 23; + jjAddStates(17, 18); + break; + case 1: + if (curChar == 46) + jjCheckNAdd(2); + break; + case 2: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 23) + kind = 23; + jjCheckNAdd(2); + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + long l = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + default : break; + } + } while(i != startsAt); + } + else + { + int hiByte = (int)(curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 077); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 3 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +static final int[] jjnextStates = { + 32, 33, 34, 35, 37, 24, 27, 28, 20, 17, 21, 18, 27, 28, 30, 24, + 25, 0, 1, +}; +private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) +{ + switch(hiByte) + { + case 0: + return ((jjbitVec2[i2] & l2) != 0L); + default : + if ((jjbitVec0[i1] & l1) != 0L) + return true; + return false; + } +} +public static final String[] jjstrLiteralImages = { +"", null, null, null, null, null, null, null, null, null, null, null, null, +"\50", "\51", "\54", "\72", "\136", null, null, null, null, null, null, }; +public static final String[] lexStateNames = { + "Boost", + "DEFAULT", +}; +public static final int[] jjnewLexState = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 1, +}; +static final long[] jjtoToken = { + 0xffff01L, +}; +static final long[] jjtoSkip = { + 0x80L, +}; +protected CharStream input_stream; +private final int[] jjrounds = new int[38]; +private final int[] jjstateSet = new int[76]; +protected char curChar; +public QueryParserTokenManager(CharStream stream) +{ + input_stream = stream; +} +public QueryParserTokenManager(CharStream stream, int lexState) +{ + this(stream); + SwitchTo(lexState); +} +public void ReInit(CharStream stream) +{ + jjmatchedPos = jjnewStateCnt = 0; + curLexState = defaultLexState; + input_stream = stream; + ReInitRounds(); +} +private final void ReInitRounds() +{ + int i; + jjround = 0x80000001; + for (i = 38; i-- > 0;) + jjrounds[i] = 0x80000000; +} +public void ReInit(CharStream stream, int lexState) +{ + ReInit(stream); + SwitchTo(lexState); +} +public void SwitchTo(int lexState) +{ + if (lexState >= 2 || lexState < 0) + throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE); + else + curLexState = lexState; +} + +protected Token jjFillToken() +{ + Token t = Token.newToken(jjmatchedKind); + t.kind = jjmatchedKind; + String im = jjstrLiteralImages[jjmatchedKind]; + t.image = (im == null) ? input_stream.GetImage() : im; + t.beginLine = input_stream.getBeginLine(); + t.beginColumn = input_stream.getBeginColumn(); + t.endLine = input_stream.getEndLine(); + t.endColumn = input_stream.getEndColumn(); + return t; +} + +int curLexState = 1; +int defaultLexState = 1; +int jjnewStateCnt; +int jjround; +int jjmatchedPos; +int jjmatchedKind; + +public Token getNextToken() +{ + int kind; + Token specialToken = null; + Token matchedToken; + int curPos = 0; + + EOFLoop : + for (;;) + { + try + { + curChar = input_stream.BeginToken(); + } + catch(java.io.IOException e) + { + jjmatchedKind = 0; + matchedToken = jjFillToken(); + return matchedToken; + } + + switch(curLexState) + { + case 0: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_0(); + break; + case 1: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_1(); + break; + } + if (jjmatchedKind != 0x7fffffff) + { + if (jjmatchedPos + 1 < curPos) + input_stream.backup(curPos - jjmatchedPos - 1); + if ((jjtoToken[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L) + { + matchedToken = jjFillToken(); + if (jjnewLexState[jjmatchedKind] != -1) + curLexState = jjnewLexState[jjmatchedKind]; + return matchedToken; + } + else + { + if (jjnewLexState[jjmatchedKind] != -1) + curLexState = jjnewLexState[jjmatchedKind]; + continue EOFLoop; + } + } + int error_line = input_stream.getEndLine(); + int error_column = input_stream.getEndColumn(); + String error_after = null; + boolean EOFSeen = false; + try { input_stream.readChar(); input_stream.backup(1); } + catch (java.io.IOException e1) { + EOFSeen = true; + error_after = curPos <= 1 ? "" : input_stream.GetImage(); + if (curChar == '\n' || curChar == '\r') { + error_line++; + error_column = 0; + } + else + error_column++; + } + if (!EOFSeen) { + input_stream.backup(1); + error_after = curPos <= 1 ? "" : input_stream.GetImage(); + } + throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR); + } +} + +} diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/Token.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/Token.java new file mode 100644 index 00000000000..56825612b8f --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/Token.java @@ -0,0 +1,81 @@ +/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */ +package org.apache.lucene.queryParser.surround.parser; + +/** + * Describes the input token stream. + */ + +public class Token { + + /** + * An integer that describes the kind of this token. This numbering + * system is determined by JavaCCParser, and a table of these numbers is + * stored in the file ...Constants.java. + */ + public int kind; + + /** + * beginLine and beginColumn describe the position of the first character + * of this token; endLine and endColumn describe the position of the + * last character of this token. + */ + public int beginLine, beginColumn, endLine, endColumn; + + /** + * The string image of the token. + */ + public String image; + + /** + * A reference to the next regular (non-special) token from the input + * stream. If this is the last token from the input stream, or if the + * token manager has not read tokens beyond this one, this field is + * set to null. This is true only if this token is also a regular + * token. Otherwise, see below for a description of the contents of + * this field. + */ + public Token next; + + /** + * This field is used to access special tokens that occur prior to this + * token, but after the immediately preceding regular (non-special) token. + * If there are no such special tokens, this field is set to null. + * When there are more than one such special token, this field refers + * to the last of these special tokens, which in turn refers to the next + * previous special token through its specialToken field, and so on + * until the first special token (whose specialToken field is null). + * The next fields of special tokens refer to other special tokens that + * immediately follow it (without an intervening regular token). If there + * is no such token, this field is null. + */ + public Token specialToken; + + /** + * Returns the image. + */ + public String toString() + { + return image; + } + + /** + * Returns a new Token object, by default. However, if you want, you + * can create and return subclass objects based on the value of ofKind. + * Simply add the cases to the switch for all those special cases. + * For example, if you have a subclass of Token called IDToken that + * you want to create if ofKind is ID, simlpy add something like : + * + * case MyParserConstants.ID : return new IDToken(); + * + * to the following switch statement. Then you can cast matchedToken + * variable to the appropriate type and use it in your lexical actions. + */ + public static final Token newToken(int ofKind) + { + switch(ofKind) + { + default : return new Token(); + } + } + +} diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/TokenMgrError.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/TokenMgrError.java new file mode 100644 index 00000000000..5ca88ef7006 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/parser/TokenMgrError.java @@ -0,0 +1,133 @@ +/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */ +package org.apache.lucene.queryParser.surround.parser; + +public class TokenMgrError extends Error +{ + /* + * Ordinals for various reasons why an Error of this type can be thrown. + */ + + /** + * Lexical error occured. + */ + static final int LEXICAL_ERROR = 0; + + /** + * An attempt wass made to create a second instance of a static token manager. + */ + static final int STATIC_LEXER_ERROR = 1; + + /** + * Tried to change to an invalid lexical state. + */ + static final int INVALID_LEXICAL_STATE = 2; + + /** + * Detected (and bailed out of) an infinite loop in the token manager. + */ + static final int LOOP_DETECTED = 3; + + /** + * Indicates the reason why the exception is thrown. It will have + * one of the above 4 values. + */ + int errorCode; + + /** + * Replaces unprintable characters by their espaced (or unicode escaped) + * equivalents in the given string + */ + protected static final String addEscapes(String str) { + StringBuffer retval = new StringBuffer(); + char ch; + for (int i = 0; i < str.length(); i++) { + switch (str.charAt(i)) + { + case 0 : + continue; + case '\b': + retval.append("\\b"); + continue; + case '\t': + retval.append("\\t"); + continue; + case '\n': + retval.append("\\n"); + continue; + case '\f': + retval.append("\\f"); + continue; + case '\r': + retval.append("\\r"); + continue; + case '\"': + retval.append("\\\""); + continue; + case '\'': + retval.append("\\\'"); + continue; + case '\\': + retval.append("\\\\"); + continue; + default: + if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { + String s = "0000" + Integer.toString(ch, 16); + retval.append("\\u" + s.substring(s.length() - 4, s.length())); + } else { + retval.append(ch); + } + continue; + } + } + return retval.toString(); + } + + /** + * Returns a detailed message for the Error when it is thrown by the + * token manager to indicate a lexical error. + * Parameters : + * EOFSeen : indicates if EOF caused the lexicl error + * curLexState : lexical state in which this error occured + * errorLine : line number when the error occured + * errorColumn : column number when the error occured + * errorAfter : prefix that was seen before this error occured + * curchar : the offending character + * Note: You can customize the lexical error message by modifying this method. + */ + protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) { + return("Lexical error at line " + + errorLine + ", column " + + errorColumn + ". Encountered: " + + (EOFSeen ? " " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") + + "after : \"" + addEscapes(errorAfter) + "\""); + } + + /** + * You can also modify the body of this method to customize your error messages. + * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not + * of end-users concern, so you can return something like : + * + * "Internal Error : Please file a bug report .... " + * + * from this method for such cases in the release version of your parser. + */ + public String getMessage() { + return super.getMessage(); + } + + /* + * Constructors of various flavors follow. + */ + + public TokenMgrError() { + } + + public TokenMgrError(String message, int reason) { + super(message); + errorCode = reason; + } + + public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) { + this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); + } +} diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/AndQuery.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/AndQuery.java new file mode 100644 index 00000000000..83bce99bc45 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/AndQuery.java @@ -0,0 +1,32 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.util.List; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.BooleanClause; + +public class AndQuery extends ComposedQuery { + public AndQuery(List queries, boolean inf, String opName) { + super(queries, inf, opName); + } + + public Query makeLuceneQueryFieldNoBoost(String fieldName, BasicQueryFactory qf) { + return SrndBooleanQuery.makeBooleanQuery( /* subqueries can be individually boosted */ + makeLuceneSubQueriesField(fieldName, qf), BooleanClause.Occur.MUST); + } +} diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/BasicQueryFactory.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/BasicQueryFactory.java new file mode 100644 index 00000000000..bd005df21fd --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/BasicQueryFactory.java @@ -0,0 +1,64 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* Create basic queries to be used during rewrite. + * The basic queries are TermQuery and SpanTermQuery. + * An exception can be thrown when too many of these are used. + * SpanTermQuery and TermQuery use IndexReader.termEnum(Term), which causes the buffer usage. + * + * Use this class to limit the buffer usage for reading terms from an index. + * Default is 1024, the same as the max. number of subqueries for a BooleanQuery. + */ + +import org.apache.lucene.index.Term; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.spans.SpanTermQuery; + +public class BasicQueryFactory { + public BasicQueryFactory(int maxBasicQueries) { + this.maxBasicQueries = maxBasicQueries; + this.queriesMade = 0; + } + + public BasicQueryFactory() { + this(1024); + } + + private int maxBasicQueries; + private int queriesMade; + + public int getNrQueriesMade() {return queriesMade;} + public int getMaxBasicQueries() {return maxBasicQueries;} + + private synchronized void checkMax() throws TooManyBasicQueries { + if (queriesMade >= maxBasicQueries) + throw new TooManyBasicQueries(getMaxBasicQueries()); + queriesMade++; + } + + public TermQuery newTermQuery(Term term) throws TooManyBasicQueries { + checkMax(); + return new TermQuery(term); + } + + public SpanTermQuery newSpanTermQuery(Term term) throws TooManyBasicQueries { + checkMax(); + return new SpanTermQuery(term); + } +} + + diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/ComposedQuery.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/ComposedQuery.java new file mode 100644 index 00000000000..3de99348582 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/ComposedQuery.java @@ -0,0 +1,116 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.List; +import java.util.ArrayList; +import java.util.Iterator; + +public abstract class ComposedQuery extends SrndQuery { + + public ComposedQuery(List qs, boolean operatorInfix, String opName) { + recompose(qs); + this.operatorInfix = operatorInfix; + this.opName = opName; + } + + protected void recompose(List queries) { + if (queries.size() < 2) throw new AssertionError("Too few subqueries"); + this.queries = queries; + } + + private String opName; + public String getOperatorName() {return opName;} + + private List queries; + + public Iterator getSubQueriesIterator() {return queries.listIterator();} + + public int getNrSubQueries() {return queries.size();} + + public SrndQuery getSubQuery(int qn) {return (SrndQuery) queries.get(qn);} + + private boolean operatorInfix; + public boolean isOperatorInfix() { return operatorInfix; } /* else prefix operator */ + + public List makeLuceneSubQueriesField(String fn, BasicQueryFactory qf) { + ArrayList luceneSubQueries = new ArrayList(); + Iterator sqi = getSubQueriesIterator(); + while (sqi.hasNext()) { + luceneSubQueries.add( ((SrndQuery) sqi.next()).makeLuceneQueryField(fn, qf)); + } + return luceneSubQueries; + } + + public String toString() { + StringBuffer r = new StringBuffer(); + if (isOperatorInfix()) { + infixToString(r); + } else { + prefixToString(r); + } + weightToString(r); + return r.toString(); + } + + /* Override for different spacing */ + protected String getPrefixSeparator() { return ", ";} + protected String getBracketOpen() { return "(";} + protected String getBracketClose() { return ")";} + + protected void infixToString(StringBuffer r) { + /* Brackets are possibly redundant in the result. */ + Iterator sqi = getSubQueriesIterator(); + r.append(getBracketOpen()); + if (sqi.hasNext()) { + r.append(sqi.next().toString()); + while (sqi.hasNext()) { + r.append(" "); + r.append(getOperatorName()); /* infix operator */ + r.append(" "); + r.append(sqi.next().toString()); + } + } + r.append(getBracketClose()); + } + + protected void prefixToString(StringBuffer r) { + Iterator sqi = getSubQueriesIterator(); + r.append(getOperatorName()); /* prefix operator */ + r.append(getBracketOpen()); + if (sqi.hasNext()) { + r.append(sqi.next().toString()); + while (sqi.hasNext()) { + r.append(getPrefixSeparator()); + r.append(sqi.next().toString()); + } + } + r.append(getBracketClose()); + } + + + public boolean isFieldsSubQueryAcceptable() { + /* at least one subquery should be acceptable */ + Iterator sqi = getSubQueriesIterator(); + while (sqi.hasNext()) { + if (((SrndQuery) sqi.next()).isFieldsSubQueryAcceptable()) { + return true; + } + } + return false; + } +} + diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/DistanceQuery.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/DistanceQuery.java new file mode 100644 index 00000000000..608866b4154 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/DistanceQuery.java @@ -0,0 +1,117 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +import java.util.List; +import java.util.Iterator; + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanQuery; + +public class DistanceQuery extends ComposedQuery implements DistanceSubQuery { + public DistanceQuery( + List queries, + boolean infix, + int opDistance, + String opName, + boolean ordered) { + super(queries, infix, opName); + this.opDistance = opDistance; /* the distance indicated in the operator */ + this.ordered = ordered; + } + + private int opDistance; + public int getOpDistance() {return opDistance;} + + private boolean ordered; + public boolean subQueriesOrdered() {return ordered;} + + public String distanceSubQueryNotAllowed() { + Iterator sqi = getSubQueriesIterator(); + while (sqi.hasNext()) { + Object leq = sqi.next(); + if (leq instanceof DistanceSubQuery) { + DistanceSubQuery dsq = (DistanceSubQuery) leq; + String m = dsq.distanceSubQueryNotAllowed(); + if (m != null) { + return m; + } + } else { + return "Operator " + getOperatorName() + " does not allow subquery " + leq.toString(); + } + } + return null; /* subqueries acceptable */ + } + + + public void addSpanQueries(SpanNearClauseFactory sncf) throws IOException { + Query snq = getSpanNearQuery(sncf.getIndexReader(), + sncf.getFieldName(), + getWeight(), + sncf.getBasicQueryFactory()); + sncf.addSpanNearQuery(snq); + } + + public Query makeLuceneQueryFieldNoBoost(final String fieldName, final BasicQueryFactory qf) { + return new Query () { + + public String toString(String fn) { + return getClass().toString() + " " + fieldName + " (" + fn + "?)"; + } + + public Query rewrite(IndexReader reader) throws IOException { + return getSpanNearQuery(reader, fieldName, getBoost(), qf); + } + + }; + } + + public Query getSpanNearQuery( + IndexReader reader, + String fieldName, + float boost, + BasicQueryFactory qf) throws IOException { + SpanQuery[] spanNearClauses = new SpanQuery[getNrSubQueries()]; + Iterator sqi = getSubQueriesIterator(); + int qi = 0; + while (sqi.hasNext()) { + SpanNearClauseFactory sncf = new SpanNearClauseFactory(reader, fieldName, qf); + + ((DistanceSubQuery)sqi.next()).addSpanQueries(sncf); + if (sncf.size() == 0) { /* distance operator requires all sub queries */ + while (sqi.hasNext()) { /* produce evt. error messages but ignore results */ + ((DistanceSubQuery)sqi.next()).addSpanQueries(sncf); + sncf.clear(); + } + return SrndQuery.theEmptyLcnQuery; + } + + spanNearClauses[qi] = sncf.makeSpanNearClause(); + + qi++; + } + + SpanNearQuery r = new SpanNearQuery(spanNearClauses, getOpDistance() - 1, subQueriesOrdered()); + r.setBoost(boost); + return r; + } +} + diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/DistanceSubQuery.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/DistanceSubQuery.java new file mode 100644 index 00000000000..0c7f2d88d07 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/DistanceSubQuery.java @@ -0,0 +1,30 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; + +public interface DistanceSubQuery { + /** When distanceSubQueryNotAllowed() returns non null, the reason why the subquery + * is not allowed as a distance subquery is returned. + *
When distanceSubQueryNotAllowed() returns null addSpanNearQueries() can be used + * in the creation of the span near clause for the subquery. + */ + String distanceSubQueryNotAllowed(); + + void addSpanQueries(SpanNearClauseFactory sncf) throws IOException; +} + diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/FieldsQuery.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/FieldsQuery.java new file mode 100644 index 00000000000..265b807a5d7 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/FieldsQuery.java @@ -0,0 +1,93 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.List; +import java.util.Iterator; + +import org.apache.lucene.search.Query; + +public class FieldsQuery extends SrndQuery { /* mostly untested */ + private SrndQuery q; + private ArrayList fieldNames; + private final char fieldOp; + private final String OrOperatorName = "OR"; /* for expanded queries, not normally visible */ + + public FieldsQuery(SrndQuery q, ArrayList fieldNames, char fieldOp) { + this.q = q; + this.fieldNames = fieldNames; + this.fieldOp = fieldOp; + } + + public FieldsQuery(SrndQuery q, String fieldName, char fieldOp) { + this.q = q; + fieldNames = new ArrayList(); + fieldNames.add(fieldName); + this.fieldOp = fieldOp; + } + + public boolean isFieldsSubQueryAcceptable() { + return false; + } + + public Query makeLuceneQueryNoBoost(BasicQueryFactory qf) { + if (fieldNames.size() == 1) { /* single field name: no new queries needed */ + return q.makeLuceneQueryFieldNoBoost((String) fieldNames.get(0), qf); + } else { /* OR query over the fields */ + ArrayList queries = new ArrayList(); + Iterator fni = getFieldNames().listIterator(); + SrndQuery qc; + while (fni.hasNext()) { + qc = (SrndQuery) q.clone(); + queries.add( new FieldsQuery( qc, (String) fni.next(), fieldOp)); + } + boolean infix = true; + OrQuery oq = new OrQuery(queries, + true /* infix OR for field names */, + OrOperatorName); + System.out.println(getClass().toString() + ", fields expanded: " + oq.toString()); /* needs testing */ + return oq.makeLuceneQueryField(null, qf); + } + } + + public Query makeLuceneQueryFieldNoBoost(String fieldName, BasicQueryFactory qf) { + return makeLuceneQueryNoBoost(qf); /* use this.fieldNames instead of fieldName */ + } + + + public List getFieldNames() {return fieldNames;} + + public char getFieldOperator() { return fieldOp;} + + public String toString() { + StringBuffer r = new StringBuffer(); + r.append("("); + fieldNamesToString(r); + r.append(q.toString()); + r.append(")"); + return r.toString(); + } + + protected void fieldNamesToString(StringBuffer r) { + Iterator fni = getFieldNames().listIterator(); + while (fni.hasNext()) { + r.append((String) fni.next()); + r.append(getFieldOperator()); + } + } +} + diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/NotQuery.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/NotQuery.java new file mode 100644 index 00000000000..49ab4cb35fa --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/NotQuery.java @@ -0,0 +1,37 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.List; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BooleanClause; + +public class NotQuery extends ComposedQuery { + public NotQuery(List queries, String opName) { super(queries, true /* infix */, opName); } + + public Query makeLuceneQueryFieldNoBoost(String fieldName, BasicQueryFactory qf) { + List luceneSubQueries = makeLuceneSubQueriesField(fieldName, qf); + BooleanQuery bq = new BooleanQuery(); + bq.add( (Query) luceneSubQueries.get(0), BooleanClause.Occur.MUST); + SrndBooleanQuery.addQueriesToBoolean(bq, + // FIXME: do not allow weights on prohibited subqueries. + luceneSubQueries.subList(1, luceneSubQueries.size()), + // later subqueries: not required, prohibited + BooleanClause.Occur.MUST_NOT); + return bq; + } +} diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/OrQuery.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/OrQuery.java new file mode 100644 index 00000000000..5be438081c6 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/OrQuery.java @@ -0,0 +1,59 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.List; +import java.util.Iterator; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.BooleanClause; + +import java.io.IOException; + +public class OrQuery extends ComposedQuery implements DistanceSubQuery { + public OrQuery(List queries, boolean infix, String opName) { + super(queries, infix, opName); + } + + public Query makeLuceneQueryFieldNoBoost(String fieldName, BasicQueryFactory qf) { + return SrndBooleanQuery.makeBooleanQuery( + /* subqueries can be individually boosted */ + makeLuceneSubQueriesField(fieldName, qf), BooleanClause.Occur.SHOULD); + } + + public String distanceSubQueryNotAllowed() { + Iterator sqi = getSubQueriesIterator(); + while (sqi.hasNext()) { + SrndQuery leq = (SrndQuery) sqi.next(); + if (leq instanceof DistanceSubQuery) { + String m = ((DistanceSubQuery)leq).distanceSubQueryNotAllowed(); + if (m != null) { + return m; + } + } else { + return "subquery not allowed: " + leq.toString(); + } + } + return null; + } + + public void addSpanQueries(SpanNearClauseFactory sncf) throws IOException { + Iterator sqi = getSubQueriesIterator(); + while (sqi.hasNext()) { + ((DistanceSubQuery)sqi.next()).addSpanQueries(sncf); + } + } +} + diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SimpleTerm.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SimpleTerm.java new file mode 100644 index 00000000000..e370462ed3b --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SimpleTerm.java @@ -0,0 +1,109 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.io.IOException; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.BooleanClause; + +public abstract class SimpleTerm + extends SrndQuery + implements DistanceSubQuery, Comparable +{ + public SimpleTerm(boolean q) {quoted = q;} + + private boolean quoted; + boolean isQuoted() {return quoted;} + + public String getQuote() {return "\"";} + public String getFieldOperator() {return "/";} + + public abstract String toStringUnquoted(); + + public int compareTo(Object o) { + /* for ordering terms and prefixes before using an index, not used */ + SimpleTerm ost = (SimpleTerm) o; + return this.toStringUnquoted().compareTo( ost.toStringUnquoted()); + } + + protected void suffixToString(StringBuffer r) {;} /* override for prefix query */ + + public String toString() { + StringBuffer r = new StringBuffer(); + if (isQuoted()) { + r.append(getQuote()); + } + r.append(toStringUnquoted()); + if (isQuoted()) { + r.append(getQuote()); + } + suffixToString(r); + weightToString(r); + return r.toString(); + } + + public abstract void visitMatchingTerms( + IndexReader reader, + String fieldName, + MatchingTermVisitor mtv) throws IOException; + + public interface MatchingTermVisitor { + void visitMatchingTerm(Term t)throws IOException; + } + + public String distanceSubQueryNotAllowed() {return null;} + + + public Query makeLuceneQueryFieldNoBoost(final String fieldName, final BasicQueryFactory qf) { + return new Query() { + public String toString(String fn) { + return getClass().toString() + " " + fieldName + " (" + fn + "?)"; + } + + public Query rewrite(IndexReader reader) throws IOException { + final ArrayList luceneSubQueries = new ArrayList(); + visitMatchingTerms( reader, fieldName, + new MatchingTermVisitor() { + public void visitMatchingTerm(Term term) throws IOException { + luceneSubQueries.add(qf.newTermQuery(term)); + } + }); + return (luceneSubQueries.size() == 0) ? SrndQuery.theEmptyLcnQuery + : (luceneSubQueries.size() == 1) ? (Query) luceneSubQueries.get(0) + : SrndBooleanQuery.makeBooleanQuery( + /* luceneSubQueries all have default weight */ + luceneSubQueries, BooleanClause.Occur.SHOULD); /* OR the subquery terms */ + } + }; + } + + public void addSpanQueries(final SpanNearClauseFactory sncf) throws IOException { + visitMatchingTerms( + sncf.getIndexReader(), + sncf.getFieldName(), + new MatchingTermVisitor() { + public void visitMatchingTerm(Term term) throws IOException { + sncf.addTermWeighted(term, getWeight()); + } + }); + } +} + + + diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SpanNearClauseFactory.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SpanNearClauseFactory.java new file mode 100644 index 00000000000..e1ed30e2888 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SpanNearClauseFactory.java @@ -0,0 +1,153 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* +SpanNearClauseFactory: + +Operations: + +- create for a field name and an indexreader. + +- add a weighted Term + this should add a corresponding SpanTermQuery, or + increase the weight of an existing one. + +- add a weighted subquery SpanNearQuery + +- create a clause for SpanNearQuery from the things added above. + For this, create an array of SpanQuery's from the added ones. + The clause normally is a SpanOrQuery over the added subquery SpanNearQuery + the SpanTermQuery's for the added Term's +*/ + +/* When it is necessary to suppress double subqueries as much as possible: + hashCode() and equals() on unweighted SpanQuery are needed (possibly via getTerms(), + the terms are individually hashable). + Idem SpanNearQuery: hash on the subqueries and the slop. + Evt. merge SpanNearQuery's by adding the weights of the corresponding subqueries. + */ + +/* To be determined: + Are SpanQuery weights handled correctly during search by Lucene? + Should the resulting SpanOrQuery be sorted? + Could other SpanQueries be added for use in this factory: + - SpanOrQuery: in principle yes, but it only has access to it's terms + via getTerms(); are the corresponding weights available? + - SpanFirstQuery: treat similar to subquery SpanNearQuery. (ok?) + - SpanNotQuery: treat similar to subquery SpanNearQuery. (ok?) + */ + +import java.util.HashMap; +import java.util.Iterator; + +import java.util.Comparator; +import java.util.Arrays; + +import java.io.IOException; + +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; + +import org.apache.lucene.search.Query; + +import org.apache.lucene.search.spans.SpanQuery; +import org.apache.lucene.search.spans.SpanNearQuery; +import org.apache.lucene.search.spans.SpanOrQuery; +import org.apache.lucene.search.spans.SpanTermQuery; + + +public class SpanNearClauseFactory { + public SpanNearClauseFactory(IndexReader reader, String fieldName, BasicQueryFactory qf) { + this.reader = reader; + this.fieldName = fieldName; + this.weightBySpanQuery = new HashMap(); + this.qf = qf; + } + private IndexReader reader; + private String fieldName; + private HashMap weightBySpanQuery; + private BasicQueryFactory qf; + + public IndexReader getIndexReader() {return reader;} + + public String getFieldName() {return fieldName;} + + public BasicQueryFactory getBasicQueryFactory() {return qf;} + + public TermEnum getTermEnum(String termText) throws IOException { + return getIndexReader().terms(new Term(getFieldName(), termText)); + } + + public int size() {return weightBySpanQuery.size();} + + public void clear() {weightBySpanQuery.clear();} + + protected void addSpanQueryWeighted(SpanQuery sq, float weight) { + Float w = (Float) weightBySpanQuery.get(sq); + if (w != null) + w = new Float(w.floatValue() + weight); + else + w = new Float(weight); + weightBySpanQuery.put(sq, w); + } + + public void addTermWeighted(Term t, float weight) throws IOException { + SpanTermQuery stq = qf.newSpanTermQuery(t); + /* CHECKME: wrap in Hashable...? */ + addSpanQueryWeighted(stq, weight); + } + + public void addSpanNearQuery(Query q) { + if (q == SrndQuery.theEmptyLcnQuery) + return; + if (! (q instanceof SpanNearQuery)) + throw new AssertionError("Expected SpanNearQuery: " + q.toString(getFieldName())); + /* CHECKME: wrap in Hashable...? */ + addSpanQueryWeighted((SpanNearQuery)q, q.getBoost()); + } + + public SpanQuery makeSpanNearClause() { + SpanQuery [] spanQueries = new SpanQuery[size()]; + Iterator sqi = weightBySpanQuery.keySet().iterator(); + int i = 0; + while (sqi.hasNext()) { + SpanQuery sq = (SpanQuery) sqi.next(); + sq.setBoost(((Float)weightBySpanQuery.get(sq)).floatValue()); + spanQueries[i++] = sq; + } + + /* CHECKME: Does the underlying implementation of SpanQuery need sorting? */ + if (false) /* true when sorting needed */ + Arrays.sort(spanQueries, new Comparator() { + public int compare(Object o1, Object o2) { + SpanQuery sq1 = (SpanQuery) o1; + SpanQuery sq2 = (SpanQuery) o2; + /* compare the text of the first term of each SpanQuery */ + return ((Term)sq1.getTerms().iterator().next()).text().compareTo( + ((Term)sq2.getTerms().iterator().next()).text()); + } + public boolean equals(Object o) {return false;} + }); + + if (spanQueries.length == 1) + return spanQueries[0]; + else + return new SpanOrQuery(spanQueries); + } +} + diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndBooleanQuery.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndBooleanQuery.java new file mode 100644 index 00000000000..b7f232cb6ca --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndBooleanQuery.java @@ -0,0 +1,44 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.List; + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BooleanClause; + +class SrndBooleanQuery { + public static void addQueriesToBoolean( + BooleanQuery bq, + List queries, + BooleanClause.Occur occur) { + for (int i = 0; i < queries.size(); i++) { + bq.add( (Query) queries.get(i), occur); + } + } + + public static Query makeBooleanQuery( + List queries, + BooleanClause.Occur occur) { + if (queries.size() <= 1) { + throw new AssertionError("Too few subqueries: " + queries.size()); + } + BooleanQuery bq = new BooleanQuery(); + addQueriesToBoolean(bq, queries.subList(0, queries.size()), occur); + return bq; + } +} diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndPrefixQuery.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndPrefixQuery.java new file mode 100644 index 00000000000..8804ee7b148 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndPrefixQuery.java @@ -0,0 +1,75 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; + +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.IndexReader; + +import java.io.IOException; + + +public class SrndPrefixQuery extends SimpleTerm { + public SrndPrefixQuery(String prefix, boolean quoted, char truncator) { + super(quoted); + this.prefix = prefix; + this.truncator = truncator; + } + + private final String prefix; + public String getPrefix() {return prefix;} + + private final char truncator; + public char getSuffixOperator() {return truncator;} + + public Term getLucenePrefixTerm(String fieldName) { + return new Term(fieldName, getPrefix()); + } + + public String toStringUnquoted() {return getPrefix();} + + protected void suffixToString(StringBuffer r) {r.append(getSuffixOperator());} + + public void visitMatchingTerms( + IndexReader reader, + String fieldName, + MatchingTermVisitor mtv) throws IOException + { + /* inspired by PrefixQuery.rewrite(): */ + TermEnum enumerator = reader.terms(getLucenePrefixTerm(fieldName)); + boolean expanded = false; + try { + do { + Term term = enumerator.term(); + if ((term != null) + && term.text().startsWith(getPrefix()) + && term.field().equals(fieldName)) { + mtv.visitMatchingTerm(term); + expanded = true; + } else { + break; + } + } while (enumerator.next()); + } finally { + enumerator.close(); + } + if (! expanded) { + System.out.println("No terms in " + fieldName + " field for: " + toString()); + } + } +} diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndQuery.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndQuery.java new file mode 100644 index 00000000000..41ca1d24caa --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndQuery.java @@ -0,0 +1,86 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.ArrayList; +import java.util.List; +import java.util.Iterator; + +import java.io.IOException; + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BooleanClause; + +public abstract class SrndQuery implements Cloneable { + public SrndQuery() {} + + private float weight = (float) 1.0; + private boolean weighted = false; + + public void setWeight(float w) { + weight = w; /* as parsed from the query text */ + weighted = true; + } + public boolean isWeighted() {return weighted;} + public float getWeight() { return weight; } + public String getWeightString() {return Float.toString(getWeight());} + + public String getWeightOperator() {return "^";} + + protected void weightToString(StringBuffer r) { /* append the weight part of a query */ + if (isWeighted()) { + r.append(getWeightOperator()); + r.append(getWeightString()); + } + } + + public Query makeLuceneQueryField(String fieldName, BasicQueryFactory qf){ + Query q = makeLuceneQueryFieldNoBoost(fieldName, qf); + if (isWeighted()) { + q.setBoost(getWeight() * q.getBoost()); /* weight may be at any level in a SrndQuery */ + } + return q; + } + + public abstract Query makeLuceneQueryFieldNoBoost(String fieldName, BasicQueryFactory qf); + + public abstract String toString(); + + public boolean isFieldsSubQueryAcceptable() {return true;} + + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException cns) { + throw new Error(cns); + } + } + +/* An empty Lucene query */ + public final static Query theEmptyLcnQuery = new BooleanQuery() { /* no changes allowed */ + public void setBoost(float boost) { + throw new UnsupportedOperationException(); + } + public void add(BooleanClause clause) { + throw new UnsupportedOperationException(); + } + public void add(Query query, BooleanClause.Occur occur) { + throw new UnsupportedOperationException(); + } + }; +} + diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTermQuery.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTermQuery.java new file mode 100644 index 00000000000..4d190fca146 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTermQuery.java @@ -0,0 +1,67 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.List; + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; + +import org.apache.lucene.index.IndexReader; +import java.io.IOException; + + +public class SrndTermQuery extends SimpleTerm { + public SrndTermQuery(String termText, boolean quoted) { + super(quoted); + this.termText = termText; + } + + private final String termText; + public String getTermText() {return termText;} + + public Term getLuceneTerm(String fieldName) { + return new Term(fieldName, getTermText()); + } + + public String toStringUnquoted() {return getTermText();} + + public void visitMatchingTerms( + IndexReader reader, + String fieldName, + MatchingTermVisitor mtv) throws IOException + { + /* check term presence in index here for symmetry with other SimpleTerm's */ + TermEnum enumerator = reader.terms(getLuceneTerm(fieldName)); + try { + Term it= enumerator.term(); /* same or following index term */ + if ((it != null) + && it.text().equals(getTermText()) + && it.field().equals(fieldName)) { + mtv.visitMatchingTerm(it); + } else { + System.out.println("No term in " + fieldName + " field for: " + toString()); + } + } finally { + enumerator.close(); + } + } +} + + + diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTruncQuery.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTruncQuery.java new file mode 100644 index 00000000000..4dfbf665bb3 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/SrndTruncQuery.java @@ -0,0 +1,111 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermEnum; +import org.apache.lucene.index.IndexReader; + +import java.io.IOException; + +import java.util.regex.Pattern; +import java.util.regex.Matcher; + + +public class SrndTruncQuery extends SimpleTerm { + public SrndTruncQuery(String truncated, char unlimited, char mask) { + super(false); /* not quoted */ + this.truncated = truncated; + this.unlimited = unlimited; + this.mask = mask; + truncatedToPrefixAndPattern(); + } + + private final String truncated; + private final char unlimited; + private final char mask; + + private String prefix; + private Pattern pattern; + + + public String getTruncated() {return truncated;} + + public String toStringUnquoted() {return getTruncated();} + + + protected boolean matchingChar(char c) { + return (c != unlimited) && (c != mask); + } + + protected void appendRegExpForChar(char c, StringBuffer re) { + if (c == unlimited) + re.append(".*"); + else if (c == mask) + re.append("."); + else + re.append(c); + } + + protected void truncatedToPrefixAndPattern() { + int i = 0; + while ((i < truncated.length()) && matchingChar(truncated.charAt(i))) { + i++; + } + prefix = truncated.substring(0, i); + + StringBuffer re = new StringBuffer(); + while (i < truncated.length()) { + appendRegExpForChar(truncated.charAt(i), re); + i++; + } + pattern = Pattern.compile(re.toString()); + } + + public void visitMatchingTerms( + IndexReader reader, + String fieldName, + MatchingTermVisitor mtv) throws IOException + { + boolean expanded = false; + int prefixLength = prefix.length(); + TermEnum enumerator = reader.terms(new Term(fieldName, prefix)); + Matcher matcher = pattern.matcher(""); + try { + do { + Term term = enumerator.term(); + if (term != null) { + String text = term.text(); + if ((! text.startsWith(prefix)) || (! term.field().equals(fieldName))) { + break; + } else { + matcher.reset( text.substring(prefixLength)); + if (matcher.matches()) { + mtv.visitMatchingTerm(term); + expanded = true; + } + } + } + } while (enumerator.next()); + } finally { + enumerator.close(); + matcher.reset(); + } + if (! expanded) { + System.out.println("No terms in " + fieldName + " field for: " + toString()); + } + } +} diff --git a/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/TooManyBasicQueries.java b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/TooManyBasicQueries.java new file mode 100644 index 00000000000..627c8d8c1d3 --- /dev/null +++ b/contrib/surround/src/java/org/apache/lucene/queryParser/surround/query/TooManyBasicQueries.java @@ -0,0 +1,26 @@ +package org.apache.lucene.queryParser.surround.query; +/** + * Copyright 2005 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.IOException; /* subclass to be usable from within Query.rewrite() */ + +public class TooManyBasicQueries extends IOException { + public TooManyBasicQueries(int maxBasicQueries) { + super("Exceeded maximum of " + maxBasicQueries + " basic queries."); + } +} + + diff --git a/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/BooleanQueryTest.java b/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/BooleanQueryTest.java new file mode 100644 index 00000000000..d5c1d671484 --- /dev/null +++ b/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/BooleanQueryTest.java @@ -0,0 +1,94 @@ +package org.apache.lucene.queryParser.surround.query; + +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.HitCollector; + +import org.apache.lucene.queryParser.surround.parser.QueryParser; + +import junit.framework.TestCase; + +public class BooleanQueryTest { + String queryText; + final int[] expectedDocNrs; + SingleFieldTestDb dBase; + String fieldName; + TestCase testCase; + BasicQueryFactory qf; + boolean verbose = true; + + public BooleanQueryTest( + String queryText, + int[] expectedDocNrs, + SingleFieldTestDb dBase, + String fieldName, + TestCase testCase, + BasicQueryFactory qf) { + this.queryText = queryText; + this.expectedDocNrs = expectedDocNrs; + this.dBase = dBase; + this.fieldName = fieldName; + this.testCase = testCase; + this.qf = qf; + } + + public void setVerbose(boolean verbose) {this.verbose = verbose;} + + class TestCollector extends HitCollector { // FIXME: use check hits from Lucene tests + int totalMatched; + boolean[] encountered; + + TestCollector() { + totalMatched = 0; + encountered = new boolean[expectedDocNrs.length]; + } + + public void collect(int docNr, float score) { + /* System.out.println(docNr + " '" + dBase.getDocs()[docNr] + "': " + score); */ + testCase.assertTrue(queryText + ": positive score", score > 0.0); + testCase.assertTrue(queryText + ": too many hits", totalMatched < expectedDocNrs.length); + int i; + for (i = 0; i < expectedDocNrs.length; i++) { + if ((! encountered[i]) && (expectedDocNrs[i] == docNr)) { + encountered[i] = true; + break; + } + } + if (i == expectedDocNrs.length) { + testCase.assertTrue(queryText + ": doc nr for hit not expected: " + docNr, false); + } + totalMatched++; + } + + void checkNrHits() { + testCase.assertEquals(queryText + ": nr of hits", expectedDocNrs.length, totalMatched); + } + } + + public void doTest() throws Exception { + QueryParser parser = new QueryParser(); + + if (verbose) { + System.out.println(""); + System.out.println("Query: " + queryText); + } + + SrndQuery lq = parser.parse(queryText); + + /* if (verbose) System.out.println("Srnd: " + lq.toString()); */ + + Query query = lq.makeLuceneQueryField(fieldName, qf); + /* if (verbose) System.out.println("Lucene: " + query.toString()); */ + + TestCollector tc = new TestCollector(); + Searcher searcher = new IndexSearcher(dBase.getDb()); + try { + searcher.search(query, tc); + } finally { + searcher.close(); + } + tc.checkNrHits(); + } +} + diff --git a/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/ExceptionQueryTest.java b/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/ExceptionQueryTest.java new file mode 100644 index 00000000000..4c3bbfa85fc --- /dev/null +++ b/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/ExceptionQueryTest.java @@ -0,0 +1,55 @@ +package org.apache.lucene.queryParser.surround.query; + +import org.apache.lucene.search.Query; + +import org.apache.lucene.queryParser.surround.parser.QueryParser; +import org.apache.lucene.queryParser.surround.parser.ParseException; + +import junit.framework.TestCase; + + +public class ExceptionQueryTest { + private String queryText; + private boolean verbose; + private TestCase testCase; + + public ExceptionQueryTest(String queryText, boolean verbose) { + this.queryText = queryText; + this.verbose = verbose; + this.testCase = testCase; + } + + public void doTest(StringBuffer failQueries) { + QueryParser parser = new QueryParser(); + boolean pass = false; + SrndQuery lq = null; + try { + lq = parser.parse(queryText); + if (verbose) { + System.out.println("Query: " + queryText + "\nParsed as: " + lq.toString()); + } + } catch (ParseException e) { + if (verbose) { + System.out.println("Parse exception for query:\n" + + queryText + "\n" + + e.getMessage()); + } + pass = true; + } + if (! pass) { + failQueries.append(queryText); + failQueries.append("\nParsed as: "); + failQueries.append(lq.toString()); + failQueries.append("\n"); + } + } + + public static String getFailQueries(String[] exceptionQueries, boolean verbose) { + StringBuffer failQueries = new StringBuffer(); + for (int i = 0; i < exceptionQueries.length; i++ ) { + new ExceptionQueryTest( exceptionQueries[i], verbose).doTest(failQueries); + } + return failQueries.toString(); + } +} + diff --git a/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/SingleFieldTestDb.java b/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/SingleFieldTestDb.java new file mode 100644 index 00000000000..e8474e63706 --- /dev/null +++ b/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/SingleFieldTestDb.java @@ -0,0 +1,39 @@ +package org.apache.lucene.queryParser.surround.query; + +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.RAMDirectory; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.analysis.WhitespaceAnalyzer; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexWriter; + +public class SingleFieldTestDb { + private Directory db; + private String[] docs; + private String fieldName; + private String dbName = "testdb"; + + public SingleFieldTestDb(String[] documents, String fName) { + try { + db = new RAMDirectory(); + docs = documents; + fieldName = fName; + Analyzer analyzer = new WhitespaceAnalyzer(); + IndexWriter writer = new IndexWriter(db, analyzer, true); + for (int j = 0; j < docs.length; j++) { + Document d = new Document(); + d.add(new Field(fieldName, docs[j], Field.Store.NO, Field.Index.TOKENIZED)); + writer.addDocument(d); + } + writer.close(); + } catch (java.io.IOException ioe) { + throw new Error(ioe); + } + } + + Directory getDb() {return db;} + String[] getDocs() {return docs;} + String getFieldname() {return fieldName;} +} + diff --git a/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/Test01Exceptions.java b/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/Test01Exceptions.java new file mode 100644 index 00000000000..27976cf19f9 --- /dev/null +++ b/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/Test01Exceptions.java @@ -0,0 +1,51 @@ +package org.apache.lucene.queryParser.surround.query; + +import junit.framework.TestCase; +import junit.framework.TestSuite; +import junit.textui.TestRunner; + +public class Test01Exceptions extends TestCase { + /** Main for running test case by itself. */ + public static void main(String args[]) { + TestRunner.run(new TestSuite(Test01Exceptions.class)); + } + + boolean verbose = false; /* to show actual parsing error messages */ + final String fieldName = "bi"; + + String[] exceptionQueries = { + "*", + "a*", + "ab*", + "?", + "a?", + "ab?", + "a???b", + "a?", + "a*b?", + "word1 word2", + "word2 AND", + "word1 OR", + "AND(word2)", + "AND(word2,)", + "AND(word2,word1,)", + "OR(word2)", + "OR(word2 ,", + "OR(word2 , word1 ,)", + "xx NOT", + "xx (a AND b)", + "(a AND b", + "a OR b)", + "or(word2+ not ord+, and xyz,def)", + "" + }; + + public void test01Exceptions() throws Exception { + String m = ExceptionQueryTest.getFailQueries(exceptionQueries, verbose); + if (m.length() > 0) { + fail("No ParseException for:\n" + m); + } + } +} + + diff --git a/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/Test02Boolean.java b/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/Test02Boolean.java new file mode 100644 index 00000000000..c79d5350f26 --- /dev/null +++ b/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/Test02Boolean.java @@ -0,0 +1,105 @@ +package org.apache.lucene.queryParser.surround.query; + +import junit.framework.TestCase; +import junit.framework.TestSuite; +import junit.textui.TestRunner; + + +public class Test02Boolean extends TestCase { + public static void main(String args[]) { + TestRunner.run(new TestSuite(Test02Boolean.class)); + } + + final String fieldName = "bi"; + boolean verbose = false; + int maxBasicQueries = 16; + + String[] docs1 = { + "word1 word2 word3", + "word4 word5", + "ord1 ord2 ord3", + "orda1 orda2 orda3 word2 worda3", + "a c e a b c" + }; + + SingleFieldTestDb db1 = new SingleFieldTestDb(docs1, fieldName); + + public void normalTest1(String query, int[] expdnrs) throws Exception { + BooleanQueryTest bqt = new BooleanQueryTest( query, expdnrs, db1, fieldName, this, + new BasicQueryFactory(maxBasicQueries)); + bqt.setVerbose(verbose); + bqt.doTest(); + } + + public void test02Terms01() throws Exception { + int[] expdnrs = {0}; normalTest1( "word1", expdnrs); + } + public void test02Terms02() throws Exception { + int[] expdnrs = {0, 1, 3}; normalTest1( "word*", expdnrs); + } + public void test02Terms03() throws Exception { + int[] expdnrs = {2}; normalTest1( "ord2", expdnrs); + } + public void test02Terms04() throws Exception { + int[] expdnrs = {}; normalTest1( "kxork*", expdnrs); + } + public void test02Terms05() throws Exception { + int[] expdnrs = {0, 1, 3}; normalTest1( "wor*", expdnrs); + } + public void test02Terms06() throws Exception { + int[] expdnrs = {}; normalTest1( "ab", expdnrs); + } + + public void test02Terms10() throws Exception { + int[] expdnrs = {}; normalTest1( "abc?", expdnrs); + } + public void test02Terms13() throws Exception { + int[] expdnrs = {0,1,3}; normalTest1( "word?", expdnrs); + } + public void test02Terms14() throws Exception { + int[] expdnrs = {0,1,3}; normalTest1( "w?rd?", expdnrs); + } + public void test02Terms20() throws Exception { + int[] expdnrs = {0,1,3}; normalTest1( "w*rd?", expdnrs); + } + public void test02Terms21() throws Exception { + int[] expdnrs = {3}; normalTest1( "w*rd??", expdnrs); + } + public void test02Terms22() throws Exception { + int[] expdnrs = {3}; normalTest1( "w*?da?", expdnrs); + } + public void test02Terms23() throws Exception { + int[] expdnrs = {}; normalTest1( "w?da?", expdnrs); + } + + public void test03And01() throws Exception { + int[] expdnrs = {0}; normalTest1( "word1 AND word2", expdnrs); + } + public void test03And02() throws Exception { + int[] expdnrs = {3}; normalTest1( "word* and ord*", expdnrs); + } + public void test03And03() throws Exception { + int[] expdnrs = {0}; normalTest1( "and(word1,word2)", expdnrs); + } + public void test04Or01() throws Exception { + int[] expdnrs = {0, 3}; normalTest1( "word1 or word2", expdnrs); + } + public void test04Or02() throws Exception { + int[] expdnrs = {0, 1, 2, 3}; normalTest1( "word* OR ord*", expdnrs); + } + public void test04Or03() throws Exception { + int[] expdnrs = {0, 3}; normalTest1( "OR (word1, word2)", expdnrs); + } + public void test05Not01() throws Exception { + int[] expdnrs = {3}; normalTest1( "word2 NOT word1", expdnrs); + } + public void test05Not02() throws Exception { + int[] expdnrs = {0}; normalTest1( "word2* not ord*", expdnrs); + } + public void test06AndOr01() throws Exception { + int[] expdnrs = {0}; normalTest1( "(word1 or ab)and or(word2,xyz, defg)", expdnrs); + } + public void test07AndOrNot02() throws Exception { + int[] expdnrs = {0}; normalTest1( "or( word2* not ord*, and(xyz,def))", expdnrs); + } +} diff --git a/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/Test03Distance.java b/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/Test03Distance.java new file mode 100644 index 00000000000..011046f01d2 --- /dev/null +++ b/contrib/surround/src/test/org/apache/lucene/queryParser/surround/query/Test03Distance.java @@ -0,0 +1,203 @@ +package org.apache.lucene.queryParser.surround.query; + +import junit.framework.TestCase; +import junit.framework.TestSuite; +import junit.textui.TestRunner; + +public class Test03Distance extends TestCase { + public static void main(String args[]) { + TestRunner.run(new TestSuite(Test03Distance.class)); + } + boolean verbose = false; + int maxBasicQueries = 16; + + String [] exceptionQueries = { + "(aa and bb) w cc", + "(aa or bb) w (cc and dd)", + "(aa opt bb) w cc", + "(aa not bb) w cc", + "(aa or bb) w (bi:cc)", + "(aa or bb) w bi:cc", + "(aa or bi:bb) w cc", + "(aa or (bi:bb)) w cc", + "(aa or (bb and dd)) w cc" + }; + + public void test00Exceptions() throws Exception { + String m = ExceptionQueryTest.getFailQueries(exceptionQueries, verbose); + if (m.length() > 0) { + fail("No ParseException for:\n" + m); + } + } + + final String fieldName = "bi"; + + String[] docs1 = { + "word1 word2 word3", + "word4 word5", + "ord1 ord2 ord3", + "orda1 orda2 orda3 word2 worda3", + "a c e a b c" + }; + + SingleFieldTestDb db1 = new SingleFieldTestDb(docs1, fieldName); + + String[] docs2 = { + "w1 w2 w3 w4 w5", + "w1 w3 w2 w3", + "" + }; + + SingleFieldTestDb db2 = new SingleFieldTestDb(docs2, fieldName); + + public void distanceTest1(String query, int[] expdnrs) throws Exception { + BooleanQueryTest bqt = new BooleanQueryTest( query, expdnrs, db1, fieldName, this, + new BasicQueryFactory(maxBasicQueries)); + bqt.setVerbose(verbose); + bqt.doTest(); + } + + public void distanceTest2(String query, int[] expdnrs) throws Exception { + BooleanQueryTest bqt = new BooleanQueryTest( query, expdnrs, db2, fieldName, this, + new BasicQueryFactory(maxBasicQueries)); + bqt.setVerbose(verbose); + bqt.doTest(); + } + + public void test0W01() throws Exception { + int[] expdnrs = {0}; distanceTest1( "word1 w word2", expdnrs); + } + public void test0N01() throws Exception { + int[] expdnrs = {0}; distanceTest1( "word1 n word2", expdnrs); + } + public void test0N01r() throws Exception { /* r reverse */ + int[] expdnrs = {0}; distanceTest1( "word2 n word1", expdnrs); + } + + public void test0W02() throws Exception { + int[] expdnrs = {}; distanceTest1( "word2 w word1", expdnrs); + } + + public void test0W03() throws Exception { + int[] expdnrs = {}; distanceTest1( "word2 2W word1", expdnrs); + } + public void test0N03() throws Exception { + int[] expdnrs = {0}; distanceTest1( "word2 2N word1", expdnrs); + } + public void test0N03r() throws Exception { + int[] expdnrs = {0}; distanceTest1( "word1 2N word2", expdnrs); + } + + public void test0W04() throws Exception { + int[] expdnrs = {}; distanceTest1( "word2 3w word1", expdnrs); + } + + public void test0N04() throws Exception { + int[] expdnrs = {0}; distanceTest1( "word2 3n word1", expdnrs); + } + public void test0N04r() throws Exception { + int[] expdnrs = {0}; distanceTest1( "word1 3n word2", expdnrs); + } + + public void test0W05() throws Exception { + int[] expdnrs = {}; distanceTest1( "orda1 w orda3", expdnrs); + } + public void test0W06() throws Exception { + int[] expdnrs = {3}; distanceTest1( "orda1 2w orda3", expdnrs); + } + + public void test1Wtrunc01() throws Exception { + int[] expdnrs = {0}; distanceTest1( "word1* w word2", expdnrs); + } + public void test1Wtrunc02() throws Exception { + int[] expdnrs = {0}; distanceTest1( "word* w word2", expdnrs); + } + public void test1Wtrunc02r() throws Exception { + int[] expdnrs = {0,3}; distanceTest1( "word2 w word*", expdnrs); + } + public void test1Ntrunc02() throws Exception { + int[] expdnrs = {0,3}; distanceTest1( "word* n word2", expdnrs); + } + public void test1Ntrunc02r() throws Exception { + int[] expdnrs = {0,3}; distanceTest1( "word2 n word*", expdnrs); + } + + public void test1Wtrunc03() throws Exception { + int[] expdnrs = {0}; distanceTest1( "word1* w word2*", expdnrs); + } + public void test1Ntrunc03() throws Exception { + int[] expdnrs = {0}; distanceTest1( "word1* N word2*", expdnrs); + } + + public void test1Wtrunc04() throws Exception { + int[] expdnrs = {}; distanceTest1( "kxork* w kxor*", expdnrs); + } + public void test1Ntrunc04() throws Exception { + int[] expdnrs = {}; distanceTest1( "kxork* 99n kxor*", expdnrs); + } + + public void test1Wtrunc05() throws Exception { + int[] expdnrs = {}; distanceTest1( "word2* 2W word1*", expdnrs); + } + public void test1Ntrunc05() throws Exception { + int[] expdnrs = {0}; distanceTest1( "word2* 2N word1*", expdnrs); + } + + public void test1Wtrunc06() throws Exception { + int[] expdnrs = {3}; distanceTest1( "ord* W word*", expdnrs); + } + public void test1Ntrunc06() throws Exception { + int[] expdnrs = {3}; distanceTest1( "ord* N word*", expdnrs); + } + public void test1Ntrunc06r() throws Exception { + int[] expdnrs = {3}; distanceTest1( "word* N ord*", expdnrs); + } + + public void test1Wtrunc07() throws Exception { + int[] expdnrs = {3}; distanceTest1( "(orda2 OR orda3) W word*", expdnrs); + } + public void test1Wtrunc08() throws Exception { + int[] expdnrs = {3}; distanceTest1( "(orda2 OR orda3) W (word2 OR worda3)", expdnrs); + } + public void test1Wtrunc09() throws Exception { + int[] expdnrs = {3}; distanceTest1( "(orda2 OR orda3) 2W (word2 OR worda3)", expdnrs); + } + public void test1Ntrunc09() throws Exception { + int[] expdnrs = {3}; distanceTest1( "(orda2 OR orda3) 2N (word2 OR worda3)", expdnrs); + } + + public void test2Wprefix01() throws Exception { + int[] expdnrs = {0}; distanceTest2( "W (w1, w2, w3)", expdnrs); + } + public void test2Nprefix01a() throws Exception { + int[] expdnrs = {0,1}; distanceTest2( "N(w1, w2, w3)", expdnrs); + } + public void test2Nprefix01b() throws Exception { + int[] expdnrs = {0,1}; distanceTest2( "N(w3, w1, w2)", expdnrs); + } + + public void test2Wprefix02() throws Exception { + int[] expdnrs = {0,1}; distanceTest2( "2W(w1,w2,w3)", expdnrs); + } + + public void test2Nprefix02a() throws Exception { + int[] expdnrs = {0,1}; distanceTest2( "2N(w1,w2,w3)", expdnrs); + } + public void test2Nprefix02b() throws Exception { + int[] expdnrs = {0,1}; distanceTest2( "2N(w2,w3,w1)", expdnrs); + } + + public void test2Wnested01() throws Exception { + int[] expdnrs = {0}; distanceTest2( "w1 W w2 W w3", expdnrs); + } + public void test2Nnested01() throws Exception { + int[] expdnrs = {0}; distanceTest2( "w1 N w2 N w3", expdnrs); + } + + public void test2Wnested02() throws Exception { + int[] expdnrs = {0,1}; distanceTest2( "w1 2W w2 2W w3", expdnrs); + } + public void test2Nnested02() throws Exception { + int[] expdnrs = {0,1}; distanceTest2( "w1 2N w2 2N w3", expdnrs); + } +} diff --git a/contrib/surround/surround.txt b/contrib/surround/surround.txt new file mode 100644 index 00000000000..8341aa9ed3a --- /dev/null +++ b/contrib/surround/surround.txt @@ -0,0 +1,75 @@ +Description of Surround: + +Surround consists of operators (uppercase/lowercase): + +AND/OR/NOT/nW/nN/() as infix and +AND/OR/nW/nN as prefix. + +Distance operators W and N have default n=1, max 99. +Implemented as SpanQuery with slop = (n - 1). +An example prefix form is: + +20n(aa*, bb*, cc*) + +The name Surround was chosen because of this prefix form +and because it uses the newly introduced span queries +to implement the proximity operators. +The names of the operators and the prefix and suffix +forms have been borrowed from various other query +languages described on the internet. + + +Query terms from the Lucene standard query parser: + +field:termtext +^ boost +* internal and suffix truncation +? one character + + +Some examples: + +aa +aa and bb +aa and bb or cc same effect as: (aa and bb) or cc +aa NOT bb NOT cc same effect as: (aa NOT bb) NOT cc + +and(aa,bb,cc) aa and bb and cc +99w(aa,bb,cc) ordered span query with slop 98 +99n(aa,bb,cc) unordered span query with slop 98 + +20n(aa*,bb*) +3w(a?a or bb?, cc+) + +title: text: aa +title : text : aa or bb +title:text: aa not bb +title:aa not text:bb + +cc 3w dd infix: dual. + +cc N dd N ee same effect as: (cc N dd) N ee + +text: aa 3d bb + +For examples on using the Surround language, see the +test packages. + + +Development status + +Not tested: multiple fields, internally mapped to OR queries, +not compared to Lucene's MultipleFieldQuery. + +* suffix truncation is implemented very similar to Lucene's PrefixQuery. + +Wildcards (? and internal *) are implemented with regular expressions +allow further variations. A reimplementation using +WildCardTermEnum (correct name?) should be no problem. + +Warnings about missing terms are sent to System.out, this might +be replaced by another stream. + +BooleanQueryTest.TestCollector uses a results checking method that should +be replaced by the checking method from Lucene's TestBasics.java. +