LUCENE-9531: Consolidate duplicated generated classes CharStream and FastCharStream (#1886)

This commit is contained in:
Dawid Weiss 2020-09-18 08:53:30 +02:00 committed by GitHub
parent fbf8e4f044
commit 5ec2bac91c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
30 changed files with 168 additions and 1015 deletions

View File

@ -73,6 +73,17 @@ def commonCleanups = { FileTree generatedFiles ->
generatedFiles.matching({ include "*TokenManager.java" }).each { file ->
modifyFile(file, { text ->
// Remove redundant imports.
text = text.replaceAll(
/(?m)^import .+/,
"")
// Add CharStream imports.
text = text.replaceAll(
/package (.+)/,
'''
package $1
import org.apache.lucene.queryparser.charstream.CharStream;
'''.trim())
// Eliminates redundant cast message.
text = text.replace(
"int hiByte = (int)(curChar >> 8);",
@ -112,16 +123,6 @@ configure(project(":lucene:queryparser")) {
return text
})
}
generatedFiles.matching { include "*TokenManager.java" }.each { file ->
modifyFile(file, { text ->
// Remove redundant imports.
text = text.replaceAll(
/(?m)^import .+/,
"")
return text
})
}
}
}
@ -144,16 +145,6 @@ configure(project(":lucene:queryparser")) {
return text
})
}
generatedFiles.matching { include "*TokenManager.java" }.each { file ->
modifyFile(file, { text ->
// Remove redundant imports.
text = text.replaceAll(
/(?m)^import .+/,
"")
return text
})
}
}
}
@ -230,16 +221,6 @@ configure(project(":lucene:queryparser")) {
return text
})
}
generatedFiles.matching { include "StandardSyntaxParserTokenManager.java" }.each { file ->
modifyFile(file, { text ->
// Remove redundant imports.
text = text.replaceAll(
/(?m)^import .+/,
"")
return text
})
}
}
}
@ -277,16 +258,6 @@ configure(project(":solr:core")) {
return text
})
}
generatedFiles.matching { include "*TokenManager.java" }.each { file ->
modifyFile(file, { text ->
// Remove redundant imports.
text = text.replaceAll(
/(?m)^import .+/,
"")
return text
})
}
}
}
}
@ -363,6 +334,9 @@ class JavaCCTask extends DefaultTask {
project.copy {
from tempDir
into targetDir
// We don't need CharStream interface as we redirect to our own.
exclude "CharStream.java"
}
}
}

View File

@ -134,6 +134,9 @@ Improvements
* LUCENE-9527: Upgrade javacc to 7.0.4, regenerate query parsers. (Dawid Weiss)
* LUCENE-9531: Consolidated CharStream and FastCharStream classes: these have been moved
from each query parser package to org.apache.lucene.queryparser.charstream (Dawid Weiss).
Bug fixes
* LUCENE-8663: NRTCachingDirectory.slowFileExists may open a file while

View File

@ -1,6 +1,20 @@
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 7.0 */
/* JavaCCOptions:STATIC=false,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package org.apache.lucene.queryparser.classic;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.charstream;
/**
* This interface describes a character stream that maintains line and
@ -16,9 +30,7 @@ package org.apache.lucene.queryparser.classic;
* by the lexer. Hence their implementation won't affect the generated lexer's
* operation.
*/
public
interface CharStream {
public interface CharStream {
/**
* Returns the next character from the selected input. The method
@ -27,22 +39,6 @@ interface CharStream {
*/
char readChar() throws java.io.IOException;
@Deprecated
/**
* Returns the column position of the character last read.
* @deprecated
* @see #getEndColumn
*/
int getColumn();
@Deprecated
/**
* Returns the line number of the character last read.
* @deprecated
* @see #getEndLine
*/
int getLine();
/**
* Returns the column number of the last character for current token (being
* matched after the last call to BeginTOken).
@ -110,11 +106,4 @@ interface CharStream {
* affect the lexer's operation.
*/
void Done();
void setTabSize(int i);
int getTabSize();
boolean getTrackLineColumn();
void setTrackLineColumn(boolean trackLineColumn);
}
/* (filtered)*/

View File

@ -14,15 +14,21 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.surround.parser;
package org.apache.lucene.queryparser.charstream;
import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link
* org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API. */
/**
* An efficient implementation of JavaCC's CharStream interface.
* <p>
* Note that this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's
* {@link org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
*/
public final class FastCharStream implements CharStream {
// See SOLR-11314
private final static IOException READ_PAST_EOF = new IOException("Read past EOF.");
char[] buffer = null;
int bufferLength = 0; // end of valid chars
@ -33,7 +39,9 @@ public final class FastCharStream implements CharStream {
Reader input; // source of chars
/** Constructs from a Reader. */
/**
* Constructs from a Reader.
*/
public FastCharStream(Reader r) {
input = r;
}
@ -45,7 +53,7 @@ public final class FastCharStream implements CharStream {
return buffer[bufferPosition++];
}
private final void refill() throws IOException {
private void refill() throws IOException {
int newPosition = bufferLength - tokenStart;
if (tokenStart == 0) { // token won't fit in buffer
@ -66,9 +74,9 @@ public final class FastCharStream implements CharStream {
tokenStart = 0;
int charsRead = // fill space in buffer
input.read(buffer, newPosition, buffer.length-newPosition);
input.read(buffer, newPosition, buffer.length - newPosition);
if (charsRead == -1)
throw new IOException("read past eof");
throw READ_PAST_EOF;
else
bufferLength += charsRead;
}
@ -101,60 +109,27 @@ public final class FastCharStream implements CharStream {
try {
input.close();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
/**
* Returns the column position of the character last read.
* @deprecated see: #getEndColumn
*/
@Deprecated
@Override
public final int getColumn() {
return bufferStart + bufferPosition;
}
/**
* @deprecated see: #getEndLine
*/
@Deprecated
@Override
public final int getLine() {
return 1;
}
@Override
public final int getEndColumn() {
return bufferStart + bufferPosition;
}
@Override
public final int getEndLine() {
return 1;
}
@Override
public final int getBeginColumn() {
return bufferStart + tokenStart;
}
@Override
public final int getBeginLine() {
return 1;
}
@Override
public void setTabSize(int i) {
throw new RuntimeException("Tab size not implemented.");
}
@Override
public int getTabSize() {
throw new RuntimeException("Tab size not implemented.");
}
@Override
public boolean getTrackLineColumn() {
return false;
}
@Override
public void setTrackLineColumn(boolean trackLineColumn) {
throw new RuntimeException("Line/Column tracking not implemented.");
}
}

View File

@ -0,0 +1,26 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/**
* This package contains reusable parts for javacc-generated
* grammars (query parsers).
*
* @see org.apache.lucene.queryparser.charstream.CharStream
* @see org.apache.lucene.queryparser.charstream.FastCharStream
*/
package org.apache.lucene.queryparser.charstream;

View File

@ -1,161 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.classic;
import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link
* org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
* */
public final class FastCharStream implements CharStream {
char[] buffer = null;
int bufferLength = 0; // end of valid chars
int bufferPosition = 0; // next char to read
int tokenStart = 0; // offset in buffer
int bufferStart = 0; // position in file of buffer
Reader input; // source of chars
/** Constructs from a Reader. */
public FastCharStream(Reader r) {
input = r;
}
@Override
public final char readChar() throws IOException {
if (bufferPosition >= bufferLength)
refill();
return buffer[bufferPosition++];
}
private final void refill() throws IOException {
int newPosition = bufferLength - tokenStart;
if (tokenStart == 0) { // token won't fit in buffer
if (buffer == null) { // first time: alloc buffer
buffer = new char[2048];
} else if (bufferLength == buffer.length) { // grow buffer
char[] newBuffer = new char[buffer.length*2];
System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
buffer = newBuffer;
}
} else { // shift token to front
System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
}
bufferLength = newPosition; // update state
bufferPosition = newPosition;
bufferStart += tokenStart;
tokenStart = 0;
int charsRead = // fill space in buffer
input.read(buffer, newPosition, buffer.length-newPosition);
if (charsRead == -1)
throw new IOException("read past eof");
else
bufferLength += charsRead;
}
@Override
public final char BeginToken() throws IOException {
tokenStart = bufferPosition;
return readChar();
}
@Override
public final void backup(int amount) {
bufferPosition -= amount;
}
@Override
public final String GetImage() {
return new String(buffer, tokenStart, bufferPosition - tokenStart);
}
@Override
public final char[] GetSuffix(int len) {
char[] value = new char[len];
System.arraycopy(buffer, bufferPosition - len, value, 0, len);
return value;
}
@Override
public final void Done() {
try {
input.close();
} catch (IOException e) {
}
}
/**
* Returns the column position of the character last read.
* @deprecated see: #getEndColumn
*/
@Deprecated
@Override
public final int getColumn() {
return bufferStart + bufferPosition;
}
/**
* @deprecated see #getEndLine
*/
@Override
@Deprecated
public final int getLine() {
return 1;
}
@Override
public final int getEndColumn() {
return bufferStart + bufferPosition;
}
@Override
public final int getEndLine() {
return 1;
}
@Override
public final int getBeginColumn() {
return bufferStart + tokenStart;
}
@Override
public final int getBeginLine() {
return 1;
}
@Override
public void setTabSize(int i) {
throw new RuntimeException("Tab size not implemented.");
}
@Override
public int getTabSize() {
throw new RuntimeException("Tab size not implemented.");
}
@Override
public boolean getTrackLineColumn() {
return false;
}
@Override
public void setTrackLineColumn(boolean trackLineColumn) {
throw new RuntimeException("Line/Column tracking not implemented.");
}
}

View File

@ -15,6 +15,8 @@ import org.apache.lucene.document.DateTools;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.queryparser.charstream.FastCharStream;
/**
* This class is generated by JavaCC. The most important method is
@ -706,22 +708,6 @@ if (splitOnWhitespace == false) {
finally { jj_save(2, xla); }
}
private boolean jj_3R_3()
{
if (jj_scan_token(TERM)) return true;
jj_lookingAhead = true;
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
jj_lookingAhead = false;
if (!jj_semLA || jj_3R_6()) return true;
Token xsp;
if (jj_3R_7()) return true;
while (true) {
xsp = jj_scanpos;
if (jj_3R_7()) { jj_scanpos = xsp; break; }
}
return false;
}
private boolean jj_3R_6()
{
return false;
@ -770,6 +756,22 @@ if (splitOnWhitespace == false) {
return false;
}
private boolean jj_3R_3()
{
if (jj_scan_token(TERM)) return true;
jj_lookingAhead = true;
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
jj_lookingAhead = false;
if (!jj_semLA || jj_3R_6()) return true;
Token xsp;
if (jj_3R_7()) return true;
while (true) {
xsp = jj_scanpos;
if (jj_3R_7()) { jj_scanpos = xsp; break; }
}
return false;
}
/** Generated Token Manager. */
public QueryParserTokenManager token_source;
/** Current token. */

View File

@ -38,6 +38,8 @@ import org.apache.lucene.document.DateTools;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.queryparser.charstream.FastCharStream;
/**
* This class is generated by JavaCC. The most important method is

View File

@ -25,6 +25,8 @@ import java.util.regex.Pattern;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.queryparser.charstream.FastCharStream;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
import org.apache.lucene.search.*;

View File

@ -1,6 +1,9 @@
/* QueryParserTokenManager.java */
/* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */
package org.apache.lucene.queryparser.classic;
import org.apache.lucene.queryparser.charstream.CharStream;

View File

@ -1,120 +0,0 @@
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 7.0 */
/* JavaCCOptions:STATIC=false,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package org.apache.lucene.queryparser.flexible.standard.parser;
/**
* This interface describes a character stream that maintains line and
* column number positions of the characters. It also has the capability
* to backup the stream to some extent. An implementation of this
* interface is used in the TokenManager implementation generated by
* JavaCCParser.
*
* All the methods except backup can be implemented in any fashion. backup
* needs to be implemented correctly for the correct operation of the lexer.
* Rest of the methods are all used to get information like line number,
* column number and the String that constitutes a token and are not used
* by the lexer. Hence their implementation won't affect the generated lexer's
* operation.
*/
public
interface CharStream {
/**
* Returns the next character from the selected input. The method
* of selecting the input is the responsibility of the class
* implementing this interface. Can throw any java.io.IOException.
*/
char readChar() throws java.io.IOException;
@Deprecated
/**
* Returns the column position of the character last read.
* @deprecated
* @see #getEndColumn
*/
int getColumn();
@Deprecated
/**
* Returns the line number of the character last read.
* @deprecated
* @see #getEndLine
*/
int getLine();
/**
* Returns the column number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
int getEndColumn();
/**
* Returns the line number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
int getEndLine();
/**
* Returns the column number of the first character for current token (being
* matched after the last call to BeginTOken).
*/
int getBeginColumn();
/**
* Returns the line number of the first character for current token (being
* matched after the last call to BeginTOken).
*/
int getBeginLine();
/**
* Backs up the input stream by amount steps. Lexer calls this method if it
* had already read some characters, but could not use them to match a
* (longer) token. So, they will be used again as the prefix of the next
* token and it is the implementation's responsibility to do this right.
*/
void backup(int amount);
/**
* Returns the next character that marks the beginning of the next token.
* All characters must remain in the buffer between two successive calls
* to this method to implement backup correctly.
*/
char BeginToken() throws java.io.IOException;
/**
* Returns a string made up of characters from the marked token beginning
* to the current buffer position. Implementations have the choice of returning
* anything that they want to. For example, for efficiency, one might decide
* to just return null, which is a valid implementation.
*/
String GetImage();
/**
* Returns an array of characters that make up the suffix of length 'len' for
* the currently matched token. This is used to build up the matched string
* for use in actions in the case of MORE. A simple and inefficient
* implementation of this is as follows :
*
* {
* String t = GetImage();
* return t.substring(t.length() - len, t.length()).toCharArray();
* }
*/
char[] GetSuffix(int len);
/**
* The lexer calls this function to indicate that it is done with the stream
* and hence implementations can free any resources held by this class.
* Again, the body of this function can be just empty and it will not
* affect the lexer's operation.
*/
void Done();
void setTabSize(int i);
int getTabSize();
boolean getTrackLineColumn();
void setTrackLineColumn(boolean trackLineColumn);
}
/* (filtered)*/

View File

@ -1,161 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.flexible.standard.parser;
import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link
* org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
* */
public final class FastCharStream implements CharStream {
char[] buffer = null;
int bufferLength = 0; // end of valid chars
int bufferPosition = 0; // next char to read
int tokenStart = 0; // offset in buffer
int bufferStart = 0; // position in file of buffer
Reader input; // source of chars
/** Constructs from a Reader. */
public FastCharStream(Reader r) {
input = r;
}
@Override
public final char readChar() throws IOException {
if (bufferPosition >= bufferLength)
refill();
return buffer[bufferPosition++];
}
private final void refill() throws IOException {
int newPosition = bufferLength - tokenStart;
if (tokenStart == 0) { // token won't fit in buffer
if (buffer == null) { // first time: alloc buffer
buffer = new char[2048];
} else if (bufferLength == buffer.length) { // grow buffer
char[] newBuffer = new char[buffer.length * 2];
System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
buffer = newBuffer;
}
} else { // shift token to front
System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
}
bufferLength = newPosition; // update state
bufferPosition = newPosition;
bufferStart += tokenStart;
tokenStart = 0;
int charsRead = // fill space in buffer
input.read(buffer, newPosition, buffer.length-newPosition);
if (charsRead == -1)
throw new IOException("read past eof");
else
bufferLength += charsRead;
}
@Override
public final char BeginToken() throws IOException {
tokenStart = bufferPosition;
return readChar();
}
@Override
public final void backup(int amount) {
bufferPosition -= amount;
}
@Override
public final String GetImage() {
return new String(buffer, tokenStart, bufferPosition - tokenStart);
}
@Override
public final char[] GetSuffix(int len) {
char[] value = new char[len];
System.arraycopy(buffer, bufferPosition - len, value, 0, len);
return value;
}
@Override
public final void Done() {
try {
input.close();
} catch (IOException e) {
}
}
/**
* Returns the column position of the character last read.
* @deprecated see #getEndColumn
*/
@Deprecated
@Override
public final int getColumn() {
return bufferStart + bufferPosition;
}
/**
* @deprecated see #getEndLine
*/
@Deprecated
@Override
public final int getLine() {
return 1;
}
@Override
public final int getEndColumn() {
return bufferStart + bufferPosition;
}
@Override
public final int getEndLine() {
return 1;
}
@Override
public final int getBeginColumn() {
return bufferStart + tokenStart;
}
@Override
public final int getBeginLine() {
return 1;
}
@Override
public void setTabSize(int i) {
throw new RuntimeException("Tab size not implemented.");
}
@Override
public int getTabSize() {
throw new RuntimeException("Tab size not implemented.");
}
@Override
public boolean getTrackLineColumn() {
return false;
}
@Override
public void setTrackLineColumn(boolean trackLineColumn) {
throw new RuntimeException("Line/Column tracking not implemented.");
}
}

View File

@ -41,6 +41,8 @@ import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser;
import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.queryparser.charstream.FastCharStream;
/**
* Parser for the standard Lucene syntax
@ -747,17 +749,6 @@ if (boost != null) {
finally { jj_save(2, xla); }
}
private boolean jj_3R_26()
{
Token xsp;
xsp = jj_scanpos;
if (jj_3_2()) {
jj_scanpos = xsp;
if (jj_3_3()) return true;
}
return false;
}
private boolean jj_3R_22()
{
if (jj_scan_token(OR)) return true;
@ -1045,6 +1036,17 @@ if (boost != null) {
return false;
}
private boolean jj_3R_26()
{
Token xsp;
xsp = jj_scanpos;
if (jj_3_2()) {
jj_scanpos = xsp;
if (jj_3_3()) return true;
}
return false;
}
/** Generated Token Manager. */
public StandardSyntaxParserTokenManager token_source;
/** Current token. */

View File

@ -48,6 +48,8 @@ import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode;
import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode;
import org.apache.lucene.queryparser.flexible.core.parser.SyntaxParser;
import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.queryparser.charstream.FastCharStream;
/**
* Parser for the standard Lucene syntax

View File

@ -1,6 +1,7 @@
/* StandardSyntaxParserTokenManager.java */
/* Generated By:JavaCC: Do not edit this line. StandardSyntaxParserTokenManager.java */
package org.apache.lucene.queryparser.flexible.standard.parser;
import org.apache.lucene.queryparser.charstream.CharStream;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@ -37,6 +38,8 @@ package org.apache.lucene.queryparser.flexible.standard.parser;

View File

@ -1,120 +0,0 @@
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 7.0 */
/* JavaCCOptions:STATIC=false,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package org.apache.lucene.queryparser.surround.parser;
/**
* This interface describes a character stream that maintains line and
* column number positions of the characters. It also has the capability
* to backup the stream to some extent. An implementation of this
* interface is used in the TokenManager implementation generated by
* JavaCCParser.
*
* All the methods except backup can be implemented in any fashion. backup
* needs to be implemented correctly for the correct operation of the lexer.
* Rest of the methods are all used to get information like line number,
* column number and the String that constitutes a token and are not used
* by the lexer. Hence their implementation won't affect the generated lexer's
* operation.
*/
public
interface CharStream {
/**
* Returns the next character from the selected input. The method
* of selecting the input is the responsibility of the class
* implementing this interface. Can throw any java.io.IOException.
*/
char readChar() throws java.io.IOException;
@Deprecated
/**
* Returns the column position of the character last read.
* @deprecated
* @see #getEndColumn
*/
int getColumn();
@Deprecated
/**
* Returns the line number of the character last read.
* @deprecated
* @see #getEndLine
*/
int getLine();
/**
* Returns the column number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
int getEndColumn();
/**
* Returns the line number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
int getEndLine();
/**
* Returns the column number of the first character for current token (being
* matched after the last call to BeginTOken).
*/
int getBeginColumn();
/**
* Returns the line number of the first character for current token (being
* matched after the last call to BeginTOken).
*/
int getBeginLine();
/**
* Backs up the input stream by amount steps. Lexer calls this method if it
* had already read some characters, but could not use them to match a
* (longer) token. So, they will be used again as the prefix of the next
* token and it is the implementation's responsibility to do this right.
*/
void backup(int amount);
/**
* Returns the next character that marks the beginning of the next token.
* All characters must remain in the buffer between two successive calls
* to this method to implement backup correctly.
*/
char BeginToken() throws java.io.IOException;
/**
* Returns a string made up of characters from the marked token beginning
* to the current buffer position. Implementations have the choice of returning
* anything that they want to. For example, for efficiency, one might decide
* to just return null, which is a valid implementation.
*/
String GetImage();
/**
* Returns an array of characters that make up the suffix of length 'len' for
* the currently matched token. This is used to build up the matched string
* for use in actions in the case of MORE. A simple and inefficient
* implementation of this is as follows :
*
* {
* String t = GetImage();
* return t.substring(t.length() - len, t.length()).toCharArray();
* }
*/
char[] GetSuffix(int len);
/**
* The lexer calls this function to indicate that it is done with the stream
* and hence implementations can free any resources held by this class.
* Again, the body of this function can be just empty and it will not
* affect the lexer's operation.
*/
void Done();
void setTabSize(int i);
int getTabSize();
boolean getTrackLineColumn();
void setTrackLineColumn(boolean trackLineColumn);
}
/* (filtered)*/

View File

@ -18,6 +18,8 @@ import org.apache.lucene.queryparser.surround.query.DistanceQuery;
import org.apache.lucene.queryparser.surround.query.SrndTermQuery;
import org.apache.lucene.queryparser.surround.query.SrndPrefixQuery;
import org.apache.lucene.queryparser.surround.query.SrndTruncQuery;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.queryparser.charstream.FastCharStream;
/**
* This class is generated by JavaCC. The only method that clients should need

View File

@ -46,6 +46,8 @@ import org.apache.lucene.queryparser.surround.query.DistanceQuery;
import org.apache.lucene.queryparser.surround.query.SrndTermQuery;
import org.apache.lucene.queryparser.surround.query.SrndPrefixQuery;
import org.apache.lucene.queryparser.surround.query.SrndTruncQuery;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.queryparser.charstream.FastCharStream;
/**
* This class is generated by JavaCC. The only method that clients should need

View File

@ -1,6 +1,9 @@
/* QueryParserTokenManager.java */
/* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */
package org.apache.lucene.queryparser.surround.parser;
import org.apache.lucene.queryparser.charstream.CharStream;

View File

@ -37,6 +37,7 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
import org.apache.lucene.queryparser.util.QueryParserTestBase;

View File

@ -45,6 +45,7 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.charstream.FastCharStream;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.messages.QueryParserMessages;
import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode;
@ -54,7 +55,6 @@ import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessor
import org.apache.lucene.queryparser.flexible.messages.MessageImpl;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode;
import org.apache.lucene.queryparser.flexible.standard.parser.FastCharStream;
import org.apache.lucene.queryparser.flexible.standard.parser.ParseException;
import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;
import org.apache.lucene.search.BooleanClause.Occur;

View File

@ -126,6 +126,8 @@ Other Changes
* SOLR-14846: Backup/Restore classes no longer take Optional method parameters and will accept nulls instead. (Mike Drob)
* LUCENE-9531: Consolidated CharStream and FastCharStream classes: these have been moved from each query parser
package to org.apache.lucene.queryparser.charstream (Dawid Weiss).
Bug Fixes
---------------------

View File

@ -1,120 +0,0 @@
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 7.0 */
/* JavaCCOptions:STATIC=false,SUPPORT_CLASS_VISIBILITY_PUBLIC=true */
package org.apache.solr.parser;
/**
* This interface describes a character stream that maintains line and
* column number positions of the characters. It also has the capability
* to backup the stream to some extent. An implementation of this
* interface is used in the TokenManager implementation generated by
* JavaCCParser.
*
* All the methods except backup can be implemented in any fashion. backup
* needs to be implemented correctly for the correct operation of the lexer.
* Rest of the methods are all used to get information like line number,
* column number and the String that constitutes a token and are not used
* by the lexer. Hence their implementation won't affect the generated lexer's
* operation.
*/
public
interface CharStream {
/**
* Returns the next character from the selected input. The method
* of selecting the input is the responsibility of the class
* implementing this interface. Can throw any java.io.IOException.
*/
char readChar() throws java.io.IOException;
@Deprecated
/**
* Returns the column position of the character last read.
* @deprecated
* @see #getEndColumn
*/
int getColumn();
@Deprecated
/**
* Returns the line number of the character last read.
* @deprecated
* @see #getEndLine
*/
int getLine();
/**
* Returns the column number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
int getEndColumn();
/**
* Returns the line number of the last character for current token (being
* matched after the last call to BeginTOken).
*/
int getEndLine();
/**
* Returns the column number of the first character for current token (being
* matched after the last call to BeginTOken).
*/
int getBeginColumn();
/**
* Returns the line number of the first character for current token (being
* matched after the last call to BeginTOken).
*/
int getBeginLine();
/**
* Backs up the input stream by amount steps. Lexer calls this method if it
* had already read some characters, but could not use them to match a
* (longer) token. So, they will be used again as the prefix of the next
* token and it is the implementation's responsibility to do this right.
*/
void backup(int amount);
/**
* Returns the next character that marks the beginning of the next token.
* All characters must remain in the buffer between two successive calls
* to this method to implement backup correctly.
*/
char BeginToken() throws java.io.IOException;
/**
* Returns a string made up of characters from the marked token beginning
* to the current buffer position. Implementations have the choice of returning
* anything that they want to. For example, for efficiency, one might decide
* to just return null, which is a valid implementation.
*/
String GetImage();
/**
* Returns an array of characters that make up the suffix of length 'len' for
* the currently matched token. This is used to build up the matched string
* for use in actions in the case of MORE. A simple and inefficient
* implementation of this is as follows :
*
* {
* String t = GetImage();
* return t.substring(t.length() - len, t.length()).toCharArray();
* }
*/
char[] GetSuffix(int len);
/**
* The lexer calls this function to indicate that it is done with the stream
* and hence implementations can free any resources held by this class.
* Again, the body of this function can be just empty and it will not
* affect the lexer's operation.
*/
void Done();
void setTabSize(int i);
int getTabSize();
boolean getTrackLineColumn();
void setTrackLineColumn(boolean trackLineColumn);
}
/* (filtered)*/

View File

@ -1,168 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.parser;
import java.io.*;
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
* this does not do line-number counting, but instead keeps track of the
* character position of the token in the input, as required by Lucene's {@link
* org.apache.lucene.analysis.tokenattributes.OffsetAttribute} API.
* */
public final class FastCharStream implements CharStream {
char[] buffer = null;
int bufferLength = 0; // end of valid chars
int bufferPosition = 0; // next char to read
int tokenStart = 0; // offset in buffer
int bufferStart = 0; // position in file of buffer
Reader input; // source of chars
/** Constructs from a Reader. */
public FastCharStream(Reader r) {
input = r;
}
@Override
public final char readChar() throws IOException {
if (bufferPosition >= bufferLength)
refill();
return buffer[bufferPosition++];
}
private final void refill() throws IOException {
int newPosition = bufferLength - tokenStart;
if (tokenStart == 0) { // token won't fit in buffer
if (buffer == null) { // first time: alloc buffer
buffer = new char[2048];
} else if (bufferLength == buffer.length) { // grow buffer
char[] newBuffer = new char[buffer.length*2];
System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
buffer = newBuffer;
}
} else { // shift token to front
System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
}
bufferLength = newPosition; // update state
bufferPosition = newPosition;
bufferStart += tokenStart;
tokenStart = 0;
int charsRead = // fill space in buffer
input.read(buffer, newPosition, buffer.length-newPosition);
if (charsRead == -1)
throw READ_PAST_EOF;
else
bufferLength += charsRead;
}
@Override
public final char BeginToken() throws IOException {
tokenStart = bufferPosition;
return readChar();
}
/**
* This Exception is used as a signal rather than an exceptional state.
*/
private static final IOException READ_PAST_EOF = new IOException("read past eof");
@Override
public final void backup(int amount) {
bufferPosition -= amount;
}
@Override
public final String GetImage() {
return new String(buffer, tokenStart, bufferPosition - tokenStart);
}
@Override
public final char[] GetSuffix(int len) {
char[] value = new char[len];
System.arraycopy(buffer, bufferPosition - len, value, 0, len);
return value;
}
@Override
public final void Done() {
try {
input.close();
} catch (IOException e) {
}
}
@Override
@Deprecated
/**
* Returns the column position of the character last read.
* @deprecated
* @see #getEndColumn
*/
public final int getLine() {
return 1;
}
@Override
@Deprecated
/**
* Returns the column position of the character last read.
* @deprecated
* @see #getEndColumn
*/
public final int getColumn() {
return bufferStart + bufferPosition;
}
@Override
public final int getEndColumn() {
return bufferStart + bufferPosition;
}
@Override
public final int getEndLine() {
return 1;
}
@Override
public final int getBeginColumn() {
return bufferStart + tokenStart;
}
@Override
public final int getBeginLine() {
return 1;
}
@Override
public void setTabSize(int i) {
throw new RuntimeException("Tab size not implemented.");
}
@Override
public int getTabSize() {
throw new RuntimeException("Tab size not implemented.");
}
@Override
public boolean getTrackLineColumn() {
return false;
}
@Override
public void setTrackLineColumn(boolean trackLineColumn) {
throw new RuntimeException("Line/Column tracking not implemented.");
}
}

View File

@ -15,6 +15,8 @@ import org.apache.lucene.search.Query;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.QParser;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.queryparser.charstream.FastCharStream;
public class QueryParser extends SolrQueryParserBase implements QueryParserConstants {
/** The default operator for parsing queries.
@ -666,27 +668,6 @@ if (splitOnWhitespace == false) {
finally { jj_save(2, xla); }
}
private boolean jj_3R_6()
{
return false;
}
private boolean jj_3R_3()
{
if (jj_scan_token(TERM)) return true;
jj_lookingAhead = true;
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
jj_lookingAhead = false;
if (!jj_semLA || jj_3R_6()) return true;
Token xsp;
if (jj_3R_7()) return true;
while (true) {
xsp = jj_scanpos;
if (jj_3R_7()) { jj_scanpos = xsp; break; }
}
return false;
}
private boolean jj_3_3()
{
Token xsp;
@ -730,6 +711,27 @@ if (splitOnWhitespace == false) {
return false;
}
private boolean jj_3R_6()
{
return false;
}
private boolean jj_3R_3()
{
if (jj_scan_token(TERM)) return true;
jj_lookingAhead = true;
jj_semLA = getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind);
jj_lookingAhead = false;
if (!jj_semLA || jj_3R_6()) return true;
Token xsp;
if (jj_3R_7()) return true;
while (true) {
xsp = jj_scanpos;
if (jj_3R_7()) { jj_scanpos = xsp; break; }
}
return false;
}
/** Generated Token Manager. */
public QueryParserTokenManager token_source;
/** Current token. */

View File

@ -38,6 +38,8 @@ import org.apache.lucene.search.Query;
import org.apache.solr.search.SyntaxError;
import org.apache.solr.search.QParser;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.queryparser.charstream.FastCharStream;
public class QueryParser extends SolrQueryParserBase {
/** The default operator for parsing queries.

View File

@ -1,6 +1,9 @@
/* QueryParserTokenManager.java */
/* Generated By:JavaCC: Do not edit this line. QueryParserTokenManager.java */
package org.apache.solr.parser;
import org.apache.lucene.queryparser.charstream.CharStream;

View File

@ -66,6 +66,8 @@ import org.apache.solr.search.QParser;
import org.apache.solr.search.QueryUtils;
import org.apache.solr.search.SolrConstantScoreQuery;
import org.apache.solr.search.SyntaxError;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.queryparser.charstream.FastCharStream;
import static org.apache.solr.parser.SolrQueryParserBase.SynonymQueryStyle.AS_SAME_TERM;

View File

@ -26,12 +26,12 @@ import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.parser.CharStream;
import org.apache.solr.parser.ParseException;
import org.apache.solr.parser.SolrQueryParserBase;
import org.apache.solr.request.SolrQueryRequest;

View File

@ -16,6 +16,7 @@
*/
package org.apache.solr.parser;
import org.apache.lucene.queryparser.charstream.CharStream;
import org.apache.lucene.search.Query;
import org.apache.solr.common.SolrException;
import org.apache.solr.request.SolrQueryRequest;