mirror of https://github.com/apache/lucene.git
LUCENE-4199: Next usecase of FastCharStream for HTMLParser
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/branches/lucene4199@1358760 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
88da536d39
commit
700f867c7e
|
@ -262,9 +262,11 @@
|
|||
<target name="init" depends="module-build.init,resolve-icu,jar-memory,jar-highlighter,jar-analyzers-common,jar-queryparser,jar-facet"/>
|
||||
|
||||
<target name="clean-javacc">
|
||||
<fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
|
||||
<containsregexp expression="Generated.*By.*JavaCC"/>
|
||||
</fileset>
|
||||
<delete>
|
||||
<fileset dir="src/java/org/apache/lucene/benchmark/byTask/feeds/demohtml" includes="*.java">
|
||||
<containsregexp expression="Generated.*By.*JavaCC"/>
|
||||
</fileset>
|
||||
</delete>
|
||||
</target>
|
||||
|
||||
<target name="javacc" depends="init,javacc-check" if="javacc.present">
|
||||
|
|
|
@ -0,0 +1,112 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 4.1 */
|
||||
/* JavaCCOptions:STATIC=false */
|
||||
package org.apache.lucene.benchmark.byTask.feeds.demohtml;
|
||||
|
||||
/**
|
||||
* This interface describes a character stream that maintains line and
|
||||
* column number positions of the characters. It also has the capability
|
||||
* to backup the stream to some extent. An implementation of this
|
||||
* interface is used in the TokenManager implementation generated by
|
||||
* JavaCCParser.
|
||||
*
|
||||
* All the methods except backup can be implemented in any fashion. backup
|
||||
* needs to be implemented correctly for the correct operation of the lexer.
|
||||
* Rest of the methods are all used to get information like line number,
|
||||
* column number and the String that constitutes a token and are not used
|
||||
* by the lexer. Hence their implementation won't affect the generated lexer's
|
||||
* operation.
|
||||
*/
|
||||
|
||||
public interface CharStream {
|
||||
|
||||
/**
|
||||
* Returns the next character from the selected input. The method
|
||||
* of selecting the input is the responsibility of the class
|
||||
* implementing this interface. Can throw any java.io.IOException.
|
||||
*/
|
||||
char readChar() throws java.io.IOException;
|
||||
|
||||
/**
|
||||
* Returns the column position of the character last read.
|
||||
* @deprecated
|
||||
* @see #getEndColumn
|
||||
*/
|
||||
int getColumn();
|
||||
|
||||
/**
|
||||
* Returns the line number of the character last read.
|
||||
* @deprecated
|
||||
* @see #getEndLine
|
||||
*/
|
||||
int getLine();
|
||||
|
||||
/**
|
||||
* Returns the column number of the last character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getEndColumn();
|
||||
|
||||
/**
|
||||
* Returns the line number of the last character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getEndLine();
|
||||
|
||||
/**
|
||||
* Returns the column number of the first character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getBeginColumn();
|
||||
|
||||
/**
|
||||
* Returns the line number of the first character for current token (being
|
||||
* matched after the last call to BeginTOken).
|
||||
*/
|
||||
int getBeginLine();
|
||||
|
||||
/**
|
||||
* Backs up the input stream by amount steps. Lexer calls this method if it
|
||||
* had already read some characters, but could not use them to match a
|
||||
* (longer) token. So, they will be used again as the prefix of the next
|
||||
* token and it is the implemetation's responsibility to do this right.
|
||||
*/
|
||||
void backup(int amount);
|
||||
|
||||
/**
|
||||
* Returns the next character that marks the beginning of the next token.
|
||||
* All characters must remain in the buffer between two successive calls
|
||||
* to this method to implement backup correctly.
|
||||
*/
|
||||
char BeginToken() throws java.io.IOException;
|
||||
|
||||
/**
|
||||
* Returns a string made up of characters from the marked token beginning
|
||||
* to the current buffer position. Implementations have the choice of returning
|
||||
* anything that they want to. For example, for efficiency, one might decide
|
||||
* to just return null, which is a valid implementation.
|
||||
*/
|
||||
String GetImage();
|
||||
|
||||
/**
|
||||
* Returns an array of characters that make up the suffix of length 'len' for
|
||||
* the currently matched token. This is used to build up the matched string
|
||||
* for use in actions in the case of MORE. A simple and inefficient
|
||||
* implementation of this is as follows :
|
||||
*
|
||||
* {
|
||||
* String t = GetImage();
|
||||
* return t.substring(t.length() - len, t.length()).toCharArray();
|
||||
* }
|
||||
*/
|
||||
char[] GetSuffix(int len);
|
||||
|
||||
/**
|
||||
* The lexer calls this function to indicate that it is done with the stream
|
||||
* and hence implementations can free any resources held by this class.
|
||||
* Again, the body of this function can be just empty and it will not
|
||||
* affect the lexer's operation.
|
||||
*/
|
||||
void Done();
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=e26d9399cd34335f985e19c1fa86c11b (do not edit this line) */
|
|
@ -0,0 +1,123 @@
|
|||
// FastCharStream.java
|
||||
package org.apache.lucene.benchmark.byTask.feeds.demohtml;
|
||||
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*
|
||||
*/
|
||||
|
||||
import java.io.*;
|
||||
|
||||
/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
|
||||
* this does not do line-number counting, but instead keeps track of the
|
||||
* character position of the token in the input, as required by Lucene's {@link
|
||||
* org.apache.lucene.analysis.Token} API.
|
||||
* */
|
||||
public final class FastCharStream implements CharStream {
|
||||
char[] buffer = null;
|
||||
|
||||
int bufferLength = 0; // end of valid chars
|
||||
int bufferPosition = 0; // next char to read
|
||||
|
||||
int tokenStart = 0; // offset in buffer
|
||||
int bufferStart = 0; // position in file of buffer
|
||||
|
||||
Reader input; // source of chars
|
||||
|
||||
/** Constructs from a Reader. */
|
||||
public FastCharStream(Reader r) {
|
||||
input = r;
|
||||
}
|
||||
|
||||
public final char readChar() throws IOException {
|
||||
if (bufferPosition >= bufferLength)
|
||||
refill();
|
||||
return buffer[bufferPosition++];
|
||||
}
|
||||
|
||||
private final void refill() throws IOException {
|
||||
int newPosition = bufferLength - tokenStart;
|
||||
|
||||
if (tokenStart == 0) { // token won't fit in buffer
|
||||
if (buffer == null) { // first time: alloc buffer
|
||||
buffer = new char[2048];
|
||||
} else if (bufferLength == buffer.length) { // grow buffer
|
||||
char[] newBuffer = new char[buffer.length*2];
|
||||
System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
|
||||
buffer = newBuffer;
|
||||
}
|
||||
} else { // shift token to front
|
||||
System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
|
||||
}
|
||||
|
||||
bufferLength = newPosition; // update state
|
||||
bufferPosition = newPosition;
|
||||
bufferStart += tokenStart;
|
||||
tokenStart = 0;
|
||||
|
||||
int charsRead = // fill space in buffer
|
||||
input.read(buffer, newPosition, buffer.length-newPosition);
|
||||
if (charsRead == -1)
|
||||
throw new IOException("read past eof");
|
||||
else
|
||||
bufferLength += charsRead;
|
||||
}
|
||||
|
||||
public final char BeginToken() throws IOException {
|
||||
tokenStart = bufferPosition;
|
||||
return readChar();
|
||||
}
|
||||
|
||||
public final void backup(int amount) {
|
||||
bufferPosition -= amount;
|
||||
}
|
||||
|
||||
public final String GetImage() {
|
||||
return new String(buffer, tokenStart, bufferPosition - tokenStart);
|
||||
}
|
||||
|
||||
public final char[] GetSuffix(int len) {
|
||||
char[] value = new char[len];
|
||||
System.arraycopy(buffer, bufferPosition - len, value, 0, len);
|
||||
return value;
|
||||
}
|
||||
|
||||
public final void Done() {
|
||||
try {
|
||||
input.close();
|
||||
} catch (IOException e) {
|
||||
}
|
||||
}
|
||||
|
||||
public final int getColumn() {
|
||||
return bufferStart + bufferPosition;
|
||||
}
|
||||
public final int getLine() {
|
||||
return 1;
|
||||
}
|
||||
public final int getEndColumn() {
|
||||
return bufferStart + bufferPosition;
|
||||
}
|
||||
public final int getEndLine() {
|
||||
return 1;
|
||||
}
|
||||
public final int getBeginColumn() {
|
||||
return bufferStart + tokenStart;
|
||||
}
|
||||
public final int getBeginLine() {
|
||||
return 1;
|
||||
}
|
||||
}
|
|
@ -29,6 +29,10 @@ public class HTMLParser implements HTMLParserConstants {
|
|||
private MyPipedInputStream pipeInStream = null;
|
||||
private PipedOutputStream pipeOutStream = null;
|
||||
|
||||
public HTMLParser(Reader reader) {
|
||||
this(new FastCharStream(reader));
|
||||
}
|
||||
|
||||
private class MyPipedInputStream extends PipedInputStream{
|
||||
|
||||
public MyPipedInputStream(){
|
||||
|
@ -464,7 +468,6 @@ null)
|
|||
|
||||
/** Generated Token Manager. */
|
||||
public HTMLParserTokenManager token_source;
|
||||
SimpleCharStream jj_input_stream;
|
||||
/** Current token. */
|
||||
public Token token;
|
||||
/** Next token. */
|
||||
|
@ -485,14 +488,9 @@ null)
|
|||
private boolean jj_rescan = false;
|
||||
private int jj_gc = 0;
|
||||
|
||||
/** Constructor with InputStream. */
|
||||
public HTMLParser(java.io.InputStream stream) {
|
||||
this(stream, null);
|
||||
}
|
||||
/** Constructor with InputStream and supplied encoding */
|
||||
public HTMLParser(java.io.InputStream stream, String encoding) {
|
||||
try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
|
||||
token_source = new HTMLParserTokenManager(jj_input_stream);
|
||||
/** Constructor with user supplied CharStream. */
|
||||
public HTMLParser(CharStream stream) {
|
||||
token_source = new HTMLParserTokenManager(stream);
|
||||
token = new Token();
|
||||
jj_ntk = -1;
|
||||
jj_gen = 0;
|
||||
|
@ -501,35 +499,8 @@ null)
|
|||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream stream) {
|
||||
ReInit(stream, null);
|
||||
}
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream stream, String encoding) {
|
||||
try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); }
|
||||
token_source.ReInit(jj_input_stream);
|
||||
token = new Token();
|
||||
jj_ntk = -1;
|
||||
jj_gen = 0;
|
||||
for (int i = 0; i < 14; i++) jj_la1[i] = -1;
|
||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public HTMLParser(java.io.Reader stream) {
|
||||
jj_input_stream = new SimpleCharStream(stream, 1, 1);
|
||||
token_source = new HTMLParserTokenManager(jj_input_stream);
|
||||
token = new Token();
|
||||
jj_ntk = -1;
|
||||
jj_gen = 0;
|
||||
for (int i = 0; i < 14; i++) jj_la1[i] = -1;
|
||||
for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.Reader stream) {
|
||||
jj_input_stream.ReInit(stream, 1, 1);
|
||||
token_source.ReInit(jj_input_stream);
|
||||
public void ReInit(CharStream stream) {
|
||||
token_source.ReInit(stream);
|
||||
token = new Token();
|
||||
jj_ntk = -1;
|
||||
jj_gen = 0;
|
||||
|
@ -631,7 +602,7 @@ null)
|
|||
return (jj_ntk = jj_nt.kind);
|
||||
}
|
||||
|
||||
private java.util.List<int[]> jj_expentries = new java.util.ArrayList<int[]>();
|
||||
private java.util.List jj_expentries = new java.util.ArrayList();
|
||||
private int[] jj_expentry;
|
||||
private int jj_kind = -1;
|
||||
private int[] jj_lasttokens = new int[100];
|
||||
|
@ -691,7 +662,7 @@ null)
|
|||
jj_add_error_token(0, 0);
|
||||
int[][] exptokseq = new int[jj_expentries.size()][];
|
||||
for (int i = 0; i < jj_expentries.size(); i++) {
|
||||
exptokseq[i] = jj_expentries.get(i);
|
||||
exptokseq[i] = (int[])jj_expentries.get(i);
|
||||
}
|
||||
return new ParseException(token, exptokseq, tokenImage);
|
||||
}
|
||||
|
|
|
@ -22,6 +22,7 @@ options {
|
|||
//DEBUG_LOOKAHEAD = true;
|
||||
//DEBUG_TOKEN_MANAGER = true;
|
||||
UNICODE_INPUT = true;
|
||||
USER_CHAR_STREAM=true;
|
||||
}
|
||||
|
||||
PARSER_BEGIN(HTMLParser)
|
||||
|
@ -56,6 +57,10 @@ public class HTMLParser {
|
|||
private MyPipedInputStream pipeInStream = null;
|
||||
private PipedOutputStream pipeOutStream = null;
|
||||
|
||||
public HTMLParser(Reader reader) {
|
||||
this(new FastCharStream(reader));
|
||||
}
|
||||
|
||||
private class MyPipedInputStream extends PipedInputStream{
|
||||
|
||||
public MyPipedInputStream(){
|
||||
|
|
|
@ -464,7 +464,7 @@ private int jjMoveNfa_0(int startState, int curPos)
|
|||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
|
@ -569,7 +569,7 @@ private int jjMoveNfa_5(int startState, int curPos)
|
|||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
|
@ -670,7 +670,7 @@ private int jjMoveNfa_7(int startState, int curPos)
|
|||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
|
@ -766,7 +766,7 @@ private int jjMoveNfa_4(int startState, int curPos)
|
|||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
|
@ -892,7 +892,7 @@ private int jjMoveNfa_3(int startState, int curPos)
|
|||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
|
@ -1061,7 +1061,7 @@ private int jjMoveNfa_6(int startState, int curPos)
|
|||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
|
@ -1205,7 +1205,7 @@ private int jjMoveNfa_1(int startState, int curPos)
|
|||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
|
@ -1361,7 +1361,7 @@ private int jjMoveNfa_2(int startState, int curPos)
|
|||
}
|
||||
else
|
||||
{
|
||||
int hiByte = (curChar >> 8);
|
||||
int hiByte = (int)(curChar >> 8);
|
||||
int i1 = hiByte >> 6;
|
||||
long l1 = 1L << (hiByte & 077);
|
||||
int i2 = (curChar & 0xff) >> 6;
|
||||
|
@ -1441,25 +1441,23 @@ static final long[] jjtoToken = {
|
|||
static final long[] jjtoSkip = {
|
||||
0x400000L,
|
||||
};
|
||||
protected SimpleCharStream input_stream;
|
||||
protected CharStream input_stream;
|
||||
private final int[] jjrounds = new int[28];
|
||||
private final int[] jjstateSet = new int[56];
|
||||
protected char curChar;
|
||||
/** Constructor. */
|
||||
public HTMLParserTokenManager(SimpleCharStream stream){
|
||||
if (SimpleCharStream.staticFlag)
|
||||
throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer.");
|
||||
public HTMLParserTokenManager(CharStream stream){
|
||||
input_stream = stream;
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public HTMLParserTokenManager(SimpleCharStream stream, int lexState){
|
||||
public HTMLParserTokenManager(CharStream stream, int lexState){
|
||||
this(stream);
|
||||
SwitchTo(lexState);
|
||||
}
|
||||
|
||||
/** Reinitialise parser. */
|
||||
public void ReInit(SimpleCharStream stream)
|
||||
public void ReInit(CharStream stream)
|
||||
{
|
||||
jjmatchedPos = jjnewStateCnt = 0;
|
||||
curLexState = defaultLexState;
|
||||
|
@ -1475,7 +1473,7 @@ private void ReInitRounds()
|
|||
}
|
||||
|
||||
/** Reinitialise parser. */
|
||||
public void ReInit(SimpleCharStream stream, int lexState)
|
||||
public void ReInit(CharStream stream, int lexState)
|
||||
{
|
||||
ReInit(stream);
|
||||
SwitchTo(lexState);
|
||||
|
|
|
@ -195,4 +195,4 @@ public class ParseException extends Exception {
|
|||
}
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=e5376178619291bc9d2c0c6647dc3cef (do not edit this line) */
|
||||
/* JavaCC - OriginalChecksum=e449d0e43f3d85deb1260a88b7e90fcd (do not edit this line) */
|
||||
|
|
|
@ -1,472 +0,0 @@
|
|||
/* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 4.1 */
|
||||
/* JavaCCOptions:STATIC=false */
|
||||
package org.apache.lucene.benchmark.byTask.feeds.demohtml;
|
||||
|
||||
/**
|
||||
* An implementation of interface CharStream, where the stream is assumed to
|
||||
* contain only ASCII characters (without unicode processing).
|
||||
*/
|
||||
|
||||
public class SimpleCharStream
|
||||
{
|
||||
/** Whether parser is static. */
|
||||
public static final boolean staticFlag = false;
|
||||
int bufsize;
|
||||
int available;
|
||||
int tokenBegin;
|
||||
/** Position in buffer. */
|
||||
public int bufpos = -1;
|
||||
protected int bufline[];
|
||||
protected int bufcolumn[];
|
||||
|
||||
protected int column = 0;
|
||||
protected int line = 1;
|
||||
|
||||
protected boolean prevCharIsCR = false;
|
||||
protected boolean prevCharIsLF = false;
|
||||
|
||||
protected java.io.Reader inputStream;
|
||||
|
||||
protected char[] buffer;
|
||||
protected int maxNextCharInd = 0;
|
||||
protected int inBuf = 0;
|
||||
protected int tabSize = 8;
|
||||
|
||||
protected void setTabSize(int i) { tabSize = i; }
|
||||
protected int getTabSize(int i) { return tabSize; }
|
||||
|
||||
|
||||
protected void ExpandBuff(boolean wrapAround)
|
||||
{
|
||||
char[] newbuffer = new char[bufsize + 2048];
|
||||
int newbufline[] = new int[bufsize + 2048];
|
||||
int newbufcolumn[] = new int[bufsize + 2048];
|
||||
|
||||
try
|
||||
{
|
||||
if (wrapAround)
|
||||
{
|
||||
System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
|
||||
System.arraycopy(buffer, 0, newbuffer,
|
||||
bufsize - tokenBegin, bufpos);
|
||||
buffer = newbuffer;
|
||||
|
||||
System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
|
||||
System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos);
|
||||
bufline = newbufline;
|
||||
|
||||
System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
|
||||
System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos);
|
||||
bufcolumn = newbufcolumn;
|
||||
|
||||
maxNextCharInd = (bufpos += (bufsize - tokenBegin));
|
||||
}
|
||||
else
|
||||
{
|
||||
System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin);
|
||||
buffer = newbuffer;
|
||||
|
||||
System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin);
|
||||
bufline = newbufline;
|
||||
|
||||
System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin);
|
||||
bufcolumn = newbufcolumn;
|
||||
|
||||
maxNextCharInd = (bufpos -= tokenBegin);
|
||||
}
|
||||
}
|
||||
catch (Throwable t)
|
||||
{
|
||||
throw new Error(t.getMessage());
|
||||
}
|
||||
|
||||
|
||||
bufsize += 2048;
|
||||
available = bufsize;
|
||||
tokenBegin = 0;
|
||||
}
|
||||
|
||||
protected void FillBuff() throws java.io.IOException
|
||||
{
|
||||
if (maxNextCharInd == available)
|
||||
{
|
||||
if (available == bufsize)
|
||||
{
|
||||
if (tokenBegin > 2048)
|
||||
{
|
||||
bufpos = maxNextCharInd = 0;
|
||||
available = tokenBegin;
|
||||
}
|
||||
else if (tokenBegin < 0)
|
||||
bufpos = maxNextCharInd = 0;
|
||||
else
|
||||
ExpandBuff(false);
|
||||
}
|
||||
else if (available > tokenBegin)
|
||||
available = bufsize;
|
||||
else if ((tokenBegin - available) < 2048)
|
||||
ExpandBuff(true);
|
||||
else
|
||||
available = tokenBegin;
|
||||
}
|
||||
|
||||
int i;
|
||||
try {
|
||||
if ((i = inputStream.read(buffer, maxNextCharInd,
|
||||
available - maxNextCharInd)) == -1)
|
||||
{
|
||||
inputStream.close();
|
||||
throw new java.io.IOException();
|
||||
}
|
||||
else
|
||||
maxNextCharInd += i;
|
||||
return;
|
||||
}
|
||||
catch(java.io.IOException e) {
|
||||
--bufpos;
|
||||
backup(0);
|
||||
if (tokenBegin == -1)
|
||||
tokenBegin = bufpos;
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
|
||||
/** Start. */
|
||||
public char BeginToken() throws java.io.IOException
|
||||
{
|
||||
tokenBegin = -1;
|
||||
char c = readChar();
|
||||
tokenBegin = bufpos;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
protected void UpdateLineColumn(char c)
|
||||
{
|
||||
column++;
|
||||
|
||||
if (prevCharIsLF)
|
||||
{
|
||||
prevCharIsLF = false;
|
||||
line += (column = 1);
|
||||
}
|
||||
else if (prevCharIsCR)
|
||||
{
|
||||
prevCharIsCR = false;
|
||||
if (c == '\n')
|
||||
{
|
||||
prevCharIsLF = true;
|
||||
}
|
||||
else
|
||||
line += (column = 1);
|
||||
}
|
||||
|
||||
switch (c)
|
||||
{
|
||||
case '\r' :
|
||||
prevCharIsCR = true;
|
||||
break;
|
||||
case '\n' :
|
||||
prevCharIsLF = true;
|
||||
break;
|
||||
case '\t' :
|
||||
column--;
|
||||
column += (tabSize - (column % tabSize));
|
||||
break;
|
||||
default :
|
||||
break;
|
||||
}
|
||||
|
||||
bufline[bufpos] = line;
|
||||
bufcolumn[bufpos] = column;
|
||||
}
|
||||
|
||||
/** Read a character. */
|
||||
public char readChar() throws java.io.IOException
|
||||
{
|
||||
if (inBuf > 0)
|
||||
{
|
||||
--inBuf;
|
||||
|
||||
if (++bufpos == bufsize)
|
||||
bufpos = 0;
|
||||
|
||||
return buffer[bufpos];
|
||||
}
|
||||
|
||||
if (++bufpos >= maxNextCharInd)
|
||||
FillBuff();
|
||||
|
||||
char c = buffer[bufpos];
|
||||
|
||||
UpdateLineColumn(c);
|
||||
return c;
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated
|
||||
* @see #getEndColumn
|
||||
*/
|
||||
|
||||
public int getColumn() {
|
||||
return bufcolumn[bufpos];
|
||||
}
|
||||
|
||||
/**
|
||||
* @deprecated
|
||||
* @see #getEndLine
|
||||
*/
|
||||
|
||||
public int getLine() {
|
||||
return bufline[bufpos];
|
||||
}
|
||||
|
||||
/** Get token end column number. */
|
||||
public int getEndColumn() {
|
||||
return bufcolumn[bufpos];
|
||||
}
|
||||
|
||||
/** Get token end line number. */
|
||||
public int getEndLine() {
|
||||
return bufline[bufpos];
|
||||
}
|
||||
|
||||
/** Get token beginning column number. */
|
||||
public int getBeginColumn() {
|
||||
return bufcolumn[tokenBegin];
|
||||
}
|
||||
|
||||
/** Get token beginning line number. */
|
||||
public int getBeginLine() {
|
||||
return bufline[tokenBegin];
|
||||
}
|
||||
|
||||
/** Backup a number of characters. */
|
||||
public void backup(int amount) {
|
||||
|
||||
inBuf += amount;
|
||||
if ((bufpos -= amount) < 0)
|
||||
bufpos += bufsize;
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.Reader dstream, int startline,
|
||||
int startcolumn, int buffersize)
|
||||
{
|
||||
inputStream = dstream;
|
||||
line = startline;
|
||||
column = startcolumn - 1;
|
||||
|
||||
available = bufsize = buffersize;
|
||||
buffer = new char[buffersize];
|
||||
bufline = new int[buffersize];
|
||||
bufcolumn = new int[buffersize];
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.Reader dstream, int startline,
|
||||
int startcolumn)
|
||||
{
|
||||
this(dstream, startline, startcolumn, 4096);
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.Reader dstream)
|
||||
{
|
||||
this(dstream, 1, 1, 4096);
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.Reader dstream, int startline,
|
||||
int startcolumn, int buffersize)
|
||||
{
|
||||
inputStream = dstream;
|
||||
line = startline;
|
||||
column = startcolumn - 1;
|
||||
|
||||
if (buffer == null || buffersize != buffer.length)
|
||||
{
|
||||
available = bufsize = buffersize;
|
||||
buffer = new char[buffersize];
|
||||
bufline = new int[buffersize];
|
||||
bufcolumn = new int[buffersize];
|
||||
}
|
||||
prevCharIsLF = prevCharIsCR = false;
|
||||
tokenBegin = inBuf = maxNextCharInd = 0;
|
||||
bufpos = -1;
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.Reader dstream, int startline,
|
||||
int startcolumn)
|
||||
{
|
||||
ReInit(dstream, startline, startcolumn, 4096);
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.Reader dstream)
|
||||
{
|
||||
ReInit(dstream, 1, 1, 4096);
|
||||
}
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
|
||||
int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
|
||||
{
|
||||
this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.InputStream dstream, int startline,
|
||||
int startcolumn, int buffersize)
|
||||
{
|
||||
this(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline,
|
||||
int startcolumn) throws java.io.UnsupportedEncodingException
|
||||
{
|
||||
this(dstream, encoding, startline, startcolumn, 4096);
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.InputStream dstream, int startline,
|
||||
int startcolumn)
|
||||
{
|
||||
this(dstream, startline, startcolumn, 4096);
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
|
||||
{
|
||||
this(dstream, encoding, 1, 1, 4096);
|
||||
}
|
||||
|
||||
/** Constructor. */
|
||||
public SimpleCharStream(java.io.InputStream dstream)
|
||||
{
|
||||
this(dstream, 1, 1, 4096);
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream dstream, String encoding, int startline,
|
||||
int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException
|
||||
{
|
||||
ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize);
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream dstream, int startline,
|
||||
int startcolumn, int buffersize)
|
||||
{
|
||||
ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize);
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException
|
||||
{
|
||||
ReInit(dstream, encoding, 1, 1, 4096);
|
||||
}
|
||||
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream dstream)
|
||||
{
|
||||
ReInit(dstream, 1, 1, 4096);
|
||||
}
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream dstream, String encoding, int startline,
|
||||
int startcolumn) throws java.io.UnsupportedEncodingException
|
||||
{
|
||||
ReInit(dstream, encoding, startline, startcolumn, 4096);
|
||||
}
|
||||
/** Reinitialise. */
|
||||
public void ReInit(java.io.InputStream dstream, int startline,
|
||||
int startcolumn)
|
||||
{
|
||||
ReInit(dstream, startline, startcolumn, 4096);
|
||||
}
|
||||
/** Get token literal value. */
|
||||
public String GetImage()
|
||||
{
|
||||
if (bufpos >= tokenBegin)
|
||||
return new String(buffer, tokenBegin, bufpos - tokenBegin + 1);
|
||||
else
|
||||
return new String(buffer, tokenBegin, bufsize - tokenBegin) +
|
||||
new String(buffer, 0, bufpos + 1);
|
||||
}
|
||||
|
||||
/** Get the suffix. */
|
||||
public char[] GetSuffix(int len)
|
||||
{
|
||||
char[] ret = new char[len];
|
||||
|
||||
if ((bufpos + 1) >= len)
|
||||
System.arraycopy(buffer, bufpos - len + 1, ret, 0, len);
|
||||
else
|
||||
{
|
||||
System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0,
|
||||
len - bufpos - 1);
|
||||
System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1);
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
/** Reset buffer when finished. */
|
||||
public void Done()
|
||||
{
|
||||
buffer = null;
|
||||
bufline = null;
|
||||
bufcolumn = null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Method to adjust line and column numbers for the start of a token.
|
||||
*/
|
||||
public void adjustBeginLineColumn(int newLine, int newCol)
|
||||
{
|
||||
int start = tokenBegin;
|
||||
int len;
|
||||
|
||||
if (bufpos >= tokenBegin)
|
||||
{
|
||||
len = bufpos - tokenBegin + inBuf + 1;
|
||||
}
|
||||
else
|
||||
{
|
||||
len = bufsize - tokenBegin + bufpos + 1 + inBuf;
|
||||
}
|
||||
|
||||
int i = 0, j = 0, k = 0;
|
||||
int nextColDiff = 0, columnDiff = 0;
|
||||
|
||||
while (i < len &&
|
||||
bufline[j = start % bufsize] == bufline[k = ++start % bufsize])
|
||||
{
|
||||
bufline[j] = newLine;
|
||||
nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j];
|
||||
bufcolumn[j] = newCol + columnDiff;
|
||||
columnDiff = nextColDiff;
|
||||
i++;
|
||||
}
|
||||
|
||||
if (i < len)
|
||||
{
|
||||
bufline[j] = newLine++;
|
||||
bufcolumn[j] = newCol + columnDiff;
|
||||
|
||||
while (i++ < len)
|
||||
{
|
||||
if (bufline[j = start % bufsize] != bufline[++start % bufsize])
|
||||
bufline[j] = newLine++;
|
||||
else
|
||||
bufline[j] = newLine;
|
||||
}
|
||||
}
|
||||
|
||||
line = bufline[j];
|
||||
column = bufcolumn[j];
|
||||
}
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=7c2e625567f11c3058995b779d0149ad (do not edit this line) */
|
|
@ -121,4 +121,4 @@ public class Token {
|
|||
}
|
||||
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=e49c2a0c10d50ff2ebd0639552330ce7 (do not edit this line) */
|
||||
/* JavaCC - OriginalChecksum=24643dc85fd6daeec42ceba20b46ee61 (do not edit this line) */
|
||||
|
|
|
@ -138,4 +138,4 @@ public class TokenMgrError extends Error
|
|||
this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason);
|
||||
}
|
||||
}
|
||||
/* JavaCC - OriginalChecksum=3aee554f696e5d7a18b1ad330c1de53f (do not edit this line) */
|
||||
/* JavaCC - OriginalChecksum=538f0da130356fcc0bc7db621ab0389d (do not edit this line) */
|
||||
|
|
Loading…
Reference in New Issue