vtigerossez/include/antlr/AntlrLexer.php

<?php
/**
 * Base class for lexers
 */
abstract class AntlrLexer extends BaseRecognizer{
	public static $DEFAULT_TOKEN_CHANNEL = 0;
	protected $input;
	
	public function __construct($input, $state=null) {
		if($state==null){
			$state = new RecognizerSharedState();
		}
		$this->state = $state;
		$this->input = $input;
	}
	
	public function reset() {
		parent::reset(); // reset all recognizer state variables
		// wack Lexer state variables
		if ( $this->input!=null ) {
			$this->input->seek(0); // rewind the input
		}
		if ( $this->state==null ) {
			return; // no shared state work to do
		}
		$this->state->token = null;
		$this->state->type = TokenConst::$INVALID_TOKEN_TYPE;
		$this->state->channel = TokenConst::$DEFAULT_CHANNEL;
		$this->state->tokenStartCharIndex = -1;
		$this->state->tokenStartCharPositionInLine = -1;
		$this->state->tokenStartLine = -1;
		$this->state->text = null;
	}
	
	
	/** Return a token from this source; i.e., match a token on the char
	 *  stream.
	 */
	public function nextToken() {
		while (true) {
			$this->state->token = null;
			$this->state->channel = 0;//Token::DEFAULT_CHANNEL;
			$this->state->tokenStartCharIndex = $this->input->index();
			$this->state->tokenStartCharPositionInLine = $this->input->getCharPositionInLine();
			$this->state->tokenStartLine = $this->input->getLine();
			$this->state->text = null;
			if ( $this->input->LA(1)==CharStreamConst::$EOF ) {
				return TokenConst::$EOF_TOKEN;
			}
			try {
				$this->mTokens();
				if ( $this->state->token==null ) {
					$this->emit();
				}
				else if ( $this->state->token==Token::$SKIP_TOKEN ) {
					continue;
				}
				return $this->state->token;
			}
			catch (NoViableAltException $nva) {
				$this->reportError($nva);
				$this->recover($nva); // throw out current char and try again
			}
			catch (RecognitionException $re) {
				$this->reportError($re);
				// match() routine has already called recover()
			}
		}
	}
	
	/** Instruct the lexer to skip creating a token for current lexer rule
	 *  and look for another token.  nextToken() knows to keep looking when
	 *  a lexer rule finishes with token set to SKIP_TOKEN.  Recall that
	 *  if token==null at end of any token rule, it creates one for you
	 *  and emits it.
	 */
	public function skip() {
		$this->state->token = TokenConst::$SKIP_TOKEN;
	}

	/** This is the lexer entry point that sets instance var 'token' */
	public abstract function mTokens();

	/** Set the char stream and reset the lexer */
	public function setCharStream($input) {
		$this->input = null;
		$this->reset();
		$this->input = $input;
	}

	public function getCharStream() {
		return $this->input;
	}
	
	public function getSourceName() {
		return $this->input->getSourceName();
	}
	
	/** Currently does not support multiple emits per nextToken invocation
	 *  for efficiency reasons.  Subclass and override this method and
	 *  nextToken (to push tokens into a list and pull from that list rather
	 *  than a single variable as this implementation does).
	 */
	/** The standard method called to automatically emit a token at the
	 *  outermost lexical rule.  The token object should point into the
	 *  char buffer start..stop.  If there is a text override in 'text',
	 *  use that to set the token's text.  Override this method to emit
	 *  custom Token objects.
	 *
	 *  If you are building trees, then you should also override
	 *  Parser or TreeParser.getMissingSymbol().
	 */
	public function emit($token=null) {
		if($token==null){
			$token = CommonToken::forInput($this->input, $this->state->type, $this->state->channel,
				$this->state->tokenStartCharIndex, $this->getCharIndex()-1);
			$token->setLine($this->state->tokenStartLine);
			$token->setText($this->state->text);
			$token->setCharPositionInLine($this->state->tokenStartCharPositionInLine);
		}
		$this->state->token = $token;
		return $token;
	}
	
	function matchString($s){
		$i = 0;
		while ( $i<strlen($s)) {
			if ( $this->input->LA(1)!=charAt($s, $i) ) {
				if ( $this->state->backtracking>0 ) {
					$this->state->failed = true;
					return;
				}
				$mte = new MismatchedTokenException(charAt($s, $i), $this->input);
				$this->recover($mte);
				throw $mte;
			}
			$i++;
			$this->input->consume();
			$state->failed = false;
		}
	}
	
	public function matchAny() {
		$this->input->consume();
	}
	
	public function matchChar($c) {
		if ($this->input->LA(1)!=$c ) {
			if ( $this->state->backtracking>0 ) {
				$this->state->failed = true;
				return;
			}
			$mte = new MismatchedTokenException($c, $this->input);
			$this->recover($mte);  // don't really recover; just consume in lexer
			throw $mte;
		}
		$this->input->consume();
		$this->state->failed = false;
	}
	
	public function matchRange($a, $b) {
		if ( $this->input->LA(1)<$a || $this->input->LA(1)>$b ) {
			if ( $this->state->backtracking>0 ) {
				$this->state->failed = true;
				return;
			}
			$mre = new MismatchedRangeException($a, $b, $this->input);
			$this->recover($mre);
			throw $mre;
		}
		$this->input->consume();
		$this->state->failed = false;
	}
	
	public function getLine() {
		return $this->input->getLine();
	}

	public function getCharPositionInLine() {
		return $this->input->getCharPositionInLine();
	}
	
	/** What is the index of the current character of lookahead? */
	public function getCharIndex() {
		return $this->input->index();
	}
	

	/** Return the text matched so far for the current token or any
	 *  text override.
	 */
	public function getText() {
		if ( $this->state->text!=null ) {
			return $this->state->text;
		}
		return $this->input->substring($this->state->tokenStartCharIndex,$this->getCharIndex()-1);
	}

	/** Set the complete text of this token; it wipes any previous
	 *  changes to the text.
	 */
	public function setText($text) {
		$this->state->text = $text;
	}
	
	public function reportError($e) {
		/** TODO: not thought about recovery in lexer yet.
		 *
		// if we've already reported an error and have not matched a token
		// yet successfully, don't report any errors.
		if ( errorRecovery ) {
			//System.err.print("[SPURIOUS] ");
			return;
		}
		errorRecovery = true;
		 */

		$this->displayRecognitionError($this->getTokenNames(), $e);
	}
	
	public function getErrorMessage($e, $tokenNames) {
		$msg = null;
		if ( $e instanceof MismatchedTokenException ) {
			$mte = $e;
			$msg = "mismatched character ".$this->getCharErrorDisplay($e->c).
				" expecting ".$this->getCharErrorDisplay($mte->expecting);
		}
		else if ( $e instanceof NoViableAltException ) {
			$nvae = $e;
			// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"
			// and "(decision="+nvae.decisionNumber+") and
			// "state "+nvae.stateNumber
			$msg = "no viable alternative at character ".$this->getCharErrorDisplay($e->c);
		}
		else if ( $e instanceof EarlyExitException ) {
			$eee = $e;
			// for development, can add "(decision="+eee.decisionNumber+")"
			$msg = "required (...)+ loop did not match anything at character ".$this->getCharErrorDisplay($e->c);
		}
		else if ( $e instanceof MismatchedNotSetException ) {
			$mse = $e;
			$msg = "mismatched character ".$this->getCharErrorDisplay($e->c)." expecting set ".$mse->expecting;
		}
		else if ( $e instanceof MismatchedSetException ) {
			$mse = $e;
			$msg = "mismatched character ".$this->getCharErrorDisplay($e->c)." expecting set ".$mse->expecting;
		}
		else if ( $e instanceof MismatchedRangeException ) {
			$mre = $e;
			$msg = "mismatched character ".$this->getCharErrorDisplay($e->c)." expecting set ".
				  $this->getCharErrorDisplay($mre->a)."..".$this->getCharErrorDisplay($mre->b);
		}
		else {
			$msg = parent::getErrorMessage($e, $tokenNames);
		}
		return $msg;
	}
	
	public function getCharErrorDisplay($c) {
		$s = chr($c);
		switch ( $s ) {
			case '\n' :
				$s = "\\n";
				break;
			case '\t' :
				$s = "\\t";
				break;
			case '\r' :
				$s = "\\r";
				break;
		}
		if ($c==TokenConst::$EOF){
			$s = "<EOF>";
		}
		return "'".$s."'";
	}
	
	/** Lexers can normally match any char in it's vocabulary after matching
	 *  a token, so do the easy thing and just kill a character and hope
	 *  it all works out.  You can instead use the rule invocation stack
	 *  to do sophisticated error recovery if you are in a fragment rule.
	 */
	public function recover($re) {
		$this->input->consume();
	}
	
	
	public function traceIn($ruleName, $ruleIndex)  {
		$inputSymbol = $this->input->LT(1)." line=".$this->getLine().":".$this->getCharPositionInLine();
		parent::traceIn($ruleName, $ruleIndex, $inputSymbol);
	}

	public function traceOut($ruleName, $ruleIndex)  {
		$inputSymbol = $this->input->LT(1)." line=".$this->getLine().":".$this->getCharPositionInLine();
		parent::traceOut($ruleName, $ruleIndex, $inputSymbol);
	}
}

?>
include 文件夹。 2013-01-30 21:51:28 -05:00			`<?php`
			`/**`
			`* Base class for lexers`
			`*/`
			`abstract class AntlrLexer extends BaseRecognizer{`
			`public static $DEFAULT_TOKEN_CHANNEL = 0;`
			`protected $input;`

			`public function __construct($input, $state=null) {`
			`if($state==null){`
			`$state = new RecognizerSharedState();`
			`}`
			`$this->state = $state;`
			`$this->input = $input;`
			`}`

			`public function reset() {`
			`parent::reset(); // reset all recognizer state variables`
			`// wack Lexer state variables`
			`if ( $this->input!=null ) {`
			`$this->input->seek(0); // rewind the input`
			`}`
			`if ( $this->state==null ) {`
			`return; // no shared state work to do`
			`}`
			`$this->state->token = null;`
			`$this->state->type = TokenConst::$INVALID_TOKEN_TYPE;`
			`$this->state->channel = TokenConst::$DEFAULT_CHANNEL;`
			`$this->state->tokenStartCharIndex = -1;`
			`$this->state->tokenStartCharPositionInLine = -1;`
			`$this->state->tokenStartLine = -1;`
			`$this->state->text = null;`
			`}`


			`/** Return a token from this source; i.e., match a token on the char`
			`* stream.`
			`*/`
			`public function nextToken() {`
			`while (true) {`
			`$this->state->token = null;`
			`$this->state->channel = 0;//Token::DEFAULT_CHANNEL;`
			`$this->state->tokenStartCharIndex = $this->input->index();`
			`$this->state->tokenStartCharPositionInLine = $this->input->getCharPositionInLine();`
			`$this->state->tokenStartLine = $this->input->getLine();`
			`$this->state->text = null;`
			`if ( $this->input->LA(1)==CharStreamConst::$EOF ) {`
			`return TokenConst::$EOF_TOKEN;`
			`}`
			`try {`
			`$this->mTokens();`
			`if ( $this->state->token==null ) {`
			`$this->emit();`
			`}`
			`else if ( $this->state->token==Token::$SKIP_TOKEN ) {`
			`continue;`
			`}`
			`return $this->state->token;`
			`}`
			`catch (NoViableAltException $nva) {`
			`$this->reportError($nva);`
			`$this->recover($nva); // throw out current char and try again`
			`}`
			`catch (RecognitionException $re) {`
			`$this->reportError($re);`
			`// match() routine has already called recover()`
			`}`
			`}`
			`}`

			`/** Instruct the lexer to skip creating a token for current lexer rule`
			`* and look for another token. nextToken() knows to keep looking when`
			`* a lexer rule finishes with token set to SKIP_TOKEN. Recall that`
			`* if token==null at end of any token rule, it creates one for you`
			`* and emits it.`
			`*/`
			`public function skip() {`
			`$this->state->token = TokenConst::$SKIP_TOKEN;`
			`}`

			`/** This is the lexer entry point that sets instance var 'token' */`
			`public abstract function mTokens();`

			`/** Set the char stream and reset the lexer */`
			`public function setCharStream($input) {`
			`$this->input = null;`
			`$this->reset();`
			`$this->input = $input;`
			`}`

			`public function getCharStream() {`
			`return $this->input;`
			`}`

			`public function getSourceName() {`
			`return $this->input->getSourceName();`
			`}`

			`/** Currently does not support multiple emits per nextToken invocation`
			`* for efficiency reasons. Subclass and override this method and`
			`* nextToken (to push tokens into a list and pull from that list rather`
			`* than a single variable as this implementation does).`
			`*/`
			`/** The standard method called to automatically emit a token at the`
			`* outermost lexical rule. The token object should point into the`
			`* char buffer start..stop. If there is a text override in 'text',`
			`* use that to set the token's text. Override this method to emit`
			`* custom Token objects.`
			`*`
			`* If you are building trees, then you should also override`
			`* Parser or TreeParser.getMissingSymbol().`
			`*/`
			`public function emit($token=null) {`
			`if($token==null){`
			`$token = CommonToken::forInput($this->input, $this->state->type, $this->state->channel,`
			`$this->state->tokenStartCharIndex, $this->getCharIndex()-1);`
			`$token->setLine($this->state->tokenStartLine);`
			`$token->setText($this->state->text);`
			`$token->setCharPositionInLine($this->state->tokenStartCharPositionInLine);`
			`}`
			`$this->state->token = $token;`
			`return $token;`
			`}`

			`function matchString($s){`
			`$i = 0;`
			`while ( $i<strlen($s)) {`
			`if ( $this->input->LA(1)!=charAt($s, $i) ) {`
			`if ( $this->state->backtracking>0 ) {`
			`$this->state->failed = true;`
			`return;`
			`}`
			`$mte = new MismatchedTokenException(charAt($s, $i), $this->input);`
			`$this->recover($mte);`
			`throw $mte;`
			`}`
			`$i++;`
			`$this->input->consume();`
			`$state->failed = false;`
			`}`
			`}`

			`public function matchAny() {`
			`$this->input->consume();`
			`}`

			`public function matchChar($c) {`
			`if ($this->input->LA(1)!=$c ) {`
			`if ( $this->state->backtracking>0 ) {`
			`$this->state->failed = true;`
			`return;`
			`}`
			`$mte = new MismatchedTokenException($c, $this->input);`
			`$this->recover($mte); // don't really recover; just consume in lexer`
			`throw $mte;`
			`}`
			`$this->input->consume();`
			`$this->state->failed = false;`
			`}`

			`public function matchRange($a, $b) {`
			`if ( $this->input->LA(1)<$a \|\| $this->input->LA(1)>$b ) {`
			`if ( $this->state->backtracking>0 ) {`
			`$this->state->failed = true;`
			`return;`
			`}`
			`$mre = new MismatchedRangeException($a, $b, $this->input);`
			`$this->recover($mre);`
			`throw $mre;`
			`}`
			`$this->input->consume();`
			`$this->state->failed = false;`
			`}`

			`public function getLine() {`
			`return $this->input->getLine();`
			`}`

			`public function getCharPositionInLine() {`
			`return $this->input->getCharPositionInLine();`
			`}`

			`/** What is the index of the current character of lookahead? */`
			`public function getCharIndex() {`
			`return $this->input->index();`
			`}`


			`/** Return the text matched so far for the current token or any`
			`* text override.`
			`*/`
			`public function getText() {`
			`if ( $this->state->text!=null ) {`
			`return $this->state->text;`
			`}`
			`return $this->input->substring($this->state->tokenStartCharIndex,$this->getCharIndex()-1);`
			`}`

			`/** Set the complete text of this token; it wipes any previous`
			`* changes to the text.`
			`*/`
			`public function setText($text) {`
			`$this->state->text = $text;`
			`}`

			`public function reportError($e) {`
			`/** TODO: not thought about recovery in lexer yet.`
			`*`
			`// if we've already reported an error and have not matched a token`
			`// yet successfully, don't report any errors.`
			`if ( errorRecovery ) {`
			`//System.err.print("[SPURIOUS] ");`
			`return;`
			`}`
			`errorRecovery = true;`
			`*/`

			`$this->displayRecognitionError($this->getTokenNames(), $e);`
			`}`

			`public function getErrorMessage($e, $tokenNames) {`
			`$msg = null;`
			`if ( $e instanceof MismatchedTokenException ) {`
			`$mte = $e;`
			`$msg = "mismatched character ".$this->getCharErrorDisplay($e->c).`
			`" expecting ".$this->getCharErrorDisplay($mte->expecting);`
			`}`
			`else if ( $e instanceof NoViableAltException ) {`
			`$nvae = $e;`
			`// for development, can add "decision=<<"+nvae.grammarDecisionDescription+">>"`
			`// and "(decision="+nvae.decisionNumber+") and`
			`// "state "+nvae.stateNumber`
			`$msg = "no viable alternative at character ".$this->getCharErrorDisplay($e->c);`
			`}`
			`else if ( $e instanceof EarlyExitException ) {`
			`$eee = $e;`
			`// for development, can add "(decision="+eee.decisionNumber+")"`
			`$msg = "required (...)+ loop did not match anything at character ".$this->getCharErrorDisplay($e->c);`
			`}`
			`else if ( $e instanceof MismatchedNotSetException ) {`
			`$mse = $e;`
			`$msg = "mismatched character ".$this->getCharErrorDisplay($e->c)." expecting set ".$mse->expecting;`
			`}`
			`else if ( $e instanceof MismatchedSetException ) {`
			`$mse = $e;`
			`$msg = "mismatched character ".$this->getCharErrorDisplay($e->c)." expecting set ".$mse->expecting;`
			`}`
			`else if ( $e instanceof MismatchedRangeException ) {`
			`$mre = $e;`
			`$msg = "mismatched character ".$this->getCharErrorDisplay($e->c)." expecting set ".`
			`$this->getCharErrorDisplay($mre->a)."..".$this->getCharErrorDisplay($mre->b);`
			`}`
			`else {`
			`$msg = parent::getErrorMessage($e, $tokenNames);`
			`}`
			`return $msg;`
			`}`

			`public function getCharErrorDisplay($c) {`
			`$s = chr($c);`
			`switch ( $s ) {`
			`case '\n' :`
			`$s = "\\n";`
			`break;`
			`case '\t' :`
			`$s = "\\t";`
			`break;`
			`case '\r' :`
			`$s = "\\r";`
			`break;`
			`}`
			`if ($c==TokenConst::$EOF){`
			`$s = "<EOF>";`
			`}`
			`return "'".$s."'";`
			`}`

			`/** Lexers can normally match any char in it's vocabulary after matching`
			`* a token, so do the easy thing and just kill a character and hope`
			`* it all works out. You can instead use the rule invocation stack`
			`* to do sophisticated error recovery if you are in a fragment rule.`
			`*/`
			`public function recover($re) {`
			`$this->input->consume();`
			`}`


			`public function traceIn($ruleName, $ruleIndex) {`
			`$inputSymbol = $this->input->LT(1)." line=".$this->getLine().":".$this->getCharPositionInLine();`
			`parent::traceIn($ruleName, $ruleIndex, $inputSymbol);`
			`}`

			`public function traceOut($ruleName, $ruleIndex) {`
			`$inputSymbol = $this->input->LT(1)." line=".$this->getLine().":".$this->getCharPositionInLine();`
			`parent::traceOut($ruleName, $ruleIndex, $inputSymbol);`
			`}`
			`}`

			`?>`