* @copyright 2006 Gregory Beaver * @license http://www.php.net/license/3_01.txt PHP License 3.01 * @version CVS: $Id$ * @since File available since Release 0.1.0 */ /** * Symbols (terminals and nonterminals) of the grammar are stored in this class * * @package PHP_ParserGenerator * @author Gregory Beaver * @copyright 2006 Gregory Beaver * @license http://www.php.net/license/3_01.txt PHP License 3.01 * @version 0.1.0 * @since Class available since Release 0.1.0 */ class PHP_ParserGenerator_Symbol { /** * Symbols that start with a capital letter like FOO. * * These are tokens directly from the lexer */ const TERMINAL = 1; /** * Symbols that start with a lower-case letter like foo. * * These are grammar rules like "foo ::= BLAH." */ const NONTERMINAL = 2; /** * Multiple terminal symbols. * * These are a grammar rule that consists of several terminals like * FOO|BAR|BAZ. Note that non-terminals cannot be in a multi-terminal, * and a multi-terminal acts like a single terminal. * * "FOO|BAR FOO|BAZ" is actually two multi-terminals, FOO|BAR and FOO|BAZ. */ const MULTITERMINAL = 3; const LEFT = 1; const RIGHT = 2; const NONE = 3; const UNK = 4; /** * Name of the symbol * * @var string */ public $name; /** * Index of this symbol. * * This will ultimately end up representing the symbol in the generated * parser * @var int */ public $index; /** * Symbol type * * One of PHP_ParserGenerator_Symbol::TERMINAL, * PHP_ParserGenerator_Symbol::NONTERMINAL or * PHP_ParserGenerator_Symbol::MULTITERMINAL * @var int */ public $type; /** * Linked list of rules that use this symbol, if it is a non-terminal. * @var PHP_ParserGenerator_Rule */ public $rule; /** * Fallback token in case this token doesn't parse * @var PHP_ParserGenerator_Symbol */ public $fallback; /** * Precendence, if defined. * * -1 if no unusual precedence * @var int */ public $prec = -1; /** * Associativity if precedence is defined. * * One of PHP_ParserGenerator_Symbol::LEFT, * PHP_ParserGenerator_Symbol::RIGHT, PHP_ParserGenerator_Symbol::NONE * or PHP_ParserGenerator_Symbol::UNK * @var unknown_type */ public $assoc; /** * First-set for all rules of this symbol * * @var array */ public $firstset; /** * True if this symbol is a non-terminal and can generate an empty * result. * * For instance "foo ::= ." * @var boolean */ public $lambda; /** * Code that executes whenever this symbol is popped from the stack during * error processing. * * @var string|0 */ public $destructor = 0; /** * Line number of destructor code * @var int */ public $destructorln; /** * Unused relic of the C version of Lemon. * * The data type of information held by this object. Only used * if this is a non-terminal * @var string */ public $datatype; /** * Unused relic of the C version of Lemon. * * The data type number. In the parser, the value * stack is a union. The .yy%d element of this * union is the correct data type for this object * @var string */ public $dtnum; /**#@+ * The following fields are used by MULTITERMINALs only */ /** * Number of terminal symbols in the MULTITERMINAL * * This is of course the same as count($this->subsym) * @var int */ public $nsubsym; /** * Array of terminal symbols in the MULTITERMINAL * @var array an array of {@link PHP_ParserGenerator_Symbol} objects */ public $subsym = array(); /**#@-*/ /** * Singleton storage of symbols * * @var array an array of PHP_ParserGenerator_Symbol objects */ private static $symbol_table = array(); /** * Return a pointer to the (terminal or nonterminal) symbol "x". * Create a new symbol if this is the first time "x" has been seen. * (this is a singleton) * @param string * @return PHP_ParserGenerator_Symbol */ public static function Symbol_new($x) { if (isset(self::$symbol_table[$x])) { return self::$symbol_table[$x]; } $sp = new PHP_ParserGenerator_Symbol; $sp->name = $x; $sp->type = preg_match('/[A-Z]/', $x[0]) ? self::TERMINAL : self::NONTERMINAL; $sp->rule = 0; $sp->fallback = 0; $sp->prec = -1; $sp->assoc = self::UNK; $sp->firstset = array(); $sp->lambda = false; $sp->destructor = 0; $sp->datatype = 0; self::$symbol_table[$sp->name] = $sp; return $sp; } /** * Return the number of unique symbols * @return int */ public static function Symbol_count() { return count(self::$symbol_table); } public static function Symbol_arrayof() { return array_values(self::$symbol_table); } public static function Symbol_find($x) { if (isset(self::$symbol_table[$x])) { return self::$symbol_table[$x]; } return 0; } /** * Sort function helper for symbols * * Symbols that begin with upper case letters (terminals or tokens) * must sort before symbols that begin with lower case letters * (non-terminals). Other than that, the order does not matter. * * We find experimentally that leaving the symbols in their original * order (the order they appeared in the grammar file) gives the * smallest parser tables in SQLite. * @param PHP_ParserGenerator_Symbol * @param PHP_ParserGenerator_Symbol */ public static function sortSymbols($a, $b) { $i1 = $a->index + 10000000*(ord($a->name[0]) > ord('Z')); $i2 = $b->index + 10000000*(ord($b->name[0]) > ord('Z')); return $i1 - $i2; } /** * Return true if two symbols are the same. */ public static function same_symbol(PHP_ParserGenerator_Symbol $a, PHP_ParserGenerator_Symbol $b) { if ($a === $b) return 1; if ($a->type != self::MULTITERMINAL) return 0; if ($b->type != self::MULTITERMINAL) return 0; if ($a->nsubsym != $b->nsubsym) return 0; for ($i = 0; $i < $a->nsubsym; $i++) { if ($a->subsym[$i] != $b->subsym[$i]) return 0; } return 1; } }