546 lines
16 KiB
PHP
546 lines
16 KiB
PHP
<?php
|
|
/**
|
|
* PHP_ParserGenerator, a php 5 parser generator.
|
|
*
|
|
* This is a direct port of the Lemon parser generator, found at
|
|
* {@link http://www.hwaci.com/sw/lemon/}
|
|
*
|
|
* PHP version 5
|
|
*
|
|
* LICENSE: This source file is subject to version 3.01 of the PHP license
|
|
* that is available through the world-wide-web at the following URI:
|
|
* http://www.php.net/license/3_01.txt. If you did not receive a copy of
|
|
* the PHP License and are unable to obtain it through the web, please
|
|
* send a note to license@php.net so we can mail you a copy immediately.
|
|
*
|
|
* @category php
|
|
* @package PHP_ParserGenerator
|
|
* @author Gregory Beaver <cellog@php.net>
|
|
* @copyright 2006 Gregory Beaver
|
|
* @license http://www.php.net/license/3_01.txt PHP License 3.01
|
|
* @version CVS: $Id$
|
|
* @since File available since Release 0.1.0
|
|
*/
|
|
/**
|
|
/** A configuration is a production rule of the grammar together with
|
|
* a mark (dot) showing how much of that rule has been processed so far.
|
|
*
|
|
* Configurations also contain a follow-set which is a list of terminal
|
|
* symbols which are allowed to immediately follow the end of the rule.
|
|
* Every configuration is recorded as an instance of the following class.
|
|
*
|
|
* @package PHP_ParserGenerator
|
|
* @author Gregory Beaver <cellog@php.net>
|
|
* @copyright 2006 Gregory Beaver
|
|
* @license http://www.php.net/license/3_01.txt PHP License 3.01
|
|
* @version 0.1.0
|
|
* @since Class available since Release 0.1.0
|
|
*/
|
|
class PHP_ParserGenerator_Config {
|
|
const COMPLETE = 1;
|
|
const INCOMPLETE = 2;
|
|
/**
|
|
* The parser rule upon with the configuration is based.
|
|
*
|
|
* A parser rule is something like:
|
|
* <pre>
|
|
* blah ::= FOO bar.
|
|
* </pre>
|
|
* @var PHP_ParserGenerator_Rule
|
|
*/
|
|
public $rp;
|
|
/**
|
|
* The parse point.
|
|
*
|
|
* This is the index into the right-hand side of a rule that is
|
|
* represented by this configuration. In other words, possible
|
|
* dots for this rule:
|
|
*
|
|
* <pre>
|
|
* blah ::= FOO bar.
|
|
* </pre>
|
|
*
|
|
* are (represented by "[here]"):
|
|
*
|
|
* <pre>
|
|
* blah ::= [here] FOO bar.
|
|
* blah ::= FOO [here] bar.
|
|
* blah ::= FOO bar [here].
|
|
* </pre>
|
|
* @var int
|
|
*/
|
|
public $dot;
|
|
/**
|
|
* Follow-set for this configuration only
|
|
*
|
|
* This is the list of terminals and non-terminals that
|
|
* can follow this configuration.
|
|
* @var array
|
|
*/
|
|
public $fws;
|
|
/**
|
|
* Follow-set forward propagation links.
|
|
* @var PHP_ParserGenerator_PropagationLink
|
|
*/
|
|
public $fplp;
|
|
/**
|
|
* Follow-set backwards propagation links
|
|
* @var PHP_ParserGenerator_PropagationLink
|
|
*/
|
|
public $bplp;
|
|
/**
|
|
* State that contains this configuration
|
|
* @var PHP_ParserGenerator_State
|
|
*/
|
|
public $stp;
|
|
/* enum {
|
|
COMPLETE, /* The status is used during followset and
|
|
INCOMPLETE /* shift computations
|
|
} */
|
|
/**
|
|
* Status during followset and shift computations.
|
|
*
|
|
* One of PHP_ParserGenerator_Config::COMPLETE or
|
|
* PHP_ParserGenerator_Config::INCOMPLETE.
|
|
* @var int
|
|
*/
|
|
public $status;
|
|
/**
|
|
* Next configuration in the state.
|
|
*
|
|
* Index of next PHP_ParserGenerator_Config object.
|
|
* @var int
|
|
*/
|
|
public $next;
|
|
/**
|
|
* Index of the next basis configuration PHP_ParserGenerator_Config object
|
|
* @var int
|
|
*/
|
|
public $bp;
|
|
|
|
/**
|
|
* Top of the list of configurations for the current state.
|
|
* @var PHP_ParserGenerator_Config
|
|
*/
|
|
static public $current;
|
|
/**
|
|
* Last on the list of configurations for the current state.
|
|
* @var PHP_ParserGenerator_Config
|
|
*/
|
|
static public $currentend;
|
|
|
|
/**
|
|
* Top of the list of basis configurations for the current state.
|
|
* @var PHP_ParserGenerator_Config
|
|
*/
|
|
static public $basis;
|
|
/**
|
|
* Last on the list of basis configurations for the current state.
|
|
* @var PHP_ParserGenerator_Config
|
|
*/
|
|
static public $basisend;
|
|
|
|
/**
|
|
* Associative array representation of the linked list of configurations
|
|
* found in {@link $current}
|
|
*
|
|
* @var array
|
|
*/
|
|
static public $x4a = array();
|
|
|
|
/**
|
|
* Return a pointer to a new configuration
|
|
* @return PHP_ParserGenerator_Config
|
|
*/
|
|
private static function newconfig()
|
|
{
|
|
return new PHP_ParserGenerator_Config;
|
|
}
|
|
|
|
/**
|
|
* Display the current configuration for the .out file
|
|
*
|
|
* @param PHP_ParserGenerator_Config $cfp
|
|
* @see PHP_ParserGenerator_Data::ReportOutput()
|
|
*/
|
|
static function Configshow(PHP_ParserGenerator_Config $cfp)
|
|
{
|
|
$fp = fopen('php://output', 'w');
|
|
while ($cfp) {
|
|
if ($cfp->dot == $cfp->rp->nrhs) {
|
|
$buf = sprintf('(%d)', $cfp->rp->index);
|
|
fprintf($fp, ' %5s ', $buf);
|
|
} else {
|
|
fwrite($fp,' ');
|
|
}
|
|
$cfp->ConfigPrint($fp);
|
|
fwrite($fp, "\n");
|
|
if (0) {
|
|
//SetPrint(fp,cfp->fws,$this);
|
|
//PlinkPrint(fp,cfp->fplp,"To ");
|
|
//PlinkPrint(fp,cfp->bplp,"From");
|
|
}
|
|
$cfp = $cfp->next;
|
|
}
|
|
fwrite($fp, "\n");
|
|
fclose($fp);
|
|
}
|
|
|
|
/**
|
|
* Initialize the configuration list builder for a new state.
|
|
*/
|
|
static function Configlist_init()
|
|
{
|
|
self::$current = 0;
|
|
self::$currentend = &self::$current;
|
|
self::$basis = 0;
|
|
self::$basisend = &self::$basis;
|
|
self::$x4a = array();
|
|
}
|
|
|
|
/**
|
|
* Remove all data from the table.
|
|
*
|
|
* Pass each data to the function $f as it is removed if
|
|
* $f is a valid callback.
|
|
* @param callback|null
|
|
* @see Configtable_clear()
|
|
*/
|
|
static function Configtable_reset($f)
|
|
{
|
|
self::$current = 0;
|
|
self::$currentend = &self::$current;
|
|
self::$basis = 0;
|
|
self::$basisend = &self::$basis;
|
|
self::Configtable_clear(0);
|
|
}
|
|
|
|
/**
|
|
* Remove all data from the associative array representation
|
|
* of configurations.
|
|
*
|
|
* Pass each data to the function $f as it is removed if
|
|
* $f is a valid callback.
|
|
* @param callback|null
|
|
*/
|
|
static function Configtable_clear($f)
|
|
{
|
|
if (!count(self::$x4a)) {
|
|
return;
|
|
}
|
|
if ($f) {
|
|
for ($i = 0; $i < count(self::$x4a); $i++) {
|
|
call_user_func($f, self::$x4a[$i]->data);
|
|
}
|
|
}
|
|
self::$x4a = array();
|
|
}
|
|
|
|
/**
|
|
* Reset the configuration list builder for a new state.
|
|
* @see Configtable_clear()
|
|
*/
|
|
static function Configlist_reset()
|
|
{
|
|
self::Configtable_clear(0);
|
|
}
|
|
|
|
/**
|
|
* Add another configuration to the configuration list for this parser state.
|
|
* @param PHP_ParserGenerator_Rule the rule
|
|
* @param int Index into the right-hand side of the rule where the dot goes
|
|
* @return PHP_ParserGenerator_Config
|
|
*/
|
|
static function Configlist_add($rp, $dot)
|
|
{
|
|
$model = new PHP_ParserGenerator_Config;
|
|
$model->rp = $rp;
|
|
$model->dot = $dot;
|
|
$cfp = self::Configtable_find($model);
|
|
if ($cfp === 0) {
|
|
$cfp = self::newconfig();
|
|
$cfp->rp = $rp;
|
|
$cfp->dot = $dot;
|
|
$cfp->fws = array();
|
|
$cfp->stp = 0;
|
|
$cfp->fplp = $cfp->bplp = 0;
|
|
$cfp->next = 0;
|
|
$cfp->bp = 0;
|
|
self::$currentend = $cfp;
|
|
self::$currentend = &$cfp->next;
|
|
self::Configtable_insert($cfp);
|
|
}
|
|
return $cfp;
|
|
}
|
|
|
|
/**
|
|
* Add a basis configuration to the configuration list for this parser state.
|
|
*
|
|
* Basis configurations are the root for a configuration. This method also
|
|
* inserts the configuration into the regular list of configurations for this
|
|
* reason.
|
|
* @param PHP_ParserGenerator_Rule the rule
|
|
* @param int Index into the right-hand side of the rule where the dot goes
|
|
* @return PHP_ParserGenerator_Config
|
|
*/
|
|
static function Configlist_addbasis($rp, $dot)
|
|
{
|
|
$model = new PHP_ParserGenerator_Config;
|
|
$model->rp = $rp;
|
|
$model->dot = $dot;
|
|
$cfp = self::Configtable_find($model);
|
|
if ($cfp === 0) {
|
|
$cfp = self::newconfig();
|
|
$cfp->rp = $rp;
|
|
$cfp->dot = $dot;
|
|
$cfp->fws = array();
|
|
$cfp->stp = 0;
|
|
$cfp->fplp = $cfp->bplp = 0;
|
|
$cfp->next = 0;
|
|
$cfp->bp = 0;
|
|
self::$currentend = $cfp;
|
|
self::$currentend = &$cfp->next;
|
|
self::$basisend = $cfp;
|
|
self::$basisend = &$cfp->bp;
|
|
self::Configtable_insert($cfp);
|
|
}
|
|
return $cfp;
|
|
}
|
|
|
|
/**
|
|
* Compute the closure of the configuration list.
|
|
*
|
|
* This calculates all of the possible continuations of
|
|
* each configuration, ensuring that each state accounts
|
|
* for every configuration that could arrive at that state.
|
|
*/
|
|
static function Configlist_closure(PHP_ParserGenerator_Data $lemp)
|
|
{
|
|
for ($cfp = self::$current; $cfp; $cfp = $cfp->next) {
|
|
$rp = $cfp->rp;
|
|
$dot = $cfp->dot;
|
|
if ($dot >= $rp->nrhs) {
|
|
continue;
|
|
}
|
|
$sp = $rp->rhs[$dot];
|
|
if ($sp->type == PHP_ParserGenerator_Symbol::NONTERMINAL) {
|
|
if ($sp->rule === 0 && $sp !== $lemp->errsym) {
|
|
PHP_ParserGenerator::ErrorMsg($lemp->filename, $rp->line,
|
|
"Nonterminal \"%s\" has no rules.", $sp->name);
|
|
$lemp->errorcnt++;
|
|
}
|
|
for ($newrp = $sp->rule; $newrp; $newrp = $newrp->nextlhs) {
|
|
$newcfp = self::Configlist_add($newrp, 0);
|
|
for ($i = $dot + 1; $i < $rp->nrhs; $i++) {
|
|
$xsp = $rp->rhs[$i];
|
|
if ($xsp->type == PHP_ParserGenerator_Symbol::TERMINAL) {
|
|
$newcfp->fws[$xsp->index] = 1;
|
|
break;
|
|
} elseif ($xsp->type == PHP_ParserGenerator_Symbol::MULTITERMINAL) {
|
|
for ($k = 0; $k < $xsp->nsubsym; $k++) {
|
|
$newcfp->fws[$xsp->subsym[$k]->index] = 1;
|
|
}
|
|
break;
|
|
} else {
|
|
$a = array_diff_key($xsp->firstset, $newcfp->fws);
|
|
$newcfp->fws += $a;
|
|
if ($xsp->lambda === false) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
if ($i == $rp->nrhs) {
|
|
PHP_ParserGenerator_PropagationLink::Plink_add($cfp->fplp, $newcfp);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Sort the configuration list
|
|
* @uses Configcmp()
|
|
*/
|
|
static function Configlist_sort()
|
|
{
|
|
$a = 0;
|
|
//self::Configshow(self::$current);
|
|
self::$current = PHP_ParserGenerator::msort(self::$current,'next', array('PHP_ParserGenerator_Config', 'Configcmp'));
|
|
//self::Configshow(self::$current);
|
|
self::$currentend = &$a;
|
|
self::$currentend = 0;
|
|
}
|
|
|
|
/**
|
|
* Sort the configuration list
|
|
* @uses Configcmp
|
|
*/
|
|
static function Configlist_sortbasis()
|
|
{
|
|
$a = 0;
|
|
self::$basis = PHP_ParserGenerator::msort(self::$current,'bp', array('PHP_ParserGenerator_Config', 'Configcmp'));
|
|
self::$basisend = &$a;
|
|
self::$basisend = 0;
|
|
}
|
|
|
|
/**
|
|
* Return a pointer to the head of the configuration list and
|
|
* reset the list
|
|
* @see $current
|
|
* @return PHP_ParserGenerator_Config
|
|
*/
|
|
static function Configlist_return()
|
|
{
|
|
$old = self::$current;
|
|
self::$current = 0;
|
|
self::$currentend = &self::$current;
|
|
return $old;
|
|
}
|
|
|
|
/**
|
|
* Return a pointer to the head of the basis list and
|
|
* reset the list
|
|
* @see $basis
|
|
* @return PHP_ParserGenerator_Config
|
|
*/
|
|
static function Configlist_basis()
|
|
{
|
|
$old = self::$basis;
|
|
self::$basis = 0;
|
|
self::$basisend = &self::$basis;
|
|
return $old;
|
|
}
|
|
|
|
/**
|
|
* Free all elements of the given configuration list
|
|
* @param PHP_ParserGenerator_Config
|
|
*/
|
|
static function Configlist_eat($cfp)
|
|
{
|
|
for(; $cfp; $cfp = $nextcfp){
|
|
$nextcfp = $cfp->next;
|
|
if ($cfp->fplp !=0) {
|
|
throw new Exception('fplp of configuration non-zero?');
|
|
}
|
|
if ($cfp->bplp !=0) {
|
|
throw new Exception('bplp of configuration non-zero?');
|
|
}
|
|
if ($cfp->fws) {
|
|
$cfp->fws = array();
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Compare two configurations for sorting purposes.
|
|
*
|
|
* Configurations based on higher precedence rules
|
|
* (those earlier in the file) are chosen first. Two
|
|
* configurations that are the same rule are sorted by
|
|
* dot (see {@link $dot}), and those configurations
|
|
* with a dot closer to the left-hand side are chosen first.
|
|
* @param unknown_type $a
|
|
* @param unknown_type $b
|
|
* @return unknown
|
|
*/
|
|
static function Configcmp($a, $b)
|
|
{
|
|
$x = $a->rp->index - $b->rp->index;
|
|
if (!$x) {
|
|
$x = $a->dot - $b->dot;
|
|
}
|
|
return $x;
|
|
}
|
|
|
|
/**
|
|
* Print out information on this configuration.
|
|
*
|
|
* @param resource $fp
|
|
* @see PHP_ParserGenerator_Data::ReportOutput()
|
|
*/
|
|
function ConfigPrint($fp)
|
|
{
|
|
$rp = $this->rp;
|
|
fprintf($fp, "%s ::=", $rp->lhs->name);
|
|
for ($i = 0; $i <= $rp->nrhs; $i++) {
|
|
if ($i === $this->dot) {
|
|
fwrite($fp,' *');
|
|
}
|
|
if ($i === $rp->nrhs) {
|
|
break;
|
|
}
|
|
$sp = $rp->rhs[$i];
|
|
fprintf($fp,' %s', $sp->name);
|
|
if ($sp->type == PHP_ParserGenerator_Symbol::MULTITERMINAL) {
|
|
for ($j = 1; $j < $sp->nsubsym; $j++) {
|
|
fprintf($fp, '|%s', $sp->subsym[$j]->name);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Hash a configuration for the associative array {@link $x4a}
|
|
*/
|
|
private static function confighash(PHP_ParserGenerator_Config $a)
|
|
{
|
|
$h = 0;
|
|
$h = $h * 571 + $a->rp->index * 37 + $a->dot;
|
|
return $h;
|
|
}
|
|
|
|
/**
|
|
* Insert a new record into the array. Return TRUE if successful.
|
|
* Prior data with the same key is NOT overwritten
|
|
*/
|
|
static function Configtable_insert(PHP_ParserGenerator_Config $data)
|
|
{
|
|
$h = self::confighash($data);
|
|
if (isset(self::$x4a[$h])) {
|
|
$np = self::$x4a[$h];
|
|
} else {
|
|
$np = 0;
|
|
}
|
|
while ($np) {
|
|
if (self::Configcmp($np->data, $data) == 0) {
|
|
/* An existing entry with the same key is found. */
|
|
/* Fail because overwrite is not allows. */
|
|
return 0;
|
|
}
|
|
$np = $np->next;
|
|
}
|
|
/* Insert the new data */
|
|
$np = array('data' => $data, 'next' => 0, 'from' => 0);
|
|
$np = new PHP_ParserGenerator_StateNode;
|
|
$np->data = $data;
|
|
if (isset(self::$x4a[$h])) {
|
|
self::$x4a[$h]->from = $np->next;
|
|
$np->next = self::$x4a[$h];
|
|
}
|
|
$np->from = $np;
|
|
self::$x4a[$h] = $np;
|
|
return 1;
|
|
}
|
|
|
|
/**
|
|
* Return a pointer to data assigned to the given key. Return NULL
|
|
* if no such key.
|
|
* @return PHP_ParserGenerator_Config|0
|
|
*/
|
|
static function Configtable_find(PHP_ParserGenerator_Config $key)
|
|
{
|
|
$h = self::confighash($key);
|
|
if (!isset(self::$x4a[$h])) {
|
|
return 0;
|
|
}
|
|
$np = self::$x4a[$h];
|
|
while ($np) {
|
|
if (self::Configcmp($np->data, $key) == 0) {
|
|
break;
|
|
}
|
|
$np = $np->next;
|
|
}
|
|
return $np ? $np->data : 0;
|
|
}
|
|
}
|
|
?>
|