I18N: Introduce the `Plural_Forms` class.

Historically, we've evaluated the plural forms for each language using `create_function()`. This is being deprecated in PHP 7.2, so needs to be replaced.

The `Plural_Forms` class parses the `Plural-Forms` header from the PO file, and internally caches the result of all subsequent plural form tests, allowing it to match the performance of the existing code.

Props rmccue.
Fixes #41562.


Built from https://develop.svn.wordpress.org/trunk@41722


git-svn-id: http://core.svn.wordpress.org/trunk@41556 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
Gary Pendergast 2017-10-04 01:30:46 +00:00
parent 5f7a5c1246
commit 0a3b7d8e31
3 changed files with 353 additions and 7 deletions

View File

@ -0,0 +1,343 @@
<?php
/**
* A gettext Plural-Forms parser.
*
* @since 4.9.0
*/
class Plural_Forms {
/**
* Operator characters.
*
* @since 4.9.0
* @var string OP_CHARS Operator characters.
*/
const OP_CHARS = '|&><!=%?:';
/**
* Valid number characters.
*
* @since 4.9.0
* @var string NUM_CHARS Valid number characters.
*/
const NUM_CHARS = '0123456789';
/**
* Operator precedence.
*
* Operator precedence from highest to lowest. Higher numbers indicate
* higher precedence, and are executed first.
*
* @see https://en.wikipedia.org/wiki/Operators_in_C_and_C%2B%2B#Operator_precedence
*
* @since 4.9.0
* @var array $op_precedence Operator precedence from highest to lowest.
*/
protected static $op_precedence = array(
'%' => 6,
'<' => 5,
'<=' => 5,
'>' => 5,
'>=' => 5,
'==' => 4,
'!=' => 4,
'&&' => 3,
'||' => 2,
'?:' => 1,
'?' => 1,
'(' => 0,
')' => 0,
);
/**
* Tokens generated from the string.
*
* @since 4.9.0
* @var array $tokens List of tokens.
*/
protected $tokens = array();
/**
* Cache for repeated calls to the function.
*
* @since 4.9.0
* @var array $cache Map of $n => $result
*/
protected $cache = [];
/**
* Constructor.
*
* @since 4.9.0
*
* @param string $str Plural function (just the bit after `plural=` from Plural-Forms)
*/
public function __construct( $str ) {
$this->parse( $str );
}
/**
* Parse a Plural-Forms string into tokens.
*
* Uses the shunting-yard algorithm to convert the string to Reverse Polish
* Notation tokens.
*
* @since 4.9.0
*
* @param string $str String to parse.
*/
protected function parse( $str ) {
$pos = 0;
$len = strlen( $str );
// Convert infix operators to postfix using the shunting-yard algorithm.
$output = array();
$stack = array();
while ( $pos < $len ) {
$next = substr( $str, $pos, 1 );
switch ( $next ) {
// Ignore whitespace
case ' ':
case "\t":
$pos++;
break;
// Variable (n)
case 'n':
$output[] = [ 'var' ];
$pos++;
break;
// Parentheses
case '(':
$stack[] = $next;
$pos++;
break;
case ')':
$found = false;
while ( ! empty( $stack ) ) {
$o2 = $stack[ count( $stack ) - 1 ];
if ( $o2 !== '(' ) {
$output[] = [ 'op', array_pop( $stack ) ];
continue;
}
// Discard open paren.
array_pop( $stack );
$found = true;
break;
}
if ( ! $found ) {
throw new Exception( 'Mismatched parentheses' );
}
$pos++;
break;
// Operators
case '|':
case '&':
case '>':
case '<':
case '!':
case '=':
case '%':
case '?':
$end_operator = strspn( $str, self::OP_CHARS, $pos );
$operator = substr( $str, $pos, $end_operator );
if ( ! array_key_exists( $operator, self::$op_precedence ) ) {
throw new Exception( sprintf( 'Unknown operator "%s"', $operator ) );
}
while ( ! empty( $stack ) ) {
$o2 = $stack[ count( $stack ) - 1 ];
// Ternary is right-associative in C
if ( $operator === '?:' || $operator === '?' ) {
if ( self::$op_precedence[ $operator ] >= self::$op_precedence[ $o2 ] ) {
break;
}
} elseif ( self::$op_precedence[ $operator ] > self::$op_precedence[ $o2 ] ) {
break;
}
$output[] = [ 'op', array_pop( $stack ) ];
}
$stack[] = $operator;
$pos += $end_operator;
break;
// Ternary "else"
case ':':
$found = false;
$s_pos = count( $stack ) - 1;
while ( $s_pos >= 0 ) {
$o2 = $stack[ $s_pos ];
if ( $o2 !== '?' ) {
$output[] = [ 'op', array_pop( $stack ) ];
$s_pos--;
continue;
}
// Replace.
$stack[ $s_pos ] = '?:';
$found = true;
break;
}
if ( ! $found ) {
throw new Exception( 'Missing starting "?" ternary operator' );
}
$pos++;
break;
// Default - number or invalid
default:
if ( $next >= '0' && $next <= '9' ) {
$span = strspn( $str, self::NUM_CHARS, $pos );
$output[] = [ 'value', intval( substr( $str, $pos, $span ) ) ];
$pos += $span;
continue;
}
throw new Exception( sprintf( 'Unknown symbol "%s"', $next ) );
}
}
while ( ! empty( $stack ) ) {
$o2 = array_pop( $stack );
if ( $o2 === '(' || $o2 === ')' ) {
throw new Exception( 'Mismatched parentheses' );
}
$output[] = [ 'op', $o2 ];
}
$this->tokens = $output;
}
/**
* Get the plural form for a number.
*
* Caches the value for repeated calls.
*
* @since 4.9.0
*
* @param int $num Number to get plural form for.
* @return int Plural form value.
*/
public function get( $num ) {
if ( isset( $this->cache[ $num ] ) ) {
return $this->cache[ $num ];
}
return $this->cache[ $num ] = $this->execute( $num );
}
/**
* Execute the plural form function.
*
* @since 4.9.0
*
* @param int $n Variable "n" to substitute.
* @return int Plural form value.
*/
public function execute( $n ) {
$stack = array();
$i = 0;
$total = count( $this->tokens );
while ( $i < $total ) {
$next = $this->tokens[$i];
$i++;
if ( $next[0] === 'var' ) {
$stack[] = $n;
continue;
} elseif ( $next[0] === 'value' ) {
$stack[] = $next[1];
continue;
}
// Only operators left.
switch ( $next[1] ) {
case '%':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 % $v2;
break;
case '||':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 || $v2;
break;
case '&&':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 && $v2;
break;
case '<':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 < $v2;
break;
case '<=':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 <= $v2;
break;
case '>':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 > $v2;
break;
case '>=':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 >= $v2;
break;
case '!=':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 != $v2;
break;
case '==':
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 == $v2;
break;
case '?:':
$v3 = array_pop( $stack );
$v2 = array_pop( $stack );
$v1 = array_pop( $stack );
$stack[] = $v1 ? $v2 : $v3;
break;
default:
throw new Exception( sprintf( 'Unknown operator "%s"', $next[1] ) );
}
}
if ( count( $stack ) !== 1 ) {
throw new Exception( 'Too many values remaining on the stack' );
}
return (int) $stack[0];
}
}

View File

@ -7,6 +7,7 @@
* @subpackage translations
*/
require_once dirname(__FILE__) . '/plural-forms.php';
require_once dirname(__FILE__) . '/entry.php';
if ( ! class_exists( 'Translations', false ) ):
@ -187,7 +188,7 @@ class Gettext_Translations extends Translations {
function nplurals_and_expression_from_header($header) {
if (preg_match('/^\s*nplurals\s*=\s*(\d+)\s*;\s+plural\s*=\s*(.+)$/', $header, $matches)) {
$nplurals = (int)$matches[1];
$expression = trim($this->parenthesize_plural_exression($matches[2]));
$expression = trim( $matches[2] );
return array($nplurals, $expression);
} else {
return array(2, 'n != 1');
@ -201,11 +202,13 @@ class Gettext_Translations extends Translations {
* @param string $expression
*/
function make_plural_form_function($nplurals, $expression) {
$expression = str_replace('n', '$n', $expression);
$func_body = "
\$index = (int)($expression);
return (\$index < $nplurals)? \$index : $nplurals - 1;";
return create_function('$n', $func_body);
try {
$handler = new Plural_Forms( rtrim( $expression, ';' ) );
return array( $handler, 'get' );
} catch ( Exception $e ) {
// Fall back to default plural-form function.
return $this->make_plural_form_function( 2, 'n != 1' );
}
}
/**

View File

@ -4,7 +4,7 @@
*
* @global string $wp_version
*/
$wp_version = '4.9-alpha-41721';
$wp_version = '4.9-alpha-41722';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.