pomo performance improvements. Props nbachiyski. fixes #10165
git-svn-id: http://svn.automattic.com/wordpress/trunk@12174 1a063a9b-81f0-0310-95a4-ce76da25c4cd
This commit is contained in:
parent
742349b291
commit
e5a98d208a
|
@ -2,7 +2,7 @@
|
|||
/**
|
||||
* Class for working with MO files
|
||||
*
|
||||
* @version $Id: mo.php 221 2009-09-07 21:08:21Z nbachiyski $
|
||||
* @version $Id: mo.php 293 2009-11-12 15:43:50Z nbachiyski $
|
||||
* @package pomo
|
||||
* @subpackage mo
|
||||
*/
|
||||
|
@ -21,10 +21,9 @@ class MO extends Gettext_Translations {
|
|||
* @param string $filename MO file to load
|
||||
*/
|
||||
function import_from_file($filename) {
|
||||
$reader = new POMO_CachedIntFileReader($filename);
|
||||
if (isset($reader->error)) {
|
||||
$reader = new POMO_FileReader($filename);
|
||||
if (!$reader->is_resource())
|
||||
return false;
|
||||
}
|
||||
return $this->import_from_reader($reader);
|
||||
}
|
||||
|
||||
|
@ -113,61 +112,111 @@ class MO extends Gettext_Translations {
|
|||
}
|
||||
|
||||
function import_from_reader($reader) {
|
||||
$reader->setEndian('little');
|
||||
$endian = MO::get_byteorder($reader->readint32());
|
||||
if (false === $endian) {
|
||||
$endian_string = MO::get_byteorder($reader->readint32());
|
||||
if (false === $endian_string) {
|
||||
return false;
|
||||
}
|
||||
$reader->setEndian($endian);
|
||||
$reader->setEndian($endian_string);
|
||||
|
||||
$revision = $reader->readint32();
|
||||
$total = $reader->readint32();
|
||||
// get addresses of array of lenghts and offsets for original string and translations
|
||||
$originals_lenghts_addr = $reader->readint32();
|
||||
$translations_lenghts_addr = $reader->readint32();
|
||||
$endian = ('big' == $endian_string)? 'N' : 'V';
|
||||
|
||||
$header = $reader->read(24);
|
||||
if ($reader->strlen($header) != 24)
|
||||
return false;
|
||||
|
||||
// parse header
|
||||
$header = unpack("{$endian}revision/{$endian}total/{$endian}originals_lenghts_addr/{$endian}translations_lenghts_addr/{$endian}hash_length/{$endian}hash_addr", $header);
|
||||
if (!is_array($header))
|
||||
return false;
|
||||
|
||||
extract( $header );
|
||||
|
||||
// support revision 0 of MO format specs, only
|
||||
if ($revision != 0)
|
||||
return false;
|
||||
|
||||
// seek to data blocks
|
||||
$reader->seekto($originals_lenghts_addr);
|
||||
$originals_lenghts = $reader->readint32array($total * 2); // each of
|
||||
$reader->seekto($translations_lenghts_addr);
|
||||
$translations_lenghts = $reader->readint32array($total * 2);
|
||||
|
||||
$length = create_function('$i', 'return $i * 2 + 1;');
|
||||
$offset = create_function('$i', 'return $i * 2 + 2;');
|
||||
// read originals' indices
|
||||
$originals_lengths_length = $translations_lenghts_addr - $originals_lenghts_addr;
|
||||
if ( $originals_lengths_length != $total * 8 )
|
||||
return false;
|
||||
|
||||
for ($i = 0; $i < $total; ++$i) {
|
||||
$reader->seekto($originals_lenghts[$offset($i)]);
|
||||
$original = $reader->read($originals_lenghts[$length($i)]);
|
||||
$reader->seekto($translations_lenghts[$offset($i)]);
|
||||
$translation = $reader->read($translations_lenghts[$length($i)]);
|
||||
if ('' == $original) {
|
||||
$originals = $reader->read($originals_lengths_length);
|
||||
if ( $reader->strlen( $originals ) != $originals_lengths_length )
|
||||
return false;
|
||||
|
||||
// read translations' indices
|
||||
$translations_lenghts_length = $hash_addr - $translations_lenghts_addr;
|
||||
if ( $translations_lenghts_length != $total * 8 )
|
||||
return false;
|
||||
|
||||
$translations = $reader->read($translations_lenghts_length);
|
||||
if ( $reader->strlen( $translations ) != $translations_lenghts_length )
|
||||
return false;
|
||||
|
||||
// transform raw data into set of indices
|
||||
$originals = $reader->str_split( $originals, 8 );
|
||||
$translations = $reader->str_split( $translations, 8 );
|
||||
|
||||
// skip hash table
|
||||
$strings_addr = $hash_addr + $hash_length * 4;
|
||||
|
||||
$reader->seekto($strings_addr);
|
||||
|
||||
$strings = $reader->read_all();
|
||||
$reader->close();
|
||||
|
||||
for ( $i = 0; $i < $total; $i++ ) {
|
||||
$o = unpack( "{$endian}length/{$endian}pos", $originals[$i] );
|
||||
$t = unpack( "{$endian}length/{$endian}pos", $translations[$i] );
|
||||
if ( !$o || !$t ) return false;
|
||||
|
||||
// adjust offset due to reading strings to separate space before
|
||||
$o['pos'] -= $strings_addr;
|
||||
$t['pos'] -= $strings_addr;
|
||||
|
||||
$original = $reader->substr( $strings, $o['pos'], $o['length'] );
|
||||
$translation = $reader->substr( $strings, $t['pos'], $t['length'] );
|
||||
|
||||
if ('' === $original) {
|
||||
$this->set_headers($this->make_headers($translation));
|
||||
} else {
|
||||
$this->add_entry($this->make_entry($original, $translation));
|
||||
$entry = &$this->make_entry($original, $translation);
|
||||
$this->entries[$entry->key()] = &$entry;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a Translation_Entry from original string and translation strings,
|
||||
* found in a MO file
|
||||
*
|
||||
* @static
|
||||
* @param string $original original string to translate from MO file. Might contain
|
||||
* 0x04 as context separator or 0x00 as singular/plural separator
|
||||
* @param string $translation translation string from MO file. Might contain
|
||||
* 0x00 as a plural translations separator
|
||||
*/
|
||||
function &make_entry($original, $translation) {
|
||||
$args = array();
|
||||
$entry = & new Translation_Entry();
|
||||
// look for context
|
||||
$parts = explode(chr(4), $original);
|
||||
if (isset($parts[1])) {
|
||||
$original = $parts[1];
|
||||
$args['context'] = $parts[0];
|
||||
$entry->context = $parts[0];
|
||||
}
|
||||
// look for plural original
|
||||
$parts = explode(chr(0), $original);
|
||||
$args['singular'] = $parts[0];
|
||||
$entry->singular = $parts[0];
|
||||
if (isset($parts[1])) {
|
||||
$args['plural'] = $parts[1];
|
||||
$entry->is_plural = true;
|
||||
$entry->plural = $parts[1];
|
||||
}
|
||||
// plural translations are also separated by \0
|
||||
$args['translations'] = explode(chr(0), $translation);
|
||||
$entry = & new Translation_Entry($args);
|
||||
$entry->translations = explode(chr(0), $translation);
|
||||
return $entry;
|
||||
}
|
||||
|
||||
|
@ -178,7 +227,5 @@ class MO extends Gettext_Translations {
|
|||
function get_plural_forms_count() {
|
||||
return $this->_nplurals;
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
endif;
|
|
@ -3,64 +3,182 @@
|
|||
* Classes, which help reading streams of data from files.
|
||||
* Based on the classes from Danilo Segan <danilo@kvota.net>
|
||||
*
|
||||
* @version $Id: streams.php 223 2009-09-07 21:20:13Z nbachiyski $
|
||||
* @version $Id: streams.php 293 2009-11-12 15:43:50Z nbachiyski $
|
||||
* @package pomo
|
||||
* @subpackage streams
|
||||
*/
|
||||
|
||||
if ( !class_exists( 'POMO_Reader' ) ):
|
||||
class POMO_Reader {
|
||||
|
||||
var $endian = 'little';
|
||||
var $_post = '';
|
||||
|
||||
function POMO_Reader() {
|
||||
$this->is_overloaded = ((ini_get("mbstring.func_overload") & 2) != 0) && function_exists('mb_substr');
|
||||
$this->_pos = 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the endianness of the file.
|
||||
*
|
||||
* @param $endian string 'big' or 'little'
|
||||
*/
|
||||
function setEndian($endian) {
|
||||
$this->endian = $endian;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a 32bit Integer from the Stream
|
||||
*
|
||||
* @return mixed The integer, corresponding to the next 32 bits from
|
||||
* the stream of false if there are not enough bytes or on error
|
||||
*/
|
||||
function readint32() {
|
||||
$bytes = $this->read(4);
|
||||
if (4 != $this->strlen($bytes))
|
||||
return false;
|
||||
$endian_letter = ('big' == $this->endian)? 'N' : 'V';
|
||||
$int = unpack($endian_letter, $bytes);
|
||||
return array_shift($int);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads an array of 32-bit Integers from the Stream
|
||||
*
|
||||
* @param integer count How many elements should be read
|
||||
* @return mixed Array of integers or false if there isn't
|
||||
* enough data or on error
|
||||
*/
|
||||
function readint32array($count) {
|
||||
$bytes = $this->read(4 * $count);
|
||||
if (4*$count != $this->strlen($bytes))
|
||||
return false;
|
||||
$endian_letter = ('big' == $this->endian)? 'N' : 'V';
|
||||
return unpack($endian_letter.$count, $bytes);
|
||||
}
|
||||
|
||||
|
||||
function substr($string, $start, $length) {
|
||||
if ($this->is_overloaded) {
|
||||
return mb_substr($string, $start, $length, 'ascii');
|
||||
} else {
|
||||
return substr($string, $start, $length);
|
||||
}
|
||||
}
|
||||
|
||||
function strlen($string) {
|
||||
if ($this->is_overloaded) {
|
||||
return mb_strlen($string, 'ascii');
|
||||
} else {
|
||||
return strlen($string);
|
||||
}
|
||||
}
|
||||
|
||||
function str_split($string, $chunk_size) {
|
||||
if (!function_exists('str_split')) {
|
||||
$length = $this->strlen($string);
|
||||
$out = array();
|
||||
for ($i = 0; $i < $length; $i += $chunk_size)
|
||||
$out[] = $this->substr($string, $i, $chunk_size);
|
||||
return $out;
|
||||
} else {
|
||||
return str_split( $string, $chunk_size );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
function pos() {
|
||||
return $this->_pos;
|
||||
}
|
||||
|
||||
function is_resource() {
|
||||
return true;
|
||||
}
|
||||
|
||||
function close() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
endif;
|
||||
|
||||
if ( !class_exists( 'POMO_FileReader' ) ):
|
||||
class POMO_FileReader extends POMO_Reader {
|
||||
function POMO_FileReader($filename) {
|
||||
parent::POMO_Reader();
|
||||
$this->_f = fopen($filename, 'r');
|
||||
}
|
||||
|
||||
function read($bytes) {
|
||||
return fread($this->_f, $bytes);
|
||||
}
|
||||
|
||||
function seekto($pos) {
|
||||
if ( -1 == fseek($this->_f, $pos, SEEK_SET)) {
|
||||
return false;
|
||||
}
|
||||
$this->_pos = $pos;
|
||||
return true;
|
||||
}
|
||||
|
||||
function is_resource() {
|
||||
return is_resource($this->_f);
|
||||
}
|
||||
|
||||
function feof() {
|
||||
return feof($this->_f);
|
||||
}
|
||||
|
||||
function close() {
|
||||
return fclose($this->_f);
|
||||
}
|
||||
|
||||
function read_all() {
|
||||
$all = '';
|
||||
while ( !$this->feof() )
|
||||
$all .= $this->read(4096);
|
||||
return $all;
|
||||
}
|
||||
}
|
||||
endif;
|
||||
|
||||
if ( !class_exists( 'POMO_StringReader' ) ):
|
||||
/**
|
||||
* Provides file-like methods for manipulating a string instead
|
||||
* of a physical file.
|
||||
*/
|
||||
class POMO_StringReader {
|
||||
var $_pos;
|
||||
var $_str;
|
||||
|
||||
class POMO_StringReader extends POMO_Reader {
|
||||
|
||||
var $_str = '';
|
||||
|
||||
function POMO_StringReader($str = '') {
|
||||
parent::POMO_Reader();
|
||||
$this->_str = $str;
|
||||
$this->_pos = 0;
|
||||
$this->is_overloaded = ((ini_get("mbstring.func_overload") & 2) != 0) && function_exists('mb_substr');
|
||||
}
|
||||
|
||||
function _substr($string, $start, $length) {
|
||||
if ($this->is_overloaded) {
|
||||
return mb_substr($string,$start,$length,'ascii');
|
||||
} else {
|
||||
return substr($string,$start,$length);
|
||||
}
|
||||
}
|
||||
|
||||
function _strlen($string) {
|
||||
if ($this->is_overloaded) {
|
||||
return mb_strlen($string,'ascii');
|
||||
} else {
|
||||
return strlen($string);
|
||||
}
|
||||
}
|
||||
|
||||
function read($bytes) {
|
||||
$data = $this->_substr($this->_str, $this->_pos, $bytes);
|
||||
$data = $this->substr($this->_str, $this->_pos, $bytes);
|
||||
$this->_pos += $bytes;
|
||||
if ($this->_strlen($this->_str) < $this->_pos) $this->_pos = $this->_strlen($this->_str);
|
||||
if ($this->strlen($this->_str) < $this->_pos) $this->_pos = $this->strlen($this->_str);
|
||||
return $data;
|
||||
}
|
||||
|
||||
function seekto($pos) {
|
||||
$this->_pos = $pos;
|
||||
if ($this->_strlen($this->_str) < $this->_pos) $this->_pos = $this->_strlen($this->_str);
|
||||
return $this->_pos;
|
||||
}
|
||||
|
||||
function pos() {
|
||||
if ($this->strlen($this->_str) < $this->_pos) $this->_pos = $this->strlen($this->_str);
|
||||
return $this->_pos;
|
||||
}
|
||||
|
||||
function length() {
|
||||
return $this->_strlen($this->_str);
|
||||
return $this->strlen($this->_str);
|
||||
}
|
||||
|
||||
function read_all() {
|
||||
return $this->substr($this->_str, $this->_pos, $this->strlen($this->_str));
|
||||
}
|
||||
|
||||
}
|
||||
endif;
|
||||
|
||||
|
@ -81,61 +199,11 @@ endif;
|
|||
|
||||
if ( !class_exists( 'POMO_CachedIntFileReader' ) ):
|
||||
/**
|
||||
* Allows reading integers from a file.
|
||||
* Reads the contents of the file in the beginning.
|
||||
*/
|
||||
class POMO_CachedIntFileReader extends POMO_CachedFileReader {
|
||||
|
||||
var $endian = 'little';
|
||||
|
||||
/**
|
||||
* Opens a file and caches it.
|
||||
*
|
||||
* @param $filename string name of the file to be opened
|
||||
* @param $endian string endianness of the words in the file, allowed
|
||||
* values are 'little' or 'big'. Default value is 'little'
|
||||
*/
|
||||
function POMO_CachedIntFileReader($filename, $endian = 'little') {
|
||||
$this->endian = $endian;
|
||||
function POMO_CachedIntFileReader($filename) {
|
||||
parent::POMO_CachedFileReader($filename);
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the endianness of the file.
|
||||
*
|
||||
* @param $endian string 'big' or 'little'
|
||||
*/
|
||||
function setEndian($endian) {
|
||||
$this->endian = $endian;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a 32bit Integer from the Stream
|
||||
*
|
||||
* @return mixed The integer, corresponding to the next 32 bits from
|
||||
* the stream of false if there are not enough bytes or on error
|
||||
*/
|
||||
function readint32() {
|
||||
$bytes = $this->read(4);
|
||||
if (4 != $this->_strlen($bytes))
|
||||
return false;
|
||||
$endian_letter = ('big' == $this->endian)? 'N' : 'V';
|
||||
$int = unpack($endian_letter, $bytes);
|
||||
return array_shift($int);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads an array of 32-bit Integers from the Stream
|
||||
*
|
||||
* @param integer count How many elements should be read
|
||||
* @return mixed Array of integers or false if there isn't
|
||||
* enough data or on error
|
||||
*/
|
||||
function readint32array($count) {
|
||||
$bytes = $this->read(4 * $count);
|
||||
if (4*$count != $this->_strlen($bytes))
|
||||
return false;
|
||||
$endian_letter = ('big' == $this->endian)? 'N' : 'V';
|
||||
return unpack($endian_letter.$count, $bytes);
|
||||
}
|
||||
}
|
||||
endif;
|
Loading…
Reference in New Issue