247 lines
7.5 KiB
PHP
247 lines
7.5 KiB
PHP
<?php
|
|
|
|
/**
|
|
* [Discuz!] (C)2001-2099 Comsenz Inc.
|
|
* This is NOT a freeware, use is subject to license terms
|
|
*
|
|
* $Id: class_chinese.php 6757 2010-03-25 09:01:29Z cnteacher $
|
|
*/
|
|
|
|
if(!defined('IN_DISCUZ')) {
|
|
exit('Access Denied');
|
|
}
|
|
|
|
define('CODETABLE_DIR', DISCUZ_ROOT.'./source/include/table/');
|
|
|
|
class Chinese {
|
|
|
|
var $table = '';
|
|
var $iconv_enabled = false;
|
|
var $convertbig5 = false;
|
|
var $unicode_table = array();
|
|
var $config = array (
|
|
'SourceLang' => '',
|
|
'TargetLang' => '',
|
|
'GBtoUnicode_table' => 'gb-unicode.table',
|
|
'BIG5toUnicode_table' => 'big5-unicode.table',
|
|
'GBtoBIG5_table' => 'gb-big5.table',
|
|
);
|
|
|
|
function Chinese($SourceLang, $TargetLang, $ForceTable = FALSE) {
|
|
$this->config['SourceLang'] = $this->_lang($SourceLang);
|
|
$this->config['TargetLang'] = $this->_lang($TargetLang);
|
|
|
|
if(ICONV_ENABLE && $this->config['TargetLang'] != 'BIG5' && !$ForceTable) {
|
|
$this->iconv_enabled = true;
|
|
} else {
|
|
$this->iconv_enabled = false;
|
|
$this->OpenTable();
|
|
}
|
|
}
|
|
|
|
function _lang($LangCode) {
|
|
$LangCode = strtoupper($LangCode);
|
|
|
|
if(substr($LangCode, 0, 2) == 'GB') {
|
|
return 'GBK';
|
|
} elseif(substr($LangCode, 0, 3) == 'BIG') {
|
|
return 'BIG5';
|
|
} elseif(substr($LangCode, 0, 3) == 'UTF') {
|
|
return 'UTF-8';
|
|
} elseif(substr($LangCode, 0, 3) == 'UNI') {
|
|
return 'UNICODE';
|
|
}
|
|
}
|
|
|
|
function _hex2bin($hexdata) {
|
|
for($i=0; $i < strlen($hexdata); $i += 2) {
|
|
$bindata .= chr(hexdec(substr($hexdata, $i, 2)));
|
|
}
|
|
return $bindata;
|
|
}
|
|
|
|
function OpenTable() {
|
|
$this->unicode_table = array();
|
|
if(!$this->iconv_enabled && $this->config['TargetLang'] == 'BIG5') {
|
|
$this->config['TargetLang'] = 'GBK';
|
|
$this->convertbig5 = TRUE;
|
|
}
|
|
if($this->config['SourceLang'] == 'GBK' || $this->config['TargetLang'] == 'GBK') {
|
|
$this->table = CODETABLE_DIR.$this->config['GBtoUnicode_table'];
|
|
} elseif($this->config['SourceLang'] == 'BIG5' || $this->config['TargetLang'] == 'BIG5') {
|
|
$this->table = CODETABLE_DIR.$this->config['BIG5toUnicode_table'];
|
|
}
|
|
$fp = fopen($this->table, 'rb');
|
|
$tabletmp = fread($fp, filesize($this->table));
|
|
for($i = 0; $i < strlen($tabletmp); $i += 4) {
|
|
$tmp = unpack('nkey/nvalue', substr($tabletmp, $i, 4));
|
|
if($this->config['TargetLang'] == 'UTF-8') {
|
|
$this->unicode_table[$tmp['key']] = '0x'.dechex($tmp['value']);
|
|
} elseif($this->config['SourceLang'] == 'UTF-8') {
|
|
$this->unicode_table[$tmp['value']] = '0x'.dechex($tmp['key']);
|
|
} elseif($this->config['TargetLang'] == 'UNICODE') {
|
|
$this->unicode_table[$tmp['key']] = dechex($tmp['value']);
|
|
}
|
|
}
|
|
}
|
|
|
|
function CHSUtoUTF8($c) {
|
|
$str = '';
|
|
if($c < 0x80) {
|
|
$str .= $c;
|
|
} elseif($c < 0x800) {
|
|
$str .= (0xC0 | $c >> 6);
|
|
$str .= (0x80 | $c & 0x3F);
|
|
} elseif($c < 0x10000) {
|
|
$str .= (0xE0 | $c >> 12);
|
|
$str .= (0x80 | $c >> 6 & 0x3F);
|
|
$str .=( 0x80 | $c & 0x3F);
|
|
} elseif($c < 0x200000) {
|
|
$str .= (0xF0 | $c >> 18);
|
|
$str .= (0x80 | $c >> 12 & 0x3F);
|
|
$str .= (0x80 | $c >> 6 & 0x3F);
|
|
$str .= (0x80 | $c & 0x3F);
|
|
}
|
|
return $str;
|
|
}
|
|
|
|
function GB2312toBIG5($c) {
|
|
$f = fopen(CODETABLE_DIR.$this->config['GBtoBIG5_table'], 'r');
|
|
$max=strlen($c)-1;
|
|
for($i = 0;$i < $max;$i++){
|
|
$h=ord($c[$i]);
|
|
if($h>=160) {
|
|
$l=ord($c[$i+1]);
|
|
if($h==161 && $l==64){
|
|
$gb=" ";
|
|
} else{
|
|
fseek($f,($h-160)*510+($l-1)*2);
|
|
$gb=fread($f,2);
|
|
}
|
|
$c[$i]=$gb[0];
|
|
$c[$i+1]=$gb[1];
|
|
$i++;
|
|
}
|
|
}
|
|
$result = $c;
|
|
return $result;
|
|
}
|
|
|
|
function Convert($SourceText) {
|
|
if($this->config['SourceLang'] == $this->config['TargetLang']) {
|
|
return $SourceText;
|
|
} elseif($this->iconv_enabled) {
|
|
if($this->config['TargetLang'] <> 'UNICODE') {
|
|
return iconv($this->config['SourceLang'], $this->config['TargetLang'], $SourceText);
|
|
} else {
|
|
$return = '';
|
|
while($SourceText != '') {
|
|
if(ord(substr($SourceText, 0, 1)) > 127) {
|
|
$return .= "&#x".dechex($this->Utf8_Unicode(iconv($this->config['SourceLang'],"UTF-8", substr($SourceText, 0, 2)))).";";
|
|
$SourceText = substr($SourceText, 2, strlen($SourceText));
|
|
} else {
|
|
$return .= substr($SourceText, 0, 1);
|
|
$SourceText = substr($SourceText, 1, strlen($SourceText));
|
|
}
|
|
}
|
|
return $return;
|
|
}
|
|
|
|
} elseif($this->config['TargetLang'] == 'UNICODE') {
|
|
$utf = '';
|
|
while($SourceText != '') {
|
|
if(ord(substr($SourceText, 0, 1)) > 127) {
|
|
if($this->config['SourceLang'] == 'GBK') {
|
|
$utf .= '&#x'.$this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2))) - 0x8080].';';
|
|
} elseif($this->config['SourceLang'] == 'BIG5') {
|
|
$utf .= '&#x'.$this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2)))].';';
|
|
}
|
|
$SourceText = substr($SourceText, 2, strlen($SourceText));
|
|
} else {
|
|
$utf .= substr($SourceText, 0, 1);
|
|
$SourceText = substr($SourceText, 1, strlen($SourceText));
|
|
}
|
|
}
|
|
return $utf;
|
|
} else {
|
|
$ret = '';
|
|
if($this->config['SourceLang'] == 'UTF-8') {
|
|
$out = '';
|
|
$len = strlen($SourceText);
|
|
$i = 0;
|
|
while($i < $len) {
|
|
$c = ord(substr($SourceText, $i++, 1));
|
|
switch($c >> 4) {
|
|
case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
|
|
$out .= substr($SourceText, $i - 1, 1);
|
|
break;
|
|
case 12: case 13:
|
|
$char2 = ord(substr($SourceText, $i++, 1));
|
|
$char3 = $this->unicode_table[(($c & 0x1F) << 6) | ($char2 & 0x3F)];
|
|
if($this->config['TargetLang'] == 'GBK') {
|
|
$out .= $this->_hex2bin(dechex($char3 + 0x8080));
|
|
} elseif($this->config['TargetLang'] == 'BIG5') {
|
|
$out .= $this->_hex2bin($char3);
|
|
}
|
|
break;
|
|
case 14:
|
|
$char2 = ord(substr($SourceText, $i++, 1));
|
|
$char3 = ord(substr($SourceText, $i++, 1));
|
|
$char4 = $this->unicode_table[(($c & 0x0F) << 12) | (($char2 & 0x3F) << 6) | (($char3 & 0x3F) << 0)];
|
|
if($this->config['TargetLang'] == 'GBK') {
|
|
$out .= $this->_hex2bin(dechex($char4 + 0x8080));
|
|
} elseif($this->config['TargetLang'] == 'BIG5') {
|
|
$out .= $this->_hex2bin($char4);
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
return !$this->convertbig5 ? $out : $this->GB2312toBIG5($out);
|
|
} else {
|
|
while($SourceText != '') {
|
|
if(ord(substr($SourceText, 0, 1)) > 127) {
|
|
if($this->config['SourceLang'] == 'BIG5') {
|
|
$utf8 = $this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2)))]));
|
|
} elseif($this->config['SourceLang'] == 'GBK') {
|
|
$utf8=$this->CHSUtoUTF8(hexdec($this->unicode_table[hexdec(bin2hex(substr($SourceText, 0, 2))) - 0x8080]));
|
|
}
|
|
for($i = 0; $i < strlen($utf8); $i += 3) {
|
|
$ret .= chr(substr($utf8, $i, 3));
|
|
}
|
|
$SourceText = substr($SourceText, 2, strlen($SourceText));
|
|
} else {
|
|
$ret .= substr($SourceText, 0, 1);
|
|
$SourceText = substr($SourceText, 1, strlen($SourceText));
|
|
}
|
|
}
|
|
$SourceText = '';
|
|
return $ret;
|
|
}
|
|
}
|
|
}
|
|
|
|
function Utf8_Unicode($char) {
|
|
switch(strlen($char)) {
|
|
case 1:
|
|
return ord($char);
|
|
case 2:
|
|
$n = (ord($char[0]) & 0x3f) << 6;
|
|
$n += ord($char[1]) & 0x3f;
|
|
return $n;
|
|
case 3:
|
|
$n = (ord($char[0]) & 0x1f) << 12;
|
|
$n += (ord($char[1]) & 0x3f) << 6;
|
|
$n += ord($char[2]) & 0x3f;
|
|
return $n;
|
|
case 4:
|
|
$n = (ord($char[0]) & 0x0f) << 18;
|
|
$n += (ord($char[1]) & 0x3f) << 12;
|
|
$n += (ord($char[2]) & 0x3f) << 6;
|
|
$n += ord($char[3]) & 0x3f;
|
|
return $n;
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
?>
|