您的位置:首页 > 编程语言 > PHP开发

php字符串截取,检测字符串编码方法

2012-06-28 16:30 519 查看
<?php
/**
* 中文字符串截取
*/
function subString($str, $start, $length) {
$i = 0;
//完整排除之前的UTF8字符
while($i < $start) {
$ord = ord($str{$i});
if($ord < 192) {
$i++;
} elseif($ord < 224) {
$i += 2;
} else {
$i += 3;
}
}
//开始截取
$result = '';
while($i < $start + $length && $i < strlen($str)) {
$ord = ord($str{$i});
if($ord < 192) {
$result .= $str{$i};
$i++;
} elseif($ord <224) {
$result .= $str{$i}.$str{$i+1};
$i += 2;
} else {
$result .= $str{$i}.$str{$i+1}.$str{$i+2};
$i += 3;
}
}
if($i < strlen($str)) {
$result .= '...';
}
return $result;
}

/**
* 检测编码
*/

function safeEncoding($string,$outEncoding = 'UTF-8') {
$encoding = "UTF-8";
for($i = 0; $i<strlen($string); $i++) {
if(ord($string{$i}) < 128) {
continue;
}

if((ord($string{$i}) & 224) == 224) {
//第一个字节判断通过
$char = $string{++$i};
if((ord($char) & 128) == 128) {
//第二个字节判断通过
$char = $string{++$i};
if((ord($char) & 128) == 128) {
$encoding = "UTF-8";
break;
}
}
}
if((ord($string{$i}) & 192) == 192) {
//第一个字节判断通过
$char = $string{++$i};
if((ord($char) & 128) == 128) {
//第二个字节判断通过
$encoding = "GB2312";
break;
}
}
}

if(strtoupper($encoding) == strtoupper($outEncoding)) {
return $string;
} else {
return iconv($encoding,$outEncoding,$string);
}
}

class splite_utf8
{
private  function splite_single_utf8_left_word ($str )
{
$aciss = ord( $str);
$out_str = '';

if ($aciss >= 240 )
{
$out_str.=substr ( $str, 0, 4 );
}
elseif ($aciss >= 224 )
{
$out_str.=substr ( $str, 0, 3 );
}
elseif ($aciss >= 192 )
{
$out_str.=substr ( $str, 0, 2 );
}
else
{
$out_str.=substr ($str, 0, 1 );
}
return $out_str;
}

private  function splite_single_utf8_right_word ($str )
{
$aciss = ord( $str);
$out_str = '';

if ($aciss >= 240 )
{
$out_str.=substr ( $str, 4 );
}
elseif ($aciss >= 224 )
{
$out_str.= substr ( $str, 3 );
}
elseif ($aciss >= 192 )
{
$out_str.= substr ( $str, 2 );
}
else
{
$out_str.= substr ($str, 1 );
}

return $out_str;
}

public function count_word($str, $length=0 )
{
$aciss = ord( $str);

if ($aciss >= 240 )
{
$length+= 1;
$str=substr($str,4);
}
elseif ($aciss >= 224 )
{
$length+= 1;
$str=substr($str,3);
}
elseif ($aciss >= 192 )
{
$length+= 1;
$str=substr($str,2);
}
else
{
$length+= 1;
$str=substr($str,1);
}

if($str=='')
{
return $length;
}
else
{
return $this->count_word($str,$length);
}
}

public function splite_mulit_utf8_word ($str, $start = 0, $length = -1 )
{
$temp = '';

if($start < 0 )
{
$start = $this->count_word($str) + $start;
}

for ($i = 0; $i < $start; $i++ )
{
$str=$this->splite_single_utf8_right_word ($str );
}

for ($i = 0; $i < $length; $i++ )
{
$temp.= $this->splite_single_utf8_left_word ($str );
$str = $this->splite_single_utf8_right_word ($str );
}

if( $length == -1 )
{
return $str;
}
else
{
return $temp;
}
}
}

$utf=new splite_utf8();
$text='的萨芬dfdf!@#$%^&*I()';
$length=$utf->count_word($text);
echo $length."/n";
$word=$utf->splite_mulit_utf8_word ($text, 3, 2);
var_dump($word);
?>
内容来自用户分享和网络整理,不保证内容的准确性,如有侵权内容,可联系管理员处理 点击这里给我发消息
标签: