动网论坛,站长建站首选,国内使用量最多的论坛软件 动网论坛官方技术讨论区 站长工具 申请属于您自己的免费论坛
首页 | 新闻资讯 | 网站运营 | 网络编程 | 数据库 | 服务器 | 网页设计 | 图像媒体 | 网络应用 | 搜索优化 | 资源下载 | 动网主机 | DVBOX
    本站内  互联网 ASP论坛  ASP.Net论坛  PHP论坛
  
   PHP → 阅读文章

 介绍一组中文处理工具函数

作者来源: 
阅读 数 187 人次 , 2006-3-29 4:10:00 


<?
/* 中文处理工具函数
--- 空格 ---
string gbspace(string) --------- 每个中文字之间加空格
string gbunspace(string) ------- 每个中文字之间的空格清除
string clear_space(string) ------- 用来清除多余的空格

--- 转换 ---
string gbcase(string,offset) --- 将字符串内的中英文字转换大小写
offset : "upper"  - 字符串全转为大写 (strtoupper)
 "lower"  - 字符串全转为小写 (strtolower)
 "ucwords" - 将字符串每个字第一个字母改大写 (ucwords)
 "ucfirst" - 将字符串第一个字母改大写 (ucfirst)
string gbrev(string) ----------- 颠倒字符串

--- 文字检查 ---
int gb_check(string) ----------- 检查字符串内是否有 gb 字,有会返回 true,
 否则会返回false
int gb_all(string) ------------- 检查字符串内所有字是否有 gb 字,是会返回 true,
 否则会返回false
int gb_non(string) ------------- 检查字符串内所有字并不是 gb 字,是会返回 true,
 否则会返回false
int gblen(string) -------------- 返回字符串长度(中文字只计一字母)

--- 查找、取代、提取 ---
int/array gbpos(haystack,needle,[offset]) ---- 查找字符串 (strpos)
offset : 留空 - 查找第一个出现的位置
 int - 由该位置搜索出现的第一个位置
 "r" - 查找最后一次出现的位置 (strrpos)
 "a" - 将所有查找到的字储存为数组(返回 array)

string gb_replace(needle,str,haystack) -- 查找与取代字符串 (str_replace)
string gb_replace_i(needle,str_f,str_b,haystack) -- 不检查大小写查找与取代字符串
 needle - 查找字母
 str - 取代字母 ( str_f - 该字母前, str_b 该字母后)
 haystack - 字符串

string gbsubstr(string,start,[length]) -- 从string提取出由开始到结尾或长度
length的字符串。
中文字只计一字母,可使用正负数。
string gbstrnear(string,length)  -- 从 string提取最接近 length的字符串。
 length 中 中文字计2个字母。

--- 注意 ---
如使用由 form 返回的字符串前,请先替字符串经过 stripslashes() 处理,除去多余的 \ 。

用法:在原 php 代码内加上:
include ("gb.inc");
即可使用以上工具函数。
*/

function gblen($string) {
$l = strlen($string);
$ptr = 0;
$a = 0;
while ($a < $l) {
$ch = substr($string,$a,1);
$ch2 = substr($string,$a+1,1);
if (ord($ch) >= hexdec("0x81") && ord($ch2) >= hexdec("0x40")) {
$ptr++;
$a += 2;
} else {
$ptr++;
$a++;
} // end if
} // end while

return $ptr;
}

function gbsubstr($string,$start,$length) {
if (!is_int($length) && $length != "") {
return "错误:length 值错误(必须为数值)。<br>";
} elseif ($length == "0") {
return "";
} else {
$l = strlen($string);
$a = 0;
$ptr = 0;
$str_list = array();
$str_list2 = array();
while ($a < $l) {
$ch = substr($string,$a,1);
$ch2 = substr($string,$a+1,1);
if (ord($ch) >= hexdec("0x81") && ord($ch2) >= hexdec("0x40")) {
$str_list[$ptr] = $a;
$str_list2[$ptr] = $a+1;
$ptr++;
$a += 2;
} else {
$str_list[$ptr] = $a;
$str_list2[$ptr] = $a;
$ptr++;
$a++;
} // end if
} // end while

if ($start > $ptr || -$start > $ptr) {
return;
} elseif ($length == "") {
if ($start >= 0) { // (text,+)
return substr($string,$str_list[$start]);
} else { // (test,-)
return substr($string,$str_list[$ptr + $start]);
}
} else {

if ($length > 0) { // $length > 0


if ($start >= 0) { // (text,+,+)
if (($start + $length) >= count($str_list2)) {
return substr($string,$str_list[$start]);
} else { //(text,+,+)
$end = $str_list2[$start + ($length - 1)] - $str_list[$start] +1;
return substr($string,$str_list[$start],$end);
}

} else { // (text ,-,+)
$start = $ptr + $start;
if (($start + $length) >= count($str_list2)) {
return substr($string,$str_list[$start]);
} else {
$end = $str_list2[$start + ($length - 1)] - $str_list[$start] +1;
return substr($string,$str_list[$start],$end);
}
}

} else { // $length < 0
$end = strlen($string) - $str_list[$ptr+$length];
if ($start >= 0) { // (text,+,-) {
return substr($string,$str_list[$start],-$end);
} else { //(text,-,-)
$start = $ptr + $start;
return substr($string,$str_list[$start],-$end);
}

} // end of length > / < 0

}
} // end if
}

function gb_replace($needle,$string,$haystack) {
$l = strlen($haystack);
$l2 = strlen($needle);
$l3 = strlen($string);
$news = "";
$skip = 0;
$a = 0;
while ($a < $l) {
$ch = substr($haystack,$a,1);
$ch2 = substr($haystack,$a+1,1);
if (ord($ch) >= hexdec("0x81") && ord($ch2) >= hexdec("0x40")) {
if (substr($haystack,$a,$l2) == $needle) {
$news .= $string;
$a += $l2;
} else {
$news .= $ch.$ch2;
$a += 2;
}
} else {
if (substr($haystack,$a,$l2) == $needle) {
$news .= $string;
$a += $l2;
} else {
$news .= $ch;
$a++;
}
} // end if
} // end while
return $news;
}

function gb_replace_i($needle,$str_f,$str_b,$haystack) {

$l = strlen($haystack);
$l2 = strlen($needle);
$l3 = strlen($string);
$news = "";
$skip = 0;
$a = 0;
while ($a < $l) {
$ch = substr($haystack,$a,1);
$ch2 = substr($haystack,$a+1,1);
if (ord($ch) >= hexdec("0x81") && ord($ch2) >= hexdec("0x40")) {
if (gbcase(substr($haystack,$a,$l2),"lower") == gbcase($needle,"lower")) {
$news .= $str_f . substr($haystack,$a,$l2) . $str_b;
$a += $l2;
} else {
$news .= $ch.$ch2;
$a += 2;
}
} else {
if (gbcase(substr($haystack,$a,$l2),"lower") == gbcase($needle,"lower")) {
$news .= $str_f . substr($haystack,$a,$l2) . $str_b;
$a += $l2;
} else {
$news .= $ch;
$a++;
}
} // end if
} // end while
return $news;
}



function gbpos($haystack,$needle,$offset) {
if (!is_int($offset)) {
$offset = strtolower($offset);
if ($offset != "" && $offset != "r" && $offset != "a") {
return "错误:offset 值错误。<br>";
}
}
$l = strlen($haystack);
$l2 = strlen($needle);
$found = false;
$w = 0; // word
$a = 0; // start

if ($offset == "" || $offset == "r") {
$atleast = 0;
$value = false;
} elseif ($offset == "a") {
$value = array();
$atleast = 0;
} else {
$value = false;
$atleast = $offset;
}
while ($a < $l) {
$ch = substr($haystack,$a,1);
$ch2 = substr($haystack,$a+1,1);
if (ord($ch) >= hexdec("0x81") && ord($ch2) >= hexdec("0x40") && $skip == 0) {
if (substr($haystack,$a,$l2) == $needle) {
if ($offset == "r") {
$found = true;
$value = $w;
} elseif ($offset == "a") {
$found = true;
$value[] = $w;
} elseif (!$value) {
if ($w >= $atleast) {
$found = true;
$value = $w;
}
}
}
$a += 2;
} else {
if (substr($haystack,$a,$l2) == $needle) {
if ($offset == "r") {
$found = true;
$value = $w;
} elseif ($offset == "a") {
$found = true;
$value[] = $w;
} elseif (!$value) {
if ($w >= $atleast) {
$found = true;
$value = $w;
}
}
}
$a++;
}
$w++;
} // end of while
if ($found) {
return $value;
} else {
return $false;
}
// } // end of while

}

function gbrev($text) {
$news = "";
$l = strlen($text);
$gb = 0;
$a = 0;
while ($a < $l) {
$ch = substr($text,$a,1);
$ch2 = substr($text,$a+1,1);
if (ord($ch) >= hexdec("0x81") && ord($ch2) >= hexdec("0x40") && $skip == 0) {
$a += 2;
$news = $ch . $ch2 . $news;
} else {
$news = $ch . $news;
$a++;
}
}
return $news;
}

function gb_check($text) {
$l = strlen($text);
$a = 0;
while ($a < $l) {
$ch = substr($text,$a,1);
$ch2 = substr($text,$a+1,1);
if (ord($ch) >= hexdec("0x81") && ord($ch2) >= hexdec("0x40")) {
return true;
} else {
return false;
}
}
}

function gb_all ($text) {
$l = strlen($text);
$all = 1;
$a = 0;
while ($a < $l) {
$ch = substr($text,$a,1);
$ch2 = substr($text,$a+1,1);
if (ord($ch) >= hexdec("0x81") && ord($ch2) >= hexdec("0x40")) {
$a += 2;
} else {
$a++;
$all = 0;
}
}
if ($all == 1) {
return true;
} else {
return false;
}
}

function gb_non ($text) {
$l = strlen($text);
$all = 1;
$a = 0;
while ($a < $l) {
$ch = substr($text,$a,1);
$ch2 = substr($text,$a+1,1);
if (ord($ch) >= hexdec("0x81") && ord($ch2) >= hexdec("0x40")) {
$a += 2;
$all = 0;
} else {
$a++;
}
}
if ($all == 1) {
return true;
} else {
return false;
}
}


function gbcase ($text,$case) {
$case = strtolower($case);
if ($case != "upper" && $case != "lower" && $case != "ucwords" && $case != "ucfirst") {
return "函数用法错误。 $case";
} else {
$ucfirst = 0;
$ucwords = 0;
$news = "";
$l = strlen($text);
$gb = 0;
$english = 0;

$a = 0;
while ($a < $l) {

$ch = substr($text,$a,1);
if ($gb == 0 && ord($ch) >= hexdec("0x81")) {

$gb = 1;
$english = 0;
$news .= $ch;
$ucwords = 0;

} elseif ($gb == 1 && ord($ch) >= hexdec("0x40") && $english == 0) {
$news .= "$ch";
$ucwords = 0;
$gb = 0;

} else {
if ($case == "upper") {
$news .= strtoupper($ch);
} elseif ($case == "lower") {
$news .= strtolower($ch);
} elseif ($case == "ucwords") {
if ($ucwords == 0) {
$news .= strtoupper($ch);
} else {
$news .= strtolower($ch);
}
$ucwords = 1;
} elseif ($case == "ucfirst") {
if ($ucfirst == 0) {
$news .= strtoupper($ch);
$ucfirst = 1;
} else {
$news .= strtolower($ch);
$ucfirst = 1;
}
} else {
$news .= $ch;
}
if ($ch == " " || $ch == "\n") {
$ucwords = 0;
}
$english = 1;
$gb = 0;

}

$a++;

} // end of while
return $news;
} // end else
}



function gbspace ($text) {

$news = "";
$l = strlen($text);
$gb = 0;
$english = 0;

$a = 0;
while ($a < $l) {


$ch = substr($text,$a,1);
$ch2 = substr($text,$a+1,1);
if (!($ch == " " && $ch2 == " ")) {
if ($gb == 0) {
if (ord($ch) >= hexdec("0x81")) {

if ($english == 1) {
if ((substr($text,$a-1,1) == " ") || (substr($text,$a-1,1) == "\n")) {
$news .= "$ch";
} else {
$news .= " $ch";
}
$english = 0;
$gb = 1;
} else {
$gb = 1;
$english = 0;
$news .= $ch;
}
} else {
$english = 1;
$gb = 0;
$news .= $ch;
}

} else {
if (ord($ch) >= hexdec("0x40")) {
if ($english == 0) {
if ((substr($text,$a+1,1) == " ")|| (substr($text,$a+1,1) == "\n")) {
$news .= "$ch";
} else {
$news .= "$ch ";
}
} else {
$news .= " $ch";
}
} else {
$english = 1;
$news .= "$ch";
}
$gb = 0;
}
}
$a++;
} // end of while

// chk 1 & last is space

$l = strlen($news);
if (substr($news,0,1) == " ") {
$news = substr($news,1);
}
$l = strlen($news);
if (substr($news,$l-1,1) == " ") {
$news = substr($news,0,$l-1);
}
return $news;
}

function gbunspace($text) {
$news = "";
$l = strlen($text);
$a = 0;
$last_space = 1;
while ($a < $l) {

$ch = substr($text,$a,1);
$ch2 = substr($text,$a+1,1);
$ch3 = substr($text,$a+2,1);
if (($a + 1) == $l ) {
$last_space = 1;
}
if ($ch == " ") {
if ($last_space == 0) {
if (ord($ch2) >= hexdec("0x81") && ord($ch3) >= hexdec("0x40")) {
if ($chi == 0) {
$news .= " ";
$last_space = 1;
}
$chi=1;


} elseif ($ch2 != " ") {
$news .= " ";
$chi = 0;
$last_space = 1;
}
}
} else {
if (ord($ch) >= hexdec("0x81") && ord($ch2) >= hexdec("0x40")) {
$chi = 1;
$a++;
$news .= $ch . $ch2;
$last_space = 0;

} else {
$chi = 0;
$news .= $ch;
$last_space = 0;
}

}
$a++;
}
// chk 1 & last is space

$l = strlen($news);
if (substr($news,0,1) == " ") {
$news = substr($news,1);
}
$l = strlen($news);
if (substr($news,$l-1,1) == " ") {
$news = substr($news,0,$l-1);
}
return $news;



} // end of function

function gbstrnear($text,$length) {

$tex_len = strlen($text);
$a = 0;
$w = "";
while ($a < $tex_len) {
$ch = substr($text,$a,1);
$ch2 = substr($text,$a+1,1);
if (gb_all($ch.$ch2)) {
$w .= $ch.$ch2;
$a=$a+2;
} else {
$w .= $ch;
$a++;
}
if ($a == $length || $a == ($length - 1)) {
$a = $tex_len;
}
}
return $w;
} // end of function

function clear_space($text) {
$t = "";
for ($a=0;$a<strlen($text);$a++) {
$ch = substr($text,$a,1);
$ch2 = substr($text,$a+1,1);
if ($ch == " " && $ch2 == " ") {
} else {
$t .= $ch;
}
}
return $t;
}


?>

 
 收藏本文  打印本文  论坛讨论  关闭窗口
· 上一篇:PHP中一个文档中没有的特色
· 下一篇:在PHP中以root身份运行外部命令
· 生成Excel文件范例(1):客户端
· 一个模仿oso的论坛程序(之三)
· PHP 4.06正式版发布,修正了许多BUG,更加稳定
· PHP 4.0.0中session.save_path的bug
· 使用 php4 加速 web 传输   


关于本站 | 联系我们 | 业务合作 | 客户案例 | 诚聘英才 | 广告合作 | 收藏本站
海口动网先锋网络科技有限公司版权所有
Copyright © 2000 - 2006 Cndw.Com
中华人民共和国电信与信息服务业务经营许可证编号 琼 ICP 020077