- 刚写的个PHP拼音分词的东西~
- Post by
yaohuaq
2011-06-23 21:06:44 Thursday Tags:PHP,拼音,分词
-
最大正向匹配算法,
PHP代码
- <?php
-
-
-
-
- class PinyinSplit
- {
-
- private $sms = array('b','c','d','f','g','h','j','k','l','m','n','p','q','r','s','t','w','x','y','z','sh','zh','ch');
-
- private $yms = array('a','e','i','o','u','v','ai','au','ao','ei','er','ou','ue','ua','an','en','in','un','ie','uv','uo','ui','iu','ia','ang','ing','ong','eng','uan','uai','ian','iao','uang','ueng','iong','iang');
-
- private $result = '';
-
- public function __construct($string)
- {
- $string = preg_replace('/[^a-z]/i', '', $string);
- $this->result = substr($this->findsm($string), 0, -1);
- }
-
- public function get()
- {
- return $this->result;
- }
-
- public function findsm($string, $result = '')
- {
- $find_len = 0;
- $str_len = strlen($string);
-
- foreach($this->sms as $sm) {
- for($i=0;$i<=$str_len;$i++) {
- if($sm == substr($string, 0 ,$i+1)) {
- $find_len = strlen($sm);
- }
- }
- }
-
- if($find_len != 0) {
- $result .= substr($string, 0 ,$find_len);
- $str_last = substr($string, $find_len);
- }else {
- $result = $this->findym($string, $result);
- }
-
- if(isset($str_last) && strlen($str_last) > 0) {
- $result = $this->findym($str_last, $result);
- }
-
- return $result;
- }
-
- public function findym($string, $result = '')
- {
- $find_len = 0;
- $str_len = strlen($string);
-
- foreach($this->yms as $ym) {
- for($i=0;$i<=$str_len;$i++) {
- if($ym == substr($string, 0, $i+1)) {
- $find_len = strlen($ym);
- }
- }
- }
-
- if($find_len != 0) {
- $result .= substr($string, 0, $find_len);
- $result .= ' ';
- $str_last = substr($string, $find_len);
- }else {
- $result = $this->findsm($string, $result);
- }
-
- if(isset($str_last) && strlen($str_last) > 0) {
- $result = $this->findsm($str_last, $result);
- }
-
- return $result;
- }
-
- }
-
- if(isset($argc) && $argc > 1) {
- $string = $argv[1];
- }else {
- $string = isset($_GET['string']) ? $_GET['string'] : '';
- }
-
-
- $p = new PinyinSplit($string);
- $rs = $p->get();
- echo $rs."\n\n";
- ?>
- 评论:
-
-
- baiyuxiong 2011-06-27 23:17:04
- 给讲讲原理么。
- 个人信息
- 迎接
- 昵称:C7_yaohuaq
- QQ:88523499
- Email:c77cc#vip.qq.com
- 居住地:北京 海淀
- 星座:天蝎
- 毕业院校:广西民族大学
- 工作点:喜讯无限