PHP制作百度词典查词采集器
网络编程 2021-07-05 09:08www.168986.cn编程入门
这篇文章主要介绍了PHP制作百度词典查词采集器的相关资料,需要的朋友可以参考下
百度dict 采集样本
写的采集百度dict词典翻译后的所有结果数据,附带了13.5w单词库和采集简单的案例,这里我把写出的主要类dict.class.php放出来,项目地址http://github./widuu/baidu_dict,有需要的直接fork就可以了~么么哒,这东西用的人很少,所以有用的兄弟拿走了哈~
<?php / dict.class.php 采集百度词典翻译内容 @copyright (C) 2014 widuu @license http://.widuu. @lastmodify 2014-2-15 / header("content-type:text/html;charset=utf8"); class Dict{ private $word; //显示的条数 private static $num = 10; public function __construct(){} / 公用返回百度采集数据的方法 @param string 英文单词 retun array( symbol" => 音标 "pro" => 发音 "example"=> 例句 "explain"=> 简明释义 "synonym"=> 同反义词 "phrase" => 短语数组 ) / public function content($word){ $this -> word = $word; $symbol = $this -> Pronounced(); $pro = $this->getSay(); $example = $this -> getExample(); $explain = $this -> getExplain(); $synonym = $this -> getSynonym(); $phrase = $this -> getPhrase(); $result = array( "symbol" => $symbol, //音标 "pro" => $pro, //发音 "example"=> $example, //例句 "explain"=> $explain, //简明释义 "synonym"=> $synonym, //同反义词 "phrase" => $phrase //短语数组 ); return $result; } / 远程获取百度翻译内容 get function curl retun string / private function getContent(){ $useragent = "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:23.0) Gecko/20100101 Firefox/23.0"; $ch = curl_init(); $url = "http://dict.baidu./s?wd=".$this->word; curl_set($ch, CURLOPT_URL, $url); curl_set($ch, CURLOPT_USERAGENT,$useragent); curl_set($ch, CURLOPT_RETURNTRANSFER, TRUE); curl_set($ch, CURLOPT_FOLLOWLOCATION, 1); curl_set($ch, CURLOPT_HTTPGET, 1); curl_set($ch, CURLOPT_AUTOREFERER,1); curl_set($ch, CURLOPT_HEADER, 0); curl_set($ch, CURLOPT_TIMEOUT, 30); $result = curl_exec($ch); if (curl_errno($curl)) { echo 'Errno'.curl_error($curl); } curl_close($ch); return $result; } / 获取百度翻译发音 retun array(英,美) / private function Pronounced(){ $data = $this -> getContent(); preg_match_all("/\"EN\-US\"\>(.)\<\/b\>/Ui",$data,$pronounced); return array( 'en' => $pronounced[1][0], 'us' => $pronounced[1][1] ); } / 获取百度翻译发音 return array(英,美) / private function getSay(){ $data = $this -> getContent(); preg_match_all("/url=\"(.)\"/Ui",$data,$pronounced); return array( 'en' => $pronounced[1][0], 'us' => $pronounced[1][1] ); } / 获取百度翻译例句 return array() 多维数组 例句 / private function getExample(){ $str = ""; $data = $this -> getContent(); preg_match_all("/var example_data = (.)\]\;/Us",$data,$example); $data1 = "[[[".ltrim($example[1][0],"["); $data2 = explode("[[[",$data1); $num = count(array_filter($data2)); foreach($data2 as $key => $value){ $data3 = explode("[[","[[".$value); foreach ($data3 as $k => $v) { preg_match_all("/\[\"(.)\",/Us","[".$v, $match); if(!empty($match[1])){ $str .= implode($match[1]," ")."@"; } } } $data4 = trim($str,"@"); $data5 = explode("@", $data4); $result = array_chunk($data5, 2); return $result; } / 获取简明释义 return array (x => "词性",b => "附属") / private function getExplain(){ $data = $this -> getContent(); preg_match_all("/id\=\"en\-simple\-means\"\>(.)\<div(\s+)class\=\"source\"\>/Us",$data,$explain); $r_data = $explain[1][0]; preg_match_all("/\<p\>\<strong\>(?P<adj>.)\<\/strong\>\<span\>(?P<name>.)\<\/span\>\<\/p\>/Us", $r_data, $a_data); preg_match_all("/\<span\>(?P<tag>[^\>]+)\\<a(\s+)href\=\"(.)\"\>(?P<word>.)\<\/a\>\<\/span\>/Us", $r_data, $b_data); $result = array(); foreach ($a_data["adj"] as $key => $value) { $result[$value] = $a_data["name"][$key]; } $word_b = array(); foreach ($b_data["tag"] as $key => $value) { $word_b[$value] = strip_tags($b_data["word"][$key]); } $result_data = array("x" => $result,"b" => $word_b); return $result_data; } / 获取同义词 return array(0 => "同义词", 1 => "反义词") 一般为多维数组 / private function getSynonym(){ $data = $this -> getContent(); preg_match_all("/id=\"en\-syn\-ant\"\>(.)<div(\s+)class\=\"source\">/Us",$data,$synonym); $content = $synonym[1][0]; $data1 = explode("</dl>", $content); $result = array(); $data2 = array(); foreach ($data1 as $key => $value) { preg_match_all("/\<strong\>(?P<adj>.)\ \;\<\/strong\>\<\/div\>\<div(\s+)class\=\"syn\-ant\-list\"\>\<ul\>(?<content>.)\<\/ul\>/Us", $value, $r_data); $data2[$key]["adj"] = $r_data["adj"]; $data2[$key]["content"] = $r_data["content"]; } foreach ($data2 as $key => $value) { foreach ($value["content"] as $k => $v) { if(!empty($v)){ preg_match_all("/\<li\>\<p\>(?P<title>.)\<\/p\>(?P<value>.)\<\/li>/Us", $v, $v_data); foreach ($v_data['title'] as $m => $d) { $data = strip_tags(preg_replace("<</a>>"," ", $v_data["value"][$m])); $result[$key][$value["adj"][$k]][$d] = $data; } } } } return $result; } / 获取短语词组 return array (key => value) 一维或者多维数组 / private function getPhrase(){ $num = self::$num; $data = $this -> getContent(); preg_match_all("/id=\"en\-phrase\"\>(.)\<div class\=\"source\"\>/Us",$data,$phrase); $data = explode("</dd>",$phrase[1][0]); $data1 = array_slice($data,0,$num); $result = array(); foreach ($data1 as $key => $value) { $data2 = explode("</p>", $value); $n = count($data2); if($n<=3){ $result[str_replace(" ","",strip_tags($data2[0]))] = strip_tags($data2[1]); }else{ $data3 = array_slice($data2,0,$n-1); $data4 = array_slice($data2,0,2); $res = array_diff($data3,$data4); $data5 = array_chunk($res,2); $key_value = trim(str_replace(" ","",strip_tags($data4[0]))); $result[$key_value] = strip_tags($data4[1]); foreach ($data5 as $key => $value) { foreach ($value as $k => $v) { $value[$k] = strip_tags($v); } $array = array($result[$key_value],$value); if (array_key_exists($key_value, $result)){ $result[$key_value] = $array; } } } } return $result; } / 将数组转换为字符串 @param array $data 数组 @param bool $isformdata 如果为0,则不使用new_stripslashes处理,可选参数,默认为1 @return string 返回字符串,如果,data为空,则返回空 / private function array2string($data, $isformdata = 1) { if($data == '') return ''; if($isformdata) $data = $this->new_stripslashes($data); return addslashes(var_export($data, TRUE)); } / 返回经stripslashes处理过的字符串或数组 @param $string 需要处理的字符串或数组 @return mixed / private function new_stripslashes($string) { if(!is_array($string)) return stripslashes($string); foreach($string as $key => $val) $string[$key] = $this->new_stripslashes($val); return $string; } } // $word = new dict("express"); // $word ->content();
以上就是本文的全部内容了,非常实用的功能,希望小伙伴们能够喜欢。
编程语言
- 如何快速学会编程 如何快速学会ug编程
- 免费学编程的app 推荐12个免费学编程的好网站
- 电脑怎么编程:电脑怎么编程网咯游戏菜单图标
- 如何写代码新手教学 如何写代码新手教学手机
- 基础编程入门教程视频 基础编程入门教程视频华
- 编程演示:编程演示浦丰投针过程
- 乐高编程加盟 乐高积木编程加盟
- 跟我学plc编程 plc编程自学入门视频教程
- ug编程成航林总 ug编程实战视频
- 孩子学编程的好处和坏处
- 初学者学编程该从哪里开始 新手学编程从哪里入
- 慢走丝编程 慢走丝编程难学吗
- 国内十强少儿编程机构 中国少儿编程机构十强有
- 成人计算机速成培训班 成人计算机速成培训班办
- 孩子学编程网上课程哪家好 儿童学编程比较好的
- 代码编程教学入门软件 代码编程教程