php抓取各大搜索引擎搜录量
代码请注意抓取页面的字符集问题,蘑菇展示的均为utf-8的编码。
//过滤html代码
function removehtml($str){
return preg_replace("/<[^>]*>/","",$str);
}
//格式化url
function format_url($url){
$url = trim($url);
$url = str_replace('http://','',$url);
if(substr($url,0,strlen('www.'))=='www.')
$url = substr($url,strlen('www.'));
return $url;
}
function baidu($url) {
$url = 'http://www.baidu.com/s?wd=site:'.$url;
$content = file_get_contents($url);
$content = iconv('GBK','UTF-8',$content);
preg_match ('/百度一下,找到相关网页(.*?)篇/', $content, $out);
$num = $out[1];
return $num?$num:0;
}
function google($url) {
$url = 'http://www.google.com.hk/search?hl=en&q=site:'.$url;
$content = file_get_contents($url);
preg_match ('/about(.*?)from/', $content, $out);
$num = removehtml($out[1]);
return $num?$num:0;
}
function soso($url) {
$url = 'http://www.soso.com/q?pid=s.idx&w=site:'.$url;
$content = file_get_contents($url);
$content = iconv('GBK','UTF-8',$content);
preg_match ('/搜索到约(.*?)项结果/', $content, $out);
$num = $out[1];
return $num?$num:0;
}
function sogou($url) {
$url = 'http://www.sogou.com/web?query=site:'.$url;
$content = file_get_contents($url);
$content = iconv('GBK','UTF-8',$content);
preg_match ('/找到(.*?)个网页/', $content, $out);
$num = $out[1];
return $num?$num:0;
}
function bing($url) {
$url = 'http://cn.bing.com/search?q=site:'.$url;
$content = file_get_contents($url);
preg_match ('/共(.*?) 条/', $content, $out);
$num = $out[1];
return $num?$num:0;
}
function yahoo($url) {
$url = 'http://search.yahoo.com/search?fr=siteexplorer&p=site:'.$url;
$content = file_get_contents($url);
preg_match ('/\<strong id\=\"resultCount\"\>([\s\S]*?)\<\/strong\>/', $body, $out);
$num = $out[1];
return $num?$num:0;
}
function youdao($url) {
$url = 'http://www.youdao.com/search?q=site:'.$url;
$content = file_get_contents($url);
preg_match ('/共约(.*?)条结果/', $content, $out);
$num = $out[1];
return $num?$num:0;
}
$url = format_url('http://www.365coding.com');
echo baidu($url);
echo google($url);
echo soso($url);
echo sogou($url);
echo bing($url);
echo yahoo($url);
echo youdao($url);
?>



