小偷程序(PHP 搜狗)

时间:2019-11-29作者:klpeng分类:IT综合浏览:387评论:0
<?php
if(empty($_GET['key'])){
    echo '请输入关键词,域名上加?key=';exit;
}

/**************************获取搜搜内容***********************************/
$key = $_GET['key'];
mb_convert_encoding($key,'utf-8','gb2312');
$url = 'https://www.sogou.com/sogou?query='. urlencode($key) .'&insite=wenwen.sogou.com';
//$url = 'https://www.sogou.com/sogou?query=香花槐&_ast=1574942592&_asf=www.sogou.com&w=01029901&pid=sogou-wsse-a9e18cb5dd9d3ab4&duppid=1&cid=&s_from=result_up&insite=wenwen.sogou.com' ;
$content = httpRequest($url);
mb_convert_encoding($content,'utf-8','gb2312');
$str = $content;
$str = str_replace('href="/css/', "https://www.sogou.com/css/", $content);
$str = str_replace('src="/images/', "https://www.sogou.com/images/", $str);
$str = str_replace('src="/js/', "https://www.sogou.com/js", $str);
$str = str_replace('href="/web/css/', "https://www.sogou.com/web/css/", $str);
$str = str_replace('src="/web/images/', "https://www.sogou.com/web/images/", $str);
$str = str_replace('src="/web/js/', "https://www.sogou.com/web/js/", $str);
$str = str_replace('src="/web/img/', "https://www.sogou.com/web/img/", $str);

//替换内容
$search = '/<div class="header" style="">.*?<\/p>(\t|\r\n|\n|\s)*<\/div>/is';
$str = preg_replace($search, "", $str);
$str = preg_replace('/搜狗搜索/', "", $str);
$str = preg_replace('/搜狗/', "", $str);
$str = preg_replace('/站内搜索/', "", $str);
$str = preg_replace('/全网搜索/', "", $str);
$str = preg_replace('/- 问问/', "", $str);
$str = preg_replace('/最佳答案/', "用户评价", $str);
$search = '/<div class="biz_fb">.*?<\/a>(\t|\r\n|\n|\s)*<\/div>/is';
$str = preg_replace($search, "", $str);

$search="/<p class=\"str_time\".*?>.*?<\/p>/ism";
$str = preg_replace($search, "", $str);

$search="/<p class=\"num-tips\".*?>.*?<\/p>/ism";
$str = preg_replace($search, "", $str);

$search = '/<div class="p" id="pagebar_container">.*?<\/a>(\t|\r\n|\n|\s)*<\/div>/is';
$str = preg_replace($search, "", $str);

$search = '/<div class="cr" id="s_footer">.*?<\/div>(\t|\r\n|\n|\s)*<\/div>/is';
$str = preg_replace($search, "", $str);

$search="/<div class=\"fb\".*?>.*?<\/div>/ism";
$str = preg_replace($search, "", $str);

$search="/<div class=\"biz_risk_alert\".*?>.*?<\/div>/ism";
$str = preg_replace($search, "", $str);


//替换链接
$str = preg_replace('/<a .*?href="(.*?)".*?>/is',"<a href='http://www.cctv.com'>",$str);


echo $str;

echo '
 <style>
 .wrapper {
    padding: 0;
}
</style>
';

/**************************获取关键词***********************************/

$url = 'https://www.sogou.com/tx?query='. urlencode($key);

$content = httpRequest($url);
mb_convert_encoding($content,'utf-8','gb2312');

//构造一个div
echo '<div style="padding-left:38em;" class="content_re">';
echo '<h2>相关搜索</h2>';
//1
$search="/<div class=\"hint-mid\">.*?<\/a><\/div>/ism";
//$str2 = preg_replace($search, "", $content);
$str2 = preg_match($search, $content, $res);
$html=preg_replace("(<a[^>]*>(.+?)<\/a>)","<a href='/test.php?key=$1'>$1</a>",$res[0]);
echo $html;


//3
$search = '/<div class="hintBox">.*?<\/table>(\t|\r\n|\n|\s)*<\/div>/is';
$str = preg_match($search, $content, $div);

$html = $div[0];
/*$html = preg_replace('/<a .*?href="(.*?)".*?>/is',"<a href='http://www.cctv.com'>",$html);*/

$html=preg_replace("(<a[^>]*>(.+?)<\/a>)","<a href='/test.php?key=$1'>$1</a>",$html);
$html=preg_replace("/相关搜索/","",$html);
echo $html;

//2
$search="/<p>(\t|\r\n|\n|\s)<a.*?<\/p>/ism";
//$str2 = preg_replace($search, "", $content);
$str2 = preg_match($search, $content,$res);
//替换链接+关键词(test.php 根据自己的改)
$html=preg_replace("(<a[^>]*>(.+?)<\/a>)","<a href='/test.php?key=$1'>$1</a>",$res[0]);
echo $html;

echo '</div>';



//样式写在这
echo '
  <style>
  .hint-mid {
    font-size: 0;
    line-height: 0;
    max-height: 102px;
    overflow: hidden;
    margin-bottom: 0;
    padding: 5px 0;
}
.hint-mid a {
    display: inline-block;
    font-size: 13px;
    line-height: 28px;
    height: 28px;
    padding: 0 7px;
    margin-right: 8px;
    text-decoration: none;
    border: 1px solid #e6e6e6;
    margin-bottom: 8px;
    border-radius: 1px;
}
.content_re{

}
p a {
    white-space: nowrap;
    padding: 4px 8px;
}
.hintBox{
margin-left: 0;
}
</style>
';


//curl 抓取内容
function httpRequest($url, $postData = array())
{
    //curl 伪造useragent
    $useragent = array(
        'Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0)',
        'Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.2)',
        'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)',
        'Mozilla/5.0 (Windows; U; Windows NT 5.2) Gecko/2008070208 Firefox/3.0.1',
        'Opera/9.27 (Windows NT 5.2; U; zh-cn)',
        'Opera/8.0 (Macintosh; PPC Mac OS X; U; en)',
        'Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Chrome/0.2.149.27 Safari/525.13 ',
        'Mozilla/5.0 (Windows; U; Windows NT 5.2) AppleWebKit/525.13 (KHTML, like Gecko) Version/3.1 Safari/525.13'
    );

    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);

    if (!empty($postData)) {
        // 设置请求方式为post
        curl_setopt($ch, CURLOPT_POST, true);
        curl_setopt($ch, CURLOPT_POSTFIELDS, $postData);
    }
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);

    curl_setopt($ch, CURLOPT_ENCODING , 'gzip');

    //添加这两行
//    curl_setopt($ch, CURLOPT_ENCODING, "gzip");
//    curl_setopt($ch, CURLOPT_HTTPHEADER, ['Origin: http://h5.eqxiu.com']);

    // 设置iP和useragent
    curl_setopt($ch, CURLOPT_REFERER, "http://h5.eqxiu.com/s/Rwg9jFlv");
    curl_setopt($ch, CURLOPT_USERAGENT, array_rand($useragent));

    //函数中加入下面这条语句

    curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1);

    // (3)执行
    $result = curl_exec($ch);
    // (4)关闭
    curl_close($ch);
    return $result;
}
打赏
文章版权声明:除非注明,否则均为彭超的博客原创文章,转载或复制请以超链接形式并注明出处。
相关推荐

发表评论:

◎欢迎参与讨论,请在这里发表您的看法、交流您的观点。

猜你喜欢