phpcurl+xpath示例

2019-07-11  本文已影响0人  水电梁师傅
    $url = "https://so.gushiwen.org/shiwenv_94e9aad7a0d2.aspx";
    $html = curlget($url);
    $regular = "/html/body//a//@href";
    $titlexpath = "//h1";
    $titles = xpathregular($html,$titlexpath);
    for ($i = 0; $i < $titles->length; $i++) 
    {
        
        $title = $titles->item($i);
        $title = $title->nodeValue;
        echo $title;

    }       
    
    
    function curlget($url)
    {
        $url = "https://so.gushiwen.org/shiwenv_94e9aad7a0d2.aspx";
        $ch = curl_init(); // 2. 设置选项,包括URL
        curl_setopt($ch,CURLOPT_URL,$url);
        curl_setopt($ch,CURLOPT_SSL_VERIFYPEER,0);
        curl_setopt($ch,CURLOPT_SSL_VERIFYHOST,0);
        curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
        curl_setopt($ch,CURLOPT_HEADER,0);
                // 3. 执行并获取HTML文档内容
        $output = curl_exec($ch); 
        if($output === FALSE )
        { 
            echo "CURL Error:".curl_error($ch);

        } // 4. 释放curl句柄
        return $output;
        curl_close($ch);
        

    }

    function xpathregular($html,$regular)
    {
        $dom = new DOMDocument();
         
        //从一个字符串加载HTML
         
        @$dom->loadHTML($html);
         
        //使该HTML规范化
         
        $dom->normalize();
         
        //用DOMXpath加载DOM,用于查询
         
        $xpath = new DOMXPath($dom);
        #获取所有的a标签的地址
        $results = $xpath->query($regular);  
         
        return $results;
    }

上一篇下一篇

猜你喜欢

热点阅读