爬取百度logo

2018-05-21 本文已影响30人 Danny001

#!/usr/bin/env python3

# _*_ coding:utf-8 _*_

import logging,shutil,os,requests

from bs4import BeautifulSoup

from selenium.webdriver.supportimport expected_conditions

#查找baidu，并保存到当前文件夹下

url="https://www.baidu.com"

logging.basicConfig(level=logging.INFO)

headers={}

headers['User-Agent']="Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36"

# 发送get请求获得一个response对象(我们可以从响应对象获得我们想要的一切数据),requests 会自动帮我们拼接请求地址

res=requests.get(url,headers=headers)

try:

if res.status_code==200:

# html字符串创建BeautifulSoup对象

soup=BeautifulSoup(res.text,'html.parser')#html解析器

#输出网页内容

print(soup.prettify())

#print ("以上网页内容")

#img_logo=soup.select_one("head > title")#返回list首位

img_logo = soup.select_one('#lg > img.index-logo-src')

#img_log=select.xpath("//*[@id="lg"]/img[1]")

print ("img_logo"+str(img_logo))

if img_logo:

print("aaa")

img_url=img_logo["src"]#返回图片标签的src属性

#组合成完整的url

img_url=img_urlif img_url.startswith("http")else "http:"+img_url

print (img_url)

#先删除本地，在下载

file_name="logo.png"

if os.path.isfile(file_name):

os.remove(file_name)

#获取图片数据流

res=requests.get(img_url,stream=True)

with open(file_name,"wb")as out_file:

shutil.copyfileobj(res.raw,out_file)

print ("ssd")

else:

logging.info("查找元素失败")

else:

logging.info("请求失败")

except WindowsError:

logging.info("连接错误")

#注意url正确，且头文件正确；

#可使用命令行直接运行 pypthon pc2.py ,若没有导入相关包使用pip install bs4 命令导入

#注销一大块使用快捷键ctl+/

爬取百度logo

猜你喜欢

热点阅读