python学习笔记之二[Beautifulsoup4]
2017-04-08 本文已影响86人
王滕辉
下载安装
https://pypi.python.org/pypi?%3Aaction=search&term=BeautifulSoup&submit=search
下载完成开始安装
解压压缩包
python setup.py install
C:\Python34\beautifulsoup4-4.5.3>python setup.py install
running install
running bdist_egg
running egg_info
writing beautifulsoup4.egg-info\PKG-INFO
writing dependency_links to beautifulsoup4.egg-info\dependency_links.txt
writing top-level names to beautifulsoup4.egg-info\top_level.txt
writing requirements to beautifulsoup4.egg-info\requires.txt
reading manifest file 'beautifulsoup4.egg-info\SOURCES.txt'
reading manifest template 'MANIFEST.in'
writing manifest file 'beautifulsoup4.egg-info\SOURCES.txt'
installing library code to build\bdist.win-amd64\egg
running install_lib
running build_py
creating build
creating build\lib
creating build\lib\bs4
copying bs4\1631353.py -> build\lib\bs4
copying bs4\dammit.py -> build\lib\bs4
copying bs4\diagnose.py -> build\lib\bs4
copying bs4\element.py -> build\lib\bs4
copying bs4\testing.py -> build\lib\bs4
copying bs4\__init__.py -> build\lib\bs4
creating build\lib\bs4\builder
copying bs4\builder\_html5lib.py -> build\lib\bs4\builder
copying bs4\builder\_htmlparser.py -> build\lib\bs4\builder
copying bs4\builder\_lxml.py -> build\lib\bs4\builder
copying bs4\builder\__init__.py -> build\lib\bs4\builder
creating build\lib\bs4\tests
copying bs4\tests\test_builder_registry.py -> build\lib\bs4\tests
copying bs4\tests\test_docs.py -> build\lib\bs4\tests
copying bs4\tests\test_html5lib.py -> build\lib\bs4\tests
copying bs4\tests\test_htmlparser.py -> build\lib\bs4\tests
copying bs4\tests\test_lxml.py -> build\lib\bs4\tests
copying bs4\tests\test_soup.py -> build\lib\bs4\tests
copying bs4\tests\test_tree.py -> build\lib\bs4\tests
copying bs4\tests\__init__.py -> build\lib\bs4\tests
Fixing build\lib\bs4\1631353.py build\lib\bs4\dammit.py build\lib\bs4\diagnose.p
y build\lib\bs4\element.py build\lib\bs4\testing.py build\lib\bs4\__init__.py bu
ild\lib\bs4\builder\_html5lib.py build\lib\bs4\builder\_htmlparser.py build\lib\
bs4\builder\_lxml.py build\lib\bs4\builder\__init__.py build\lib\bs4\tests\test_
builder_registry.py build\lib\bs4\tests\test_docs.py build\lib\bs4\tests\test_ht
ml5lib.py build\lib\bs4\tests\test_htmlparser.py build\lib\bs4\tests\test_lxml.p
y build\lib\bs4\tests\test_soup.py build\lib\bs4\tests\test_tree.py build\lib\bs
4\tests\__init__.py
Skipping optional fixer: buffer
Skipping optional fixer: idioms
Skipping optional fixer: set_literal
Skipping optional fixer: ws_comma
Fixing build\lib\bs4\1631353.py build\lib\bs4\dammit.py build\lib\bs4\diagnose.p
y build\lib\bs4\element.py build\lib\bs4\testing.py build\lib\bs4\__init__.py bu
ild\lib\bs4\builder\_html5lib.py build\lib\bs4\builder\_htmlparser.py build\lib\
bs4\builder\_lxml.py build\lib\bs4\builder\__init__.py build\lib\bs4\tests\test_
builder_registry.py build\lib\bs4\tests\test_docs.py build\lib\bs4\tests\test_ht
ml5lib.py build\lib\bs4\tests\test_htmlparser.py build\lib\bs4\tests\test_lxml.p
y build\lib\bs4\tests\test_soup.py build\lib\bs4\tests\test_tree.py build\lib\bs
4\tests\__init__.py
Skipping optional fixer: buffer
Skipping optional fixer: idioms
Skipping optional fixer: set_literal
Skipping optional fixer: ws_comma
creating build\bdist.win-amd64
creating build\bdist.win-amd64\egg
creating build\bdist.win-amd64\egg\bs4
copying build\lib\bs4\1631353.py -> build\bdist.win-amd64\egg\bs4
creating build\bdist.win-amd64\egg\bs4\builder
copying build\lib\bs4\builder\_html5lib.py -> build\bdist.win-amd64\egg\bs4\buil
der
copying build\lib\bs4\builder\_htmlparser.py -> build\bdist.win-amd64\egg\bs4\bu
ilder
copying build\lib\bs4\builder\_lxml.py -> build\bdist.win-amd64\egg\bs4\builder
copying build\lib\bs4\builder\__init__.py -> build\bdist.win-amd64\egg\bs4\build
er
copying build\lib\bs4\dammit.py -> build\bdist.win-amd64\egg\bs4
copying build\lib\bs4\diagnose.py -> build\bdist.win-amd64\egg\bs4
copying build\lib\bs4\element.py -> build\bdist.win-amd64\egg\bs4
copying build\lib\bs4\testing.py -> build\bdist.win-amd64\egg\bs4
creating build\bdist.win-amd64\egg\bs4\tests
copying build\lib\bs4\tests\test_builder_registry.py -> build\bdist.win-amd64\eg
g\bs4\tests
copying build\lib\bs4\tests\test_docs.py -> build\bdist.win-amd64\egg\bs4\tests
copying build\lib\bs4\tests\test_html5lib.py -> build\bdist.win-amd64\egg\bs4\te
sts
copying build\lib\bs4\tests\test_htmlparser.py -> build\bdist.win-amd64\egg\bs4\
tests
copying build\lib\bs4\tests\test_lxml.py -> build\bdist.win-amd64\egg\bs4\tests
copying build\lib\bs4\tests\test_soup.py -> build\bdist.win-amd64\egg\bs4\tests
copying build\lib\bs4\tests\test_tree.py -> build\bdist.win-amd64\egg\bs4\tests
copying build\lib\bs4\tests\__init__.py -> build\bdist.win-amd64\egg\bs4\tests
copying build\lib\bs4\__init__.py -> build\bdist.win-amd64\egg\bs4
byte-compiling build\bdist.win-amd64\egg\bs4\1631353.py to 1631353.cpython-34.py
c
byte-compiling build\bdist.win-amd64\egg\bs4\builder\_html5lib.py to _html5lib.c
python-34.pyc
byte-compiling build\bdist.win-amd64\egg\bs4\builder\_htmlparser.py to _htmlpars
er.cpython-34.pyc
byte-compiling build\bdist.win-amd64\egg\bs4\builder\_lxml.py to _lxml.cpython-3
4.pyc
byte-compiling build\bdist.win-amd64\egg\bs4\builder\__init__.py to __init__.cpy
thon-34.pyc
byte-compiling build\bdist.win-amd64\egg\bs4\dammit.py to dammit.cpython-34.pyc
byte-compiling build\bdist.win-amd64\egg\bs4\diagnose.py to diagnose.cpython-34.
pyc
byte-compiling build\bdist.win-amd64\egg\bs4\element.py to element.cpython-34.py
c
byte-compiling build\bdist.win-amd64\egg\bs4\testing.py to testing.cpython-34.py
c
byte-compiling build\bdist.win-amd64\egg\bs4\tests\test_builder_registry.py to t
est_builder_registry.cpython-34.pyc
byte-compiling build\bdist.win-amd64\egg\bs4\tests\test_docs.py to test_docs.cpy
thon-34.pyc
byte-compiling build\bdist.win-amd64\egg\bs4\tests\test_html5lib.py to test_html
5lib.cpython-34.pyc
byte-compiling build\bdist.win-amd64\egg\bs4\tests\test_htmlparser.py to test_ht
mlparser.cpython-34.pyc
byte-compiling build\bdist.win-amd64\egg\bs4\tests\test_lxml.py to test_lxml.cpy
thon-34.pyc
byte-compiling build\bdist.win-amd64\egg\bs4\tests\test_soup.py to test_soup.cpy
thon-34.pyc
byte-compiling build\bdist.win-amd64\egg\bs4\tests\test_tree.py to test_tree.cpy
thon-34.pyc
byte-compiling build\bdist.win-amd64\egg\bs4\tests\__init__.py to __init__.cpyth
on-34.pyc
byte-compiling build\bdist.win-amd64\egg\bs4\__init__.py to __init__.cpython-34.
pyc
creating build\bdist.win-amd64\egg\EGG-INFO
copying beautifulsoup4.egg-info\PKG-INFO -> build\bdist.win-amd64\egg\EGG-INFO
copying beautifulsoup4.egg-info\SOURCES.txt -> build\bdist.win-amd64\egg\EGG-INF
O
copying beautifulsoup4.egg-info\dependency_links.txt -> build\bdist.win-amd64\eg
g\EGG-INFO
copying beautifulsoup4.egg-info\requires.txt -> build\bdist.win-amd64\egg\EGG-IN
FO
copying beautifulsoup4.egg-info\top_level.txt -> build\bdist.win-amd64\egg\EGG-I
NFO
zip_safe flag not set; analyzing archive contents...
creating dist
creating 'dist\beautifulsoup4-4.5.3-py3.4.egg' and adding 'build\bdist.win-amd64
\egg' to it
removing 'build\bdist.win-amd64\egg' (and everything under it)
Processing beautifulsoup4-4.5.3-py3.4.egg
Copying beautifulsoup4-4.5.3-py3.4.egg to c:\python34\lib\site-packages
Adding beautifulsoup4 4.5.3 to easy-install.pth file
Installed c:\python34\lib\site-packages\beautifulsoup4-4.5.3-py3.4.egg
Processing dependencies for beautifulsoup4==4.5.3
Finished processing dependencies for beautifulsoup4==4.5.3
C:\Python34\beautifulsoup4-4.5.3>
看到finished证明beautifusoup安装完成
可以愉快的测试了
# coding=utf-8
__author__ = 'zdz8207'
from bs4 import BeautifulSoup
>
import urllib.request
import urllib.parse
import re
import urllib.request, urllib.parse, http.cookiejar
def getHtml(url):
cj = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
opener.addheaders = [('User-Agent', 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/41.0.2272.101 Safari/537.36'),('Cookie', '4564564564564564565646540')]
urllib.request.install_opener(opener)
html_bytes = urllib.request.urlopen(url).read()
html_string = html_bytes.decode('utf-8')
return html_string
html_doc = getHtml("http://zst.aicai.com/ssq/openInfo/")
soup = BeautifulSoup(html_doc, 'html.parser')
\# print(soup.title)
\#table = soup.find_all('table', class_='fzTab')
\#print(table)#<tr onmouseout="this.style.background=''" 这种tr丢失了
\#soup.strip() 加了strip后经常出现find_all('tr') 只返回第一个tr
tr = soup.find('tr',attrs={"onmouseout": "this.style.background=''"})
\#print(tr)
tds = tr.find_all('td')
opennum = tds[0].get_text()
\#print(opennum)
reds = []
for i in range(2,8):
reds.append(tds[i].get_text())
print(reds)
blue = tds[8].get_text()
print(blue)
#把list转换为字符串:(',').join(list)
#最终输出结果格式如:2015075期开奖号码:6,11,13,19,21,32, 蓝球:4 print(opennum+'期开奖号码:'+ (',').join(reds)+", 蓝球:"+blue)
测试
C:\Users\wang\python>python xinlangshuangseqiu.py
['02']
15
['02', '04']
15
['02', '04', '12']
15
['02', '04', '12', '14']
15
['02', '04', '12', '14', '17']
15
['02', '04', '12', '14', '17', '24']
15
C:\Users\wang\python>
成功