初见

python爬虫||基于jupyter工具

2020-05-08  本文已影响0人  南山以南ddl

import requests

from lxml import etree

headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/64.0.3282.186 Safari/537.36'}

list=[]

for j in range(0,250,25):

    url = 'https://movie.douban.com/top250?start='+str(j)+'&filter='

    page = requests.get(url, headers=headers)

    a = etree.HTML(page.text)

    for i in range(1,26):

        b = a.xpath('//*[@id="content"]/div/div[1]/ol/li['+str(i)+']/div/div[2]/div[2]/p[1]/text()[1]')             

        b=str(b)

print(b[b.find("导演")+4:b.find("主演")-12])

list.append(b[b.find("导演")+4:b.find("主演")-12])

print(list)

import pandas as pd

fr = pd.DataFrame(list)

#print(fr)

#print(fr.index[0])

#print(fr.iloc[0])

pinlv=fr[0].value_counts()

print(pinlv.head(20))

print(type(pinlv))

上一篇下一篇

猜你喜欢

热点阅读