数据蛙数据分析每周作业

爬取拉勾招聘职位

2019-04-28  本文已影响17人  夜希辰

爬取拉勾招聘职位

import json

import pymysql

import requests

from bs4 import BeautifulSoup

from requests.exceptions import RequestException

headers = {

    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36',

    'Host':'www.lagou.com',

    'Referer':'https://www.lagou.com/jobs/list_%E6%95%B0%E6%8D%AE%E5%88%86%E6%9E%90?px=default&city=%E6%88%90%E9%83%BD',

    'X-Anit-Forge-Code':'0',

    'X-Anit-Forge-Token':'None',

    'X-Requested-With':'XMLHttpRequest',

    'Origin':'https://www.lagou.com'

    }

data = {

    'first':'true',

    'pn':'1',

    'kd':'数据分析'

}

def get_one_page(url):

    try:

        respones = requests.post(url, headers=headers, data=data)

        respones.encoding = 'utf-8'

        if respones.status_code == 200:

            return respones.text

        else:

            print("访问异常,状态码:"+ respones.status_code)

    except RequestException as e:

        print('请求错误')

        print(e)

        return None

def main():

    url = 'https://www.lagou.com/jobs/positionAjax.json?px=default&city=%E6%88%90%E9%83%BD&needAddtionalResult=false'

    html = get_one_page(url)

    print(html)

# if main() == '__main()__':

#    main()

#我写的错代码,很奔溃……

if __name__ == '__main__':

    main()

上一篇 下一篇

猜你喜欢

热点阅读