爬去企查查数据

2019-06-26  本文已影响0人  Magic_小灰灰

#-*- coding-8 -*-

import requests

import lxml

import sys

from bs4 import BeautifulSoup

import xlwt

import time

import urllib

def craw(url,key_word,x):

    User_Agent = 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:56.0) Gecko/20100101 Firefox/56.0'

#    if x == 0:

#        re = 'http://www.qichacha.com/search?key='+key_word

#    else:

#        re = 'https://www.qichacha.com/search?key={}#p:{}&'.format(key_word,x-1)

    re = r'https://www.qichacha.com/search?key='+key_word

    headers = {

            'Host':'www.qichacha.com',

            'Connection': 'keep-alive',

            'Accept':r'text/html, */*; q=0.01',

            'X-Requested-With': 'XMLHttpRequest',

            'User-Agent':r'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36',

            'Referer': re,

            'Accept-Encoding':'gzip, deflate, br',

            'Accept-Language':'zh-CN,zh;q=0.9',

            'Cookie':r'xxxxxxxxx这里换成你的cookiexxxxxxxx这里换成你的cooki

上一篇下一篇

猜你喜欢

热点阅读