爬取京东商品评论

2018-07-09  本文已影响0人  叁咪

爬取京东商品评论

#--*--coding:utf-8--*--

import requests

import json

import os

import sys

import random

import time

'''proxies = {

  "http": "proxy.xxcom:911",

  "https": "proxy.xx.com:911",

}'''

headers = {'User-Agent':'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',

'Accept':'text/html;q=0.9,*/*;q=0.8',

'Accept-Charset':'ISO-8859-1,utf-8;q=0.7,*;q=0.3',

'Connection':'close',

'Referer':'https://www.jd.com/'

}

cookie= {'__jdu':'10846'}

f = open('c:/users/ffan2/desktop/jd.txt','a',encoding='utf-8')

url1='https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv33573&productId=5118016&score=0&sortType=5&page='

url2='&pageSize=10&isShadowSku=0&fold=1'

ran_num=random.sample(range(30), 30)

'''for i in range(0,1):

    #url='https://club.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv17182&productId=4554969&score=0&sortType=5&page='+str(i)+'&pageSize=10&isShadowSku=0&fold=1'

    url='https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv33573&productId=5118016&score=0&sortType=5&page='+str(i)+'&pageSize=10&isShadowSku=0&fold=1'

    #实现爬多页

    print (url)

    r = requests.get(url,headers=headers,cookies=cookie,proxies=proxies)

    #print(r.status_code)

    html=str(r.content, encoding = "GBK")

    f.write(html)

print('done')

    #print(html)

'''

for i in ran_num:

      a = ran_num[0]

      if i == a:

          i=str(i)

          url=(url1+i+url2)

          r=requests.get(url=url,headers=headers,cookies=cookie) #,proxies=proxies

          html=r.content

      else:

          i=str(i)

          url=(url1+i+url2)

          r=requests.get(url=url,headers=headers,cookies=cookie) #,proxies=proxies

          html2=r.content

          html = html + html2

          time.sleep(5)

          print("当前抓取页面:",url,"状态:",r)

print('done--------------------')         

上一篇下一篇

猜你喜欢

热点阅读