上海Python

爬虫作业3(京东商品)

2019-07-25  本文已影响0人  56f82a501045

import requests

import json

import pymysql

import time

import re

conn = pymysql.connect(host='192.168.112.157',user='python',passwd='Python.123456',db='python',port=3306,

charset='utf8')

cursor = conn.cursor()

headers = {

'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36',

'referer':'https://item.jd.com/100004325476.html'

}

urls = [

'https://sclub.jd.com/comment/productPageComments.action?&productId=100004325476&score=0&sortType=5&page={}&pageSize=10'.format(

str(i))for iin range(0,50)]

for urlin urls:

res = requests.get(url,headers=headers)

json_data = json.loads(res.text)

comments = json_data['comments']

for commentin comments:

content = comment['content']

creationTime = comment['creationTime']

nickname = comment['nickname']

productColor = comment['productColor']

# userClientShow = re.findall('来自京东(.*?)客户端', comment['userClientShow'], re.S)[0]

        userClientShow = re.sub('来自','',comment['userClientShow']).replace('京东','').replace('客户端','')

userLevelName = comment['userLevelName']

# print(userClientShow, userLevelName, content, creationTime, nickname, productColor)

        cursor.execute(

"insert into comment_info (userClientShow, userLevelName,content,creationTime,nickname,productColor) " \

"values(%s,%s,%s,%s,%s,%s)",

(userClientShow, userLevelName, content, creationTime, nickname, productColor))

conn.commit()

time.sleep(5)

conn.close()

上一篇下一篇

猜你喜欢

热点阅读