双色球历史数据爬取
2022-06-27 本文已影响0人
Lonelyroots
import pymysql
from pymongo import MongoClient
from requests_html import HTMLSession
class Spider:
def __init__(self):
self.url = "https://datachart.500.com/ssq/history/newinc/history.php?start=19000&end=21018"
self.session = HTMLSession()
# 只要连接一次,千万不要放到循环里!!!!!!
# MongoDB连接
conn = MongoClient('localhost',8881)
db = conn['dual_colored_ball'] # 如果没有这个数据库,那么创建
self.my_set = db['words'] # 如果没有这个表(集合),那么创建
# MySQL连接
host = "localhost"
port = 8001
db = "spider"
user = "admin"
password = "qwe123"
self.conn = pymysql.connect(host=host, port=port, db=db, user=user, password=password)
self.cursor = self.conn.cursor() # 获取游标
def parse(self):
response = self.session.get(url=self.url)
for tr in response.html.xpath('//tbody[@id="tdata"]/tr'):
number = tr.xpath('//td[1]/text()')[0] # 期号
red = tr.xpath('//td[2]/text()|//td[3]/text()|//td[4]/text()|//td[5]/text()|//td[6]/text()|//td[7]/text()') # 红球
blue = tr.xpath('//td[8]/text()')[0] # 蓝球
prizePool = tr.xpath('//td[10]/text()')[0] # 奖池奖金(元)
FirstPrize = tr.xpath('//td[11]/text()|//td[12]/text()') # 一等奖
SecondPrize = tr.xpath('//td[13]/text()|//td[14]/text()') # 二等奖
Total_bet = tr.xpath('//td[15]/text()')[0] # 投注总金额
Date = tr.xpath('//td[16]/text()')[0] # 开奖日期
data = (number,red,blue,prizePool,FirstPrize,SecondPrize,Total_bet,Date)
# self.saveMongoDB(data)
self.saveMySQL(data)
print(data) # 如:打印['19077'] ['09', '11', '13', '18', '21', '22'] ['15'] ['928,983,242'] ['1', '10,000,000'] ['118', '221,011'] ['331,156,004'] ['2019-07-04']
def saveMySQL(self,data):
self.cursor.execute("insert into dual_colored_ball values ('%s','%s','%s','%s','%s','%s','%s','%s');"%(
int(data[0]),
'-'.join(data[1]), # 拼接列表
data[2],
data[3],
'-'.join(data[4]),
'-'.join(data[5]),
data[6],
data[7],
))
self.conn.commit() # 确认提交,注意!!!
def saveMongoDB(self,data):
# insert_many插入的是列表,所以需要在字典外加个[]
self.my_set.insert_many([{
"number": data[0],
"red": data[1],
"blue": data[2],
"prizePool": data[3],
"FirstPrize": data[4],
"SecondPrize": data[5],
"Total_bet": data[6],
"Date": data[7],
}]) # 添加数据
def run(self):
self.parse()
self.cursor.close()
self.conn.close()
if __name__ == '__main__':
spider = Spider()
spider.run()
文章到这里就结束了!希望大家能多多支持Python(系列)!六个月带大家学会Python,私聊我,可以问关于本文章的问题!以后每天都会发布新的文章,喜欢的点点关注!一个陪伴你学习Python的新青年!不管多忙都会更新下去,一起加油!
Editor:Lonelyroots
注:仅用于学习!