双色球历史数据爬取

2022-06-27  本文已影响0人  Lonelyroots
import pymysql
from pymongo import MongoClient
from requests_html import HTMLSession

class Spider:
    def __init__(self):
        self.url = "https://datachart.500.com/ssq/history/newinc/history.php?start=19000&end=21018"
        self.session = HTMLSession()

        # 只要连接一次,千万不要放到循环里!!!!!!
        # MongoDB连接
        conn = MongoClient('localhost',8881)
        db = conn['dual_colored_ball']  # 如果没有这个数据库,那么创建
        self.my_set = db['words']  # 如果没有这个表(集合),那么创建

        # MySQL连接
        host = "localhost"
        port = 8001
        db = "spider"
        user = "admin"
        password = "qwe123"
        self.conn = pymysql.connect(host=host, port=port, db=db, user=user, password=password)
        self.cursor = self.conn.cursor()  # 获取游标


    def parse(self):
        response = self.session.get(url=self.url)
        for tr in response.html.xpath('//tbody[@id="tdata"]/tr'):
            number = tr.xpath('//td[1]/text()')[0]     # 期号
            red = tr.xpath('//td[2]/text()|//td[3]/text()|//td[4]/text()|//td[5]/text()|//td[6]/text()|//td[7]/text()')       # 红球
            blue = tr.xpath('//td[8]/text()')[0]      # 蓝球
            prizePool = tr.xpath('//td[10]/text()')[0]       # 奖池奖金(元)
            FirstPrize = tr.xpath('//td[11]/text()|//td[12]/text()')       # 一等奖
            SecondPrize = tr.xpath('//td[13]/text()|//td[14]/text()')       # 二等奖
            Total_bet = tr.xpath('//td[15]/text()')[0]       # 投注总金额
            Date = tr.xpath('//td[16]/text()')[0]     # 开奖日期
            data = (number,red,blue,prizePool,FirstPrize,SecondPrize,Total_bet,Date)
            # self.saveMongoDB(data)
            self.saveMySQL(data)
            print(data)     # 如:打印['19077'] ['09', '11', '13', '18', '21', '22'] ['15'] ['928,983,242'] ['1', '10,000,000'] ['118', '221,011'] ['331,156,004'] ['2019-07-04']

    def saveMySQL(self,data):
            self.cursor.execute("insert into dual_colored_ball values ('%s','%s','%s','%s','%s','%s','%s','%s');"%(
                int(data[0]),
                '-'.join(data[1]),      # 拼接列表
                data[2],
                data[3],
                '-'.join(data[4]),
                '-'.join(data[5]),
                data[6],
                data[7],
                  ))
            self.conn.commit()       # 确认提交,注意!!!

    def saveMongoDB(self,data):
        # insert_many插入的是列表,所以需要在字典外加个[]
        self.my_set.insert_many([{
            "number": data[0],
            "red": data[1],
            "blue": data[2],
            "prizePool": data[3],
            "FirstPrize": data[4],
            "SecondPrize": data[5],
            "Total_bet": data[6],
            "Date": data[7],
        }])        # 添加数据

    def run(self):
        self.parse()
        self.cursor.close()
        self.conn.close()

if __name__ == '__main__':
    spider = Spider()
    spider.run()

文章到这里就结束了!希望大家能多多支持Python(系列)!六个月带大家学会Python,私聊我,可以问关于本文章的问题!以后每天都会发布新的文章,喜欢的点点关注!一个陪伴你学习Python的新青年!不管多忙都会更新下去,一起加油!

Editor:Lonelyroots

注:仅用于学习!

上一篇下一篇

猜你喜欢

热点阅读