Python

Python基础爬取数据存数据库

2020-01-27  本文已影响0人  elijah777

Python爬虫操作

一、基础爬取数据存数据库

通过数据库的初始数据,包含搜索key信息。

1、读取数据库内容

2、遍历key

3、发送get请求

4、获取并解析数据 从返回的数据中解析出来

5、更新数据库 包含获取的数据内容

import json

import pymysql
import requests
import datetime
import  time
host = '127.0.0.1'
user = 'root'
psd = '123456'
db = 'test'
c = 'utf8' 
port = 3306
TABLE_NAME = 'snow'
 
def getBrand(symbol, company_name):
    
    url = "https://stock.xxxxx.com/v5/stock/finance/cn/balance.json?symbol="+symbol+"&type=Q4&is_detail=true&count=6&timestamp=1819955200001"
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36",
        "Cookie":"xq_a_token=xxxxx;"
    }
    response = requests.get(url, headers = headers )
    result = json.loads(response.text)
    datas = result['data']['list']
    quote_name = result['data']['quote_name']
    if  quote_name != company_name:
        return
    pdata = []
    pdata.append(quote_name)

    for data in datas :
        item_info = {}
        item_info['report_name'] = data['report_name']
        item_info['net_increase_in_cce'] =  data['net_increase_in_cce'][0]
        pdata.append(data['net_increase_in_cce'][0])
        print(item_info)
    return pdata;


def process_item():
    # 数据库连接
    con = pymysql.connect(host=host, user=user, passwd=psd, db=db, charset=c, port=port)
    # 数据库游标
    cue = con.cursor()
    try:
       
        # 查询数据
        query_sql = "select  ID,  HEBING,       DATA_ID,       COMPANY_NAME,       STOCK_CODE, " \
                    "   ASSETS_TOTAL18, ASSETS_TOTAL17, ASSETS_TOTAL16, ASSETS_TOTAL15,  ASSETS_TOTAL14, ASSETS_TOTAL13,   " \
                    "    CASH_FLOW18,    CASH_FLOW17,   CASH_FLOW16,   CASH_FLOW15,   CASH_FLOW14,    CASH_FLOW13,   " \
                    "    DESCRIPTION,       CREATE_DATE,       LAST_UP_DATE " \
                    " from " + TABLE_NAME + "  where COMPANY_NAME is not null and STOCK_CODE  is not null "
 
        cue.execute(query_sql)  # 执行sql

        # 查询所有数据,返回结果默认以元组形式,所以可以进行迭代处理
        for i in cue.fetchall():
            s = i[2].split('.')
            sc = s[1] + s[0]
            pdata = getBrand(sc,i[3])
            if pdata == None:
                continue
            update_sql = "update " + TABLE_NAME + "  set CASH_FLOW18=%s " \
                                                  ", CASH_FLOW17=%s, CASH_FLOW16=%s, CASH_FLOW15=%s" \
                                                  ", CASH_FLOW14=%s, CASH_FLOW13=%s, DESCRIPTION=%s " \
                                                  " where ID=%s"
           
            data = (pdata[1], pdata[2], pdata[3], pdata[4], pdata[5], pdata[6],sc,i[0])

            try:
                cue.execute(update_sql, data)
                print(data)
            except Exception as e:
                print('Insert error:', e)
                con.rollback()
            else:
                con.commit()
  
        update_sql = "update " + TABLE_NAME + "  set CASH_FLOW18=%s " \
                                               ", CASH_FLOW17=%s, CASH_FLOW16=%s, CASH_FLOW15=%s" \
                                               ", CASH_FLOW14=%s, CASH_FLOW13=%s, DESCRIPTION=%s " \
                                              " where ID=%s"

    except Exception as e:
        print('Insert error:', e)
        con.rollback()
    else:
        con.commit()

    cue.close()  # 关闭游标
   

if __name__ == '__main__':
    # getBrand()
    process_item()


此段代码 仅用于数据的获取,爬虫学习,不做任何商业用途

2020/01/27 春节新型冠状病毒期间于许昌

上一篇下一篇

猜你喜欢

热点阅读