爬虫获取一条新闻内容 2018-11-03

2018-11-03  本文已影响0人  画奴

import pymysql

import requests as re

from bs4 import BeautifulSoup

try:

    url="http://www.cuc.edu.cn/zcyw/11569.html"

    r=re.get(url)

    soup = BeautifulSoup(r.text,'html.parser')

    title=soup.find_all('h1')

    newsfrom=soup.find_all('sapn')

    newsdate=soup.find_all('sapn')

    viewcount=soup.find_all('span',attrs={'id':'hits'})

    newscontent=soup.find_all('article',attrs={'class','con-area'})

    ntitle=title[0].get_text()

    nfrom=newsfrom[0].get_text()[27:30]

    ndate=newsdate[0].get_text()[67:77]

    ncount=int(viewcount[0].get_text())

    ncontent=newscontent[0].get_text()

    saverec(url,ntitle,nfrom,ndate,ncount,ncontent)   

except:

    print("error")

def getUrl():

    pass

def saverec(url,ntitle,nfrom,ndate,ncount,ncontent):   

    # pymysql.connect(数据库url,用户名,密码,数据库名 )

    db = pymysql.connect("localhost", "root", "2017", "engword", charset = 'utf8')

    cursor = db.cursor()

    try:

        cursor.execute("INSERT INTO cucnews(newsurl,title,newsfrom,newsdate,contents,newscount) VALUES(%s,%s,%s,%s,%s,%s)",(url, ntitle,nfrom,ndate,ncontent,ncount))

        db.commit()

    except:

        print(db.error())

        db.rollback()

    db.close()

上一篇下一篇

猜你喜欢

热点阅读