CSDN博客标题抓取

2020-07-05  本文已影响0人  成功在于实践
import json
import time
from selenium import webdriver
class CsdnBlog():
    def __init__(self):
        self.url ='https://www.cnblogs.com/imyalost/category/1040462.html'
        self.driver  =webdriver.Chrome()
        self.driver.get(self.url)
        self.file = open('./123.json', 'w')
    def parse_data(self):
        list =self.driver.find_elements_by_xpath('//*[@id="mainContent"]/div/div/div/div[1]/a')
        temp ={}
        for n in list:
            i=n.text
            j=n.get_attribute('href')
            temp[i] =j
        return temp

    # def __del__(self):
    #     self.file.close()
    #     self.driver.close()


    def save_data(self,data):
        data =json.dumps(data,ensure_ascii=False)+ ',\n'
        self.file.write(data)
    def run(self):
        self.driver.get(self.url)
        data=self.parse_data()
        self.save_data(data)
if __name__ == '__main__':
    csdn =CsdnBlog()
    csdn.run()
上一篇下一篇

猜你喜欢

热点阅读