【Python】asyncio协程异步批量下载视频

2022-12-20  本文已影响0人  奶茶不要奶不要茶
#!/usr/bin/env python3
# coding: utf-8

import os, time
import pathlib
import aiohttp
import asyncio
from urllib import parse

def to_video_name(url):
    '''
    根据URL创建存储目录并生成视频文件名
    '''
    to_string = parse.urlparse(url)
    # 存储目录,使用绝对路径,例如/root/video
    video_dir = os.getcwd() + os.path.dirname(to_string.path)
    # 创建目录,目录不存在则创建,即使目录已存在也不会抛出异常
    pathlib.Path(video_dir).mkdir(parents=True, exist_ok=True)
    # 文件名,使用绝对路径,例如/root/video/to1.mp4
    video_name = video_dir + "/" + os.path.basename(to_string.path)
    return video_name
    
async def fetch(session, url):
    r = None
    ct = 0
    
    # 请求头部
    headers = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.1.4753.73 Safari/527.56 Edg/102.1.4753.73",
        "Accept-Encoding": "identity"
    }
    # 请求URL,例如http://www.example.com/video/to1.mp4
    url = url
    # 失败重试次数
    retry = 5
    
    # 传入完整的URL,然后根据URL生成视频文件名
    video_name = to_video_name(url)

    while ct < retry:
        try:
            async with session.get(url=url, headers=headers) as r:
                if r.status == 200:
                    #start_time = time.time()
                    with open(video_name, 'wb') as fp:
                        async for chunk in r.content.iter_chunked(4194304):
                            fp.write(chunk)
                    #end_time = time.time()
                    #print(end_time-start_time)
                    return r
        except Exception as e:
            pass
        ct += 1
    return r
            
async def main(url_list):
    # url_list是URL列表
    start_time = time.time()
    timeout = aiohttp.ClientTimeout(connect=5)
    conn = aiohttp.TCPConnector(ssl=False, limit=10)
    async with aiohttp.ClientSession(timeout=timeout, connector=conn, raise_for_status=True) as session:
        tasks = [asyncio.create_task(fetch(session, url)) for url in url_list]
        results = await asyncio.gather(*tasks)
        [print(result.status, result.url) for result in results]
    end_time = time.time()
    print("Total time:", end_time-start_time)

if __name__ == "__main__":
    import sys
    url_file = sys.argv[1]
    with open(url_file, "r") as fp:
        url_list = [url.strip("\n") for url in fp.readlines()]
    asyncio.run(main(url_list))
root@ubuntu:/tmp/py# cat url_list.txt 
http://192.168.3.120/video/01a.mkv
http://192.168.3.120/video/02a.mkv
http://192.168.3.120/video/03a.mkv
root@ubuntu:/tmp/py# 
root@ubuntu:/tmp/py# ./down2video.py url_list.txt 
200 http://192.168.3.120/video/01a.mkv
200 http://192.168.3.120/video/02a.mkv
200 http://192.168.3.120/video/03a.mkv
Total time: 0.17106056213378906
root@ubuntu:/tmp/py# 
root@ubuntu:/tmp/py# tree 
.
├── down2video.py
├── url_list.txt
└── video
    ├── 01a.mkv
    ├── 02a.mkv
    └── 03a.mkv

1 directory, 5 files
root@ubuntu:/tmp/py# 
上一篇下一篇

猜你喜欢

热点阅读