【Python】asyncio协程异步批量下载视频
2022-12-20 本文已影响0人
奶茶不要奶不要茶
#!/usr/bin/env python3
# coding: utf-8
import os, time
import pathlib
import aiohttp
import asyncio
from urllib import parse
def to_video_name(url):
'''
根据URL创建存储目录并生成视频文件名
'''
to_string = parse.urlparse(url)
# 存储目录,使用绝对路径,例如/root/video
video_dir = os.getcwd() + os.path.dirname(to_string.path)
# 创建目录,目录不存在则创建,即使目录已存在也不会抛出异常
pathlib.Path(video_dir).mkdir(parents=True, exist_ok=True)
# 文件名,使用绝对路径,例如/root/video/to1.mp4
video_name = video_dir + "/" + os.path.basename(to_string.path)
return video_name
async def fetch(session, url):
r = None
ct = 0
# 请求头部
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/102.1.4753.73 Safari/527.56 Edg/102.1.4753.73",
"Accept-Encoding": "identity"
}
# 请求URL,例如http://www.example.com/video/to1.mp4
url = url
# 失败重试次数
retry = 5
# 传入完整的URL,然后根据URL生成视频文件名
video_name = to_video_name(url)
while ct < retry:
try:
async with session.get(url=url, headers=headers) as r:
if r.status == 200:
#start_time = time.time()
with open(video_name, 'wb') as fp:
async for chunk in r.content.iter_chunked(4194304):
fp.write(chunk)
#end_time = time.time()
#print(end_time-start_time)
return r
except Exception as e:
pass
ct += 1
return r
async def main(url_list):
# url_list是URL列表
start_time = time.time()
timeout = aiohttp.ClientTimeout(connect=5)
conn = aiohttp.TCPConnector(ssl=False, limit=10)
async with aiohttp.ClientSession(timeout=timeout, connector=conn, raise_for_status=True) as session:
tasks = [asyncio.create_task(fetch(session, url)) for url in url_list]
results = await asyncio.gather(*tasks)
[print(result.status, result.url) for result in results]
end_time = time.time()
print("Total time:", end_time-start_time)
if __name__ == "__main__":
import sys
url_file = sys.argv[1]
with open(url_file, "r") as fp:
url_list = [url.strip("\n") for url in fp.readlines()]
asyncio.run(main(url_list))
root@ubuntu:/tmp/py# cat url_list.txt
http://192.168.3.120/video/01a.mkv
http://192.168.3.120/video/02a.mkv
http://192.168.3.120/video/03a.mkv
root@ubuntu:/tmp/py#
root@ubuntu:/tmp/py# ./down2video.py url_list.txt
200 http://192.168.3.120/video/01a.mkv
200 http://192.168.3.120/video/02a.mkv
200 http://192.168.3.120/video/03a.mkv
Total time: 0.17106056213378906
root@ubuntu:/tmp/py#
root@ubuntu:/tmp/py# tree
.
├── down2video.py
├── url_list.txt
└── video
├── 01a.mkv
├── 02a.mkv
└── 03a.mkv
1 directory, 5 files
root@ubuntu:/tmp/py#