线程池

2019-06-04  本文已影响0人  forjie

线程池 concurrent.futures 使用心得

from concurrent.futures import ThreadPoolExecutor, as_completed

# 下载图片
def save_image(self,
                   img_url,
                   meida_path,
                   ):
        whole_path, file_path = get_img_path(meida_path,
                                                  img_url,
                                                  )

        if whole_path and file_path:
            self.download_img(img_url, whole_path)
        return file_path, whole_path

# 对拿到的url 进行分解,创建本地下载的路径
def get_img_path(self,
                     media_path,
                     img_url,
                     ):
        """
        获取要存取图片的路径,用图片自己的路径
        :param meida_dir:  项目中media的路径
        :param img_url:    图片的url
               eg: https://***/doctor/20190315/4/ca47fcbe8231ac60b152b51ff7183113_100_100.png
        :return:
            whole_path: 本地完整路径
            file_path:  /doctor/20190315/4/ca47fcbe8231ac60b152b51ff7183113_100_100.png
        """
        parse_path = urlparse(img_url).path
        h, file_name = os.path.split(parse_path)
        basename = os.path.join(base_dir, media_path, h[1:])
        whole_path = os.path.join(base_dir, media_path, h[1:], file_name)
        # 如果之前就有,那就不下载   PS:有可能有,但是没有存进数据库
        if os.path.exists(whole_path):
            # todo 在之前就判断url
            return None, None
        try:
            os.makedirs(basename)
            return whole_path, parse_path
        except FileExistsError:
            return whole_path, parse_path
        except Exception as e:
            print(e)

# 拿到路径进行本地下载
def download_img(self,
                     img_url,
                     whole_path):
        """
        下载图片
        """
        if os.path.exists(whole_path):
            return
        res = requests.get(img_url)
        try:
            with open(whole_path, 'wb') as f:
                for chunk in res.iter_content(chunk_size=512):
                    f.write(chunk)
        except Exception as e:
            print('download_exception:%s' % img_url)

# 利用线程池进行多线程下载
def no_name_insert_media(self,
                             url_lst,
                             sql_colums,
                             ):
        with ThreadPoolExecutor(max_workers=max_workers) as executor:
            all_task = [executor.submit(save_image, url, media_path) for url in url_lst]
            col_data = []
            for future in as_completed(all_task):
                file_path, whole_path = future.result()
                if not file_path and not whole_path: continue
                uid, md5sum = self.md5_uuid(whole_path)
                col_data.append([file_path, uid, md5sum])

PS:
1,在创建线程池的时间尽量用

with ThreadPoolExecutor(max_workers=max_workers) as executor

2,带参数的函数,参数放在submit,函数名称的后面,多个一直放在后面就行

executor.submit(save_image, url, media_path)

这里的url和media_path都是参数
3,有些时候你用的多线程,也感觉效率也没有提升,那就是你写的方式有问题

all_task = [executor.submit(save_image, url, media_path) for url in url_lst]
            col_data = []
            for future in as_completed(all_task):

上面这样写,是利用了线程池的
下面这么写,就没有,好像是因为当用result()直接获取值的时候是会阻塞的.

  for item in url_lst:
          img_url = item.get('img_url') if item.get('img_url') else item.get('img')
          if not img_url or check_url(img_url): continue
          file_path, whole_path = executor.submit(self.save_image, img_url, media_path).result()
上一篇 下一篇

猜你喜欢

热点阅读