python 常用包整理

2020-03-21  本文已影响0人  BrianHsu
debug
 a = range(2000)
 pdb.set_trace()
 for i in a:
    pdb.set_trace()
    if i % 2 == 0:
        continue
    print(i + 1)
import pysnooper
@pysnooper.snoop(prefix="com.pioneer.bc")
def run_test():
    pass
对象序列化
# 序列化并存储
def pickle_dumps(obj, file):
    pickle.dump(obj, file)


# 读取并反序列化
def pickle_load(file):
    print(pickle.load(file))


if __name__ == '__main__':
    import pickle

    a = '["a", "b", "c"]'
    with open("t.txt", 'wb') as f:
        pickle_dumps(a, f)

    with open("t.txt", 'rb') as fr:
        pickle_load(fr)
# 序列化并存储
def pickle_dumps(obj, file):
    cPickle.dump(obj, file)


# 读取并反序列化
def pickle_load(file):
    print(cPickle.load(file))


if __name__ == '__main__':
    import _pickle as cPickle

    a = '["a", "b", "c"]'
    with open("t.txt", 'wb') as f:
        pickle_dumps(a, f)

    with open("t.txt", 'rb') as fr:
        pickle_load(fr)
数据处理
# 使用pandas读取excel
def pd(file_path, file_name):
    import pandas as pds
    return pds.read_excel(file_path + file_name, sheet_name="user")


if __name__ == '__main__':
    df = pd(".", "t.xlsx")
    for i in df.index:
        print(df['user'][i])
http请求
# 使用requests读取网页
def request_get():
    import requests
    res = requests.get("http://m.baidu.com")
    return res.text


if __name__ == '__main__':
    print(request_get())
import ujson
from aiohttp import ClientSession
from aiohttp.connector import TCPConnector


class Client(object):
    client = None
    connector = None

    @staticmethod
    def init_client():
        if not Client.connector:
            Client.init_connector()
        Client.client = ClientSession(connector=Client.connector, json_serialize=ujson.dumps)

    @staticmethod
    def get_client():
        if not Client.client or Client.client.closed:
            Client.init_client()
        return Client.client

    @staticmethod
    def init_connector():
        Client.connector = TCPConnector(verify_ssl=False,
                                        limit=1000,
                                        keepalive_timeout=30)


async def aio_http():
    async with Client.get_client().get("https://m.baidu.com", timeout=30) as response:
        # response.encoding("utf-8")
        return response.text


if __name__ == '__main__':
    import asyncio
    loop = asyncio.get_event_loop()
    loop.run_until_complete(aio_http())
短链或随机字符串
# 生成随机字符串
def gen_random():
    import shortuuid
    return shortuuid.uuid()


if __name__ == '__main__':
    print(gen_random())


# 生成短链
def short_url():
    pro = "http://"
    import shortuuid
    short_url = shortuuid.uuid()
    return pro + short_url


if __name__ == '__main__':
    print(short_url())
数据不同格式存储
# 生成table
def gen_table():
    import tablib
    d = tablib.Dataset(headers=["First_name", "Second_name"])
    origin_data = [('li', 'san'), ('wang', 'wu')]
    for _ in origin_data:
        d.append(_)
    print(d.export('json'))
    print(d.export('yaml'))
    print(d.export('df'))

if __name__ == '__main__':
    gen_table()
网页抓取数据提取
# 使用beautiful 抓取网页
def get_web_page():
    import requests
    from bs4 import BeautifulSoup

    res = requests.get("https://www.liepin.com/")
    res.encoding = 'utf-8'
    soup = BeautifulSoup(res.text, 'html.parser')
    for _ in soup.find_all("img"):
        if _.get("src"):
            print(_.text)
            print(_.get("src"))


if __name__ == '__main__':
    get_web_page()
文档生成工具
上一篇下一篇

猜你喜欢

热点阅读