python 常用包整理
2020-03-21 本文已影响0人
BrianHsu
debug
- pdb 内置函数,通过pdb.set_trace()设置断点,通过n,c 等命令进行调试
a = range(2000)
pdb.set_trace()
for i in a:
pdb.set_trace()
if i % 2 == 0:
continue
print(i + 1)
- pysnooper 三方包,通过装饰器将日志内容打印纸console
import pysnooper
@pysnooper.snoop(prefix="com.pioneer.bc")
def run_test():
pass
对象序列化
- pickle 三方包,将python对象序列化和反序列化 pickle.dump(object, file), pickle.load(f)
# 序列化并存储
def pickle_dumps(obj, file):
pickle.dump(obj, file)
# 读取并反序列化
def pickle_load(file):
print(pickle.load(file))
if __name__ == '__main__':
import pickle
a = '["a", "b", "c"]'
with open("t.txt", 'wb') as f:
pickle_dumps(a, f)
with open("t.txt", 'rb') as fr:
pickle_load(fr)
- Cpickle 三方包,将python对象序列化和反序列化 方法同上,速度上是上一种方法的20倍以上
# 序列化并存储
def pickle_dumps(obj, file):
cPickle.dump(obj, file)
# 读取并反序列化
def pickle_load(file):
print(cPickle.load(file))
if __name__ == '__main__':
import _pickle as cPickle
a = '["a", "b", "c"]'
with open("t.txt", 'wb') as f:
pickle_dumps(a, f)
with open("t.txt", 'rb') as fr:
pickle_load(fr)
数据处理
- pandas 三方包,读取数据,并以行列的方式加载
# 使用pandas读取excel
def pd(file_path, file_name):
import pandas as pds
return pds.read_excel(file_path + file_name, sheet_name="user")
if __name__ == '__main__':
df = pd(".", "t.xlsx")
for i in df.index:
print(df['user'][i])
http请求
- requests 三方包, 同步请求
# 使用requests读取网页
def request_get():
import requests
res = requests.get("http://m.baidu.com")
return res.text
if __name__ == '__main__':
print(request_get())
- aiohttp 三方包, 异步请求,使用async 关键字
import ujson
from aiohttp import ClientSession
from aiohttp.connector import TCPConnector
class Client(object):
client = None
connector = None
@staticmethod
def init_client():
if not Client.connector:
Client.init_connector()
Client.client = ClientSession(connector=Client.connector, json_serialize=ujson.dumps)
@staticmethod
def get_client():
if not Client.client or Client.client.closed:
Client.init_client()
return Client.client
@staticmethod
def init_connector():
Client.connector = TCPConnector(verify_ssl=False,
limit=1000,
keepalive_timeout=30)
async def aio_http():
async with Client.get_client().get("https://m.baidu.com", timeout=30) as response:
# response.encoding("utf-8")
return response.text
if __name__ == '__main__':
import asyncio
loop = asyncio.get_event_loop()
loop.run_until_complete(aio_http())
短链或随机字符串
- shortuuid 三方包,用于生成短链或者随机字符串,重复性率低
# 生成随机字符串
def gen_random():
import shortuuid
return shortuuid.uuid()
if __name__ == '__main__':
print(gen_random())
# 生成短链
def short_url():
pro = "http://"
import shortuuid
short_url = shortuuid.uuid()
return pro + short_url
if __name__ == '__main__':
print(short_url())
数据不同格式存储
- tablib 三方包,用户数据格式转换,表格数据格式,包括,XLS、CSV,JSON,YAML转换
# 生成table
def gen_table():
import tablib
d = tablib.Dataset(headers=["First_name", "Second_name"])
origin_data = [('li', 'san'), ('wang', 'wu')]
for _ in origin_data:
d.append(_)
print(d.export('json'))
print(d.export('yaml'))
print(d.export('df'))
if __name__ == '__main__':
gen_table()
网页抓取数据提取
- beautifulSoup 三方包,用户数据被解析,通过既定的方法获取。 soup = BeautifulSoup(html_doc, "html.parser/ lxml")
# 使用beautiful 抓取网页
def get_web_page():
import requests
from bs4 import BeautifulSoup
res = requests.get("https://www.liepin.com/")
res.encoding = 'utf-8'
soup = BeautifulSoup(res.text, 'html.parser')
for _ in soup.find_all("img"):
if _.get("src"):
print(_.text)
print(_.get("src"))
if __name__ == '__main__':
get_web_page()
- re 内置包,通过正则表达式提取数据
文档生成工具
- sphinx 常用命令 sphinx-quickstart 、修改source/conf.py、sphinx-apidoc -o sphinx_demo/source ../src/ 、make html