在这个520特别的日子里,分享几个用的上的Python代码
2018-05-20 本文已影响15人
python分享者
timg.jpg
大牛炼成记.jpg
Python520表白神器!心爱的她
from turtle import *
from time import sleep
def go_to(x, y):
up()
goto(x, y)
down()
def big_Circle(size): #函数用于绘制心的大圆
speed(1)
for i in range(150):
forward(size)
right(0.3)
def small_Circle(size): #函数用于绘制心的小圆
speed(1)
for i in range(210):
forward(size)
right(0.786)
def line(size):
speed(1)
forward(51*size)
def heart( x, y, size):
go_to(x, y)
left(150)
begin_fill()
line(size)
big_Circle(size)
small_Circle(size)
left(120)
small_Circle(size)
big_Circle(size)
line(size)
end_fill()
def arrow():
pensize(10)
setheading(0)
go_to(-400, 0)
left(15)
forward(150)
go_to(339, 178)
forward(150)
def arrowHead():
pensize(1)
speed(1)
color('red', 'red')
begin_fill()
left(120)
forward(20)
right(150)
forward(35)
right(120)
forward(35)
right(150)
forward(20)
end_fill()
def main():
pensize(2)
color('red', 'pink')
#getscreen().tracer(30, 0) #取消注释后,快速显示图案
heart(200, 0, 1) #画出第一颗心,前面两个参数控制心的位置,函数最后一个参数可控制心的大小
setheading(0) #使画笔的方向朝向x轴正方向
heart(-80, -100, 1.5) #画出第二颗心
arrow() #画出穿过两颗心的直线
arrowHead() #画出箭的箭头
go_to(400, -300)
write("author:520Python", move=True, align="left", font=("宋体", 30, "normal"))
done()
main()
如果表白失败了!那么...........
Python爬取妹子图
安慰你幼小的心灵,毕竟今天都是秀秀秀秀,一心只为“圣贤书”,两耳不闻窗外事
#!/usr/bin/env python
# coding=utf-8
import os
import time
import threading
from multiprocessing import Pool, cpu_count
import requests
from bs4 import BeautifulSoup
headers = {
'X-Requested-With': 'XMLHttpRequest',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/56.0.2924.87 Safari/537.36',
'Referer': "http://www.mmjpg.com"
}
dir_path = r"E:\mmjpg" # 下载图片保存路径
def save_pic(pic_src, pic_cnt):
""" 将图片下载到本地文件夹 """
try:
img = requests.get(pic_src, headers=headers, timeout=10)
imgname = "pic_cnt_{}.jpg".format(pic_cnt + 1)
with open(imgname, 'ab') as f:
f.write(img.content)
print(imgname)
except Exception as e:
print(e)
def make_dir(folder_name):
""" 新建套图文件夹并切换到该目录下 """
path = os.path.join(dir_path, folder_name)
# 如果目录已经存在就不用再次爬取了,去重,提高效率。存在返回 False,否则反之
if not os.path.exists(path):
os.makedirs(path)
print(path)
os.chdir(path)
return True
print("Folder has existed!")
return False
def delete_empty_dir(dir):
""" 如果程序半路中断的话,可能存在已经新建好文件夹但是仍没有下载的图片的情况
但此时文件夹已经存在所以会忽略该套图的下载,此时要删除空文件夹 """
if os.path.exists(dir):
if os.path.isdir(dir):
for d in os.listdir(dir):
path = os.path.join(dir, d) # 组装下一级地址
if os.path.isdir(path):
delete_empty_dir(path) # 递归删除空文件夹
if not os.listdir(dir):
os.rmdir(dir)
print("remove the empty dir: {}".format(dir))
else:
print("Please start your performance!") # 请开始你的表演
lock = threading.Lock() # 全局资源锁
def urls_crawler(url):
""" 爬虫入口,主要爬取操作 """
try:
r = requests.get(url, headers=headers, timeout=10).text
# 套图名,也作为文件夹名
folder_name = BeautifulSoup(r, 'lxml').find('h2').text.encode('ISO-8859-1').decode('utf-8')
with lock:
if make_dir(folder_name):
# 套图张数
max_count = BeautifulSoup(r, 'lxml').find('div', class_='page').find_all('a')[-2].get_text()
# 套图页面
page_urls = [url + "/" + str(i) for i in range(1, int(max_count) + 1)]
# 图片地址
img_urls = []
for index, page_url in enumerate(page_urls):
result = requests.get(page_url, headers=headers, timeout=10).text
# 最后一张图片没有a标签直接就是img所以分开解析
if index + 1 < len(page_urls):
img_url = BeautifulSoup(result, 'lxml').find('div', class_='content').find('a').img['src']
img_urls.append(img_url)
else:
img_url = BeautifulSoup(result, 'lxml').find('div', class_='content').find('img')['src']
img_urls.append(img_url)
for cnt, url in enumerate(img_urls):
save_pic(url, cnt)
except Exception as e:
print(e)
if __name__ == "__main__":
urls = ['http://mmjpg.com/mm/{cnt}'.format(cnt=cnt) for cnt in range(1, 953)]
pool = Pool(processes=cpu_count())
try:
delete_empty_dir(dir_path)
pool.map(urls_crawler, urls)
except Exception as e:
time.sleep(30)
delete_empty_dir(dir_path)
pool.map(urls_crawler, urls)
Python爬取小说
import urllib.request
import re
# 1 获取主页源代码
# 2 获取章节超链接
# 3 获取章节超链接源码
# 4 获取小说内容
# 5 下载,文件操作
# 驼峰命名法
# 获取小说内容
def getNovertContent():
# <http.client.HTTPResponse object at 0x000001DFD017F400>
html = urllib.request.urlopen("http://www.quanshuwang.com/book/0/269").read()
html = html.decode("gbk")
# 不加括号 不匹配
# 正则表达式 .*? 匹配所有
reg = r'<li><a href="(.*?)" title=".*?">(.*?)</a></li>'
# 增加效率的
reg = re.compile(reg)
urls = re.findall(reg,html)
# print(urls)
# 列表
# [(http://www.quanshuwang.com/book/0/269/78850.html,第一章 山边小村),
# (http://www.quanshuwang.com/book/0/269/78854.html,第二章 青牛镇)]
for url in urls:
# 章节的URL地址
novel_url = url[0]
# 章节标题
novel_title = url[1]
chapt = urllib.request.urlopen(novel_url).read()
chapt_html = chapt.decode("gbk")
# r 表示原生字符串 \ \\d r"\d"
reg = r'</script> (.*?)<script type="text/javascript">'
# S 代表多行匹配
reg = re.compile(reg,re.S)
chapt_content = re.findall(reg,chapt_html)
# print(chapt_content)
# 列表["  二愣子睁大着双眼,直直望着茅草和烂泥糊成的<br />"]
# 第一个参数 要替换的字符串 替换后的字符串
chapt_content = chapt_content[0].replace(" ","")
# print(chapt_content) 字符串 二愣子睁大着双眼,直直望着茅草和烂泥糊成的<br />
chapt_content = chapt_content.replace("<br />","")
print("正在保存 %s"%novel_title)
# w 读写模式 wb
# f = open("{}.txt".format(novel_title),'w')
# f.write(chapt_content)
with open("{}.txt".format(novel_title),'w') as f:
f.write(chapt_content)
# f.close()
getNovertContent()
但是这些都仅仅只是心灵上的安慰,咱们需要充实自己!
Python爬取智联招聘
寻求高薪工作,从此走向人生巅峰,赢娶白富美。更要学好Python!
大牛炼成记.jpg
#-*- coding: utf-8 -*-
import re
import csv
import requests
from tqdm import tqdm
from urllib.parse import urlencode
from requests.exceptions import RequestException
def get_one_page(city, keyword, region, page):
'''
获取网页html内容并返回
'''
paras = {
'jl': city, # 搜索城市
'kw': keyword, # 搜索关键词
'isadv': 0, # 是否打开更详细搜索选项
'isfilter': 1, # 是否对结果过滤
'p': page, # 页数
're': region # region的缩写,地区,2005代表海淀
}
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
'Host': 'sou.zhaopin.com',
'Referer': 'https://www.zhaopin.com/',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-CN,zh;q=0.9'
}
url = 'https://sou.zhaopin.com/jobs/searchresult.ashx?' + urlencode(paras)
try:
# 获取网页内容,返回html数据
response = requests.get(url, headers=headers)
# 通过状态码判断是否获取成功
if response.status_code == 200:
return response.text
return None
except RequestException as e:
return None
def parse_one_page(html):
'''
解析HTML代码,提取有用信息并返回
'''
# 正则表达式进行解析
pattern = re.compile('<a style=.*? target="_blank">(.*?)</a>.*?' # 匹配职位信息
'<td class="gsmc"><a href="(.*?)" target="_blank">(.*?)</a>.*?' # 匹配公司网址和公司名称
'<td class="zwyx">(.*?)</td>', re.S) # 匹配月薪
# 匹配所有符合条件的内容
items = re.findall(pattern, html)
for item in items:
job_name = item[0]
job_name = job_name.replace('<b>', '')
job_name = job_name.replace('</b>', '')
yield {
'job': job_name,
'website': item[1],
'company': item[2],
'salary': item[3]
}
def write_csv_file(path, headers, rows):
'''
将表头和行写入csv文件
'''
# 加入encoding防止中文写入报错
# newline参数防止每写入一行都多一个空行
with open(path, 'a', encoding='gb18030', newline='') as f:
f_csv = csv.DictWriter(f, headers)
f_csv.writeheader()
f_csv.writerows(rows)
def write_csv_headers(path, headers):
'''
写入表头
'''
with open(path, 'a', encoding='gb18030', newline='') as f:
f_csv = csv.DictWriter(f, headers)
f_csv.writeheader()
def write_csv_rows(path, headers, rows):
'''
写入行
'''
with open(path, 'a', encoding='gb18030', newline='') as f:
f_csv = csv.DictWriter(f, headers)
f_csv.writerows(rows)
def main(city, keyword, region, pages):
'''
主函数
'''
filename = 'zl_' + city + '_' + keyword + '.csv'
headers = ['job', 'website', 'company', 'salary']
write_csv_headers(filename, headers)
for i in tqdm(range(pages)):
'''
获取该页中所有职位信息,写入csv文件
'''
jobs = []
html = get_one_page(city, keyword, region, i)
items = parse_one_page(html)
for item in items:
jobs.append(item)
write_csv_rows(filename, headers, jobs)
if __name__ == '__main__':
main('北京', 'python工程师', 2005, 10)