python热爱者Python学习

在这个520特别的日子里,分享几个用的上的Python代码

2018-05-20  本文已影响15人  python分享者
timg.jpg

Python520表白神器!心爱的她

from turtle import *
from time import sleep

def go_to(x, y):
   up()
   goto(x, y)
   down()


def big_Circle(size):  #函数用于绘制心的大圆
   speed(1)
   for i in range(150):
       forward(size)
       right(0.3)

def small_Circle(size):  #函数用于绘制心的小圆
   speed(1)
   for i in range(210):
       forward(size)
       right(0.786)

def line(size):
   speed(1)
   forward(51*size)

def heart( x, y, size):
   go_to(x, y)
   left(150)
   begin_fill()
   line(size)
   big_Circle(size)
   small_Circle(size)
   left(120)
   small_Circle(size)
   big_Circle(size)
   line(size)
   end_fill()

def arrow():
   pensize(10)
   setheading(0)
   go_to(-400, 0)
   left(15)
   forward(150)
   go_to(339, 178)
   forward(150)

def arrowHead():
   pensize(1)
   speed(1)
   color('red', 'red')
   begin_fill()
   left(120)
   forward(20)
   right(150)
   forward(35)
   right(120)
   forward(35)
   right(150)
   forward(20)
   end_fill()


def main():
   pensize(2)
   color('red', 'pink')
   #getscreen().tracer(30, 0) #取消注释后,快速显示图案
   heart(200, 0, 1)          #画出第一颗心,前面两个参数控制心的位置,函数最后一个参数可控制心的大小
   setheading(0)             #使画笔的方向朝向x轴正方向
   heart(-80, -100, 1.5)     #画出第二颗心
   arrow()                   #画出穿过两颗心的直线
   arrowHead()               #画出箭的箭头
   go_to(400, -300)
   write("author:520Python", move=True, align="left", font=("宋体", 30, "normal"))
   done()

main()

如果表白失败了!那么...........

Python爬取妹子图

安慰你幼小的心灵,毕竟今天都是秀秀秀秀,一心只为“圣贤书”,两耳不闻窗外事

#!/usr/bin/env python
# coding=utf-8
import os
import time
import threading
from multiprocessing import Pool, cpu_count

import requests
from bs4 import BeautifulSoup

headers = {
    'X-Requested-With': 'XMLHttpRequest',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/56.0.2924.87 Safari/537.36',
    'Referer': "http://www.mmjpg.com"
}
dir_path = r"E:\mmjpg"      # 下载图片保存路径
def save_pic(pic_src, pic_cnt):
    """ 将图片下载到本地文件夹 """
    try:
        img = requests.get(pic_src, headers=headers, timeout=10)
        imgname = "pic_cnt_{}.jpg".format(pic_cnt + 1)
        with open(imgname, 'ab') as f:
            f.write(img.content)
            print(imgname)
    except Exception as e:
        print(e)
def make_dir(folder_name):
    """ 新建套图文件夹并切换到该目录下 """
    path = os.path.join(dir_path, folder_name)
    # 如果目录已经存在就不用再次爬取了,去重,提高效率。存在返回 False,否则反之
    if not os.path.exists(path):
        os.makedirs(path)
        print(path)
        os.chdir(path)
        return True
    print("Folder has existed!")
    return False
def delete_empty_dir(dir):
    """ 如果程序半路中断的话,可能存在已经新建好文件夹但是仍没有下载的图片的情况
    但此时文件夹已经存在所以会忽略该套图的下载,此时要删除空文件夹 """
    if os.path.exists(dir):
        if os.path.isdir(dir):
            for d in os.listdir(dir):
                path = os.path.join(dir, d)     # 组装下一级地址
                if os.path.isdir(path):
                    delete_empty_dir(path)      # 递归删除空文件夹
        if not os.listdir(dir):
            os.rmdir(dir)
            print("remove the empty dir: {}".format(dir))
    else:
        print("Please start your performance!") # 请开始你的表演

lock = threading.Lock()     # 全局资源锁
def urls_crawler(url):
    """ 爬虫入口,主要爬取操作 """
    try:
        r = requests.get(url, headers=headers, timeout=10).text
        # 套图名,也作为文件夹名
        folder_name = BeautifulSoup(r, 'lxml').find('h2').text.encode('ISO-8859-1').decode('utf-8')
        with lock:
            if make_dir(folder_name):
                # 套图张数
                max_count = BeautifulSoup(r, 'lxml').find('div', class_='page').find_all('a')[-2].get_text()
                # 套图页面
                page_urls = [url + "/" + str(i) for i in range(1, int(max_count) + 1)]
                # 图片地址
                img_urls = []
                for index, page_url in enumerate(page_urls):
                    result = requests.get(page_url, headers=headers, timeout=10).text
                    # 最后一张图片没有a标签直接就是img所以分开解析
                    if index + 1 < len(page_urls):
                        img_url = BeautifulSoup(result, 'lxml').find('div', class_='content').find('a').img['src']
                        img_urls.append(img_url)
                    else:
                        img_url = BeautifulSoup(result, 'lxml').find('div', class_='content').find('img')['src']
                        img_urls.append(img_url)

                for cnt, url in enumerate(img_urls):
                    save_pic(url, cnt)
    except Exception as e:
        print(e)
if __name__ == "__main__":
    urls = ['http://mmjpg.com/mm/{cnt}'.format(cnt=cnt) for cnt in range(1, 953)]
    pool = Pool(processes=cpu_count())
    try:
        delete_empty_dir(dir_path)
        pool.map(urls_crawler, urls)
    except Exception as e:
        time.sleep(30)
        delete_empty_dir(dir_path)
        pool.map(urls_crawler, urls)

Python爬取小说

import urllib.request
import re
# 1 获取主页源代码
# 2 获取章节超链接
# 3 获取章节超链接源码
# 4 获取小说内容
# 5 下载,文件操作

# 驼峰命名法
# 获取小说内容
def getNovertContent():
    # <http.client.HTTPResponse object at 0x000001DFD017F400>
    html = urllib.request.urlopen("http://www.quanshuwang.com/book/0/269").read()
    html = html.decode("gbk")
    # 不加括号  不匹配
    # 正则表达式  .*?  匹配所有
    reg = r'<li><a href="(.*?)" title=".*?">(.*?)</a></li>'
    # 增加效率的
    reg = re.compile(reg)
    urls = re.findall(reg,html)
    # print(urls)
    # 列表
    # [(http://www.quanshuwang.com/book/0/269/78850.html,第一章 山边小村),
    # (http://www.quanshuwang.com/book/0/269/78854.html,第二章 青牛镇)]
    for url in urls:
        # 章节的URL地址
        novel_url = url[0]
        # 章节标题
        novel_title = url[1]

        chapt = urllib.request.urlopen(novel_url).read()
        chapt_html = chapt.decode("gbk")
        # r 表示原生字符串   \ \\d  r"\d"
        reg = r'</script>&nbsp;&nbsp;&nbsp;&nbsp;(.*?)<script type="text/javascript">'
        # S 代表多行匹配
        reg = re.compile(reg,re.S)
        chapt_content = re.findall(reg,chapt_html)
        # print(chapt_content)
        # 列表["&nbsp;&nbsp;&nbsp;&nbsp二愣子睁大着双眼,直直望着茅草和烂泥糊成的<br />"]

        # 第一个参数   要替换的字符串   替换后的字符串
        chapt_content = chapt_content[0].replace("&nbsp;&nbsp;&nbsp;&nbsp;","")
        # print(chapt_content)    字符串  二愣子睁大着双眼,直直望着茅草和烂泥糊成的<br />
        chapt_content = chapt_content.replace("<br />","")

        print("正在保存 %s"%novel_title)
        # w 读写模式  wb
        # f = open("{}.txt".format(novel_title),'w')
        # f.write(chapt_content)

        with open("{}.txt".format(novel_title),'w') as f:
            f.write(chapt_content)

        # f.close()

getNovertContent()

但是这些都仅仅只是心灵上的安慰,咱们需要充实自己!

Python爬取智联招聘

寻求高薪工作,从此走向人生巅峰,赢娶白富美。更要学好Python!


大牛炼成记.jpg


#-*- coding: utf-8 -*-
import re
import csv
import requests
from tqdm import tqdm
from urllib.parse import urlencode
from requests.exceptions import RequestException

def get_one_page(city, keyword, region, page):
   '''
   获取网页html内容并返回
   '''
   paras = {
       'jl': city,         # 搜索城市
       'kw': keyword,      # 搜索关键词 
       'isadv': 0,         # 是否打开更详细搜索选项
       'isfilter': 1,      # 是否对结果过滤
       'p': page,          # 页数
       're': region        # region的缩写,地区,2005代表海淀
   }

   headers = {
       'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
       'Host': 'sou.zhaopin.com',
       'Referer': 'https://www.zhaopin.com/',
       'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
       'Accept-Encoding': 'gzip, deflate, br',
       'Accept-Language': 'zh-CN,zh;q=0.9'
   }

   url = 'https://sou.zhaopin.com/jobs/searchresult.ashx?' + urlencode(paras)
   try:
       # 获取网页内容,返回html数据
       response = requests.get(url, headers=headers)
       # 通过状态码判断是否获取成功
       if response.status_code == 200:
           return response.text
       return None
   except RequestException as e:
       return None

def parse_one_page(html):
   '''
   解析HTML代码,提取有用信息并返回
   '''
   # 正则表达式进行解析
   pattern = re.compile('<a style=.*? target="_blank">(.*?)</a>.*?'        # 匹配职位信息
       '<td class="gsmc"><a href="(.*?)" target="_blank">(.*?)</a>.*?'     # 匹配公司网址和公司名称
       '<td class="zwyx">(.*?)</td>', re.S)                                # 匹配月薪      

   # 匹配所有符合条件的内容
   items = re.findall(pattern, html)   

   for item in items:
       job_name = item[0]
       job_name = job_name.replace('<b>', '')
       job_name = job_name.replace('</b>', '')
       yield {
           'job': job_name,
           'website': item[1],
           'company': item[2],
           'salary': item[3]
       }

def write_csv_file(path, headers, rows):
   '''
   将表头和行写入csv文件
   '''
   # 加入encoding防止中文写入报错
   # newline参数防止每写入一行都多一个空行
   with open(path, 'a', encoding='gb18030', newline='') as f:
       f_csv = csv.DictWriter(f, headers)
       f_csv.writeheader()
       f_csv.writerows(rows)

def write_csv_headers(path, headers):
   '''
   写入表头
   '''
   with open(path, 'a', encoding='gb18030', newline='') as f:
       f_csv = csv.DictWriter(f, headers)
       f_csv.writeheader()

def write_csv_rows(path, headers, rows):
   '''
   写入行
   '''
   with open(path, 'a', encoding='gb18030', newline='') as f:
       f_csv = csv.DictWriter(f, headers)
       f_csv.writerows(rows)

def main(city, keyword, region, pages):
   '''
   主函数
   '''
   filename = 'zl_' + city + '_' + keyword + '.csv'
   headers = ['job', 'website', 'company', 'salary']
   write_csv_headers(filename, headers)
   for i in tqdm(range(pages)):
       '''
       获取该页中所有职位信息,写入csv文件
       '''
       jobs = []
       html = get_one_page(city, keyword, region, i)
       items = parse_one_page(html)
       for item in items:
           jobs.append(item)
       write_csv_rows(filename, headers, jobs)

if __name__ == '__main__':
   main('北京', 'python工程师', 2005, 10)
上一篇下一篇

猜你喜欢

热点阅读