Python 相关学习
2020-09-18 本文已影响0人
温柔vs先生
运算符:
- 逻辑运算符:
and x and y 布尔"与" - 如果 x 为 False,x and y 返回 False,否则它返回 y 的计算值。 (a and b) 返回 20。
or x or y 布尔"或" - 如果 x 是非 0,它返回 x 的值,否则它返回 y 的计算值。 (a or b) 返回 10。
not not x 布尔"非" - 如果 x 为 True,返回 False 。如果 x 为 False,它返回 True。 not(a and b) 返回 False
- 成员运算符:
in 如果在指定的序列中找到值返回 True,否则返回 False。 x 在 y 序列中 , 如果 x 在 y 序列中返回 True。
not in 如果在指定的序列中没有找到值返回 True,否则返回 False。 x 不在 y 序列中 , 如果 x 不在 y 序列中返回 True。
- 身份运算符:
is is 是判断两个标识符是不是引用自一个对象 x is y, 类似 id(x) == id(y) , 如果引用的是同一个对象则返回 True,否则返回 False
is not is not 是判断两个标识符是不是引用自不同对象 x is not y , 类似 id(a) != id(b)。如果引用的不是同一个对象则返回结果 True,否则返回 False。
is 与 == 区别:
is 用于判断两个变量引用对象是否为同一个(同一块内存空间), == 用于判断引用变量的值是否相等。
其他
- Python的元组与列表类似,不同之处在于元组的元素不能修改。
- 可更改(mutable)与不可更改(immutable)对象,Python的函数传参,如果是可变类型,在函数中对传入的可变参数进行修改,外部的参数也会发生相应的改变。
- 模块导入:
from…import 语句:Python 的 from 语句让你从模块中导入一个指定的部分到当前命名空间中
例如,要导入模块 fib 的 fibonacci 函数,使用如下语句:
from fib import fibonacci
- 把一个字典保存为一个文件(.pkl),可以导入pickle
>>> import pickle
>>> my_list = [123, 12, "wbb"]
>>> pickle_file = open('my_list.pkl','wb')
>>> pickle.dump(my_list, pickle_file)
>>> pickle_file.close()
>>> pickle_file = open('my_list.pkl', 'rb')
>>> my_list2 = pickle.load(pickle_file)
>>> print(my_list2)
[123, 12, 'wbb']
- open 前面添加with ,后面可以省略close()
- 注意类里面的私有变量:
python里面,在属性前面添加__后就可以作为一个私有变量(即在外部是不可以直接调用),就像下面会直接报错,因为python默认在其前面添加了下划线加类名
>>> class Person:
__name = 'wbb'
def getName(self):
return self.__name
>>> p = Person()
>>> p.__name
Traceback (most recent call last):
File "<pyshell#81>", line 1, in <module>
p.__name
AttributeError: 'Person' object has no attribute '__name'
>>> p.name
Traceback (most recent call last):
File "<pyshell#82>", line 1, in <module>
p.name
AttributeError: 'Person' object has no attribute 'name'
>>> p.getName()
'wbb'
>>> p._Person__name
'wbb'
>>>
- python变量不需要声明可以直接声明。
- 如果其属性名和方法名相同会覆盖方法名。
- 属性名用名词,方法名用动词
- 实例对象创建后,方法里面的属性会一直存在直到程序关闭,就算把类对象del依然存在。
- property
>>> class C:
def __init__(self, size = 10):
self.size = size
def getSize(self):
return self.size
def setSize(self, value):
self.size = value
def delSize(self):
del self.size
x = property(getSize, setSize, delSize)
>>> c1 = C()
>>> c1.x
10
>>> c1.x = 20
>>> c1.x
20
>>> c1.size
20
>>> del c1.x
>>> c1.x
Traceback (most recent call last):
File "<pyshell#118>", line 1, in <module>
c1.x
File "<pyshell#111>", line 5, in getSize
return self.size
AttributeError: 'C' object has no attribute 'size'
>>>
>>> c = C(1)
>>> c
<__main__.C object at 0x0711C778>
>>> c.x
1
image
- 自定义一个property(就是一个描述符类),必须要实现set,get,del方法,我们要注意方法中参数的含义:
下面是一个描述符:
>>> class MyDecriptor:
def __get__(self, instance, owner):
print('getting...',self, instance, owner)
def __set__ (self, instance, owner):
print('setting...',self, instance, owner)
def __delete__ (self, instance):
print('deleting...',self, instance)
>>> class Test:
x = MyDecriptor()
>>> test = Test()
>>> del test.x
deleting... <__main__.MyDecriptor object at 0x0149FB38> <__main__.Test object at 0x0149FAF0>
>>> test.x
getting... <__main__.MyDecriptor object at 0x0149FB38> <__main__.Test object at 0x0149FAF0> <class '__main__.Test'>
>>> test.x = '123'
setting... <__main__.MyDecriptor object at 0x0149FB38> <__main__.Test object at 0x0149FAF0> 123
>>>
- 模块!模块!
如果我们想在模块中写测试代码,但是又不想在其他模块中引用时调用测试代码,此时我们可以把测试代码放在以下条件中:
if __name__ == '__main__':
# 这里是测试代码
test()
- 包:
为了避免把项目中所有的文件放在同一个文件夹下,python提供了包的概念,就是创建一个文件夹把对应的模块放在这个文件夹下,并创建一个init.py的模块,内容可以为空,主要是为了告诉python这是一个包,导入的时候
improt 文件夹名字.模块名字 as 自定义的名字
-
placekitton.com
-
爬虫(抓取豆瓣中的相关图片):
import urllib.request
import re
import os
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
def openHtml(url):
request = urllib.request.Request(url)
request.add_header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36")
response = urllib.request.urlopen(request)
html = response.read()
return html
def find_imgs(url):
html = openHtml(url).decode("utf-8")
print(html)
img_list = re.findall(r"http.+\.jpg",html)
print(img_list)
return img_list
def download_mm(folder = "xxoo"):
url = "https://movie.douban.com/top250"
os.mkdir(folder)
os.chdir(folder)
img_list = find_imgs(url)
for img_url in img_list:
with open(img_url.split("/")[-1], "wb") as f:
img = openHtml(img_url)
f.write(img)
if __name__ == "__main__":
download_mm()
- 设置IP代理:
import urllib.request
url = "http://www.whatismyip.com.tw"
proxy_support = urllib.request.ProxyHandler({"http":"123.160.68.44:9999"})
opener = urllib.request.build_opener(proxy_support)
opener.add_headers = [("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36")]
urllib.request.install_opener(opener)
response = urllib.request.urlopen(url)
html = response.read().decode("utf-8")
print(html)
- 用requests库进行请求
import requests
def download_mm():
url = "https://movie.douban.com/top250"
header = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"}
response = requests.get(url, headers = header)
response.encoding = "utf-8"
print(response.text)
if __name__ == "__main__":
download_mm()
- 抓去豆瓣图片(采用lxml,requests)
import requests
import os
from lxml import etree
def get_response(url):
header = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"}
response = requests.get(url, headers = header)
return response;
def find_imgs(url):
response = get_response(url)
response.encoding = "utf-8"
html = response.text;
#lxml 解析
html = etree.HTML(html, etree.HTMLParser())
img_list = html.xpath("//div[@id = 'wrapper']/div/div/div/ol/li/div/div[1]/a/img/@src")
print(img_list)
return img_list
def download_mm(folder = "xxoo"):
url = "https://movie.douban.com/top250"
if not os.path.exists(folder):
os.mkdir(folder)
os.chdir(folder)
else:
os.chdir(folder)
img_lists = find_imgs(url)
for img_url in img_lists:
with open(img_url.split("/")[-1], "wb") as f:
response = get_response(img_url)
img = response.content
f.write(img)
f.close
if __name__ == "__main__":
download_mm()
- 抓取妹子图片:
import requests
import os
from lxml import etree
import json
href_list = []
img_list = []
title_list = []
def save_image(url, f):
header = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"}
response = requests.get(url = url, headers = header)
img = response.content
f.write(img)
# 找到主页面 list
def find_elements(url, data):
header = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36","Content-Type": "application/json"}
response = requests.post(url = url, data = json.dumps(data), headers = header)
response.encoding = "utf-8"
data_source = response.text
data_source = json.loads(data_source)
data_content = data_source["data"]
#lxml 解析
html = etree.HTML(data_content, etree.HTMLParser())
global img_list, href_list, title_list
img_list = html.xpath("//div[@class = 'post-module-thumb b2-radius']/a/img/@data-src")
href_list = html.xpath("//div[@class = 'post-module-thumb b2-radius']/a/@href")
title_list = html.xpath("//div[@class = 'post-info']/h2/a/text()")
print(img_list,"\n--------",href_list,"\n--------",title_list)
def save_elements():
for img_url in img_list:
with open(img_url.split("/")[-1], "wb") as f:
save_image(img_url, f)
for idx, href in enumerate(href_list):
header = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.135 Safari/537.36"}
response = requests.get(url = href, headers = header)
response.encoding = "utf-8"
data_source = response.text
#lxml 解析
html = etree.HTML(data_source, etree.HTMLParser())
if not os.path.exists(title_list[idx]):
os.mkdir(title_list[idx])
os.chdir(title_list[idx])
detail_img_list = html.xpath("//div[@class = 'entry-content']/div[2]/p/img/@src")
if len(detail_img_list):
for detail_img_url in detail_img_list:
str1 = detail_img_url.split("?")[0]
str2 = str1.split("/")[-1]
with open(str2, "wb") as f:
save_image(detail_img_url, f)
os.chdir(os.path.abspath(os.path.dirname(os.getcwd())+os.path.sep+"."))
def download_mm(folder = "xxoo"):
if not os.path.exists(folder):
os.mkdir(folder)
os.chdir(folder)
else:
os.chdir(folder)
url = "https://www.jder.net/wp-json/b2/v1/getPostList"
data = {}
data["post_type"] = "post-1"
data["post_order"] = "new"
data["�post_row_count"] = "4"
data["post_count"] = "24"
data["post_thumb_ratio"] = "4/6"
data["post_open_type"] = "0"
data["post_paged"] = "1"
data["post_load_more"] = "0"
data["post_cat[0]"] = "meizi"
data["show_sidebar"] = "0"
data["width"] = "1100"
data["paged"] = "2"
find_elements(url, data)
save_elements()
if __name__ == "__main__":
download_mm()
- python 读写xsl:
获取单元格内容的数据类型
>>>> print sheet2.cell(1,0).ctype #第2行第1列:xiaoming2 为string类型 1
>>> print sheet2.cell(1,1).ctype #第2行第2列:12 为number类型 2
>>> print sheet2.cell(1,2).ctype #第2行第3列:2015/5/5 为date类型 3</pre>
说明:ctype : 0 empty,1 string, 2 number,** 3 date**, 4 boolean, 5 error
- 字符串拼接的几种方式: python字符串拼接.png
TODO:
- 你可能需要一个函数能处理比当初声明时更多的参数。这些参数叫做不定长参数,和上述2种参数不同,声明时不会命名。基本语法如下:
#!/usr/bin/python
# -*- coding: UTF-8 -*-
# 可写函数说明
def printinfo( arg1, *vartuple ):
"打印任何传入的参数"
print "输出: "
print arg1
for var in vartuple:
print var
return
# 调用printinfo 函数
printinfo( 10 )
printinfo( 70, 60, 50 )
下面是汉诺塔的方法,不太理解,待以后研究:
汉诺塔python.png
-
easygui 学习
-
容器类型协议:
如果你希望定制的容器是不可变的话,你只需要定义len()和getitem()方法。如果你希望定制的容器是可变的话,除了len()和getitem()方法,你还需要定义setting()和delitem()两个方法。 -
next yield
-
列表推导式,字典表达式(推导式),生成器推导式
>>> a = [i for i in range(100) if not(i%2) and i%3]
>>> a
[2, 4, 8, 10, 14, 16, 20, 22, 26, 28, 32, 34, 38, 40, 44, 46, 50, 52, 56, 58, 62, 64, 68, 70, 74, 76, 80, 82, 86, 88, 92, 94, 98]
>>> b = {i:i%2==0 for i in range(10)}
>>> b
{0: True, 1: False, 2: True, 3: False, 4: True, 5: False, 6: True, 7: False, 8: True, 9: False}
>>>
- 命名空间
- Python Tkinter (python 处理UI的框架)
- Python scrapy
- 控制台输出带颜色的字体:\033(不知道为什么设置了没反应,把\033以字符串输出了)