Python容器
2019-02-13 本文已影响0人
davidic
列表[]
列表是可变的,这是它区别于字符串和元组的最重要的特点,一句话概括即:列表可以修改,而字符串和元组不能。
创建
直接创建
list1 = ['a','b']
list2 = [1,2]
list函数创建
list3 = list("hello")
print list3
输出
[‘h’, ‘e’, ‘l’, ‘l’, ‘o’]
lista = [0] * 6
过滤
[elem for elem in li if len(elem) > 1]
# 用filter过滤
list(filter(lambda x: x[1]<0, exp.local_exp[1]))
划分
a=[1,2,3,4]
#不包括1
a[:1]
#输出[1]
#包括2
a[2:]
#输出[3、4]
判断数组为空
if not nums:
return None
遍历索引和元素
#遍历列表, 打印索引和元素
names = ['Tom', 'Jerry', 'Marry']
for index, name in enumerate(names):
print('names[{}] = {}'.format(index, name))
打印结果:
names[0] = Tom
names[1] = Jerry
names[2] = Marry
把列表中某个值划分出去
if featureVec[axis] == value:
reducedFeatVec = featureVec[:axis]
reducedFeatVec.extend(featureVec[axis+1:])
二维列表
dataSet = [[1, 1, 'yes'],
[1, 1, 'yes'],
[1, 0, 'no'],
[0, 1, 'no'],
[0, 1, 'no']]
定义一个5×4的都是0的二维数组
c=[[0 for i in range(4)] for j in range(5)]
合并
circle_file = glob.glob(os.path.join(self.resource_dir, 'circle/*.png'))
table_file = glob.glob(os.path.join(self.resource_dir, 'table/*.png'))
# 直接相加
self.jump_file = [cv2.imread(name, 0) for name in circle_file + table_file]
generator转list
import jieba
# jieba的cut返回的是一个generator
a = jieba.cut('我喜欢吃土豆')
b = list(a)
列表扩展的两种方式
a=[1,2,3]
b=[4,5,6]
a.append(b)
[1,2,3,[4,5,6]]
a.extend(b)
[1,2,3,4,5,6]
保存为csv
元组()
元组与列表一样,也是一种序列,唯一不同的是元组不能被修改(字符串其实也有这种特点)。
创建
t1=1,2,3
t2="jeffreyzhao","cnblogs"
t3=(1,2,3,4)
t4=()
t5=(1,)
print t1,t2,t3,t4,t5
输出:
(1, 2, 3) (‘jeffreyzhao’, ‘cnblogs’) (1, 2, 3, 4) () (1,)
从上面我们可以分析得出:
a、用逗号分隔一些值,元组自动创建完成;
b、元组大部分时候是通过圆括号括起来的;
c、空元组可以用没有包含内容的圆括号来表示;
d、只含一个值的元组,必须加个逗号(,);
list转元组
tuple函数和序列的list函数几乎一样:以一个序列作为参数并把它转换为元组。如果参数就是元组,那么该参数就会原样返回
t1=tuple([1,2,3])
t2=tuple("jeff")
t3=tuple((1,2,3))
print t1
print t2
print t3
t4=tuple(123)
print t4
输出:
(1, 2, 3)
(‘j’, ‘e’, ‘f’, ‘f’)
(1, 2, 3)
t4=tuple(123)
TypeError: ‘int’ object is not iterable
词典{}
prices = {
'A':123,
'B':450.1,
'C':12,
'E':444,
}
prices['A']
创建词典
>>>dict() # 创建空字典
{}
>>> dict(a='a', b='b', t='t') # 传入关键字
{'a': 'a', 'b': 'b', 't': 't'}
>>> dict(zip(['one', 'two', 'three'], [1, 2, 3])) # 映射函数方式来构造字典
{'three': 3, 'two': 2, 'one': 1}
>>> dict([('one', 1), ('two', 2), ('three', 3)]) # 可迭代对象方式来构造字典
{'three': 3, 'two': 2, 'one': 1}
>>>
读取文件创建词典
#读取代码
fr = open('dic.txt','r')
dic = {}
keys = [] #用来存储读取的顺序
for line in fr:
v = line.strip().split(':')
dic[v[0]] = v[1]
keys.append(v[0])
fr.close()
print(dic)
#写入文件代码 通过keys的顺序写入
fw = open('wdic.txt','w')
for k in keys:
fw.write(k+':'+dic[k]+'\n')
fw.close()
转list
li = dict.items()
结果类似于
[(u'11', 50808340), (u'1101', 9842378)]
排序
转为list后再排序
判断key是否存在
#生成一个字典
d = {'name':{},'age':{},'sex':{}}
#打印返回值
print d.has_key('name')
#结果返回True
判断词典是否包含某个元素
labelCount={}
for feature in dataSet:
label = feature[-1]
if label not in labelCount[label]: labelCount[label] = 0
词典的遍历
iteritems
sentences = ["我喜欢吃土豆","土豆是个百搭的东西","我不喜欢今天雾霾的北京"]
words = []
for doc in sentences:
words.append(list(jieba.cut(doc)))
dic = corpora.Dictionary(words)
for word,index in dic.token2id.iteritems():
print word + ', index: ' + str(index)
在3.x 里 用 items()替换iteritems()
增加元素
#比如有个词典
action = {
"_index": elastic_urls_index,
"_type": doc_type_name,
"_id": data[0],
"_source": {
"iclick_id": data[0],
"onsite_id": data[1],
"create_time": self.today_2
}
}
#要增加元素
data['_soupyrce']['age'] = 'aa'
提取文本的高频词
documents = ["Human machine interface for lab abc computer applications",
"A survey of user opinion of computer system response time"]
stoplist = set('for in and'.split())
texts = [ [word for word in document.lower().split() if word not in stoplist ] for document in documents]
from collections import defaultdict
frequency = defaultdict(int)
for text in texts:
for word in text:
frequency[word]+=1
texts = [ [word for word in text if frequency[word]>1] for text in texts ]
映射mapping
集合set
定义
aaa = set()
增加
aaa.add(1)
判断是否在集合
if 1 in aaa:
数组转集合
a = [11,22,33,44,11,22]
b = set(a)
通过set去除停用词
documents = ["Human machine interface for lab abc computer applications",
"A survey of user opinion of computer system response time"]
stoplist = set('for in and'.split())
texts = [ [word for word in document.lower().split() if word not in stoplist ] for document in documents]
set增加数据
vocabSet = set([])
for document in dataSet:
vocabSet = vocabSet | set(document)