python_cookbook记录

字符串和文本(cookbook笔记)

2017-07-23  本文已影响0人  buildbody_coder

字符串和文本

针对任意多的分隔符拆分字符串

>>> import re
>>> line = 'asdf fjdk; afed, fjek,asdf, foo'
#[]匹配其中的任意一个字符,*(匹配前面的子表达式0次或1次)
>>> re.split(r'[;,\s]*', line)

字符串开头或结尾文本匹配

>>> filename = 'spam.txt'
>>> filename.endswith('.txt')
True
>>> filename.startswith('file:')
False
>>> url = 'http://www.python.org'
>>> url.startswith('http:')
True
>>>
>>> import os
>>> filenames = os.listdir('.')
>>> filenames
[ 'Makefile', 'foo.c', 'bar.py', 'spam.c', 'spam.h' ]
>>> [name for name in filenames if name.endswith(('.c', '.h')) ]
['foo.c', 'spam.c', 'spam.h'
#可以用于检查某个文件夹下是否存在某种类型的文件
>>> any(name.endswith('.py') for name in filenames)
True
>>>
>>> import re
>>> url = 'http://www.python.org'
>>> re.match('http:|https:|ftp:', url)
<_sre.SRE_Match object at 0x101253098>
>>>

用Shell通配符匹配字符串

>>> from fnmatch import fnmatch, fnmatchcase
>>> fnmatch('foo.txt', '*.txt')
True
>>> fnmatch('foo.txt', '?oo.txt')
True
>>> fnmatch('Dat45.csv', 'Dat[0-9]*')
True
>>> names = ['Dat1.csv', 'Dat2.csv', 'config.ini', 'foo.py']
>>> [name for name in names if fnmatch(name, 'Dat*.csv')]
['Dat1.csv', 'Dat2.csv']
>>>
>>> fnmatchcase('foo.txt', '*.TXT')
False
>>>

字符串匹配和搜索

>>> text1 = '11/27/2012'
>>> import re
>>> if re.match(r'\d+/\d+/\d+', text1):
... print('yes')
... else:
... print('no')
yes
>>> datepat = re.compile(r'\d+/\d+/\d+')
>>> if datepat.match(text1):
... print('yes')
... else:
... print('no')
...
yes
>>> text = 'Today is 11/27/2012. PyCon starts 3/13/2013.'
>>> datepat.findall(text)
['11/27/2012', '3/13/2013']
>>>
datepat = re.compile(r'(\d+)/(\d+)/(\d+)')
>>> m = datepat.match('11/27/2012')
>>> m
<_sre.SRE_Match object at 0x1005d2750>
>>> # Extract the contents of each group
>>> m.group(0)
'11/27/2012'
>>> m.group(1)
'11'
>>> m.group(2)
'27'
>>> m.group(3)
'2012'
>>> m.groups()
('11', '27', '2012')
>>> month, day, year = m.groups()
>>> text
'Today is 11/27/2012. PyCon starts 3/13/2013.'
#在正则中使用了捕获分组以后,就会在findall中匹配到数据时按照规则组成元组
>>> datepat.findall(text)
[('11', '27', '2012'), ('3', '13', '2013')]
>>> for month, day, year in datepat.findall(text):
... print('{}-{}-{}'.format(year, month, day))
>>> datepat = re.compile(r'(\d+)/(\d+)/(\d+)$')
>>> datepat.match('11/27/2012abcdef')
>>> datepat.match('11/27/2012')
<_sre.SRE_Match object at 0x1005d2750>

字符串搜索和替换

>>> import re
>>> datepat = re.compile(r'(\d+)/(\d+)/(\d+)')
>>> datepat.sub(r'\3-\1-\2', text)

字符串忽略大小写的搜索替换

>>> text = 'UPPER PYTHON, lower python, Mixed Python'
>>> re.findall('python', text, flags=re.IGNORECASE)
['PYTHON', 'python', 'Python']
>>> re.sub('python', 'snake', text, flags=re.IGNORECASE)
'UPPER snake, lower snake, Mixed snake'

最短匹配模式(?非贪婪模式)

>>> str_pat = re.compile(r'\"(.*)\"')
>>> text1 = 'Computer says "no."'
>>> str_pat.findall(text1)
['no.']
>>> text2 = 'Computer says "no." Phone says "yes."'
>>> str_pat.findall(text2)
['no." Phone says "yes.']
>>>
>>> str_pat = re.compile(r'\"(.*?)\"')
>>> str_pat.findall(text2)
['no.', 'yes.']
>>>

多行匹配模式

>>> text2 = '''/* this is a
... multiline comment */
... '''
>>> comment = re.compile(r'/\*(.*?)\*/', re.DOTALL)
>>> comment.findall(text2)
[' this is a\n multiline comment ']

删除字符串中不需要的字符

>>> s = ' hello world \n'
>>> s.strip()
'hello world'
>>> s.lstrip()
'hello world \n'
>>> s.rstrip()
' hello world'
>>> t = '-----hello====='
>>> t.lstrip('-')
'hello====='
>>> t.strip('-=')
'hello'
>>> s = ' hello     world \n'
>>> s = s.strip()
>>> s
'hello     world'
>>>
>>> s.replace(' ', '')
'helloworld'
>>> import re
>>> re.sub('\s+', ' ', s)
'hello world'
>>>
def open_file(filename):
    with open(filename) as f:
        lines = (line.strip() for line in f)
        for line in lines:
            print (line)

字符串对齐

>>> text = 'Hello World'
#右对齐20宽度
>>> format(text, '>20')
'         Hello World'
#左对齐20宽度
>>> format(text, '<20')
'Hello World         '
#中间对齐20宽度
>>> format(text, '^20')
'    Hello World     '
#指定填充字符串
>>> format(text, '=>20')
'=========Hello World'
>>> format(text, '*^20')
'****Hello World*****'
#格式化多个值
>>> '{:>10} {:>10}'.format('Hello', 'World')
'     Hello      World'
#格式化数字
>>> x = 1.2345
>>> format(x, '>10')
'    1.2345'
#保留2位小数
>>> format(x, '^10.2f')
'   1.23   '

合并拼接字符串

>>> parts = ['Is', 'Chicago', 'Not', 'Chicago?']
>>> ' '.join(parts)
'Is Chicago Not Chicago?'
>>> ','.join(parts)
'Is,Chicago,Not,Chicago?'
>>> ''.join(parts)
'IsChicagoNotChicago?'
>>> data = ['ACME', 50, 91.1]
#有非str类型的值,使用生成器表达式
>>> ','.join(str(d) for d in data)
'ACME,50,91.1'
def sample():
    yield 'Is'
    yield 'Chicago'
    yield 'Not'
    yield 'Chicago?'
if __name__ == '__main__':
    text = ''.join(sample())
    for part in sample():
        f.write(part)

字符串中插入变量

>>> s = '{name} has {n} messages.'
>>> s.format(name='Guido', n=37)
上一篇下一篇

猜你喜欢

热点阅读