2018-11-29 关于python对docx文件的操作

2018-11-29  本文已影响0人  煮茶温酒曲终人散

先上一段代码

import docx
from docx import Document
from docx.shared import RGBColor
from docx.shared import Pt
from docx.enum.text import WD_ALIGN_PARAGRAPH
import re

all_data = open('F:\\vscode\\.vscode\\python\\python课\\文件操作\\结果汇总\\newans.txt',encoding='utf-8')
document = Document()
# add the title
document.add_heading('四六级单词', 0)
# add a paragraph
document.add_paragraph('test paragraphs')
'''
    from the net
'''
# set the font
document.styles['Normal'].font.name = u'黑体'      
p = document.add_paragraph()
#set the align , this is left
p.paragraph_format.alignment = WD_ALIGN_PARAGRAPH.LEFT  
# add the text , into the paragraphs  
run = p.add_run(u'START')
#set color
run.font.color.rgb = RGBColor(54,95,145)     
#set font size
run.font.size = Pt(26)
# add a page break
document.add_page_break()  

all_data = all_data.readlines()
for i in all_data:
    data = i.replace('>>','')
    data = data.split('|')
    # print('单词'+data[0])
    # print('词频'+data[1])
    # print('词义'+data[2])
    # print('例句'+data[3])
    skip = re.findall('"(.*?)"',data[2])
    if len(skip) < 1:
        print("skip")
        continue
    word = data[0]
    # new a paragraph , set the align is LEFT
    p = document.add_paragraph()
    p.paragraph_format.alignment =  WD_ALIGN_PARAGRAPH.LEFT  
    '''
        write in words
    '''
    words = p.add_run('单词:')
    words.bold = True
    words.font.size = Pt(17)
    words.font.color.rgb = RGBColor(54,95,145)
    word_content = p.add_run(word+'\n')
    # ans.writelines('单词:')
    # ans.writelines(word+"\n")
    '''
        write the frequency
    '''
    frequency = data[1]
    p_frequency = p.add_run('词频:')
    p_frequency.font.size = Pt(17)
    p_frequency.font.color.rgb = RGBColor(54,100,100)
    p.add_run(frequency+'\n').bold

    p_frequency.bold = True
    # ans.writelines('词频:')
    # ans.writelines(frequency+"\n")
    '''
        write the means
    '''
    mean = re.findall('"(.*?)"',data[2])
    p_all_mean = p.add_run('词义:\n')
    p_all_mean.bold = True
    p_all_mean.font.size = Pt(17)
    p_all_mean.font.color.rgb = RGBColor(15,15,145)
    # ans.writelines('词义:\n')
    mean_count = 0
    sentence_count = 0
    if len(mean) >0:
        for i in mean:
            if len(i) > 20:
                mean_count+=1
                strr = str(mean_count)+'. '
                p.add_run(strr+i+'\n')
            print(i)
    else:
        print(word+" has no mean")
    sentence = re.findall("'(.*?)'",data[3])
    p_sentence = p.add_run('例句:\n')
    p_sentence.bold = True
    p_sentence.font.color.rgb = RGBColor(54,95,45)
    p_sentence.font.size = Pt(17)
    if len(sentence) > 0:
        for i in sentence:
            if len(i)>20:
                sentence_count+=1
                strr = str(sentence_count)+'. '
                p.add_run(strr+i+'\n')
            print(i)
    else:
        print(word+' has no sentence')
    document.add_page_break()




# save as              
document.save('result.docx')

这段代码是我把一个分词结果写入docx文件所使用的,总结一下docx怎么用

概述

个人理解,为自己而作

docx这个模块还是很随主流操作方式的,我认为在此模块中以段和字为单位,当然了不包括图片和表格,仅仅说文字。图片和表格操作大同小异。
除了title和head,其他的文字一般都是写入一个paragraph中,然后再在这个段落中插入文字,add_run(‘text’) , 如果我们给这次插入存入一个变量,如

p = Document.add_paragraph()
content = p.add_run("words")

我们就可以对content操作,然后改变'words'的显示形式
如果我们只需要设置一个属性,比如粗体显示

content.bold = True

这样就会粗体显示了
其他操作也是大同小异,当然了,基本操作,具体可以看文档

上一篇下一篇

猜你喜欢

热点阅读