python源码大全汉字GBK与Unicode

Python代码库之解析unicode部首

2019-08-12  本文已影响2人  iCloudEnd
import sqlite3
import json
import os

'''
from cjk import *
dbname='cjk_kangxi_bushou.db'
filepath='Unihan12/a.txt'
paraList=u2para(filepath)
mdx2db(dbname,paraList)
'''

def u2para(filepath):
    filelist=[]
    realist=[]
    paralist=[]
    for line in open(filepath): 
        filelist.append(line) 
    for item in filelist: 
        item=item.replace('\n','').strip()
        if item  == '': continue 
        if item[0] == '#' : continue 
        realist.append(item) 

    for  item in realist:
        slist=item.split(';')
        cjk_id=slist[0].strip()
        cjk_code=slist[1].strip()
        cjk_uni=slist[2].strip()
        mkey=u2w(cjk_uni)
        paralist.append((cjk_id,cjk_code,cjk_uni,mkey))

    print('filelist',len(filelist))
    print('realist',len(realist))
    print('paralist',len(paralist))
    return paralist


def u2w(cstr):
    return chr(int('0x'+cstr,16))

def mdx2db(dbname,paraList):

    if os.path.isfile(dbname):
        os.remove(dbname)

    createdb(dbname)
        
    #sqlite
    import sqlite3
    conn = sqlite3.connect(dbname)
    c = conn.cursor()
    for item in paraList:
        
        msql='''INSERT INTO cjk_kangxi_bushou(cjk_id,cjk_code,cjk_uni ,mkey) VALUES (?,?,?,?)''' 
        para=item
        c.execute(msql,para)
        
    conn.commit()
    conn.close()


def  createdb(dbname):
    conn = sqlite3.connect(dbname)
    c = conn.cursor()
    
    c.execute('''CREATE TABLE cjk_kangxi_bushou
        (ID INTEGER PRIMARY KEY  AUTOINCREMENT,
        cjk_id          TEXT    NOT NULL,
        cjk_code          TEXT ,
        cjk_uni         TEXT  ,
        mkey            TEXT

         );''')
    conn.commit()
    conn.close()
    return 'ok'

更多精彩代码请关注我的专栏

上一篇下一篇

猜你喜欢

热点阅读