OCR数据集生成

2020-08-10  本文已影响0人  1037号森林里一段干木头

自动生成OCR数据集,收集到已经标注的字符有6300多张,但是很多字符缺失,在那个场景下那些缺失的字符出现概率很小,但是还是希望能识别它,所以就生成一些字符来训练。

背景图是4张,用PIL在图片上写字符,转为numpy类型后用skimage添加噪声,和小角度旋转,最后分别保存在不同的文件夹下

文件夹结构
图1.PNG
效果如下
image.png
背景图
image.png
from PIL import Image, ImageDraw, ImageFont
import numpy as np
import cv2
import os
import random
import skimage
from skimage import io,transform

img1=r"E:\imagedata\back1.png"
img2=r"E:\imagedata\back2.png"
img3=r"E:\imagedata\back3.png"
img4=r"E:\imagedata\back4.png"

region=(40,20,85,95)
image1 = Image.open(img1)
back1=image1.crop(region)

image2 = Image.open(img2)
back2=image2.crop(region)

image3 = Image.open(img3)
back3=image3.crop(region)

image4 = Image.open(img4)
back4=image4.crop(region)

back=[back1,back2,back3,back4]

len(back)
4
#make a dictionary
dic={}
num=[ x for x in range(48,58)]
Chara=[x for x in range(65,91)]
chara=[x for x in range(97,123)]

#不生成这几个字符
clip=['0','1','2','3','4','5','6','7','8','9','x','o','c','z','q']
num.extend(Chara)
num.extend(chara)

#创建字符与文件夹对应的字典
for i in range(1,63):
    #dic.append(chr(num[i]))
    dic[chr(num[i-1])]="character-"+str(i).zfill(3)

for item in clip:
    num.pop(num.index(ord(item)))
print('len:',len(num))


len: 47
#创建文件夹
save_path=r"C:\Users\bxzyz\Desktop\OCV\img-gen"
for i in range(1,63):
    name="charactor-"+str(i).zfill(3) 
    chpath=os.path.join(save_path,name)
    os.makedirs(chpath)
types_path=r"E:\imagedata\letter_type"
save_path=r"C:\Users\bxzyz\Desktop\OCV\img-gen"
x=8
y=3
n=0
noise=["gaussian","poisson"]
angle=[-10,-5,0,5,10]
#高斯噪声,服从泊松分布的噪声,椒盐噪声,有0有255,salt&peper
for item in num:
    if n>20:
        break
    #字体
    sub_folder=dic[chr(item)]
    for type_i in os.listdir(types_path):
        types=os.path.join(types_path,type_i)
        #不同的背景
        for back_i in range(len(back)):
            #加噪声
            for noise_i in noise:
                #加旋转
                for angle_i in angle:
                    
                    #选取背景back_i
                    copy=back[back_i].copy()
                    n+=1

                    #在back_i上画字符
                    draw = ImageDraw.Draw(copy)
                    ch=chr(item)
                    setFont=ImageFont.truetype(types, 50)
                    value=random.randrange(50,110,5)
                    fillColor=(value,value,value)
                    draw.text((x, y), ch, font=setFont, fill=fillColor)

                    #PIL格式的图片转为numpy类型加噪声
                    copy=np.array(copy)
                    copy=skimage.util.random_noise(copy,mode=noise_i)
                    #添加旋转
                    copy=skimage.transform.rotate(copy,angle_i)
                    imgname=os.path.join(save_path,sub_folder,"%s_%d.png"%(sub_folder,n))
                    
                    skimage.io.imsave(imgname, copy)
im=skimage.transform.rotate(copy, 10)
io.imshow(im)

<matplotlib.image.AxesImage at 0x1834827de48>
output_4_1.png

上一篇 下一篇

猜你喜欢

热点阅读