医疗影像文件读取方法
2019-10-11 本文已影响0人
SnorlaxSE
Medical Image File Formats
Medical Image File Formats 2014 Apr
DCM
Relevant Software
SimpleITK读取并显示dcm文件
Presentation
读取单张dcm文件
import SimpleITK as sitk
import numpy as np
import matplotlib.pyplot as plt
image = sitk.ReadImage(r"./729427_20181010_CT_5_249_011.dcm") # type(image) <class 'SimpleITK.SimpleITK.Image'>
image_array = np.squeeze(sitk.GetArrayFromImage(image)) # type(image_array) ->> <class 'numpy.ndarray'> image_array.shape ->> (512, 512)
plt.imshow(image_array)
plt.show()
读取dcm文件夹
import SimpleITK as sitk
def read_dicom(PathDicom):
# new ImageSeriesReader object
series_reader = sitk.ImageSeriesReader()
# GetGDCMSeriesFileNames
dicom_names = series_reader.GetGDCMSeriesFileNames(PathDicom)
# 通过之前获取到的序列的切片路径来读取该序列
series_reader.SetFileNames(dicom_names)
# 获取该序列对应的3D图像
3D_image = series_reader.Execute()
# sitk.GetArrayFromImage
image_array = sitk.GetArrayFromImage(3D_image)
return image_array, 3D_image
DICOM 数据处理 SimpleITK 需要注意的是,SimpleITK读取的图像数据的坐标顺序为zyx,即从多少张切片到单张切片的宽和高;而据SimpleITK Image获取的origin和spacing的坐标顺序则是xyz。这些需要特别注意。
PyQt5_DcmViewer_3d
-
scipy.misc.imresize(gray,(200,200))、os.walk、SimpleITK、FigureCanvas - Reference Linking: Python获取指定文件夹下的文件名、PyQt5 Matplotlib
- DcmViewer_3d.py code
pydicom库
读取患者信息
import pydicom
from pydicom.data import get_testdata_files
# filename = "./dcom_sample/729427_20181010_CT_5_249_001.dcm"
filename = get_testdata_files("MR_small.dcm")[0]
ds = pydicom.dcmread(filename) # plan dataset
详情见 👇
>>> import pydicom
>>> from pydicom.data import get_testdata_files
>>> filename = "100151270_20180808_CT_1_0000.dcm"
>>> ds = pydicom.dcmread(filename)
>>> ds
(0008, 0008) Image Type CS: ['DERIVED', 'PRIMARY', 'AXIAL']
(0008, 0016) SOP Class UID UI: CT Image Storage
(0008, 0018) SOP Instance UID UI: 1.2.840.113770.2.1.1374404734.2110896889.550030876
(0008, 0020) Study Date DA: '20180808'
...
(0010, 0010) Patient's Name PN: 'SUN^WEI^(??¨¬?^)'
(0010, 0020) Patient ID LO: '100151270'
(0010, 0030) Patient's Birth Date DA: '19670329'
(0010, 0040) Patient's Sex CS: 'M'
...
(0020, 0011) Series Number IS: "1"
(0020, 0012) Acquisition Number IS: "0"
(0020, 0013) Instance Number IS: "0"
(0020, 0032) Image Position (Patient) DS: ['-162.1999969', '-165.0000000', '-281.5000000']
...
(0028, 0010) Rows US: 512
(0028, 0011) Columns US: 512
(0028, 0030) Pixel Spacing DS: ['0.64453101', '0.64453101']
...
>>> ds['00100020'] # 使用TagID读取文件(患者)信息
(0010, 0020) Patient ID LO: '100151270'
>>> ds['00100020'].value
'100151270'
>>>
修改dcm_InstanceNumber
import pydicom
def resetInstanceNumber(file_dir):
for root, dirs, files in os.walk(file_dir):
for file in sorted(files):
file_root_path = os.path.join(root, file)
ds = pydicom.dcmread(file_root_path)
ds.InstanceNumber = len(files) - ds.InstanceNumber - 1
ds.save_as(file_root_path)
- 常用信息:姓名(ds.PatientName)、病人ID(ds.PatientID)、出生日期(ds.PatientBirthDate)、性别(ds.PatientSex)、治疗日期(StudyDate)、数据模态(Modality)、序列数(SeriesNumber)、机构名称(ds.InstitutionName)、(设备)生产厂商(ds.Manufacturer)、切片厚度(ds.SliceThickness)、像素间距(ds.PixelSpacing)、切片序号(InstanceNumber) 可阅读 DICOM之常用Tag
设置窗口窗位
参考:对于CT图像设置窗宽窗位
def setDicomWinWidthWinCenter(img_data, winwidth, wincenter, rows, cols):
img_temp = img_data
img_temp.flags.writeable = True
min = (2 * wincenter - winwidth) / 2.0 + 0.5
max = (2 * wincenter + winwidth) / 2.0 + 0.5
dFactor = 255.0 / (max - min)
for i in numpy.arange(rows):
for j in numpy.arange(cols):
img_temp[i, j] = int((img_temp[i, j]-min)*dFactor)
min_index = img_temp < 0
img_temp[min_index] = 0
max_index = img_temp > 255
img_temp[max_index] = 255
return img_temp
INTERESTING CODE 👇
Others
- 文件命名不规范,以
PatientID_StudyDate_Modality_SeriesNumber_InstanceNumber为例进行重命名
# 依据InstanceNumber重命名文件
for root, dirs, files in os.walk(file_dir):
for file in files:
file_root_path = os.path.join(root, file)
ds = pydicom.dcmread(file_root_path)
new_name = str(ds.PatientID) + "_" + str(ds.StudyDate) + "_" + str(ds.Modality) + "_" + str(ds.SeriesNumber) + "_" + str(ds.InstanceNumber).zfill(4) + ".dcm"
os.rename(file_root_path,os.path.join(root,new_name))
NRRD
Relevant Software
-
Slicer4
- 注意事项:无法打开路径中含中文的.nrrd文件
pynrrd库
pynrrd_description 👉 Example usage 注:when write.data.shape(512,512,115) 耗时近310s
import numpy as np
import nrrd
# Some sample numpy data
data = np.zeros((5,4,3,2))
filename = 'testdata.nrrd'
# Write to a NRRD file
nrrd.write(filename, data)
# Read the data back from file
readdata, header = nrrd.read(filename)
print(readdata.shape)
print(header)
visualization 👇
from PIL import Image
import numpy as np
import nrrd
nrrd_filename = './testdata_even.nrrd'
nrrd_data, nrrd_options = nrrd.read(nrrd_filename)
# visualization
nrrd_image = Image.fromarray(nrrd_data[:,:,29]*1.5) #nrrd_data[:,:,29] 表示截取第30张切片
nrrd_image.show() # 显示这图片
# save nrrd_image to "image.png"
nrrd_image = nrrd_image.convert("RGB")
nrrd_array = np.asarray(nrrd_image)
nrrd_image.save("./nrrd_image.png", "PNG")
发现visualization的图片呈左旋转显示效果,现使其正常显示
nrrd_data_29 = nrrd_data[:,:,29]*1.5
nrrd_data_29 = np.transpose(nrrd_data_29,(1,0))
nrrd_image = Image.fromarray(nrrd_data_29) #nrrd_data[:,:,29] 表示截取第30张切片
nrrd_image.show() # 显示这图片
write
- 方式一:
nrrd.write('save_filename.nrrd', write_array) - 方式二:
sitk.WriteImage(new_image, save_filename)
nifti数据(nii)
nibabel库
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
# show the nii_data.shape
nii_file = "ADNI_011_S_0010_MR_MPR__GradWarp__B1_Correction__N3__Scaled_Br_20061208114538147_S8800_I32270.nii"
data = nib.load(nii_file) # data.shape (192, 192, 160) # data.affine.shape (4, 4)
img = data.get_fdata()
img = np.squeeze(img) # img.shape (192, 192, 160)
# print(data.header) #数据头信息
# 获取slice信息生成图像
def show_img(slices):
fig, axes = plt.subplots(1, len(slices))
for i, slice in enumerate(slices):
axes[i].imshow(slice.T, cmap="gray", origin="lower")
#读取nifti文件中的slice数据
data = nib.load(nii_file)
img = data.get_fdata()
#获取单张slice数据
slice_0 = img[26, :, :]
slice_1 = img[:, 30, :]
slice_2 = img[:, :, 16]
#生成图表
show_img([slice_0, slice_1, slice_2])
plt.suptitle("show slice image")
plt.show()
-
nib.load()读取文件,会将图像向左旋转90° (一般不推荐使用,使用sitk.ReadImage()即可# z,y,x)
data.header 👇
<class 'nibabel.nifti1.Nifti1Header'> object, endian='>'
sizeof_hdr : 348
data_type : b''
db_name : b'011_S_0010'
...
quatern_b : 0.70710677
quatern_c : -1.0713779e-09
quatern_d : -0.70710677
qoffset_x : 94.87749
qoffset_y : 165.8339
qoffset_z : 115.27711
...
Convert Format
dcm2nrrd
file_path = 'xxx' # Dicom序列所在的文件夹路径
# assign series_file_names
series_file_names = sitk.ImageSeriesReader.GetGDCMSeriesFileNames(file_path)
series_file_names_list = list(series_file_names)
series_file_names_list.sort()
# assign file_names
file_names = tuple(series_file_names_list)
# new ImageSeriesReader object
series_reader = sitk.ImageSeriesReader()
# 通过之前获取到的序列的切片路径来读取该序列
series_reader.SetFileNames(file_names)
# 获取该序列对应的3D图像
image3D = series_reader.Execute()
# 查看该3D图像的尺寸
print("image3D.GetSize()",image3D.GetSize())
# 将序列保存为单个的DCM或者NRRD文件
# sitk.WriteImage(image3D, 'img3D.dcm')
save_filename = os.path.join(save_filepath, 'xxx.nrrd')
sitk.WriteImage(image3D, save_filename)
nrrd2nrrd
import SimpleITK as sitk
filename = '7 Vein 1.5 B30f.nrrd'
save_filename = 'tt.nrrd'
image = sitk.ReadImage(filename)
image_array = sitk.GetArrayFromImage(image) # z,y,x
new_image = sitk.GetImageFromArray(image_array)
# Set new_image_Direction_Info
new_Direction = list(image.GetDirection())
new_Direction[-1] *= 2
new_image.SetDirection(tuple(new_Direction))
# Set new_image Other Space_Info
new_image.SetOrigin(image.GetOrigin())
new_image.SetSpacing(image.GetSpacing())
sitk.WriteImage(new_image, save_filename)
dcm2nii
import SimpleITK as sitk
import numpy as np
def load_dcm_array(dicom):
if dicom is None:
raise Exception('dicom is %s' % str(dicom))
dcm_array = sitk.GetArrayFromImage(dicom)
if '0028|1050' in dicom.GetMetaDataKeys():
wnd_center = float(dicom.GetMetaData('0028|1050')) # 窗宽
wnd_width = float(dicom.GetMetaData('0028|1051')) # 窗位
else:
wnd_center = 32768
wnd_width = 65535
gH = wnd_center + wnd_width / 2
gL = wnd_center - wnd_width / 2
# HU值 # "归一"至0-255
dcm_array = (254 * (dcm_array - gL) / wnd_width) * (gH >= dcm_array) * (gL <= dcm_array) + 255 * (gH < dcm_array)
dcm_array = np.squeeze(dcm_array) # (1, H, W) → (H, W)
return dcm_array.astype(int)
def read_dicom(dicom_dir):
# 1. 使用正确的顺序读取dcm文件
series_reader = sitk.ImageSeriesReader()
dicom_names = series_reader.GetGDCMSeriesFileNames(dicom_dir)
dcm_img_list = [sitk.ReadImage(f) for f in dicom_names]
# 2. 使用窗宽、窗位,计算HU,消除灰色背景
image_array = np.array([load_dcm_array(dcm_img) for dcm_img in dcm_img_list], dtype='float')
return image_array
if __name__ == '__main__':
dcm_dir = '605/P00057229/CT'
array = read_dicom(dcm_dir)
new_image = sitk.GetImageFromArray(array)
sitk.WriteImage(new_image, 'CT_HU.nii')
Summary
读取
dcm
注意:文件序列名称是否与InstanceNumber一致,且检查是否为有效排序(是否需要文件名补0)
方式一:(不推荐)
import pydicom
ds = pydicom.dcmread('xxxx.dcm')
方式二:
import SimpleITK as sitk
image = sitk.ReadImage(file_path)
image_array = sitk.GetArrayFromImage(image) # z,y,x # Ex.(1,512,512)
方式三:(批量读取)
import SimpleITK as sitk
def read_dicom(PathDicom):
# new ImageSeriesReader object
series_reader = sitk.ImageSeriesReader()
# GetGDCMSeriesFileNames
dicom_names = series_reader.GetGDCMSeriesFileNames(PathDicom)
# 通过之前获取到的序列的切片路径来读取该序列
series_reader.SetFileNames(dicom_names)
# 获取该序列对应的3D图像
image_3D = series_reader.Execute()
# sitk.GetArrayFromImage
image_array = sitk.GetArrayFromImage(image_3D)
return image_array, image_3D
nrrd
方式一:
import nrrd
ori_nrrd, nrrd_header = nrrd.read('xxxx.nrrd')
方式二:
import SimpleITK as sitk
image = sitk.ReadImage(file_path)
image_array = sitk.GetArrayFromImage(image) # z,y,x # Ex. (230,512,512) # (N, H, W)
写入
dcm
import pydicom
ds = pydicom.dcmread(file_root_path)
ds.save_as(file_root_path)
nrrd
方式一: (慢)
import nrrd
nrrd.write(filename, data, header)
方式二:
import SimpleITK as sitk
sitk.WriteImage(new_image, save_filename)
可视化
方式一:
from PIL import Image
nrrd_image = Image.fromarray(image_array) # image_array is 2D
nrrd_image.show()
方式二:
import matplotlib.pyplot as plt
plt.imshow(image_array) # image_array is 2D
plt.show()