pyradiomics官方文档学习(7)--FeatureVis
2021-02-06 本文已影响0人
北欧森林
pyradiomics
官方文档里有几个示例文件,里面涉及了包括yaml文件设置、feature extraction、可视化等一系列影像组学常规操作,是非常好的学习资料。源文件链接:https://github.com/AIM-Harvard/pyradiomics
今天学习名称为''FeatureVisualizationWithClustering"这份文档。
pyRadiomics feature visualization using multi-dimensional scaling and heatmaps
- 准备数据
# Download the zip file if it does not exist
import os, zipfile
import pandas as pd
import seaborn as sns
from six.moves import urllib
url = "http://www.spl.harvard.edu/publications/bitstream/download/5270"
filename = 'example_data/Tumorbase.zip'
if not os.path.isfile(filename):
if not os.path.isdir('example_data'):
os.mkdir('example_data')
print ("retrieving")
urllib.request.urlretrieve(url, filename)
else:
print ("file already downloaded")
extracted_path = 'example_data/tumorbase'
if not os.path.isdir(extracted_path):
print ("unzipping")
z = zipfile.ZipFile(filename)
z.extractall('example_data')
print ("done unzipping")
- 加载包及函数
# Import some libraries
import SimpleITK as sitk
from radiomics import featureextractor
- 提取特征(Extract features)
# Load up the segmentations, 1 to 10 and extract the features
params = os.path.join(os.getcwd(), '..', 'examples', 'exampleSettings', 'Params.yaml')
extractor = featureextractor.RadiomicsFeatureExtractor(params)
# hang on to all our features
features = {}
for case_id in range(1,11):
path = 'example_data/tumorbase/AutomatedSegmentation/case{}/'.format(case_id)
image = sitk.ReadImage(path + "grayscale.nrrd")
mask = sitk.ReadImage(path + "segmented.nrrd")
# Tumor is in label value 6
features[case_id] = extractor.execute ( image, mask, label=6 )
# A list of the valid features, sorted
feature_names = list(sorted(filter ( lambda k: k.startswith("original_"), features[1] )))
# Make a numpy array of all the values
import numpy as np
samples = np.zeros((10,len(feature_names)))
for case_id in range(1,11):
a = np.array([])
for feature_name in feature_names:
a = np.append(a, features[case_id][feature_name])
samples[case_id-1,:] = a
# May have NaNs
samples = np.nan_to_num(samples)
- Multidimensional scaling
from sklearn import manifold
from sklearn.metrics import euclidean_distances
from sklearn.decomposition import PCA
similarities = euclidean_distances(samples)
seed = np.random.RandomState(seed=3)
mds = manifold.MDS(n_components=2, max_iter=5000, eps=1e-12, random_state=seed,
n_init=10,
dissimilarity="precomputed", n_jobs=1, metric=False)
pos = mds.fit_transform(similarities)
- 作图(Plot)
# Plot
from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib.cm as cm
fig = plt.figure(1)
ax = plt.axes([0., 0., 1., 1.])
s = 100
# Type of tumor
meningioma = [0, 1, 2]
glioma = [3,5,9]
astrocytoma = [4, 6, 7, 8]
plt.scatter(pos[meningioma, 0], pos[meningioma, 1], color='navy', alpha=1.0, s=s, lw=1, label='meningioma')
plt.scatter(pos[glioma, 0], pos[glioma, 1], color='turquoise', alpha=1.0, s=s, lw=1, label='glioma')
plt.scatter(pos[astrocytoma, 0], pos[astrocytoma, 1], color='darkorange', alpha=0.5, s=s, lw=1, label='astrocytoma')
plt.legend(scatterpoints=1, loc=5, shadow=False)
similarities = similarities.max() / similarities * 100
similarities[np.isinf(similarities)] = 0
plt.show()
image.png
- 绘制热图(Plot features as a heatmap)
import pandas as pd
import seaborn as sns
# type of each tumor
types =['meningioma', 'meningioma', 'meningioma', 'glioma', 'astrocytoma', 'glioma', 'astrocytoma', 'astrocytoma', 'astrocytoma', 'glioma']
# Construct a pandas dataframe from the samples
d = pd.DataFrame(data=samples, columns=feature_names, index=types)
corr = d.corr()
# Set up the matplotlib figure, make it big!
f, ax = plt.subplots(figsize=(15, 10))
# Draw the heatmap using seaborn
sns.heatmap(corr, vmax=.8, square=True)
- 聚类热图(Cluster the heatmap)
# Choose a subset of features for clustering
dd = d.iloc[:,1:50]
# sns.clustermap(d, linewidths=.5, figsize=(13,13))
m = d.as_matrix()
from scipy.cluster.hierarchy import dendrogram, linkage
Z = linkage(m,'ward')
plt.figure(figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
Z,
leaf_rotation=90., # rotates the x axis labels
leaf_font_size=8., # font size for the x axis labels
)
plt.show()
image.png
image.png
d.head()
image.png
pp = sns.clustermap(d, col_cluster=False, metric='chebyshev', z_score=1)
_ = plt.setp(pp.ax_heatmap.get_yticklabels(), rotation=0)
plt.show()
image.png