pyradiomics官方文档学习(7)--FeatureVis

2021-02-06  本文已影响0人  北欧森林

pyradiomics 官方文档里有几个示例文件,里面涉及了包括yaml文件设置、feature extraction、可视化等一系列影像组学常规操作,是非常好的学习资料。源文件链接:https://github.com/AIM-Harvard/pyradiomics

今天学习名称为''FeatureVisualizationWithClustering"这份文档。

pyRadiomics feature visualization using multi-dimensional scaling and heatmaps
  1. 准备数据
# Download the zip file if it does not exist
import os, zipfile
import pandas as pd
import seaborn as sns

from six.moves import urllib

url = "http://www.spl.harvard.edu/publications/bitstream/download/5270"
filename = 'example_data/Tumorbase.zip'
if not os.path.isfile(filename):
    if not os.path.isdir('example_data'):
        os.mkdir('example_data')
    print ("retrieving")
    urllib.request.urlretrieve(url, filename)
else:
    print ("file already downloaded")
    
extracted_path = 'example_data/tumorbase'
if not os.path.isdir(extracted_path):
    print ("unzipping")
    z = zipfile.ZipFile(filename)
    z.extractall('example_data')
    print ("done unzipping")
  1. 加载包及函数
# Import some libraries
import SimpleITK as sitk
from radiomics import featureextractor
  1. 提取特征(Extract features)
# Load up the segmentations, 1 to 10 and extract the features
params = os.path.join(os.getcwd(), '..', 'examples', 'exampleSettings', 'Params.yaml')

extractor = featureextractor.RadiomicsFeatureExtractor(params)
# hang on to all our features
features = {}

for case_id in range(1,11):
    path = 'example_data/tumorbase/AutomatedSegmentation/case{}/'.format(case_id)
    image = sitk.ReadImage(path + "grayscale.nrrd")
    mask = sitk.ReadImage(path + "segmented.nrrd")
    # Tumor is in label value 6
    features[case_id] = extractor.execute ( image, mask, label=6 )
    

# A list of the valid features, sorted
feature_names = list(sorted(filter ( lambda k: k.startswith("original_"), features[1] )))
# Make a numpy array of all the values
import numpy as np

samples = np.zeros((10,len(feature_names)))
for case_id in range(1,11):
    a = np.array([])
    for feature_name in feature_names:
        a = np.append(a, features[case_id][feature_name])
    samples[case_id-1,:] = a
    
# May have NaNs
samples = np.nan_to_num(samples)
  1. Multidimensional scaling
from sklearn import manifold
from sklearn.metrics import euclidean_distances
from sklearn.decomposition import PCA

similarities = euclidean_distances(samples)


seed = np.random.RandomState(seed=3)

mds = manifold.MDS(n_components=2, max_iter=5000, eps=1e-12, random_state=seed,
                   n_init=10,
                   dissimilarity="precomputed", n_jobs=1, metric=False)
pos = mds.fit_transform(similarities)
  1. 作图(Plot)
# Plot

from matplotlib import pyplot as plt
from matplotlib.collections import LineCollection
import matplotlib.cm as cm


fig = plt.figure(1)
ax = plt.axes([0., 0., 1., 1.])

s = 100

# Type of tumor
meningioma = [0, 1, 2]
glioma = [3,5,9]
astrocytoma = [4, 6, 7, 8]

plt.scatter(pos[meningioma, 0], pos[meningioma, 1], color='navy', alpha=1.0, s=s, lw=1, label='meningioma')
plt.scatter(pos[glioma, 0], pos[glioma, 1], color='turquoise', alpha=1.0, s=s, lw=1, label='glioma')
plt.scatter(pos[astrocytoma, 0], pos[astrocytoma, 1], color='darkorange', alpha=0.5, s=s, lw=1, label='astrocytoma')

plt.legend(scatterpoints=1, loc=5, shadow=False)

similarities = similarities.max() / similarities * 100
similarities[np.isinf(similarities)] = 0
plt.show()
image.png
  1. 绘制热图(Plot features as a heatmap)
import pandas as pd
import seaborn as sns

# type of each tumor
types =['meningioma', 'meningioma', 'meningioma', 'glioma', 'astrocytoma', 'glioma', 'astrocytoma', 'astrocytoma', 'astrocytoma', 'glioma']

# Construct a pandas dataframe from the samples
d = pd.DataFrame(data=samples, columns=feature_names, index=types)

corr = d.corr()

# Set up the matplotlib figure, make it big!
f, ax = plt.subplots(figsize=(15, 10))

# Draw the heatmap using seaborn
sns.heatmap(corr, vmax=.8, square=True)
  1. 聚类热图(Cluster the heatmap)
# Choose a subset of features for clustering
dd = d.iloc[:,1:50]

# sns.clustermap(d, linewidths=.5, figsize=(13,13))
m = d.as_matrix()

from scipy.cluster.hierarchy import dendrogram, linkage

Z = linkage(m,'ward')
plt.figure(figsize=(25, 10))
plt.title('Hierarchical Clustering Dendrogram')
plt.xlabel('sample index')
plt.ylabel('distance')
dendrogram(
    Z,
    leaf_rotation=90.,  # rotates the x axis labels
    leaf_font_size=8.,  # font size for the x axis labels
)
plt.show()
image.png
image.png
d.head()
image.png
pp = sns.clustermap(d, col_cluster=False, metric='chebyshev', z_score=1)
_ = plt.setp(pp.ax_heatmap.get_yticklabels(), rotation=0)
plt.show()
image.png
上一篇下一篇

猜你喜欢

热点阅读