SIFT + SVM/KMeans 图像分类

2018-06-28  本文已影响0人  缪斯muse

先贴出代码:

import cv2
import numpy as np
import os, codecs
from sklearn.cluster import KMeans
from sklearn import svm, metrics
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV
from sklearn.metrics import confusion_matrix

def get_sift(img_path):
    img = cv2.imread(img_path)
    img = cv2.resize(img, (32,32), interpolation = cv2.INTER_CUBIC)
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    detector = cv2.xfeatures2d.SIFT_create()
    kp,des = detector.detectAndCompute(gray,None)
    return des

def k_clusters(data_list, k, randomState = None):
    print("Generating SIFT features")

    feat_vec = []
    count = 0
    none_type = []
    for data in data_list:
        count += 1
        sift_feat = get_sift(data)
        if sift_feat is None:
            none_type.append(data)
            continue
        sift_feat = sift_feat.reshape(-1, 1)
        feat_vec.append(sift_feat[0].tolist())
        if count%50 == 0:
            print("The current state: " + str(count))
    input = np.array(feat_vec)
    kmeans = KMeans(n_clusters = k, random_state = randomState).fit(input)

    for data in none_type:
        del data_list[data]
    grid = svm_classify(data_list, input)

    return kmeans

def svm_classify(data_list, x):
    print("Doing SVM")

    y = []
    for key in data_list:
        y.append(data_list[key])
    y = np.array(y).flatten()
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state = 1)
    svc = svm.SVC(decision_function_shape="ovo")
    grid = GridSearchCV(svc,
         param_grid=[ { "kernel":["rbf", "linear", "poly"],
                        "C":[0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4],
                        "gamma": [1/0.1**2, 1/0.5**2, 1, 1/2**2, 1/4**2],
                        "degree": [2, 3] } ],
        cv=5, n_jobs=-1).fit(x_train, y_train)

    print("**********************")
    print("The best accuracy during validation is: " + str(grid.best_score_) +
          "\nKernel : " + str(grid.best_estimator_.kernel) +
          "\nUsing 10-fold cross-validation" +
          ", mean score for cross-validation is " + str(grid.best_score_) +
          "\nPenalty parameter C = " + str(grid.best_estimator_.C) +
          "\nGamma = " + str(grid.best_estimator_.gamma) +
          "\nDegree = " + str(grid.best_estimator_.degree))
    print("The test accuracy is: " + str(metrics.accuracy_score(y_test, grid.predict(x_test))))
    return grid


def label_dict(data_list, labels):
    files = [data.split('/')[-1] for data in data_list]
    return dict(zip(files, labels))

def read_file(path):
    print("Generating data_list")

    data_list = {}
    test_list = {}
    file_list = os.listdir(path)
    for file in file_list:
        if file != '.DS_Store':
            img_list = os.listdir(path + '/' + file)
            for img in img_list:
                if img != '.DS_Store':
                    data_list[os.path.join(path + '/' + file, img)] = file
                    test_list[img] = file
    return data_list,test_list

def calculate_accuracy(real, result):
    sum = 0
    right = 0
    for key in result:
        if str(result[key]+1) == real[key][-1]:
            right += 1
        sum += 1
    return right/sum

if __name__ == "__main__":
    data_list, test_list = read_file()
    # data_list, test_list = read_file()

    km = k_clusters(data_list, 4)
    result = label_dict(data_list, km.labels_)

    print("**********************")
    print("The KMeans accuracy is: " + str(calculate_accuracy(test_list, result)))
    print("**********************")
上一篇 下一篇

猜你喜欢

热点阅读