图像分类

2017-04-15  本文已影响0人  Fruit_初

Above All

机器学习的大作业是写图像分类。这里我整理一些有用的参考资料,以便后来提交报告的时候逻辑比较清晰。

主要想用的特征还是SIFT和SURF,当然我觉得数据集给我的感觉是颜色直方图也是可以用的。


一、简单粗暴的提取SIFT特征

源码:https://github.com/SimGuo/ImageProcessing/blob/master/main.cpp


二、Bag-of-words方法

作者:Savitch
出处:http://blog.csdn.net/assiduousknight/article/details/16901427
什么是BOW

first step then

Bag-of-words模型应用三步

接下来,我们通过上述图像展示如何通过Bag-of-words模型,将图像表示成数值向量。现在有三个目标类,分别是人脸、自行车和吉他。

kmeans 每张图根据词表转化为一个向量

代码(还没看懂)

  1. 配置环境
  2. 创建c++类CSIFTDiscriptor
    为了方便使用,我们将SIFT库用C++类CSIFTDiscriptor封装,该类可以计算并获取指定图像的特征点向量集合。类的声名在SIFTDiscriptor.h文件中,内容如下:
    #ifndef _SIFT_DISCRIPTOR_H_  
    #define _SIFT_DISCRIPTOR_H_  
    #include <string>  
    #include <highgui.h>  
    #include <cv.h>  
      
    extern "C"  
    {     
    #include "../sift/sift.h"     
    #include "../sift/imgfeatures.h"      
    #include "../sift/utils.h"    
    };  
      
    class CSIFTDiscriptor  
    {     
    public:   
        int GetInterestPointNumber()          
        {         
            return m_nInterestPointNumber;    
        }     
        struct feature *GetFeatureArray()         
        {         
            return m_pFeatureArray;       
        }  
        public :          
            void SetImgName(const std::string &strImgName)        
            {         
                m_strInputImgName = strImgName;       
            }       
            int CalculateSIFT();  
        public:   
            CSIFTDiscriptor(const std::string &strImgName);   
            CSIFTDiscriptor()         
            {         
                m_nInterestPointNumber = 0;  
                m_pFeatureArray = NULL;       
            }     
            ~CSIFTDiscriptor();  
        private:          
            std::string m_strInputImgName;    
            int m_nInterestPointNumber;   
            feature *m_pFeatureArray;     
    };    
    #endif  
    
    成员函数实现在SIFTDiscriptor.cpp文件中,其中,CalculateSIFT函数完成特征点的提取和计算,其主要内部流程如下:
  1. 调用OpenCV函数cvLoadImage加载输入图像;
  2. 为了统一输入图像的尺寸,CalculateSIFT函数的第二步是调整输入图像的尺寸,这通过调用cvResize函数实现;
  3. 如果输入图像是彩色图像,我们需要首先将其转化成灰度图,这通过调用cvCvtColor函数实现;
  4. 调用SIFT库函数sift_feature获取输入图像的特征点向量集合和特征点个数。
    #include "SIFTDiscriptor.h"  
    

int CSIFTDiscriptor::CalculateSIFT()
{
IplImage *pInputImg = cvLoadImage(m_strInputImgName.c_str());
if (!pInputImg)
{
return -1;
}
int nImgWidth = 320; //训练用标准图像大小
double dbScaleFactor = pInputImg->width / 300.0; //缩放因子
IplImage *pTmpImg = cvCreateImage(cvSize(pInputImg->width / dbScaleFactor, pInputImg->height / dbScaleFactor),
pInputImg->depth, pInputImg->nChannels);
cvResize(pInputImg, pTmpImg); //缩放
cvReleaseImage(&pInputImg);

if (pTmpImg->nChannels != 1)    //非灰度图  
{  
    IplImage *pGrayImg = cvCreateImage(cvSize(pTmpImg->width, pTmpImg->height),  
        pTmpImg->depth, 1);  
    cvCvtColor(pTmpImg, pGrayImg, CV_RGB2GRAY);  
    m_nInterestPointNumber = sift_features(pGrayImg, &m_pFeatureArray);  
    cvReleaseImage(&pGrayImg);  
}  
else  
{  
    m_nInterestPointNumber = sift_features(pTmpImg, &m_pFeatureArray);  
}  
cvReleaseImage(&pTmpImg);  
return m_nInterestPointNumber;  

}
CSIFTDiscriptor::CSIFTDiscriptor(const std::string &strImgName)
{
m_strInputImgName = strImgName;
m_nInterestPointNumber = 0;
m_pFeatureArray = NULL;
CalculateSIFT();
}
CSIFTDiscriptor::~CSIFTDiscriptor()
{
if (m_pFeatureArray)
{
free(m_pFeatureArray);
}
}
```

  1. 创建c++类CImgSet,管理实验图像集合
    Bag-of-words模型需要从多个目标类图像中提取视觉词汇,不同目标类的图像存储在不同子文件夹中,为了方便操作,我们设计了一个专门的类CImgSet用来管理图像集合,声明在文件ImgSet.h中:
    #ifndef _IMG_SET_H_  
    #define _IMG_SET_H_  
    #include <vector>  
    #include <string>  
    #pragma comment(lib, "shlwapi.lib")  
    class CImgSet  
    {  
      public:  
    CImgSet (const std::string &strImgDirName) : m_strImgDirName(strImgDirName+"//"), m_nImgNumber(0){}  
        int GetTotalImageNumber()  
        {  
            return m_nImgNumber;  
        }  
        std::string GetImgName(int nIndex)  
        {  
            return m_szImgs.at(nIndex);  
        }    
        int LoadImgsFromDir()  
        {  
            return LoadImgsFromDir("");  
        }    
    private:  
        int LoadImgsFromDir(const std::string &strDirName);  
    private:  
        typedef std::vector <std::string> IMG_SET;  
        IMG_SET m_szImgs;  
        int m_nImgNumber;  
        const std::string m_strImgDirName;  
    };  
    #endif  
    
    //成员函数实现在文件ImgSet.cpp中:  
    #include "ImgSet.h"  
    #include <windows.h>  
    #include <Shlwapi.h>  
    /** 
    strSubDirName:子文件夹名 
    */  
    int CImgSet::LoadImgsFromDir(const std::string &strSubDirName)  
    {  
        WIN32_FIND_DATAA stFD = {0};  
        std::string strDirName;  
        if ("" == strSubDirName)  
        {  
            strDirName = m_strImgDirName;  
        }    
        else  
        {    
            strDirName = strSubDirName;  
        }    
        std::string strFindName = strDirName + "//*";  
        HANDLE hFile = FindFirstFileA(strFindName.c_str(), &stFD);  
        BOOL bExist = FindNextFileA(hFile, &stFD);  
      
        for (;bExist;)  
        {  
            std::string strTmpName = strDirName + stFD.cFileName;  
            if (strDirName + "." == strTmpName || strDirName + ".." == strTmpName)  
            {  
                bExist = FindNextFileA(hFile, &stFD);  
                continue;  
            }  
            if (PathIsDirectoryA(strTmpName.c_str()))  
            {  
                strTmpName += "//";  
                LoadImgsFromDir(strTmpName);  
                bExist = FindNextFileA(hFile, &stFD);  
                continue;  
            }     
            std::string strSubImg = strDirName + stFD.cFileName;  
            m_szImgs.push_back(strSubImg);  
            bExist = FindNextFileA(hFile, &stFD);  
        }  
        m_nImgNumber = m_szImgs.size();  
        return m_nImgNumber;  
    }  
    

LoadImgsFromDir递归地从图像文件夹中获取所有实验用图像名,包括子文件夹。该函数内部通过循环调用windows API函数FindFirstFile和FindNextFile来找到文件夹中所有图像的名称。

  1. 创建CHistogram,生成图像的直方图表示
//ImgHistogram.h  
#ifndef _IMG_HISTOGRAM_H_  
#define _IMG_HISTOGRAM_H_    
#include <string>  
#include "SIFTDiscriptor.h"  
#include "ImgSet.h"  
const int cnClusterNumber = 1500;  
const int ciMax_D = FEATURE_MAX_D;  

class CHistogram  
{  
public:  
    void SetTrainingImgSetName(const std::string strTrainingImgSet)  
    {  
        m_strTrainingImgSetName = strTrainingImgSet;  
    }  
    int FormHistogram();  
    CvMat CalculateImgHistogram(const string strImgName, int pszImgHistogram[]);  
    CvMat *GetObservedData();  
    CvMat *GetCodebook()  
    {  
        return m_pCodebook;  
    }  
    void SetCodebook(CvMat *pCodebook)  
    {  
        m_pCodebook = pCodebook;  
        m_bSet = true;  
    }  
public:  
    CHistogram():m_pszHistogram(0), m_nImgNumber(0), m_pObservedData(0), m_pCodebook(0), m_bSet(false){}  
    ~CHistogram()  
    {  
        if (m_pszHistogram)  
        {  
            delete m_pszHistogram;  
            m_pszHistogram = 0;  
        }  
        if (m_pObservedData)  
        {  
            cvReleaseMat(&m_pObservedData);  
            m_pObservedData = 0;  
        }  
        if (m_pCodebook && !m_bSet)  
        {  
            cvReleaseMat(&m_pCodebook);  
            m_pCodebook = 0;  
        }  
    }  
private :  
    bool m_bSet;  
    CvMat *m_pCodebook;  
    CvMat *m_pObservedData;  
    std::string m_strTrainingImgSetName;  
    int (*m_pszHistogram)[cnClusterNumber];  
    int m_nImgNumber;  
};  
#endif  

#include "ImgHistogram.h"  
int CHistogram::FormHistogram()  
{  
    int nRet = 0;  
    CImgSet iImgSet(m_strTrainingImgSetName);  
    nRet = iImgSet.LoadImgsFromDir();  
      
    const int cnTrainingImgNumber = iImgSet.GetTotalImageNumber();  
    m_nImgNumber = cnTrainingImgNumber;  
    CSIFTDiscriptor *pDiscriptor = new CSIFTDiscriptor[cnTrainingImgNumber];  
    int nIPNumber(0) ;  
    for (int i = 0; i < cnTrainingImgNumber; ++i)  //计算每一幅训练图像的SIFT描述符  
    {  
        const string strImgName = iImgSet.GetImgName(i);  
        pDiscriptor[i].SetImgName(strImgName);  
        pDiscriptor[i].CalculateSIFT();  
        nIPNumber += pDiscriptor[i].GetInterestPointNumber();  
    }  
      
    double (*pszDiscriptor)[FEATURE_MAX_D] = new double[nIPNumber][FEATURE_MAX_D];  //存储所有描述符的数组。每一行代表一个IP的描述符  
    ZeroMemory(pszDiscriptor, sizeof(int) * nIPNumber * FEATURE_MAX_D);  
    int nIndex = 0;  
    for (int i = 0; i < cnTrainingImgNumber; ++i)  //遍历所有图像  
    {  
        struct feature *pFeatureArray = pDiscriptor[i].GetFeatureArray();  
        int nFeatureNumber = pDiscriptor[i].GetInterestPointNumber();  
        for (int j = 0; j < nFeatureNumber; ++j)  //遍历一幅图像中所有的IP(Interesting Point兴趣点  
        {  
            for (int k = 0; k < FEATURE_MAX_D; k++)//初始化一个IP描述符  
            {  
                pszDiscriptor[nIndex][k] = pFeatureArray[j].descr[k];  
            }  
            ++nIndex;  
        }  
    }  
    CvMat *pszLabels = cvCreateMat(nIPNumber, 1, CV_32SC1);  
      
    //对所有IP的描述符,执行KMeans算法,找到cnClusterNumber个聚类中心,存储在pszClusterCenters中  
    if (!m_pCodebook)   //构造码元表  
    {  
        CvMat szSamples,   
            *pszClusterCenters = cvCreateMat(cnClusterNumber, FEATURE_MAX_D, CV_32FC1);  
        cvInitMatHeader(&szSamples, nIPNumber, FEATURE_MAX_D, CV_32FC1, pszDiscriptor);  
        cvKMeans2(&szSamples, cnClusterNumber, pszLabels,   
            cvTermCriteria( CV_TERMCRIT_EPS+CV_TERMCRIT_ITER, 10, 1.0 ),  
            1, (CvRNG *)0, 0, pszClusterCenters);  //  
        m_pCodebook = pszClusterCenters;  
    }  
      
    m_pszHistogram = new int[cnTrainingImgNumber][cnClusterNumber];  //存储每幅图像的直方图表示,每一行对应一幅图像  
    ZeroMemory(m_pszHistogram, sizeof(int) * cnTrainingImgNumber * cnClusterNumber);  
      
    //计算每幅图像的直方图  
    nIndex = 0;  
    for (int i = 0; i < cnTrainingImgNumber; ++i)  
    {  
        struct feature *pFeatureArray = pDiscriptor[i].GetFeatureArray();  
        int nFeatureNumber = pDiscriptor[i].GetInterestPointNumber();  
        //      int nIndex = 0;  
        for (int j = 0; j < nFeatureNumber; ++j)  
        {  
            //          CvMat szFeature;  
            //          cvInitMatHeader(&szFeature, 1, FEATURE_MAX_D, CV_32FC1, pszDiscriptor[nIndex++]);  
            //          double dbMinimum = 1.79769e308;  
            //          int nCodebookIndex = 0;  
            //          for (int k = 0; k < m_pCodebook->rows; ++k)//找到距离最小的码元,用最小码元代替原//来的词汇  
            //          {  
            //              CvMat szCode = cvMat(1, m_pCodebook->cols, m_pCodebook->type);  
            //              cvGetRow(m_pCodebook, &szCode, k);  
            //              double dbDistance = cvNorm(&szFeature, &szCode, CV_L2);  
            //              if (dbDistance < dbMinimum)  
            //              {  
            //                  dbMinimum = dbDistance;  
            //                  nCodebookIndex = k;  
            //              }  
            //          }  
            int nCodebookIndex = pszLabels->data.i[nIndex++];   //找到第i幅图像中第j个IP在Codebook中的索引值nCodebookIndex  
            ++m_pszHistogram[i][nCodebookIndex];   //0<nCodebookIndex<cnClusterNumber;   
        }  
    }  
      
    //资源清理,函数返回  
    //  delete []m_pszHistogram;  
    //  m_pszHistogram = 0;  
      
    cvReleaseMat(&pszLabels);     
    //  cvReleaseMat(&pszClusterCenters);  
    delete []pszDiscriptor;  
    delete []pDiscriptor;  
      
    return nRet;  
}  
  
//double descr_dist_sq( struct feature* f1, struct feature* f2 );  
CvMat CHistogram::CalculateImgHistogram(const string strImgName, int pszImgHistogram[])  
{  
    if ("" == strImgName || !m_pCodebook || !pszImgHistogram)  
    {  
        return CvMat();  
    }  
    CSIFTDiscriptor iImgDisp;  
    iImgDisp.SetImgName(strImgName);  
    iImgDisp.CalculateSIFT();  
    struct feature *pImgFeature = iImgDisp.GetFeatureArray();  
    int cnIPNumber = iImgDisp.GetInterestPointNumber();  
    //  int *pszImgHistogram = new int[cnClusterNumber];  
    //  ZeroMemory(pszImgHistogram, sizeof(int)*cnClusterNumber);  
    for (int i = 0; i < cnIPNumber; ++i)  
    {  
        double *pszDistance = new double[cnClusterNumber];  
        CvMat iIP = cvMat(FEATURE_MAX_D, 1, CV_32FC1, pImgFeature[i].descr);  
        for (int j = 0; j < cnClusterNumber; ++j)  
        {  
            CvMat iCode = cvMat(1, FEATURE_MAX_D, CV_32FC1);  
            cvGetRow(m_pCodebook, &iCode, j);  
            CvMat *pTmpMat = cvCreateMat(FEATURE_MAX_D, 1, CV_32FC1);  
            cvTranspose(&iCode, pTmpMat);  
            double dbDistance = cvNorm(&iIP, pTmpMat);  //计算第i个IP与第j个code之间的距离                
            pszDistance[j] = dbDistance;  
            cvReleaseMat(&pTmpMat);  
        }  
        double dbMinDistance = pszDistance[0];  
        int nCodebookIndex = 0;  //第i个IP在codebook中距离最小的code的索引值  
        for (int j = 1; j < cnClusterNumber; ++j)  
        {  
            if (dbMinDistance > pszDistance[j])  
            {  
                dbMinDistance = pszDistance[j];  
                nCodebookIndex = j;  
            }  
        }  
        ++pszImgHistogram[nCodebookIndex];  
        delete []pszDistance;  
    }  
    CvMat iImgHistogram = cvMat(cnClusterNumber, 1, CV_32SC1, pszImgHistogram);  
    return iImgHistogram;  
}  


CvMat *CHistogram::GetObservedData()  
{  
    CvMat iHistogram;  
    cvInitMatHeader(&iHistogram, m_nImgNumber, cnClusterNumber, CV_32SC1, m_pszHistogram);  
    CvMat *m_pObservedData = cvCreateMat(iHistogram.cols, iHistogram.rows, CV_32SC1);  
    cvTranspose(&iHistogram, m_pObservedData);  
    return m_pObservedData;  
}  
上一篇下一篇

猜你喜欢

热点阅读