c++实现朴素贝叶斯算法

2021-03-21  本文已影响0人  一路向后

1.源码实现

#include <cstdio>
#include <cstring>
#include <iostream>
#include <string>
#include <vector>
#include <map>

using namespace std;

class Type {
public:
    static string POSITIVE;
    static string NEGATIVE;
};

string Type::POSITIVE = "positive";
string Type::NEGATIVE = "negative";

class Classifier {
public:
    Classifier();
    ~Classifier();
    void Learn(string statement, string type);
    string Guess(string statement);

private:
    vector<string> vocabulary;
    vector<string> types;
    map<string, map<string, double> > words;
    map<string, double> documents;

    int getWords(string statement);
    double totalP(string type);
    double p(string word, string type);
};

Classifier::Classifier()
{
    types.push_back(Type::POSITIVE);
    types.push_back(Type::NEGATIVE);
}

Classifier::~Classifier()
{
    types.clear();
}

int Classifier::getWords(string statement)
{
    char seps[] = " ,.!?\n";
    char *substring;
    int len = statement.length();
    char *text = new char[len+1];
    int i = 0;

    strcpy(text, statement.c_str());

    substring = strtok(text, seps);
    while(substring != NULL)
    {
        vocabulary.push_back(substring);
        substring = strtok(NULL, seps);
        i++;
    }

    delete text;

    return i;
}

double Classifier::totalP(string type)
{
    map<string, double>::iterator it;
    double total;

    for(it=documents.begin(); it!=documents.end(); it++)
    {
        total += (*it).second;
    }

    return (documents[type] + 1) / (total + 1);
}

double Classifier::p(string word, string type)
{
    map<string, double>::iterator it;
    double count;
    double total;

    if(words.count(type) == 0)
    {
        count = 0;
    }
    else if(words[type].count(word) == 0)
    {
        count = 0;
    }
    else
    {
        count = words[type][word];
    }

    for(it=words[type].begin(); it!=words[type].end(); it++)
    {
        total += (*it).second;
    }

    return (count + 1) / (total + 1);
}

void Classifier::Learn(string statement, string type)
{
    vector<string>::iterator it;
    string word;

    vocabulary.clear();

    this->getWords(statement);

    for(it=vocabulary.begin(); it!=vocabulary.end(); it++)
    {
        word = (*it);

        if(words.count(type) == 0 || words[type].count(word) == 0)
        {
            words[type][word] = 0;
        }

        this->words[type][word]++;
    }

    vocabulary.clear();

    this->documents[type]++;
}

string Classifier::Guess(string statement)
{
    vector<string>::iterator it1;
    vector<string>::iterator it2;
    string type;

    vocabulary.clear();

    this->getWords(statement);  //得到单词

    double best_likelihood = 0.0;
    double likelihood = 0.0;
    string best_type = "";

    for(it1=types.begin(); it1!=types.end(); it1++)
    {
        likelihood = this->totalP((*it1));  //计算 P(Type)

        for(it2=vocabulary.begin(); it2!=vocabulary.end(); it2++)
        {
            likelihood *= this->p((*it2), (*it1));  // 计算 P(word, Type)
        }

        if(likelihood > best_likelihood)
        {
            best_likelihood = likelihood;
            best_type = (*it1);
        }
    }

    vocabulary.clear();

    return best_type;
}

int main()
{
    Classifier *classifier = new Classifier();

    classifier->Learn("Symfony is the best", Type::POSITIVE);
    classifier->Learn("PhpStorm is great", Type::POSITIVE);
    classifier->Learn("Iltar complains a lot", Type::NEGATIVE);
    classifier->Learn("No Symfony is bad", Type::NEGATIVE);

    cout << classifier->Guess("Symfony is great") << endl;      //string(8) "positive"
    cout << classifier->Guess("I complain a lot") << endl;      //string(8) "negative"

    delete classifier;

    return 0;
}

2.编译源码

$ g++ -o example example.cpp

3.运行及其结果

$ ./example
positive
negative
上一篇下一篇

猜你喜欢

热点阅读