c++实现朴素贝叶斯算法
2021-03-21 本文已影响0人
一路向后
1.源码实现
#include <cstdio>
#include <cstring>
#include <iostream>
#include <string>
#include <vector>
#include <map>
using namespace std;
class Type {
public:
static string POSITIVE;
static string NEGATIVE;
};
string Type::POSITIVE = "positive";
string Type::NEGATIVE = "negative";
class Classifier {
public:
Classifier();
~Classifier();
void Learn(string statement, string type);
string Guess(string statement);
private:
vector<string> vocabulary;
vector<string> types;
map<string, map<string, double> > words;
map<string, double> documents;
int getWords(string statement);
double totalP(string type);
double p(string word, string type);
};
Classifier::Classifier()
{
types.push_back(Type::POSITIVE);
types.push_back(Type::NEGATIVE);
}
Classifier::~Classifier()
{
types.clear();
}
int Classifier::getWords(string statement)
{
char seps[] = " ,.!?\n";
char *substring;
int len = statement.length();
char *text = new char[len+1];
int i = 0;
strcpy(text, statement.c_str());
substring = strtok(text, seps);
while(substring != NULL)
{
vocabulary.push_back(substring);
substring = strtok(NULL, seps);
i++;
}
delete text;
return i;
}
double Classifier::totalP(string type)
{
map<string, double>::iterator it;
double total;
for(it=documents.begin(); it!=documents.end(); it++)
{
total += (*it).second;
}
return (documents[type] + 1) / (total + 1);
}
double Classifier::p(string word, string type)
{
map<string, double>::iterator it;
double count;
double total;
if(words.count(type) == 0)
{
count = 0;
}
else if(words[type].count(word) == 0)
{
count = 0;
}
else
{
count = words[type][word];
}
for(it=words[type].begin(); it!=words[type].end(); it++)
{
total += (*it).second;
}
return (count + 1) / (total + 1);
}
void Classifier::Learn(string statement, string type)
{
vector<string>::iterator it;
string word;
vocabulary.clear();
this->getWords(statement);
for(it=vocabulary.begin(); it!=vocabulary.end(); it++)
{
word = (*it);
if(words.count(type) == 0 || words[type].count(word) == 0)
{
words[type][word] = 0;
}
this->words[type][word]++;
}
vocabulary.clear();
this->documents[type]++;
}
string Classifier::Guess(string statement)
{
vector<string>::iterator it1;
vector<string>::iterator it2;
string type;
vocabulary.clear();
this->getWords(statement); //得到单词
double best_likelihood = 0.0;
double likelihood = 0.0;
string best_type = "";
for(it1=types.begin(); it1!=types.end(); it1++)
{
likelihood = this->totalP((*it1)); //计算 P(Type)
for(it2=vocabulary.begin(); it2!=vocabulary.end(); it2++)
{
likelihood *= this->p((*it2), (*it1)); // 计算 P(word, Type)
}
if(likelihood > best_likelihood)
{
best_likelihood = likelihood;
best_type = (*it1);
}
}
vocabulary.clear();
return best_type;
}
int main()
{
Classifier *classifier = new Classifier();
classifier->Learn("Symfony is the best", Type::POSITIVE);
classifier->Learn("PhpStorm is great", Type::POSITIVE);
classifier->Learn("Iltar complains a lot", Type::NEGATIVE);
classifier->Learn("No Symfony is bad", Type::NEGATIVE);
cout << classifier->Guess("Symfony is great") << endl; //string(8) "positive"
cout << classifier->Guess("I complain a lot") << endl; //string(8) "negative"
delete classifier;
return 0;
}
2.编译源码
$ g++ -o example example.cpp
3.运行及其结果
$ ./example
positive
negative