关于整理moore数据集的一些操作

2019-06-21  本文已影响0人  XDgbh
#include<vector>
#include<fstream>
#include<string>
using namespace std;
//字符串分割函数
void string_split(const string &str, vector<double> &v_double, const string &delim)
{
    int pos1 = 0, pos2 = 0;
    int len = str.length();
    while (pos1 < len && pos2 != string::npos)
    {
        int count = 0;
        pos2 = str.find_first_of(delim, pos1);
        if (pos2 != string::npos)
        {
            if (pos1 < pos2)
            {
                count = pos2 - pos1;
            }
        }
        else if (pos1<len)
        {
            //pos2到了最后字符串末尾,对于本次处理moore数据集,最后一列是分类字符串,不是数字,因此要放弃
            count = 0;
        }
        if (count > 0)
        {
            string temp = str.substr(pos1, count);
            v_double.push_back(stod(temp)); //将每一个string值转换成double保存起来
        }
        
        pos1 = pos2 + 1;
    }
}

const int columns = 256;
int main()
{
    ifstream in("total——将其他字符都替换成了0或1.txt");
    if (!in.is_open())
    {
        return 1;
    }
    vector<double> v_double_max(columns, 0.0);  //保存各列的最大值
    vector<double> v_double_min(columns, 100000000.0);  //保存各列的最小值
    //分割读入的每一行字符串,并存入分割出的字符串数组到vv_str
    while (!in.eof())
    {
        string line;
        getline(in, line);
        vector<double> line_double;
        string_split(line, line_double, ",");
        int length = line_double.size();
        for (int i = 0; i < length; i++)
        {
            //更新最大值
            if (line_double[i]>v_double_max[i])
            {
                v_double_max[i] = line_double[i];
            }
            //更新最小值
            else if (line_double[i]<v_double_min[i])
            {
                v_double_min[i] = line_double[i];
            }
        }
    
    }
    in.close();

    //从vv_str输出字符串到output.txt
    ofstream out("find_max_min.txt");
    if (!out.is_open())
    {
        return 1;
    }
    
    //第一行输出各列最大值
    for (double d : v_double_max)
    {
            out << d << ",";
    }
    out << '\n';    //完成一行输出
    //第二行输出各列最小值
    for (double d : v_double_min)
    {
        out << d << ",";
    }
    out << '\n';    //完成一行输出
    out.close();
    return 0;
}
第一行为各列最大值,第二行为各列最小值
#include<vector>
#include<fstream>
#include<string>
#include<iostream>
using namespace std;

//字符串分割函数
void string_split(const string &str, vector<double> &v_double, const string &delim)
{
    int pos1 = 0, pos2 = 0;
    int len = str.length();
    while (pos1 < len && pos2 != string::npos)
    {
        int count = 0;
        pos2 = str.find_first_of(delim, pos1);
        if (pos2 != string::npos)
        {
            if (pos1 < pos2)
            {
                count = pos2 - pos1;
            }
        }
        else if (pos1<len)
        {
            //pos2到了最后字符串末尾
            count = 0;
        }
        if (count > 0)
        {
            string temp = str.substr(pos1, count);
            v_double.push_back(stod(temp)); //将每一个string值转换成double保存起来
        }
        pos1 = pos2 + 1;
    }
}

const int columns = 256;
int main()
{
    string file1 = "find_max_min_diff.txt";
    string file2 = "WWW.txt";
    string out_file = "WWW_normalize0-1.txt";
    ifstream in(file1);
    if (!in.is_open())
    {
        cout << "error to read file: " << file1 << endl;
        return 1;
    }
    vector<double> v_double_max;    //保存各列的最大值
    string line1;
    getline(in, line1);
    string_split(line1, v_double_max, ",");

    vector<double> v_double_min;    //保存各列的最小值
    string line2;
    getline(in, line2);
    string_split(line2, v_double_min, ",");
    
    vector<double> v_double_diff;   //保存各列最大最小差值
    string line3;
    getline(in, line3);
    string_split(line3, v_double_diff, ",");
    in.close();
    
    //读取一个类别的文件,并将每一行的各个数值归一化到0-1区间
    ifstream in_class(file2);
    if (!in_class.is_open())
    {
        cout << "error to read file: " << file2 << endl;
        return 1;
    }

    //写入一个该类别归一化之后的文件
    ofstream out(out_file);
    if (!out.is_open())
    {
        cout << "error to read file: " << out_file << endl;
        return 1;
    }
    //处理每一行输入和输出
    while (!in_class.eof())
    {
        string line;
        getline(in_class, line);
        vector<double> line_double;
        string_split(line, line_double, ",");
        int length = line_double.size();
        for (int i = 0; i < length; i++)
        {
            //归一化每个值
            double result = 0;
            if (0 != v_double_diff[i])
            {
                result = (line_double[i] - v_double_min[i]) / v_double_diff[i];
            }
            //输出每个值到归一化文件
            out << result << ",";
        }
        //每行输出后面补0,直到每行有256个数
        for (int i = length; i < 255; i++)
        {
            out << 0 << ",";
        }
        out << 0 << endl;   //输出一行
    }
    in_class.close();
    out.close();
    return 0;
}
上一篇 下一篇

猜你喜欢

热点阅读