对于图片分类-数据处理
2019-06-19 本文已影响0人
狼无雨雪
import pandas as pd
data = pd.read_csv("ID.txt", header=None, index_col=False,sep="\t")
data.head()
print("all data length = {}".format(len(data)))
true_data = data[data[1] == "Received"]
print("true data length = {}".format(len(true_data)))
true_data.head()
false_data = data[data[1] == "Finished"]
print("false data length = {}".format(len(false_data)))
false_data.head()
import random
random.seed(2019)
true_data=true_data[2].values
false_data=false_data[2].values
prefix_replace = "https://imagecreation.blob.core.chinacloudapi.cn/"
true_data = [value.replace(prefix_replace, "/home/t-huch/") for value in true_data]
false_data = [value.replace(prefix_replace, "/home/t-huch/") for value in false_data]
random.shuffle(true_data)
random.shuffle(false_data)
length_true_data = len(true_data)
length_false_data = len(false_data)
from math import floor
train_true = true_data[:floor(0.8*length_true_data)]
validation_true = true_data[floor(0.8*length_true_data):floor(0.9*length_true_data)]
test_true = true_data[floor(0.9*length_true_data):]
length_train_true = len(train_true)
length_validation_true = len(validation_true)
length_train_true_and_validation_true = length_train_true + length_validation_true
print("length of train true = {}".format(length_train_true), "length of validation true = {}".format(length_validation_true))
train_false = false_data[:length_train_true]
validation_false = false_data[length_train_true:length_train_true_and_validation_true]
test_false = false_data[length_train_true_and_validation_true:]
length_train_false = len(train_false)
length_validation_false = len(validation_false)
print("length of train false = {}".format(length_train_false), "length of validation false = {}".format(length_validation_false))
print("length of test true = {}".format(len(test_true)), "length of test false = {}".format(len(test_false)))
import os
output_path = "/home/t-huch/imagecreation/poemimage_split"
os.makedirs(output_path, exist_ok=True)
import shutil
train_true_path = os.path.join(output_path, "train/true")
train_false_path = os.path.join(output_path, "train/false")
os.makedirs(train_true_path, exist_ok=True)
os.makedirs(train_false_path, exist_ok=True)
validation_true_path = os.path.join(output_path, "validation/true")
validation_false_path = os.path.join(output_path, "validation/false")
os.makedirs(validation_true_path, exist_ok=True)
os.makedirs(validation_false_path, exist_ok=True)
test_true_path = os.path.join(output_path, "test/true")
test_false_path = os.path.join(output_path, "test/false")
os.makedirs(test_true_path, exist_ok=True)
os.makedirs(test_false_path, exist_ok=True)
_ = [shutil.copy(value, train_true_path) for value in train_true]
_ = [shutil.copy(value, train_false_path) for value in train_false]
_ = [shutil.copy(value, validation_true_path) for value in validation_true]
_ = [shutil.copy(value, validation_false_path) for value in validation_false]
_ = [shutil.copy(value, test_true_path) for value in test_true]
_ = [shutil.copy(value, test_false_path) for value in test_false]