generate_data.ipynb @master — view markup · raw · history · blame
In [1]:
import pandas as pd
yes_data_path = './mydata/Yes/'
no_data_path = './mydata/No/'
yes_root_csv = pd.read_csv(yes_data_path+'长乐区.csv')
no_root_csv = pd.read_csv(no_data_path+'闽清县.csv')
no_root_csv.head()
Out[1]:
In [3]:
import random
gen_num = 1000
def generate_csv(input_csv, gen_num, data_path):
for i in range(gen_num):
# for each row
num_row = input_csv.shape[0]
num_col = input_csv.shape[1]
for row in range(num_row):
col = random.randint(1,num_col - 1)
#print('%d,%d'%(row,col))
#print('previous %d'%input_csv.iloc[row,col])
input_csv.iloc[row,col] = input_csv.iloc[row,col]*(1.0 + random.randint(-100,100)/1000.)
#print('previous %d'%input_csv.iloc[row,col])
input_csv.to_csv(data_path + str(i)+'.csv', index=False)
generate_csv(yes_root_csv, gen_num, yes_data_path)
generate_csv(no_root_csv, gen_num, no_data_path)