-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdataset.py
68 lines (47 loc) · 1.91 KB
/
dataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import torch
from torch.utils.data import Dataset
import joblib as jb
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
class dataset(Dataset):
def __init__(self, x, y):
super(dataset, self).__init__()
self.x = x
self.y = y
def __getitem__(self, idx):
return self.x[idx,:], self.y[idx]
def __len__(self):
return len(self.y)
def get_datasets(path, test_size):
table = pd.read_csv(path).drop(["ID","日序"],axis=1)
x = table.values[:,:-1]
X = np.hstack([np.log(x[:,0]).reshape(-1,1),
x[:,1].reshape(-1,1),
x[:,2].reshape(-1,1)])
y = table.values[:,-1].reshape(-1,1)
y = np.log(y)
y_scaler = MinMaxScaler()
y_train = y_scaler.fit_transform(y)
jb.dump(y_train,"y_scaler") # You need to save this for future iniverse tranformation. Since the transformer varies with the different splition of training and testing set, we strongly recommand you save every scaler together with the models trained.
x_train, x_test, y_train, y_test = train_test_split(X, y_train, test_size=test_size)
training_set = dataset(x_train, y_train)
testing_set = dataset(x_test, y_test)
return training_set, testing_set, y_scaler
class eva_dataset(Dataset):
def __init__(self, x):
super(eva_dataset, self).__init__()
self.x = x
def __getitem__(self, idx):
return self.x[idx,:].reshape(-1,1)
def __len__(self):
return len(self.x)
def get_month_img(DEM_path, VCD_path):
h = np.zeros(25920000)
alt = np.array(pi.open(DEM_path).getdata())
raw_img = np.array([alt,h]).transpose()
column = np.array(pi.open(VCD_path).getdata())
zero_tag = (column<=0)
log_column = np.log(column+zero_tag)
return np.hstack([log_column.reshape(-1,1),raw_img]), zero_tag