-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
114 lines (88 loc) · 3.47 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
def main(root_dir, train_mode, test_mode):
# importing packages used here
from sklearn.model_selection import train_test_split
import os
import site
import importlib
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb
import pickle
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV
# importing our packages
dataset_cleaning = importlib.import_module(
"utils.dataset_cleaning"
)
dataset_joining = importlib.import_module(
"utils.dataset_joining"
)
preprocessing = importlib.import_module(
"utils.preprocessing"
)
train = importlib.import_module(
"utils.train"
)
if train_mode == True:
# the datasets that are returned are cleaned
print('Cleaning the datasets...')
df_aircraft = dataset_cleaning.clean_aircraft(
root_dir+'/resources/ACchar.xlsx'
)
df_airport = dataset_cleaning.clean_airport(
root_dir+'/resources/training_set_airport_data.csv',
mode='train'
)
df_geography = dataset_cleaning.clean_geography(
root_dir+'/resources/geographic_data.csv'
)
df_weather = dataset_cleaning.clean_weather(
root_dir+'/resources/Weather_data.csv',
mode='train'
)
print('Merging the datasets...')
merged_df = dataset_joining.join_datasets(
df_airport, df_weather=df_weather,
df_geography=df_geography, df_aircraft=df_aircraft
)
pairs = pd.read_csv(root_dir+'/resources/new_pairs.csv')
pairs.columns = ['index', 'stand', 'runway', 'distance']
print('Preprocessing the data...')
X, y = preprocessing.preprocessing(merged_df, pairs, 'train', root_dir)
print('Fitting the model...')
fitted_model = train.train(X, y)
print('Saving the model...')
# storing the model in the specified path
filehandler = open(root_dir+'/models/model.pkl', 'w')
pickle.dump(fitted_model, filehandler)
if test_mode == True:
# loading the model
filehandler = open(path_model, 'r')
fitted_model = pickle.load(filehandler)
# the datasets that are returned are cleaned
df_aircraft = dataset_cleaning.clean_aircraft(
root_dir+'/resources/ACchar.xlsx'
)
df_airport = dataset_cleaning.clean_airport(
root_dir+'/resources/test_set_airport_data.xlsx',
mode='test'
)
df_geography = dataset_cleaning.clean_geography(
root_dir+'/resources/geographic_data.csv'
)
df_weather = dataset_cleaning.clean_weather(
root_dir+'/resources/test_set_weather_data.xlsx',
mode='test'
)
merged_df = dataset_joining.join_datasets(
df_airport, df_weather=df_weather,
df_geography=df_geography, df_aircraft=df_aircraft
)
pairs = pd.read_csv(root_dir+'/resources/new_pairs.csv')
pairs.columns = ['index', 'stand', 'runway', 'distance']
X, y = preprocessing.preprocessing(merged_df, pairs, 'test', root_dir)
main('/Users/damienchambon/Desktop/ETUDES/M2 2020-2021/Hackathon/Hackathon-Eleven',train_mode=True,test_mode=False)