-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtraining_Validation_Insertion.py
84 lines (68 loc) · 4.23 KB
/
training_Validation_Insertion.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
from datetime import datetime
from Training_Raw_data_validation.rawValidation import Raw_Data_validation
from DataTypeValidation_Insertion_Training.DataTypeValidation import dBOperation
from DataTransform_Training.DataTransformation import dataTransform
from application_logging import logger
from AzureBlobStorage.azureBlobStorage import AzureBlobStorage
from Email_Trigger.send_email import email
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
class train_validation:
def __init__(self,path):
self.raw_data = Raw_Data_validation(path)
self.dataTransform = dataTransform()
self.dBOperation = dBOperation()
self.file_object = 'Training_Main_Log'
self.log_writer = logger.App_Logger()
self.azureObj = AzureBlobStorage()
self.emailObj = email()
def train_validation(self):
try:
self.log_writer.log(self.file_object, 'Start of Validation on files!!')
# extracting values from prediction schema
LengthOfDateStampInFile, LengthOfTimeStampInFile, column_names, noofcolumns = self.raw_data.valuesFromSchema()
# getting the regex defined to validate filename
regex = self.raw_data.manualRegexCreation()
# validating filename of prediction files
self.raw_data.validationFileNameRaw(regex, LengthOfDateStampInFile, LengthOfTimeStampInFile)
# validating column length in the file
self.raw_data.validateColumnLength(noofcolumns)
# validating if any column has all values missing
self.raw_data.validateMissingValuesInWholeColumn()
self.log_writer.log(self.file_object, "Raw Data Validation Complete!!")
self.log_writer.log(self.file_object, "Starting Data Transforamtion!!")
# below function adds quotes to the '?' values in some columns.
self.dataTransform.addQuotesToStringValuesInColumn()
self.log_writer.log(self.file_object, "DataTransformation Completed!!!")
self.log_writer.log(self.file_object,
"Creating Training_Database and tables on the basis of given schema!!!")
# create database with given name, if present open the connection! Create table with columns given in schema
self.dBOperation.createTableDb('fitbitDB', column_names)
self.log_writer.log(self.file_object, "Table creation Completed!!")
self.log_writer.log(self.file_object, "Insertion of Data into Table started!!!!")
# insert csv files in the table
self.dBOperation.insertIntoTableGoodData('fitbitDB')
self.log_writer.log(self.file_object, "Insertion in Table completed!!!")
self.log_writer.log(self.file_object, "Deleting Good Data Folder!!!")
# Delete the good data folder after loading files in table
self.raw_data.deleteExistingGoodDataTrainingFolder()
self.log_writer.log(self.file_object, "Good_Data folder deleted!!!")
self.log_writer.log(self.file_object, "Moving bad files to Archive and deleting Bad_Data folder!!!")
# Move the bad files to archive folder
#self.raw_data.moveBadFilesToArchiveBad()
self.log_writer.log(self.file_object, "Bad files moved to archive!! Bad folder Deleted!!")
self.log_writer.log(self.file_object, "Validation Operation completed!!")
self.log_writer.log(self.file_object, "Extracting csv file from table")
# export data in table to csvfile
self.dBOperation.selectingDatafromtableintocsv('fitbitDB')
# Triggering Email
msg = MIMEMultipart()
msg['Subject'] = 'FitBit Calories - Train Validation | ' + str(datetime.now())
file_list = self.azureObj.listDirFiles('Training_Bad_Raw_Files_Validated')
file_str = ','.join(file_list)
body = 'Model Train Validation Done Successfully... <br><br> Fault File List: <br>' + file_str + '<br><br>Thanks and Regards, <br> Rahul Garg'
msg.attach(MIMEText(body, 'html'))
to_addr = ['[email protected]']
self.emailObj.trigger_mail(to_addr, [], msg)
except Exception as e:
raise e