Skip to content

[RUIN 296] Write script to export to database #190

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: dev
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions src/database/cloud_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
import cloudconvert
with open('./api_key.txt') as f:
api_key = f.readlines()[0]

cloudconvert.configure(api_key = api_key, sandbox = False)

cloudconvert.Job.create(payload={
"tasks": {
'import-my-file': {
'operation': 'import/url',
'url': 'https://my-url'
},
'convert-my-file': {
'operation': 'convert',
'input': 'import-my-file',
'output_format': 'pdf',
'some_other_option': 'value'
},
'export-my-file': {
'operation': 'export/url',
'input': 'convert-my-file'
}
}
})
152 changes: 152 additions & 0 deletions src/database/insertion_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,152 @@
import pandas as pd
import pyodbc
import csv
import sqlalchemy
import random
from datetime import datetime


dialect = 'mssql'
# driver = "{ODBC Driver 17 for SQL Server}"
driver = 'pyodbc'
server = "DESKTOP-DCBRNA3\SQLEXPRESS"
database_alc = "TribalTools?driver=ODBC+Driver+17+for+SQL+Server&authentication=ActiveDirectoryIntegrated"
database = "TribalTools"
user = "test_login"
password = "Zxcft^7890"
FILE_VEHICLE = './resources/lookups/vehicle.csv'
FILE_ROAD = './resources/lookups/road.csv'
excel = "./resources/master.xlsx"
excel_data = pd.read_excel(excel, sheet_name = None)
port = '1433'

# needs to be changed to the correct format
CRASH_ID = random.randint(0, 99999)

def lookup_table(file):
# returns the lookup table for one table of the dbo
reader = csv.reader(open(file, 'r'), delimiter = ';')
lookup_table = {}
for row in reader:
# print(row)
if row[1] != '':
k, v = row
lookup_table[k] = v
return lookup_table


def create_output_df_vehicle(trct, my_data, lookup):
result = []
name = trct['name'].tolist()
print(max(my_data['vehicle number']))
for i in range(1, 1 + max(my_data['vehicle number'])):
temp_df = my_data[my_data['vehicle number'] == i]
questions = temp_df['question'].tolist()
result_dict = {'question':[], 'answer':[]}

print('vehicle number', i)
for j in lookup:

# check if my data row is in both my excel file and tcrt database
if j in questions:
if lookup[j] in name:
result_dict['question'].append( lookup[j] )
result_dict['answer'].append( my_data['answer'][questions.index(j)] )
result.append(pd.DataFrame.from_dict(result_dict))
print('result\n', result)
return result


def create_output_df_road(trct, my_data, lookup):
result = []
name = trct['name'].tolist()
result_dict = {'question':[], 'answer':[]}
questions = my_data['question'].tolist()

for j in lookup:

# check if my data row is in both my excel file and tcrt database
if j in questions:
if lookup[j] in name:
# need to add C3, C15, C20, C21, C22, C23, C24, C26, C27
if lookup[j] == 'C3_Date':
result_dict['question'].append( lookup[j] )
result_dict['answer'].append( datetime.strptime(my_data['answer'][questions.index(j)],"%Y/%m/%d\t%H:%M"))
continue
result_dict['question'].append( lookup[j] )
result_dict['answer'].append( my_data['answer'][questions.index(j)] )
result.append(pd.DataFrame.from_dict(result_dict))

return result

def create_output_df_passenger(trct, my_data, lookup):
pass

def create_output_df_nonmotorist(trct, my_data, lookup):
pass

def create_output_df_driver(trct, my_data, lookup):
pass

# creates an insert statement with a df
def insert_statement(df, table_name):
key, value = '(','('

for i in df['question'].tolist():
key += "" + str(i) + ","
for i in df['answer'].tolist():
value += "'" + str(i) + "',"

key += 'tblCrashID)'
value += str(CRASH_ID) + ')'

return "INSERT INTO "+ table_name +" " + key + " values" + value


def insert_vehicle(df):
lookup_vehicle = lookup_table(FILE_VEHICLE)
new_data_vehicle = create_output_df_vehicle(df, excel_data['vehicle'], lookup_vehicle)
for i in new_data_vehicle:
insert_vehicle = insert_statement(i, "dbo.tblVehicle")
print('****************')
print('insert_vehicle:\n', insert_vehicle)
cursor.execute('SET ANSI_WARNINGS OFF')
cursor.execute(insert_vehicle)

def insert_road(df):
lookup_road = lookup_table(FILE_ROAD)
new_data_road = create_output_df_road(df, excel_data['road'], lookup_road)
for i in new_data_road:
insert_road = insert_statement(i, "dbo.tblCrash")
print('****************')
print('insert_road:\n', insert_road)
cursor.execute('SET ANSI_WARNINGS OFF')
cursor.execute('SET IDENTITY_INSERT dbo.tblCrash ON')
cursor.execute(insert_road)

def insert_passenger(trct, my_data, lookup):
pass

def insert_nonmotorist(trct, my_data, lookup):
pass

def insert_driver(trct, my_data, lookup):
pass

if __name__ == "__main__":

conn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER='+server+
';PORT=1433'+
';DATABASE='+database+
';UID='+user+
';PWD='+ password)

cursor = conn.cursor()

vehicle = pd.read_sql('SELECT name FROM sys.columns WHERE object_id = OBJECT_ID(\'dbo.tblVehicle\') ', conn)
insert_vehicle(vehicle)
road = pd.read_sql('SELECT name FROM sys.columns WHERE object_id = OBJECT_ID(\'dbo.tblCrash\') ', conn)
insert_road(road)

conn.commit()
cursor.close()
100 changes: 100 additions & 0 deletions src/database/json_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
import os
import requests # pip install requests

# The authentication key (API Key).
# Get your own by registering at https://app.pdf.co
API_KEY = "[email protected]_79f0f0b7a25c01d3b49b57614386df889907"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't have api key stored explicitly

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I will send the API keys to you! The empty files are dummy files that I haven't implemented yet but will be necessary.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Will this API key always be the same as the one in pdf_convert.py? If yes, update to match my comment above.


# Base URL for PDF.co Web API requests
BASE_URL = "https://api.pdf.co/v1"

# Source PDF file
SourceFile = "./test.pdf"
# Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
Pages = ""
# PDF document password. Leave empty for unprotected documents.
Password = ""
# Destination JSON file name
DestinationFile = "./result.json"


def main(args = None):
uploadedFileUrl = uploadFile(SourceFile)
if (uploadedFileUrl != None):
convertPdfToJson(uploadedFileUrl, DestinationFile)


def convertPdfToJson(uploadedFileUrl, destinationFile):
"""Converts PDF To Json using PDF.co Web API"""

# Prepare requests params as JSON
# See documentation: https://apidocs.pdf.co
parameters = {}
parameters["name"] = os.path.basename(destinationFile)
parameters["password"] = Password
parameters["pages"] = Pages
parameters["url"] = uploadedFileUrl

# Prepare URL for 'PDF To Json' API request
url = "{}/pdf/convert/to/json".format(BASE_URL)

# Execute request and get response as JSON
response = requests.post(url, data=parameters, headers={ "x-api-key": API_KEY })
if (response.status_code == 200):
json = response.json()

if json["error"] == False:
# Get URL of result file
resultFileUrl = json["url"]
# Download result file
r = requests.get(resultFileUrl, stream=True)
if (r.status_code == 200):
with open(destinationFile, 'wb') as file:
for chunk in r:
file.write(chunk)
print(f"Result file saved as \"{destinationFile}\" file.")
else:
print(f"Request error: {response.status_code} {response.reason}")
else:
# Show service reported error
print(json["message"])
else:
print(f"Request error: {response.status_code} {response.reason}")


def uploadFile(fileName):
"""Uploads file to the cloud"""

# 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE.

# Prepare URL for 'Get Presigned URL' API request
url = "{}/file/upload/get-presigned-url?contenttype=application/octet-stream&name={}".format(
BASE_URL, os.path.basename(fileName))

# Execute request and get response as JSON
response = requests.get(url, headers={ "x-api-key": API_KEY })
if (response.status_code == 200):
json = response.json()

if json["error"] == False:
# URL to use for file upload
uploadUrl = json["presignedUrl"]
# URL for future reference
uploadedFileUrl = json["url"]

# 2. UPLOAD FILE TO CLOUD.
with open(fileName, 'rb') as file:
requests.put(uploadUrl, data=file, headers={ "x-api-key": API_KEY, "content-type": "application/octet-stream" })

return uploadedFileUrl
else:
# Show service reported error
print(json["message"])
else:
print(f"Request error: {response.status_code} {response.reason}")

return None


if __name__ == '__main__':
main()
3 changes: 3 additions & 0 deletions src/database/my_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import pandas
data=pandas.read_csv('./resources/lookup_vehicle_tsv.tsv',sep=';')
print(data)
106 changes: 106 additions & 0 deletions src/database/pdf_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
import os
import requests # pip install requests

# The authentication key (API Key).
# Get your own by registering at https://app.pdf.co
API_KEY = "[email protected]_79f0f0b7a25c01d3b49b57614386df889907"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

API keys should not be stored explicitly is a file on GitHub

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should they get their own API key? If yes, include this information in the README.md. I would change this line to the python version of a system variable reference. This is how it is done in the build.gradle folder when calling the Google Maps API: "$System.env.GOOGLE_MAPS_API_KEY"


# Base URL for PDF.co Web API requests
BASE_URL = "https://api.pdf.co/v1"

# Source PDF file
SourceFile = ".\\test.pdf"
# Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'.
Pages = ""
# PDF document password. Leave empty for unprotected documents.
Password = ""
# Destination Html file name
DestinationFile = ".\\result.json"
# Set to $true to get simplified HTML without CSS. Default is the rich HTML keeping the document design.
PlainHtml = False
# Set to $true if your document has the column layout like a newspaper.
ColumnLayout = False


def main(args = None):
uploadedFileUrl = uploadFile(SourceFile)
if (uploadedFileUrl != None):
convertPdfToHtml(uploadedFileUrl, DestinationFile)


def convertPdfToHtml(uploadedFileUrl, destinationFile):
"""Converts PDF To Html using PDF.co Web API"""

# Prepare requests params as JSON
# See documentation: https://apidocs.pdf.co
parameters = {}
parameters["name"] = os.path.basename(destinationFile)
parameters["password"] = Password
parameters["pages"] = Pages
parameters["simple"] = PlainHtml
parameters["columns"] = ColumnLayout
parameters["url"] = uploadedFileUrl

# Prepare URL for 'PDF To Html' API request
url = "{}/pdf/convert/to/html".format(BASE_URL)

# Execute request and get response as JSON
response = requests.post(url, data=parameters, headers={ "x-api-key": API_KEY })
if (response.status_code == 200):
json = response.json()

if json["error"] == False:
# Get URL of result file
resultFileUrl = json["url"]
# Download result file
r = requests.get(resultFileUrl, stream=True)
if (r.status_code == 200):
with open(destinationFile, 'wb') as file:
for chunk in r:
file.write(chunk)
print(f"Result file saved as \"{destinationFile}\" file.")
else:
print(f"Request error: {response.status_code} {response.reason}")
else:
# Show service reported error
print(json["message"])
else:
print(f"Request error: {response.status_code} {response.reason}")


def uploadFile(fileName):
"""Uploads file to the cloud"""

# 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE.

# Prepare URL for 'Get Presigned URL' API request
url = "{}/file/upload/get-presigned-url?contenttype=application/octet-stream&name={}".format(
BASE_URL, os.path.basename(fileName))

# Execute request and get response as JSON
response = requests.get(url, headers={ "x-api-key": API_KEY })
if (response.status_code == 200):
json = response.json()

if json["error"] == False:
# URL to use for file upload
uploadUrl = json["presignedUrl"]
# URL for future reference
uploadedFileUrl = json["url"]

# 2. UPLOAD FILE TO CLOUD.
with open(fileName, 'rb') as file:
requests.put(uploadUrl, data=file, headers={ "x-api-key": API_KEY, "content-type": "application/octet-stream" })

return uploadedFileUrl
else:
# Show service reported error
print(json["message"])
else:
print(f"Request error: {response.status_code} {response.reason}")

return None


if __name__ == '__main__':
main()
1 change: 1 addition & 0 deletions src/database/resources/api_key.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiIxIiwianRpIjoiNzYyMjcxM2VlYzZkNzFkZWZjYzI1Y2QxNTljMTEwNTY2YzBiYmZiZjZlZjMwMzFiZjQ5OGIwYjUwYzY1ZTVlOTliZTZjMTQ1NzZmYTA4NTUiLCJpYXQiOjE2NDkyNTk0OTEuMzA1NTAyLCJuYmYiOjE2NDkyNTk0OTEuMzA1NTAzLCJleHAiOjQ4MDQ5MzMwOTEuMjkxODk0LCJzdWIiOiI0MTQ1NDg3NSIsInNjb3BlcyI6W119.DHH8AkUVi89PETSWrH92fmvpaZ10F_FtZKMVh7Zeqq_IEy1sEi2-maDAvvnmzJ5mv3b4TEoHirIGRcaPlJssYmnI1lXA4tiT5yheh40rDLvzGWG3nyTagat9fq3eSNYdwRqzNYRbK42khGyrGMVVIwjKccWkbGNHCP2VxDsGTWsHO76N0H5C7GYvWO4Nshm467FzEzXuwOd-Oe5cezcYm0H04XrVh4_WpKFmT1sRDcdS0_cZgTMeisUh-gJ5ZOWO7YH0TCDPiNKmNjKKcCgBw-S1TbRmlvWlmSXiJn1qRZb50q0ZSFTuiW37Y-LFkedRFKG255YorSrivkoVuPVQ9fDd-pE_BUem3dv6-NuA9-9AtuYcnZmCpZvv-c-XJS9pxuRJcGl47n87gL8zqZFRTSuYEfpihiS9nW10u6w6Cup_9cw9ZvICEBV3K5tGssfWE60w48bupfVIRxkeIJhG8sBwClA6A9DIi6YrT7nlUMYoB5SpXvfvdILvPzg_fU-1RKjVW-VqhsjTk1K9Et94dhs_XeQ-NG3yO3VnYGTpBZuCW_-P555RstD23-DAPTrpcazJ0F9xoNVTkZKKD98TAmaF-731gtXEYMcrT8nj2-34Zth9sQAxvBWaeLcJEAuJhmYMLYkiN8GhCPJCNffTfEYEAZ5yc2CKOcCCRwxPmbo
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This API key should not be stored in GitHub. Please remove and send it to me so I can add it to last pass. If you want to keep the file, add the file to the .gitignore

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would change this line to the python version of a system variable reference. This is how it is done in the build.gradle folder when calling the Google Maps API: "$System.env.GOOGLE_MAPS_API_KEY"

Loading