-
Notifications
You must be signed in to change notification settings - Fork 4
[RUIN 296] Write script to export to database #190
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from 2 commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
import cloudconvert | ||
with open('./api_key.txt') as f: | ||
api_key = f.readlines()[0] | ||
|
||
cloudconvert.configure(api_key = api_key, sandbox = False) | ||
|
||
cloudconvert.Job.create(payload={ | ||
"tasks": { | ||
'import-my-file': { | ||
'operation': 'import/url', | ||
'url': 'https://my-url' | ||
}, | ||
'convert-my-file': { | ||
'operation': 'convert', | ||
'input': 'import-my-file', | ||
'output_format': 'pdf', | ||
'some_other_option': 'value' | ||
}, | ||
'export-my-file': { | ||
'operation': 'export/url', | ||
'input': 'convert-my-file' | ||
} | ||
} | ||
}) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,152 @@ | ||
import pandas as pd | ||
import pyodbc | ||
import csv | ||
import sqlalchemy | ||
import random | ||
from datetime import datetime | ||
|
||
|
||
dialect = 'mssql' | ||
# driver = "{ODBC Driver 17 for SQL Server}" | ||
driver = 'pyodbc' | ||
server = "DESKTOP-DCBRNA3\SQLEXPRESS" | ||
database_alc = "TribalTools?driver=ODBC+Driver+17+for+SQL+Server&authentication=ActiveDirectoryIntegrated" | ||
database = "TribalTools" | ||
user = "test_login" | ||
password = "Zxcft^7890" | ||
FILE_VEHICLE = './resources/lookups/vehicle.csv' | ||
FILE_ROAD = './resources/lookups/road.csv' | ||
excel = "./resources/master.xlsx" | ||
excel_data = pd.read_excel(excel, sheet_name = None) | ||
port = '1433' | ||
|
||
# needs to be changed to the correct format | ||
CRASH_ID = random.randint(0, 99999) | ||
|
||
def lookup_table(file): | ||
# returns the lookup table for one table of the dbo | ||
reader = csv.reader(open(file, 'r'), delimiter = ';') | ||
lookup_table = {} | ||
for row in reader: | ||
# print(row) | ||
if row[1] != '': | ||
k, v = row | ||
lookup_table[k] = v | ||
return lookup_table | ||
|
||
|
||
def create_output_df_vehicle(trct, my_data, lookup): | ||
result = [] | ||
name = trct['name'].tolist() | ||
print(max(my_data['vehicle number'])) | ||
for i in range(1, 1 + max(my_data['vehicle number'])): | ||
temp_df = my_data[my_data['vehicle number'] == i] | ||
questions = temp_df['question'].tolist() | ||
result_dict = {'question':[], 'answer':[]} | ||
|
||
print('vehicle number', i) | ||
for j in lookup: | ||
|
||
# check if my data row is in both my excel file and tcrt database | ||
if j in questions: | ||
if lookup[j] in name: | ||
result_dict['question'].append( lookup[j] ) | ||
result_dict['answer'].append( my_data['answer'][questions.index(j)] ) | ||
result.append(pd.DataFrame.from_dict(result_dict)) | ||
print('result\n', result) | ||
return result | ||
|
||
|
||
def create_output_df_road(trct, my_data, lookup): | ||
result = [] | ||
name = trct['name'].tolist() | ||
result_dict = {'question':[], 'answer':[]} | ||
questions = my_data['question'].tolist() | ||
|
||
for j in lookup: | ||
|
||
# check if my data row is in both my excel file and tcrt database | ||
if j in questions: | ||
if lookup[j] in name: | ||
# need to add C3, C15, C20, C21, C22, C23, C24, C26, C27 | ||
if lookup[j] == 'C3_Date': | ||
result_dict['question'].append( lookup[j] ) | ||
result_dict['answer'].append( datetime.strptime(my_data['answer'][questions.index(j)],"%Y/%m/%d\t%H:%M")) | ||
continue | ||
result_dict['question'].append( lookup[j] ) | ||
result_dict['answer'].append( my_data['answer'][questions.index(j)] ) | ||
result.append(pd.DataFrame.from_dict(result_dict)) | ||
|
||
return result | ||
|
||
def create_output_df_passenger(trct, my_data, lookup): | ||
pass | ||
|
||
def create_output_df_nonmotorist(trct, my_data, lookup): | ||
pass | ||
|
||
def create_output_df_driver(trct, my_data, lookup): | ||
pass | ||
|
||
# creates an insert statement with a df | ||
def insert_statement(df, table_name): | ||
key, value = '(','(' | ||
|
||
for i in df['question'].tolist(): | ||
key += "" + str(i) + "," | ||
for i in df['answer'].tolist(): | ||
value += "'" + str(i) + "'," | ||
|
||
key += 'tblCrashID)' | ||
value += str(CRASH_ID) + ')' | ||
|
||
return "INSERT INTO "+ table_name +" " + key + " values" + value | ||
|
||
|
||
def insert_vehicle(df): | ||
lookup_vehicle = lookup_table(FILE_VEHICLE) | ||
new_data_vehicle = create_output_df_vehicle(df, excel_data['vehicle'], lookup_vehicle) | ||
for i in new_data_vehicle: | ||
insert_vehicle = insert_statement(i, "dbo.tblVehicle") | ||
print('****************') | ||
print('insert_vehicle:\n', insert_vehicle) | ||
cursor.execute('SET ANSI_WARNINGS OFF') | ||
cursor.execute(insert_vehicle) | ||
|
||
def insert_road(df): | ||
lookup_road = lookup_table(FILE_ROAD) | ||
new_data_road = create_output_df_road(df, excel_data['road'], lookup_road) | ||
for i in new_data_road: | ||
insert_road = insert_statement(i, "dbo.tblCrash") | ||
print('****************') | ||
print('insert_road:\n', insert_road) | ||
cursor.execute('SET ANSI_WARNINGS OFF') | ||
cursor.execute('SET IDENTITY_INSERT dbo.tblCrash ON') | ||
cursor.execute(insert_road) | ||
|
||
def insert_passenger(trct, my_data, lookup): | ||
pass | ||
|
||
def insert_nonmotorist(trct, my_data, lookup): | ||
pass | ||
|
||
def insert_driver(trct, my_data, lookup): | ||
pass | ||
|
||
if __name__ == "__main__": | ||
|
||
conn = pyodbc.connect('DRIVER={ODBC Driver 17 for SQL Server};SERVER='+server+ | ||
';PORT=1433'+ | ||
';DATABASE='+database+ | ||
';UID='+user+ | ||
';PWD='+ password) | ||
|
||
cursor = conn.cursor() | ||
|
||
vehicle = pd.read_sql('SELECT name FROM sys.columns WHERE object_id = OBJECT_ID(\'dbo.tblVehicle\') ', conn) | ||
insert_vehicle(vehicle) | ||
road = pd.read_sql('SELECT name FROM sys.columns WHERE object_id = OBJECT_ID(\'dbo.tblCrash\') ', conn) | ||
insert_road(road) | ||
|
||
conn.commit() | ||
cursor.close() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,100 @@ | ||
import os | ||
import requests # pip install requests | ||
|
||
# The authentication key (API Key). | ||
# Get your own by registering at https://app.pdf.co | ||
API_KEY = "[email protected]_79f0f0b7a25c01d3b49b57614386df889907" | ||
|
||
# Base URL for PDF.co Web API requests | ||
BASE_URL = "https://api.pdf.co/v1" | ||
|
||
# Source PDF file | ||
SourceFile = "./test.pdf" | ||
# Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. | ||
Pages = "" | ||
# PDF document password. Leave empty for unprotected documents. | ||
Password = "" | ||
# Destination JSON file name | ||
DestinationFile = "./result.json" | ||
|
||
|
||
def main(args = None): | ||
uploadedFileUrl = uploadFile(SourceFile) | ||
if (uploadedFileUrl != None): | ||
convertPdfToJson(uploadedFileUrl, DestinationFile) | ||
|
||
|
||
def convertPdfToJson(uploadedFileUrl, destinationFile): | ||
"""Converts PDF To Json using PDF.co Web API""" | ||
|
||
# Prepare requests params as JSON | ||
# See documentation: https://apidocs.pdf.co | ||
parameters = {} | ||
parameters["name"] = os.path.basename(destinationFile) | ||
parameters["password"] = Password | ||
parameters["pages"] = Pages | ||
parameters["url"] = uploadedFileUrl | ||
|
||
# Prepare URL for 'PDF To Json' API request | ||
url = "{}/pdf/convert/to/json".format(BASE_URL) | ||
|
||
# Execute request and get response as JSON | ||
response = requests.post(url, data=parameters, headers={ "x-api-key": API_KEY }) | ||
if (response.status_code == 200): | ||
json = response.json() | ||
|
||
if json["error"] == False: | ||
# Get URL of result file | ||
resultFileUrl = json["url"] | ||
# Download result file | ||
r = requests.get(resultFileUrl, stream=True) | ||
if (r.status_code == 200): | ||
with open(destinationFile, 'wb') as file: | ||
for chunk in r: | ||
file.write(chunk) | ||
print(f"Result file saved as \"{destinationFile}\" file.") | ||
else: | ||
print(f"Request error: {response.status_code} {response.reason}") | ||
else: | ||
# Show service reported error | ||
print(json["message"]) | ||
else: | ||
print(f"Request error: {response.status_code} {response.reason}") | ||
|
||
|
||
def uploadFile(fileName): | ||
"""Uploads file to the cloud""" | ||
|
||
# 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE. | ||
|
||
# Prepare URL for 'Get Presigned URL' API request | ||
url = "{}/file/upload/get-presigned-url?contenttype=application/octet-stream&name={}".format( | ||
BASE_URL, os.path.basename(fileName)) | ||
|
||
# Execute request and get response as JSON | ||
response = requests.get(url, headers={ "x-api-key": API_KEY }) | ||
if (response.status_code == 200): | ||
json = response.json() | ||
|
||
if json["error"] == False: | ||
# URL to use for file upload | ||
uploadUrl = json["presignedUrl"] | ||
# URL for future reference | ||
uploadedFileUrl = json["url"] | ||
|
||
# 2. UPLOAD FILE TO CLOUD. | ||
with open(fileName, 'rb') as file: | ||
requests.put(uploadUrl, data=file, headers={ "x-api-key": API_KEY, "content-type": "application/octet-stream" }) | ||
|
||
return uploadedFileUrl | ||
else: | ||
# Show service reported error | ||
print(json["message"]) | ||
else: | ||
print(f"Request error: {response.status_code} {response.reason}") | ||
|
||
return None | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
import pandas | ||
data=pandas.read_csv('./resources/lookup_vehicle_tsv.tsv',sep=';') | ||
print(data) |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,106 @@ | ||
import os | ||
import requests # pip install requests | ||
|
||
# The authentication key (API Key). | ||
# Get your own by registering at https://app.pdf.co | ||
API_KEY = "[email protected]_79f0f0b7a25c01d3b49b57614386df889907" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. API keys should not be stored explicitly is a file on GitHub There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should they get their own API key? If yes, include this information in the |
||
|
||
# Base URL for PDF.co Web API requests | ||
BASE_URL = "https://api.pdf.co/v1" | ||
|
||
# Source PDF file | ||
SourceFile = ".\\test.pdf" | ||
# Comma-separated list of page indices (or ranges) to process. Leave empty for all pages. Example: '0,2-5,7-'. | ||
Pages = "" | ||
# PDF document password. Leave empty for unprotected documents. | ||
Password = "" | ||
# Destination Html file name | ||
DestinationFile = ".\\result.json" | ||
# Set to $true to get simplified HTML without CSS. Default is the rich HTML keeping the document design. | ||
PlainHtml = False | ||
# Set to $true if your document has the column layout like a newspaper. | ||
ColumnLayout = False | ||
|
||
|
||
def main(args = None): | ||
uploadedFileUrl = uploadFile(SourceFile) | ||
if (uploadedFileUrl != None): | ||
convertPdfToHtml(uploadedFileUrl, DestinationFile) | ||
|
||
|
||
def convertPdfToHtml(uploadedFileUrl, destinationFile): | ||
"""Converts PDF To Html using PDF.co Web API""" | ||
|
||
# Prepare requests params as JSON | ||
# See documentation: https://apidocs.pdf.co | ||
parameters = {} | ||
parameters["name"] = os.path.basename(destinationFile) | ||
parameters["password"] = Password | ||
parameters["pages"] = Pages | ||
parameters["simple"] = PlainHtml | ||
parameters["columns"] = ColumnLayout | ||
parameters["url"] = uploadedFileUrl | ||
|
||
# Prepare URL for 'PDF To Html' API request | ||
url = "{}/pdf/convert/to/html".format(BASE_URL) | ||
|
||
# Execute request and get response as JSON | ||
response = requests.post(url, data=parameters, headers={ "x-api-key": API_KEY }) | ||
if (response.status_code == 200): | ||
json = response.json() | ||
|
||
if json["error"] == False: | ||
# Get URL of result file | ||
resultFileUrl = json["url"] | ||
# Download result file | ||
r = requests.get(resultFileUrl, stream=True) | ||
if (r.status_code == 200): | ||
with open(destinationFile, 'wb') as file: | ||
for chunk in r: | ||
file.write(chunk) | ||
print(f"Result file saved as \"{destinationFile}\" file.") | ||
else: | ||
print(f"Request error: {response.status_code} {response.reason}") | ||
else: | ||
# Show service reported error | ||
print(json["message"]) | ||
else: | ||
print(f"Request error: {response.status_code} {response.reason}") | ||
|
||
|
||
def uploadFile(fileName): | ||
"""Uploads file to the cloud""" | ||
|
||
# 1. RETRIEVE PRESIGNED URL TO UPLOAD FILE. | ||
|
||
# Prepare URL for 'Get Presigned URL' API request | ||
url = "{}/file/upload/get-presigned-url?contenttype=application/octet-stream&name={}".format( | ||
BASE_URL, os.path.basename(fileName)) | ||
|
||
# Execute request and get response as JSON | ||
response = requests.get(url, headers={ "x-api-key": API_KEY }) | ||
if (response.status_code == 200): | ||
json = response.json() | ||
|
||
if json["error"] == False: | ||
# URL to use for file upload | ||
uploadUrl = json["presignedUrl"] | ||
# URL for future reference | ||
uploadedFileUrl = json["url"] | ||
|
||
# 2. UPLOAD FILE TO CLOUD. | ||
with open(fileName, 'rb') as file: | ||
requests.put(uploadUrl, data=file, headers={ "x-api-key": API_KEY, "content-type": "application/octet-stream" }) | ||
|
||
return uploadedFileUrl | ||
else: | ||
# Show service reported error | ||
print(json["message"]) | ||
else: | ||
print(f"Request error: {response.status_code} {response.reason}") | ||
|
||
return None | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
eyJ0eXAiOiJKV1QiLCJhbGciOiJSUzI1NiJ9.eyJhdWQiOiIxIiwianRpIjoiNzYyMjcxM2VlYzZkNzFkZWZjYzI1Y2QxNTljMTEwNTY2YzBiYmZiZjZlZjMwMzFiZjQ5OGIwYjUwYzY1ZTVlOTliZTZjMTQ1NzZmYTA4NTUiLCJpYXQiOjE2NDkyNTk0OTEuMzA1NTAyLCJuYmYiOjE2NDkyNTk0OTEuMzA1NTAzLCJleHAiOjQ4MDQ5MzMwOTEuMjkxODk0LCJzdWIiOiI0MTQ1NDg3NSIsInNjb3BlcyI6W119.DHH8AkUVi89PETSWrH92fmvpaZ10F_FtZKMVh7Zeqq_IEy1sEi2-maDAvvnmzJ5mv3b4TEoHirIGRcaPlJssYmnI1lXA4tiT5yheh40rDLvzGWG3nyTagat9fq3eSNYdwRqzNYRbK42khGyrGMVVIwjKccWkbGNHCP2VxDsGTWsHO76N0H5C7GYvWO4Nshm467FzEzXuwOd-Oe5cezcYm0H04XrVh4_WpKFmT1sRDcdS0_cZgTMeisUh-gJ5ZOWO7YH0TCDPiNKmNjKKcCgBw-S1TbRmlvWlmSXiJn1qRZb50q0ZSFTuiW37Y-LFkedRFKG255YorSrivkoVuPVQ9fDd-pE_BUem3dv6-NuA9-9AtuYcnZmCpZvv-c-XJS9pxuRJcGl47n87gL8zqZFRTSuYEfpihiS9nW10u6w6Cup_9cw9ZvICEBV3K5tGssfWE60w48bupfVIRxkeIJhG8sBwClA6A9DIi6YrT7nlUMYoB5SpXvfvdILvPzg_fU-1RKjVW-VqhsjTk1K9Et94dhs_XeQ-NG3yO3VnYGTpBZuCW_-P555RstD23-DAPTrpcazJ0F9xoNVTkZKKD98TAmaF-731gtXEYMcrT8nj2-34Zth9sQAxvBWaeLcJEAuJhmYMLYkiN8GhCPJCNffTfEYEAZ5yc2CKOcCCRwxPmbo | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This API key should not be stored in GitHub. Please remove and send it to me so I can add it to last pass. If you want to keep the file, add the file to the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would change this line to the python version of a system variable reference. This is how it is done in the build.gradle folder when calling the Google Maps API: |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Don't have api key stored explicitly
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I will send the API keys to you! The empty files are dummy files that I haven't implemented yet but will be necessary.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Will this API key always be the same as the one in
pdf_convert.py
? If yes, update to match my comment above.