-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgspreadsheet.py
88 lines (77 loc) · 3.2 KB
/
gspreadsheet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
from __future__ import print_function
import pickle
import os.path
import csv
from googleapiclient.discovery import build
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request
# If modifying these scopes, delete the file token.pickle.
SCOPES = ['https://www.googleapis.com/auth/spreadsheets.readonly']
# The ID and range of a sample spreadsheet.
SAMPLE_SPREADSHEET_ID = '1q7p4k5Op8fHLyNtai4OeN0AN3oDhM6SC4IoEiztSuvI'
SAMPLE_RANGE_NAME = 'Sheet1!A2:H'
def main():
"""Shows basic usage of the Sheets API.
Prints values from a sample spreadsheet.
"""
creds = None
# The file token.pickle stores the user's access and refresh tokens, and is
# created automatically when the authorization flow completes for the first
# time.
if os.path.exists('token.pickle'):
with open('token.pickle', 'rb') as token:
creds = pickle.load(token)
# If there are no (valid) credentials available, let the user log in.
if not creds or not creds.valid:
if creds and creds.expired and creds.refresh_token:
creds.refresh(Request())
else:
flow = InstalledAppFlow.from_client_secrets_file(
'credentials.json', SCOPES)
creds = flow.run_local_server()
# Save the credentials for the next run
with open('token.pickle', 'wb') as token:
pickle.dump(creds, token)
service = build('sheets', 'v4', credentials=creds)
# Call the Sheets API
sheet = service.spreadsheets()
result = sheet.values().get(spreadsheetId=SAMPLE_SPREADSHEET_ID,
range=SAMPLE_RANGE_NAME).execute()
values = result.get('values', [])
if not values:
print('No data found.')
else:
print('Name, Major:')
for row in values:
# Print columns A and E, which correspond to indices 0 and 4.
print(row)
with open('output.csv', mode='w') as writeFile:
fieldnames = ['name', 'display_name', 'create_date', 'source_url', 'description', 'tags', 'stories', 'source']
writer = csv.DictWriter(writeFile, fieldnames=fieldnames)
writer.writeheader()
for row in values:
writer.writerow({'name': row[0], 'display_name': row[1], 'create_date': row[2], 'source_url': row[3],
'description': row[4], 'tags': row[5], 'stories': row[6], 'source': row[7]})
if __name__ == '__main__':
main()
import csv
import json
filename = 'output.csv'
# Open the CSV
f = open(filename, 'rU' )
# Change each fieldname to the appropriate field name. I know, so difficult.
reader = csv.DictReader(f, fieldnames = ( "name","display_name","create_date","source_url","description","tags","stories","source" ))
# Parse the CSV into JSON
data = [row for row in reader]
data = data[1:]
out = json.dumps( data )
print("JSON parsed!")
# Save the JSON
f = open( 'metadata.json', 'w')
f.write(out)
f.close()
print("JSON saved!")
import boto3
# Create an S3 client
s3 = boto3.client('s3')
s3.upload_file('metadata.json', 'open-data-portal', 'metadata.json', ExtraArgs={'ContentType': "application/json", 'ACL':'public-read'})