forked from williamshammond/reddit-visualizer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRedditGeneralisedDataMiner.py
113 lines (87 loc) · 3.72 KB
/
RedditGeneralisedDataMiner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
import requests
import time
import pandas as pd
import random
#"worldnews",
# "news",
# "worldpolitics",
# "Worldevents",
# "business",
# "Economics",
# "environment",
# "energy",
# "law",
# "education",
# "government",
# "history",
# "WikiLeak",
# "SOPA",
#"NewsPorn",
#“worldnews2",
yy = [
# "funny",
# "askreddit",
"gaming",
"aww",
"pics",
"music",
"science",
"worldnews",
"videos",
"todayilearned",
"movies",
"news",
"showerthoughts",
"earthporn",
"gifs"
]
for items in yy:
requesting_subreddit = items
base_url = 'https://www.reddit.com/'
data = {'grant_type': 'password', 'username': '********', 'password': '**********'}
auth = requests.auth.HTTPBasicAuth('h6wy01n0-8H86A', 'gezsDAlid5PVXPt6ug_nyqO2XKg2Lw')
r = requests.post(base_url + 'api/v1/access_token',
data=data,
headers={'user-agent': 'MacOS:reddit-visualizer:v1.0 (by /u/whammmond)'},
auth=auth)
d = r.json()
token_auth = 'bearer ' + d['access_token']
big_list = []
def make_request(subreddit_name):
token = 'beaker'
response = request_new_posts(subreddit_name,str(token_auth),'None')
big_list.extend(post_data_formatted(subreddit_name,response))
print("Working")
while(response['data']['after']!=None):
response = request_new_posts(subreddit_name,str(token_auth),response['data']['after'])
print(response['data']['after'])
big_list.extend(post_data_formatted(subreddit_name,response))
def request_new_posts(subreddit,token,after):
base_url = 'https://oauth.reddit.com'
headers = {'Authorization': str(token_auth), 'User-Agent': 'MacOS:reddit-visualizer:v1.0 (by /u/whammmond)'}
payload = {'limit' : '100', 'show':'all','t':'year','after':f'{after}'}
response = requests.get(base_url + f'/r/{subreddit}/top', headers=headers, params=payload)
if response.status_code == 200:
response_json = response.json()
return response_json
def post_data_formatted(subreddit, response):
posts = response['data']['children']
post_data = []
for i in range(len(posts)):
post_data.append({'Subreddit':f'{subreddit}',
'Title':posts[i]['data']['title'],
'Epoch Time':posts[i]['data']['created_utc'],
'Date': time.strftime("%m/%d/%Y, %H:%M:%S", time.gmtime(posts[i]['data']['created_utc'])),
'Comments':posts[i]['data']['num_comments'],
'Awards':posts[i]['data']['total_awards_received'],
'Upvotes':posts[i]['data']['ups'],
'Downvotes':(posts[i]['data']['ups']/(posts[i]['data']['upvote_ratio']+random.uniform(-0.005, 0.005)))-posts[i]['data']['ups'],
'Upvote Ratio':posts[i]['data']['upvote_ratio'],
#'Uvpote/Comment Ratio':((posts[i]['data']['ups']/posts[i]['data']['num_comments'])),
'Comment/Upvote Ratio':((posts[i]['data']['num_comments'])/(posts[i]['data']['ups'])),
})
return post_data
make_request(requesting_subreddit)
output = pd.DataFrame(big_list)
file_name = requesting_subreddit + '.csv'
output.to_csv(file_name)