Skip to content

Commit ffff09c

Browse files
committed
Working on retrieving batches of users.
1 parent 2c34a24 commit ffff09c

File tree

3 files changed

+2180
-83
lines changed

3 files changed

+2180
-83
lines changed

modules/data_process.py

+83
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import pandas as pd
2+
import requests
3+
import json
4+
import networkx as nx
5+
6+
import os
7+
8+
from modules import config
9+
from modules import plotting
10+
11+
12+
def github_request_by_url(url):
13+
''' This function allows any request using any url.
14+
Common endpoints for this project:
15+
"https://api.github.com/users/"
16+
"https://api.github.com/rate_limit"
17+
"https://api.github.com/users/{user}/following"
18+
19+
'''
20+
# make the get request
21+
r = requests.get(url, auth=(username,token))
22+
23+
# unpack and return
24+
return json.loads(r.content)
25+
26+
27+
# General purpose github_request function, that can handle any endpoint or user combination.
28+
def github_request(user, endpoint = None):
29+
''' Function to retrieve user details.
30+
If endpoint = None, then it will retrieve user info.
31+
If endpoint is set to
32+
"following",
33+
"followers",
34+
"repos",
35+
"subscriptions",
36+
"events", etc it will handle those requests as such. '''
37+
# url for get request
38+
if endpoint is not None:
39+
url = f'https://api.github.com/users/{user}/{endpoint}'
40+
#print(f"Requesting {url}")
41+
else:
42+
url = f'https://api.github.com/users/{user}'
43+
#print(f"Requesting {url}")
44+
45+
# make the get request
46+
r = requests.get(url, auth=(username,token))
47+
if r.status_code != 200:
48+
print("Something went wrong.")
49+
# unpack and return
50+
return json.loads(r.content)
51+
52+
53+
# Generate a user details dictionary by extracting
54+
def generate_user_details_dict(user):
55+
''' Function generates a dictionary for a specific user.
56+
57+
This combines user details and users following.'''
58+
59+
# First we need to get the users details
60+
user_details_dict = github_request(user)
61+
62+
# Add user_following_list login names to user_details_dict as a key:value pair (login:list of logins)
63+
user_details_dict['following_users'] = [user['login'] for user in github_request(user, endpoint = 'following')]
64+
user_details_dict['followers_users'] = [user['login'] for user in github_request(user, endpoint = 'followers')]
65+
66+
return user_details_dict
67+
68+
69+
# Generates a list of dictionaries with details
70+
# for all users followed by the original user.
71+
def return_following_details_list(user):
72+
users_details_list = []
73+
users_login_set = set(user)
74+
users_details_list.append(generate_user_details_dict(user))
75+
76+
# Loop over all users following and generate their own user details dictionaries.
77+
for fol_user in users_details_list[0]['following_users']:
78+
print(fol_user)
79+
users_login_set.add(fol_user)
80+
users_details_list.append(generate_user_details_dict(fol_user))
81+
users_login_set.add
82+
83+
return users_details_list

modules/plotting.py

+251
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,251 @@
1+
import pandas as pd
2+
import numpy as np
3+
import matplotlib.pyplot as plt
4+
5+
# api_key is stored in config.py
6+
import networkx as nx
7+
8+
import plotly.graph_objects as go
9+
10+
import time
11+
12+
13+
14+
15+
### ORIGINAL
16+
def create_nx_graph(channel_response, directed = True):
17+
'''Takes in a list of channel details response items and returns a graph object in networkX.
18+
19+
Specifically subsets only those channels queried.
20+
21+
channel_response: list of dictionaries, each entry represents a channel node;
22+
directed: boolean, if true produces a directed graph instead of a undirected graph.'''
23+
24+
# Create dictionary to instantiate graph
25+
channel_network = {channel['id']:channel['brandingSettings']['channel']['featuredChannelsUrls'] \
26+
if 'featuredChannelsUrls' in channel['brandingSettings']['channel'] else [] \
27+
for channel in channel_response}
28+
29+
# Create dictionary to attribute names
30+
channel_names = {channel['id']:channel['snippet']['title'] \
31+
if 'title' in channel['snippet'] else [] \
32+
for channel in channel_response}
33+
34+
# Create dictionary to attribute subscribeCount
35+
subscriber_count_dict = {channel['id']:int(channel['statistics']['subscriberCount']) \
36+
for channel in channel_response}
37+
38+
t0=time.clock()
39+
# Create a Directional Graph from the channel network
40+
g = nx.DiGraph(channel_network)
41+
t1 = time.clock()-t0
42+
print("Time elapsed create graph: ",t1)
43+
44+
t0=time.clock()
45+
# Create a list of channelIds to subset the graph
46+
channel_ids = [channel['id'] for channel in channel_response]
47+
channel_id_dict = {channel_id:channel_id for channel_id in channel_ids}
48+
49+
t1 = time.clock()-t0
50+
print("Time elapsed channelId list: ",t1)
51+
52+
# Create a dictionary of distance
53+
distance_dict = {channel['id']:channel['distance'] \
54+
if 'distance' in channel\
55+
else '' for channel in channel_response}
56+
57+
# Subset created graph to only include channels we have details on
58+
if directed == True:
59+
h = g.subgraph(channel_ids)#.to_undirected()
60+
# In-degree Centrality only matters for Directed graphs
61+
in_degree_dict = {node:h.in_degree()[node] for node in h.nodes()}
62+
63+
# Set attribute for in_degree
64+
nx.set_node_attributes(h,
65+
values=in_degree_dict,
66+
name='in_degree')
67+
else:
68+
69+
70+
h = g.subgraph(channel_ids)#.to_undirected()
71+
72+
73+
# Set node attributes to include position
74+
#pos = nx.drawing.layout.spring_layout(g) kamada_kawai_layout
75+
#pos = nx.nx.drawing.layout.fruchterman_reingold_layout(g)
76+
77+
t0=time.clock()
78+
79+
# Assigning positional layout
80+
pos = nx.drawing.layout.kamada_kawai_layout(h)
81+
nx.set_node_attributes(g, pos, name='pos')
82+
t1 = time.clock()-t0
83+
84+
print("Time elapsed determine position: ",t1)
85+
t0=time.clock()
86+
# Set attribute for names
87+
nx.set_node_attributes(h, channel_names, name='title')
88+
# Set attribute for SubscribeCount
89+
nx.set_node_attributes(h, subscriber_count_dict, name='subscriberCount')
90+
# Set the Id as an attribute
91+
nx.set_node_attributes(h, channel_id_dict, name='id')
92+
# Set the distance of each node
93+
nx.set_node_attributes(h, distance_dict, name='distance')
94+
t1 = time.clock()-t0
95+
print("Time elapsed Setting Attributes: ",t1)
96+
return h
97+
98+
def mpl_nx_graph(g):
99+
100+
#pos = nx.drawing.layout.spring_layout(g)
101+
# Plot the graph
102+
fig = plt.figure()
103+
#fig = plt.figure(figsize = (12,12))
104+
nx.draw_networkx(g,
105+
with_labels=True,
106+
pos={node:g.nodes()[node]['pos'] for node in g.nodes()},
107+
labels={node:node for node in g.nodes},
108+
font_size=12, font_color = 'red')
109+
plt.close()
110+
return fig
111+
112+
def simple_page_rank(g):
113+
a = nx.adjacency_matrix(g)
114+
n, _ = a.shape
115+
v0 = np.ones(n) / n
116+
for i in range(20):
117+
v1 = a @ v0
118+
v1 /= v1.sum(0)
119+
#print(np.linalg.norm(v1 - v0))
120+
v0 = v1
121+
return v1
122+
123+
124+
def extract_connected_components():
125+
sizes = []
126+
ccs = []
127+
for cc in nx.connected_components(G.to_undirected()):
128+
ccs.append(cc)
129+
sizes.append(len(cc))
130+
print(sorted(sizes))
131+
return ccs#collections.Counter(sizes)
132+
133+
134+
def plotly_network_graph(g,
135+
color_setting,
136+
title='Graph of Featured Channels',
137+
display_list = []):
138+
'''"Python code: <a href='https://plotly.com/ipython-notebooks/network-graphs/'> https://plotly.com/ipython-notebooks/network-graphs/</a>"
139+
140+
Test Test'''
141+
# Extract a list of channel names from node attributes
142+
#channel_names_list = [g.nodes[node]['title'] for node in g.nodes()]
143+
#if len(display_list) == 0: # Use Origin points
144+
# # Get a list of nodes to display text
145+
# display_list = [g.nodes[node]['id'] for node in g.nodes() if g.nodes[node]['distance'] == 0]
146+
147+
#channels_display = [g.nodes[node]['title'] \
148+
# if g.nodes[node]['id'] in display_list \
149+
# else None \
150+
# for node in g.nodes()]
151+
# Extract list of subscriber counts from node attributes
152+
#subscriber_count_list = [g.nodes[node]['subscriberCount'] for node in g.nodes()]
153+
154+
# Instantiate Edges
155+
edge_x = []
156+
edge_y = []
157+
158+
# Cycle through graph edges to generate positions
159+
for edge in g.edges():
160+
x0, y0 = g.nodes[edge[0]]['pos']
161+
x1, y1 = g.nodes[edge[1]]['pos']
162+
edge_x.append(x0)
163+
edge_x.append(x1)
164+
edge_x.append(None)
165+
edge_y.append(y0)
166+
edge_y.append(y1)
167+
edge_y.append(None)
168+
169+
170+
# Create Scatter for edges
171+
edge_trace = go.Scatter(
172+
x=edge_x, y=edge_y,
173+
line=dict(width=0.5, color='#888'),
174+
hoverinfo='none',
175+
mode='lines')
176+
177+
178+
# Instantiate Nodes
179+
node_x = []
180+
node_y = []
181+
for node in g.nodes():
182+
x, y = g.nodes[node]['pos']
183+
node_x.append(x)
184+
node_y.append(y)
185+
186+
#
187+
188+
#node_size_list = [np.log2(subcount + 1) for subcount in subscriber_count_list]
189+
190+
# Create Scatter for nodes
191+
node_trace = go.Scatter(
192+
x=node_x, y=node_y,
193+
mode='markers+text',
194+
#text=channels_display,
195+
hoverinfo='text',
196+
marker=dict(
197+
showscale=True,
198+
# colorscale options
199+
#'Greys' | @'YlGnBu'@ | 'Greens' | 'YlOrRd' | 'Bluered' | 'RdBu' |
200+
#'Reds' | 'Blues' | 'Picnic' | 'Rainbow' | 'Portland' | 'Jet' |
201+
#'Hot' | 'Blackbody' | 'Earth' | 'Electric' | 'Viridis' |
202+
colorscale='YlGnBu',
203+
reversescale=True,
204+
color=[],
205+
#size=node_size_list,
206+
colorbar=dict(
207+
thickness=15,
208+
title=color_setting,
209+
xanchor='left',
210+
titleside='right'
211+
),
212+
line_width=2))
213+
node_adjacencies = []
214+
node_text = []
215+
#node_distance = [g.nodes[node]['distance'] for node in g.nodes()]
216+
217+
for node, adjacencies in enumerate(g.adjacency()):
218+
node_adjacencies.append(len(adjacencies[1]))
219+
#node_text.append(f'{channel_names_list[node]} ({node_distance[node]}) has {str(len(adjacencies[1]))} connections and {subscriber_count_list[node]} subscribers')
220+
#node_distance = [g.nodes[node]['distance'] for node in g.nodes()]
221+
222+
# Set the setting for the
223+
#if color_setting == 'Distance':
224+
# node_trace.marker.color = node_distance
225+
#elif color_setting == 'Connections':
226+
# node_trace.marker.color = node_adjacencies
227+
#else:
228+
# print("Invalid color setting; options: ['Connections','Distance']. Used 'Connections'")
229+
230+
#node_trace.hovertext = node_text
231+
232+
fig = go.Figure(data=[edge_trace, node_trace],
233+
layout=go.Layout(
234+
scene=dict(aspectmode="data"),
235+
autosize=True,
236+
title=title,
237+
titlefont_size=16,
238+
#width=700,
239+
height=550,
240+
showlegend=False,
241+
hovermode='closest',
242+
margin=dict(b=20,l=5,r=5,t=40),
243+
annotations=[ dict(
244+
text=f"Size is log(Subscriber Count);<br>Color is {color_setting}",
245+
showarrow=False,
246+
xref="paper", yref="paper",
247+
x=0.005, y=-0.002 ) ],
248+
xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
249+
yaxis=dict(showgrid=False, zeroline=False, showticklabels=False))
250+
)
251+
return fig

0 commit comments

Comments
 (0)