@@ -3928,18 +3928,18 @@ def handle_opencypher_status(self, line, local_ns):
3928
3928
3929
3929
3930
3930
3931
- # degreeDistribution
3932
- # Shows the degree distribution of vertices in the graph
3931
+ # % degreeDistribution. Takes traversalDirection, vertexLabels, edgeLabels parameters, and visualizes
3932
+ # the degree distribution.
3933
3933
# TODO: Error handling
3934
-
3934
+
3935
3935
@line_magic
3936
3936
@needs_local_scope
3937
3937
@display_exceptions
3938
3938
@neptune_graph_only
3939
3939
def degreeDistribution (self , line , local_ns : dict = None ):
3940
3940
parser = argparse .ArgumentParser ()
3941
3941
3942
- # Get the vertexLabels and edgeLabels from graph summary
3942
+ # Get the vertexLabels and edgeLabels from graph summary, to be shown in the widgets for selection.
3943
3943
try :
3944
3944
summary_res = self .client .statistics ("propertygraph" , True , "detailed" , True )
3945
3945
summary_res .raise_for_status ()
@@ -3967,13 +3967,13 @@ def degreeDistribution(self, line, local_ns: dict = None):
3967
3967
"we will default to using all the edge labels." )
3968
3968
3969
3969
3970
- # # Additional parameters for output control
3970
+ # TODO: Additional parameter for saving the visualization?
3971
3971
# parser.add_argument('--export-to', type=str, default='',
3972
3972
# help='Export the degree distribution results to the provided file path.')
3973
3973
3974
3974
args = parser .parse_args (line .split ())
3975
3975
3976
- # put the command line specified option as the value , if any; o.w. 'both'
3976
+ # Put the selection specified on the command line , if any; o.w. default is 'both'
3977
3977
td_val = args .traversalDirection
3978
3978
td_val = td_val .lower () if td_val else 'both'
3979
3979
@@ -3985,6 +3985,8 @@ def degreeDistribution(self, line, local_ns: dict = None):
3985
3985
value = td_val
3986
3986
)
3987
3987
3988
+ # Put the vertex label(s) specified on the command line, if any; o.w. default is all the vertex labels (denoted by [])
3989
+ available_vertex_labels = sorted (available_vertex_labels )
3988
3990
selected_vlabels = args .vertexLabels if args .vertexLabels else []
3989
3991
vertex_labels_select = widgets .SelectMultiple (
3990
3992
options = available_vertex_labels ,
@@ -3994,6 +3996,8 @@ def degreeDistribution(self, line, local_ns: dict = None):
3994
3996
value = selected_vlabels
3995
3997
)
3996
3998
3999
+ # Put the edge label(s) specified on the command line, if any; o.w. default is all the edge labels (denoted by [])
4000
+ available_edge_labels = sorted (available_edge_labels )
3997
4001
selected_elabels = args .edgeLabels if args .edgeLabels else []
3998
4002
edge_labels_select = widgets .SelectMultiple (
3999
4003
options = available_edge_labels ,
@@ -4010,7 +4014,7 @@ def degreeDistribution(self, line, local_ns: dict = None):
4010
4014
display (td_dropdown , vertex_labels_select , edge_labels_select , submit_button , output )
4011
4015
4012
4016
def on_button_clicked (b ):
4013
- # Get selected traversal direction
4017
+ # Get the selected parameters
4014
4018
td = td_dropdown .value
4015
4019
vlabels = list (vertex_labels_select .value )
4016
4020
elabels = list (edge_labels_select .value )
@@ -4022,10 +4026,12 @@ def on_button_clicked(b):
4022
4026
with output :
4023
4027
res = self .callDD (td , vlabels , elabels , local_ns )
4024
4028
4029
+ # Retrieve the distribution
4025
4030
pairs = np .array (res ['results' ][0 ]['output' ]['distribution' ])
4026
4031
keys = pairs [:,0 ]
4027
4032
values = pairs [:,1 ]
4028
4033
4034
+ # Retrieve some statistics
4029
4035
max_deg = res ['results' ][0 ]['output' ]['statistics' ]['maxDeg' ]
4030
4036
median_deg = res ['results' ][0 ]['output' ]['statistics' ]['p50' ]
4031
4037
mean_deg = res ['results' ][0 ]['output' ]['statistics' ]['mean' ]
@@ -4046,6 +4052,7 @@ def callDD (self, td, vlabels, elabels, local_ns):
4046
4052
edge_str = ", " .join ([f'"{ e } "' for e in elabels ])
4047
4053
query_parts .append (f'edgeLabels: [{ edge_str } ]' )
4048
4054
4055
+ # Construct the query
4049
4056
line = "CALL neptune.algo.degreeDistribution({" + ", " .join (query_parts ) + "}) YIELD output RETURN output"
4050
4057
4051
4058
# oc_rebuild_args = (f"{f'--store-to js --silent'}")
@@ -4064,25 +4071,27 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
4064
4071
marker_size = 50
4065
4072
alpha = 0.6
4066
4073
plt .clf ()
4067
-
4068
- # Use the provided unique_degrees and counts
4074
+
4069
4075
# Get zero degree count
4070
4076
zero_idx = np .where (unique_degrees == 0 )[0 ]
4071
4077
zero_degree_count = counts [zero_idx [0 ]] if len (zero_idx ) > 0 else 0
4072
4078
4079
+ # Get non-zero degrees and counts
4073
4080
mask = unique_degrees > 0
4074
4081
filtered_degrees = unique_degrees [mask ]
4075
4082
filtered_counts = counts [mask ]
4076
4083
4077
- # Handle case when all nodes have zero degree
4084
+ # Obtain the minimum non-zero degree, unless it's all zero degrees
4078
4085
if len (filtered_degrees ) == 0 :
4079
4086
min_deg = 0
4080
4087
else :
4081
4088
min_deg = np .min (filtered_degrees )
4082
4089
4083
4090
n_bins = 1
4084
- if len (filtered_degrees ) > 0 : # Only create histogram if there are non-zero degree nodes
4091
+ # Create histogram only if there are non-zero degree nodes
4092
+ if len (filtered_degrees ) > 0 :
4085
4093
if bin_type != 'Raw' :
4094
+ # Arrange the bins for a given bin_width
4086
4095
if bin_type == 'Linear' :
4087
4096
n_bins = max (1 , int ((max_deg - min_deg ) / bin_width ))
4088
4097
bins = np .linspace (min_deg , max_deg , n_bins + 1 )
@@ -4099,9 +4108,9 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
4099
4108
else :
4100
4109
# For raw data, create bars at each unique degree
4101
4110
plt .bar (filtered_degrees , filtered_counts , alpha = alpha ,
4102
- label = 'Raw' , color = '#000080 ' )
4111
+ label = 'Raw' , color = '#000000 ' )
4103
4112
4104
- # Plot degree 0 separately
4113
+ # Plot zero degree node count separately
4105
4114
if zero_degree_count > 0 :
4106
4115
plt .bar (0 , zero_degree_count , color = 'red' ,
4107
4116
label = 'Isolated' , alpha = alpha , width = 0.2 )
@@ -4121,7 +4130,7 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
4121
4130
4122
4131
plt .gca ().set_ylim (top = y_max )
4123
4132
4124
- # Add vertical dashed lines for min and max degree if enabled
4133
+ # Add vertical dashed lines for min and max degree, if enabled
4125
4134
if show_mindeg and min_deg > 0 :
4126
4135
plt .axvline (x = min_deg , color = 'darkgreen' , linestyle = '--' , linewidth = 2 , label = f'Min non-zero degree: { min_deg } ' )
4127
4136
@@ -4155,19 +4164,22 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
4155
4164
4156
4165
max_count = np .max (counts )
4157
4166
4158
- # Create widgets (same as before)
4167
+ # Scale widget, four options
4159
4168
scale_widget = widgets .Dropdown (
4160
4169
options = ['Linear-Linear' , 'Log-Log' , 'Log(x)-Linear(y)' , 'Linear(x)-Log(y)' ],
4161
4170
value = 'Linear-Linear' ,
4162
4171
description = 'Scale:'
4163
4172
)
4164
4173
4174
+ # Binning widget, three options
4165
4175
bin_widget = widgets .Dropdown (
4166
4176
options = ['Raw' , 'Linear' , 'Logarithmic' ],
4167
4177
value = 'Linear' ,
4168
4178
description = 'Binning:'
4169
4179
)
4170
4180
4181
+ # Bin width widget, integer options in [1, 1+(max_deg/2)] interval
4182
+ # TODO: if logarithmic binning, a much smaller range makes more sense
4171
4183
bin_width_widget = widgets .IntSlider (
4172
4184
value = 1 ,
4173
4185
min = 1 ,
@@ -4178,28 +4190,27 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
4178
4190
'For log binning: multiplicative factor' )
4179
4191
)
4180
4192
4193
+ # Upper limit for y-axis range, enables zooming (lower limit is always zero)
4181
4194
y_max_widget = widgets .IntSlider (
4182
- value = max_count ,
4195
+ value = max_count * 1.1 ,
4183
4196
min = 1 ,
4184
4197
max = max_count * 1.1 ,
4185
4198
step = 1 ,
4186
4199
description = 'y-max:' ,
4187
4200
)
4188
4201
4189
- # Add x-axis range slider
4190
- x_range_widget = widgets .FloatRangeSlider (
4191
- value = [min_deg , (max_deg * 1.1 ) + 5 ],
4202
+ # Range slider for x-axis, enables zooming
4203
+ x_range_widget = widgets .FloatRangeSlider (
4192
4204
min = 0 ,
4193
4205
max = max_deg * 1.1 + 5 ,
4206
+ value = [min , max ],
4194
4207
step = 1 ,
4195
4208
description = 'x-axis range:' ,
4196
4209
disabled = False ,
4197
4210
continuous_update = True ,
4198
4211
readout = True ,
4199
4212
readout_format = '.0f' ,
4200
4213
)
4201
- # Create output widget for statistics
4202
- stats_output = widgets .Output ()
4203
4214
4204
4215
# Toggle switches for min/max degree lines
4205
4216
show_mindeg_widget = widgets .Checkbox (
@@ -4214,7 +4225,10 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
4214
4225
disabled = False
4215
4226
)
4216
4227
4217
- # Create the interactive plot
4228
+ # Output widget for statistics
4229
+ stats_output = widgets .Output ()
4230
+
4231
+ # Interactive plot
4218
4232
interactive_plot = widgets .interactive (
4219
4233
update_plot ,
4220
4234
scale_type = scale_widget ,
@@ -4226,7 +4240,7 @@ def update_plot(scale_type, bin_type, bin_width, y_max, x_range, show_mindeg, sh
4226
4240
show_maxdeg = show_maxdeg_widget
4227
4241
)
4228
4242
4229
- # Create a vertical box layout
4243
+ # Vertical box layout
4230
4244
vbox = widgets .VBox ([interactive_plot , stats_output ])
4231
4245
4232
4246
# Display the interactive plot and stats
0 commit comments