Skip to content

Commit 1e4d6b9

Browse files
replaced a work around with a bug fix.
1 parent 15b7580 commit 1e4d6b9

File tree

1 file changed

+24
-19
lines changed

1 file changed

+24
-19
lines changed

doc/python/line-and-scatter.md

Lines changed: 24 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -309,8 +309,8 @@ def swarm(
309309
point_size=16,
310310
fig_width = 800,
311311
gap_multiplier=1.2,
312-
center_even_groups = False
313-
):
312+
bin_fraction=0.95, #bin fraction slightly undersizes the bins to avoid collisions
313+
):
314314
#sorting will align columns in attractive arcs rather than having columns the vary unpredicatbly in the x-dimension
315315
X_series=X_series.copy().sort_values()
316316

@@ -328,8 +328,9 @@ def swarm(
328328

329329
for x_val in X_series:
330330
# assign this x_value to bin number
331-
# each bin is a vertical strip wide enough for one marker
332-
bin=(((fig_width*(x_val-min_x))/(max_x-min_x)) // point_size)
331+
# each bin is a vertical strip slightly narrower than one marker
332+
333+
bin=(((fig_width*bin_fraction*(x_val-min_x))/(max_x-min_x)) // point_size)
333334

334335
#update the count of dots in that strip
335336
bin_counter.update([bin])
@@ -341,41 +342,48 @@ def swarm(
341342
# we then scale this by the point_size*gap_multiplier to get a y coordinate in px
342343

343344
collision_free_y_coordinate=(bin_counter[bin]//2)*negative_1_if_count_is_odd(bin_counter[bin])*point_size*gap_multiplier
344-
list_of_rows.append({"x":x_val,"y":collision_free_y_coordinate,"bin":bin, "adj":0})
345+
list_of_rows.append({"x":x_val,"y":collision_free_y_coordinate,"bin":bin})
345346

346-
# if the number of points is even,
347-
# move y-coordinates down to put an equal number of entries above and below the axis
348-
#this can sometimes break the collision avoidance routine, but makes small N outputs look better otherwise
349-
if center_even_groups:
350-
for row in list_of_rows:
351-
if bin_counter[row["bin"]]%2==0:
352-
row["y"]-=point_size*gap_multiplier/2
353-
row["adj"]=-point_size*gap_multiplier/2
354347

355348

356349
for row in list_of_rows:
357350
bin = row["bin"]
358351
#see if we need to "look left" to avoid a possible collision
359352
for other_row in list_of_rows:
360353
if (other_row["bin"]==bin-1 ):
361-
if (((other_row["y"]==row["y"]) or (other_row["y"]==row["y"]+row["adj"]))
354+
#"bubble" the entry up until we find a slot that avoids a collision
355+
while ((other_row["y"]==row["y"])
362356
and (((fig_width*(row["x"]-other_row["x"]))/(max_x-min_x) // point_size) < 1)):
357+
print(row)
358+
print(other_row)
359+
print(((fig_width*(row["x"]-other_row["x"] ))/(max_x-min_x) // point_size))
360+
361+
print("updating to fix collision")
363362
bin_counter.update([bin])
364-
row["y"]=(bin_counter[bin]//2)*negative_1_if_count_is_odd(bin_counter[bin])*point_size*gap_multiplier+row["adj"]
363+
print(bin_counter[bin])
364+
row["y"]=(bin_counter[bin]//2)*negative_1_if_count_is_odd(bin_counter[bin])*point_size*gap_multiplier
365+
print(row["y"])
365366

367+
# if the number of points is even,
368+
# move y-coordinates down to put an equal number of entries above and below the axis
369+
for row in list_of_rows:
370+
if bin_counter[row["bin"]]%2==0:
371+
row["y"]-=point_size*gap_multiplier/2
366372

367373

368374
df = pd.DataFrame(list_of_rows)
375+
# one way to make this code more flexible to e.g. handle multiple categories would be to return a list of "swarmified" y coordinates here
376+
# you could then generate "swarmified" y coordinates for each category and add category specific offsets before scatterplotting them
369377

370378
fig = px.scatter(
371379
df,
372380
x="x",
373381
y="y",
374-
hover_data="x",
375382
)
376383
#we want to suppress the y coordinate in the hover value because the y-coordinate is irrelevant/misleading
377384
fig.update_traces(
378385
marker_size=point_size,
386+
#suppress the y coordinate because the y-coordinate is irrelevant
379387
hovertemplate="<b>value</b>: %{x}",
380388
)
381389
# we have to set the width and height because we aim to avoid icon collisions and we specify the icon size
@@ -392,10 +400,7 @@ def swarm(
392400

393401
df_iris = px.data.iris() # iris is a pandas DataFrame
394402
x = df_iris["sepal_length"]
395-
x2 = pd.Series([5.05])
396-
x = pd.concat([x,x2], ignore_index=True)
397403
fig = swarm(x)
398-
#fig = swarm(pd.Series([1,1.5, 1.78, 1.79,2,2,12]))
399404
fig.show()
400405
```
401406

0 commit comments

Comments
 (0)