|
| 1 | +import os |
| 2 | +import matplotlib.pyplot as plt |
| 3 | +import networkx as nx |
| 4 | +from src import box, constants, util |
| 5 | +from src import modularity as mod |
| 6 | + |
| 7 | +bikeLogs = "samples/BikeLogs" |
| 8 | +dflist = util.get_box_files(bikeLogs) |
| 9 | +util.ensure_date_in_filenames(dflist) |
| 10 | +dflist = util.get_box_files(bikeLogs) |
| 11 | + |
| 12 | +for csv_file in dflist: |
| 13 | + print(csv_file, flush=True) |
| 14 | + csvr = util.read_csv(csv_file) |
| 15 | + ldata = box.make_ldata(csvr) # CSV data as a list |
| 16 | + # presses |
| 17 | + press_starts, press_lengths = box.get_press_lengths_and_starts(ldata) |
| 18 | + date_string = csv_file.split("/")[-1][:8] |
| 19 | + |
| 20 | + """ |
| 21 | + b_partitions, a_partitions, b_modularities, a_modularities = mod.get_partitions(ldata, press_starts, press_lengths) |
| 22 | + |
| 23 | + for j, b_parts in enumerate(b_partitions): |
| 24 | + |
| 25 | + if b_modularities[j] == -1: # it's an oncoming case |
| 26 | + continue |
| 27 | +
|
| 28 | + ps = press_starts[j] |
| 29 | + pmod = b_modularities[j] |
| 30 | + part_and_size_pairs = [(len(x), x) for x in b_parts] |
| 31 | + part_and_size_pairs.sort(reverse=True) # sorts pairs by 1st item, i.e. size |
| 32 | + press_gap = sum([s for s, p in part_and_size_pairs]) |
| 33 | + lat_dists = [ldata[ps-x][4] for x in range(press_gap)] |
| 34 | + dispersion_score = mod.dispersion_score(lat_dists) |
| 35 | + |
| 36 | + plt.scatter(range(len(lat_dists)), lat_dists, c='b') |
| 37 | +
|
| 38 | + # ---- process the partition ----- |
| 39 | + for s, p in part_and_size_pairs: |
| 40 | +
|
| 41 | + lp = list(p) |
| 42 | + lp.sort() # should already be sorted |
| 43 | +
|
| 44 | + # skip the maxed-out readings |
| 45 | + if min([lat_dists[x] for x in p]) > 450: |
| 46 | + continue |
| 47 | + |
| 48 | + # skip the readings stuck too low |
| 49 | + if max([lat_dists[x] for x in p]) < 50: |
| 50 | + continue |
| 51 | +
|
| 52 | + # Split part into subparts >8 lines apart |
| 53 | + # Discard tiny subparts |
| 54 | + subparts = mod.strip_and_split(lp, lat_dists, 8) |
| 55 | + if len(subparts) == 0: |
| 56 | + continue |
| 57 | + lp = subparts[0] |
| 58 | + s = len(lp) |
| 59 | + |
| 60 | + |
| 61 | + # --- max clique method ---- |
| 62 | + G = nx.Graph() |
| 63 | + G.add_nodes_from(lp) |
| 64 | +
|
| 65 | + G.add_edges_from([(lp[x],lp[y]) for x in range(s-1) for y in range(x+1, s) if abs(lat_dists[lp[x]]-lat_dists[lp[y]])<40]) |
| 66 | + mc = nx.approximation.max_clique(G) |
| 67 | + additional_vertices = set() |
| 68 | + for v in mc: |
| 69 | + v_ball = [lat_dists[v]-30, lat_dists[v]+30] |
| 70 | + for u in G.nodes: |
| 71 | + if u in mc: |
| 72 | + continue |
| 73 | + # make sure the vertex we include with the clique doesn't have a very small degree |
| 74 | + if lat_dists[u] > v_ball[0] and lat_dists[u] < v_ball[1] and G.degree(u) > 0.7*len(mc): |
| 75 | + additional_vertices.add(u) |
| 76 | + |
| 77 | + ot = list(mc)+list(additional_vertices) |
| 78 | + ot_event = [date_string, ps, dispersion_score, pmod, len(ot), ot] |
| 79 | + ot_events.append(ot_event) |
| 80 | + |
| 81 | + plt.scatter(ot, [lat_dists[x] for x in ot], c='r') |
| 82 | + |
| 83 | + # --- end of max clique ---- |
| 84 | +
|
| 85 | + break # after getting to the OT event |
| 86 | + |
| 87 | + plt.ylim([0,700]) |
| 88 | + plt.savefig(os.path.join("out", "mod", date_string+"_ld_"+str(press_starts[j])+"_"+"{:.6f}".format(pmod)+"_disp="+str(dispersion_score)+"_clique.png")) |
| 89 | + plt.clf() |
| 90 | +
|
| 91 | +util.write_to_csv_file(os.path.join("data", "ot_events.csv"), ot_events) |
| 92 | +
|
| 93 | + """ |
0 commit comments