|
| 1 | +from pathlib import Path |
| 2 | + |
| 3 | +import desbordante |
| 4 | +import matplotlib.pyplot as plt |
| 5 | +import matplotlib.image as mpimg |
| 6 | + |
| 7 | + |
| 8 | +class bcolors: |
| 9 | + STUDENT = '\033[38;2;254;136;99m' |
| 10 | + TASK = '\033[38;2;87;206;235m' |
| 11 | + HEADER = '\033[95m' |
| 12 | + WARNING = '\033[93m' |
| 13 | + ENDC = '\033[0m' |
| 14 | + |
| 15 | + |
| 16 | +def colored(message, color): |
| 17 | + return color + message + bcolors.ENDC |
| 18 | + |
| 19 | + |
| 20 | +GRAPH_NAME = 'study_graph' |
| 21 | +EMBEDDINGS_NAME = 'study_embeddings' |
| 22 | +GFD_NAME = 'study_gfd' |
| 23 | + |
| 24 | +GRAPHS_DATASETS_FOLDER_PATH = 'examples/datasets/mining_gfd' |
| 25 | + |
| 26 | +GRAPH = Path(f'{GRAPHS_DATASETS_FOLDER_PATH}/{GRAPH_NAME}.dot') |
| 27 | + |
| 28 | +GRAPH_IMAGE = Path(f'examples/basic/mining_gfd/figures/graphs/{GRAPH_NAME}.png') |
| 29 | +EMBEDDINGS_IMAGE = Path(f'examples/basic/mining_gfd/figures/graphs/{EMBEDDINGS_NAME}.png') |
| 30 | +GFD_IMAGE = Path(f'examples/basic/mining_gfd/figures/gfds/{GFD_NAME}.png') |
| 31 | + |
| 32 | +PREAMBLE = ("Our profiler supports two tasks related to graph functional dependencies (GFDs): " |
| 33 | + "validation and mining (discovery). In this example, we will focus on the mining " |
| 34 | + "task (for validation, we refer the reader to another example). The mining algorithm " |
| 35 | + "used in our profiler is described in the article \"Discovering Graph Functional " |
| 36 | + "Dependencies\" by Fan Wenfei, Hu Chunming, Liu Xueli, and Lu Pinge, presented at SIGMOD '18.\n") |
| 37 | + |
| 38 | +GFD_INFO = ("GFDs are functional dependencies that consist of a pattern - a graph that specifies the " |
| 39 | + "scope - and a rule. The nature of this object will become clearer through the " |
| 40 | + "example that follows.\n") |
| 41 | + |
| 42 | +GRAPH_INFO = ("Let's analyze GFD mining through an example. Look at the graph " |
| 43 | + "presented on the top left in the figure. It describes the connections " |
| 44 | + "between students and tasks. The vertices of this " |
| 45 | + f"graph have two labels: {colored('Student (S)', bcolors.STUDENT)} and " |
| 46 | + f"{colored('Task (T)', bcolors.TASK)}. Each vertex has its own set " |
| 47 | + "of attributes depending on the label.\n\n" |
| 48 | + f"{colored('Student', bcolors.STUDENT)}:\n- {colored('name', bcolors.STUDENT)}" |
| 49 | + f" denotes the name of the student,\n- {colored('degree', bcolors.STUDENT)} " |
| 50 | + f"is the level of education,\n- {colored('year', bcolors.STUDENT)} " |
| 51 | + "is the year of study.\n\n" |
| 52 | + f"{colored('Task', bcolors.TASK)}:\n- {colored('name', bcolors.TASK)}" |
| 53 | + f" denotes the name of a task,\n- {colored('difficulty', bcolors.TASK)}" |
| 54 | + " is a categorical parameter that takes one of the following values: " |
| 55 | + "\"easy\", \"normal\" or \"hard\".\n") |
| 56 | + |
| 57 | +ALGO_INFO = ("The discovery algorithm, in addition to the graph, takes two parameters as input:\n" |
| 58 | + "- k: the maximum number of vertices in the pattern,\n" |
| 59 | + "- sigma: the minimum frequency of GFD occurrences in the original graph.\n") |
| 60 | + |
| 61 | +INFO = "Let's run the algorithm and look at the result. We will set k=2 and sigma=3.\n" |
| 62 | + |
| 63 | +REWRITING = ("It may be difficult to interpret, so let's rewrite it to a more human-readable " |
| 64 | + "format. Notation: the first line contains the literals found in the left-hand side. " |
| 65 | + "The second line contains those in the right-hand side.\n") |
| 66 | + |
| 67 | +GFD_TEXT = (' ' |
| 68 | + f' {colored("0", bcolors.TASK)} {colored("1", bcolors.STUDENT)}\n' |
| 69 | + ' ' |
| 70 | + f'{colored("(T)", bcolors.TASK)}--{colored("(S)", bcolors.STUDENT)}\n' |
| 71 | + '{' + colored("0", bcolors.TASK) + '.' + colored("difficulty", bcolors.TASK) + '' |
| 72 | + '=hard} --> {' + colored("1", bcolors.STUDENT) + '.' |
| 73 | + '' + colored("degree", bcolors.STUDENT) + '=master & ' + colored("1", bcolors.STUDENT) + '' |
| 74 | + '.' + colored("year", bcolors.STUDENT) + '=2}\n\nThe mined dependency can also be '\ |
| 75 | + 'seen on the right in the figure.\n') |
| 76 | + |
| 77 | +RESULTS = ("The dependency found indicates that only second-year master's " |
| 78 | + "students are working on the difficult task.\n") |
| 79 | + |
| 80 | +EXAMPLE_INFO = ('It is recommended to look at the first example for a deeper ' |
| 81 | + 'understanding of graph functional dependency mining. It is ' |
| 82 | + 'located in the file "mining_gfd1.py".\n') |
| 83 | + |
| 84 | +EXIT = colored("Close the image window to finish.", bcolors.WARNING) |
| 85 | + |
| 86 | + |
| 87 | +def execute_algo(algo): |
| 88 | + algo.load_data(graph=GRAPH, gfd_k=2, gfd_sigma=3) |
| 89 | + algo.execute() |
| 90 | + print(f'{bcolors.HEADER}Desbordante > {bcolors.ENDC}', end='') |
| 91 | + print('Mined GFDs:', len(algo.get_gfds())) |
| 92 | + print() |
| 93 | + print("Let's print found dependency (in DOT language):") |
| 94 | + for gfd in algo.get_gfds(): |
| 95 | + print(gfd) |
| 96 | + print(REWRITING) |
| 97 | + print(GFD_TEXT) |
| 98 | + |
| 99 | + |
| 100 | +def show_example(): |
| 101 | + fig, axarr = plt.subplots(2, 2, figsize=(16, 7), gridspec_kw={'width_ratios': [7, 3], 'wspace': 0.5}) |
| 102 | + gs = axarr[0, 1].get_gridspec() |
| 103 | + for ax in axarr[:, 1]: |
| 104 | + ax.remove() |
| 105 | + axsbig = fig.add_subplot(gs[:, -1]) |
| 106 | + |
| 107 | + axarr[0, 0].set_axis_off() |
| 108 | + axarr[0, 0].set_title('$Original$ $graph$') |
| 109 | + axarr[0, 0].imshow(mpimg.imread(GRAPH_IMAGE)) |
| 110 | + axarr[1, 0].set_axis_off() |
| 111 | + axarr[1, 0].set_title('$GFD$ $embeddings$') |
| 112 | + axarr[1, 0].imshow(mpimg.imread(EMBEDDINGS_IMAGE)) |
| 113 | + axsbig.set_axis_off() |
| 114 | + axsbig.set_title('$Mined$ $GFD$') |
| 115 | + axsbig.imshow(mpimg.imread(GFD_IMAGE)) |
| 116 | + plt.show() |
| 117 | + |
| 118 | + |
| 119 | +print(PREAMBLE) |
| 120 | +print(GFD_INFO) |
| 121 | +print(GRAPH_INFO) |
| 122 | +print(ALGO_INFO) |
| 123 | +print(INFO) |
| 124 | +execute_algo(desbordante.gfd_mining.algorithms.GfdMiner()) |
| 125 | +print(RESULTS) |
| 126 | +print(EXAMPLE_INFO) |
| 127 | +print(EXIT) |
| 128 | + |
| 129 | +show_example() |
0 commit comments