Skip to content

Commit b5fd5d6

Browse files
committed
Add examples for GFD mining
Added two examples with searching for dependencies in small graphs.
1 parent ca4b4fb commit b5fd5d6

File tree

10 files changed

+283
-0
lines changed

10 files changed

+283
-0
lines changed
Loading
Loading
Loading
Loading
Loading
Loading
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
from pathlib import Path
2+
3+
import desbordante
4+
import matplotlib.pyplot as plt
5+
import matplotlib.image as mpimg
6+
7+
8+
class bcolors:
9+
ARTICLE = '\033[38;2;173;255;47m'
10+
PERSON = '\033[38;2;46;139;87m'
11+
HEADER = '\033[95m'
12+
WARNING = '\033[93m'
13+
ENDC = '\033[0m'
14+
15+
16+
def colored(message, color):
17+
return color + message + bcolors.ENDC
18+
19+
20+
GRAPH_NAME = 'papers_graph'
21+
EMBEDDINGS_NAME = 'papers_embeddings'
22+
GFD_NAME = 'papers_gfd'
23+
24+
GRAPHS_DATASETS_FOLDER_PATH = 'examples/datasets/mining_gfd'
25+
26+
GRAPH = Path(f'{GRAPHS_DATASETS_FOLDER_PATH}/{GRAPH_NAME}.dot')
27+
28+
GRAPH_IMAGE = Path(f'examples/basic/mining_gfd/figures/graphs/{GRAPH_NAME}.png')
29+
EMBEDDINGS_IMAGE = Path(f'examples/basic/mining_gfd/figures/graphs/{EMBEDDINGS_NAME}.png')
30+
GFD_IMAGE = Path(f'examples/basic/mining_gfd/figures/gfds/{GFD_NAME}.png')
31+
32+
PREAMBLE = ("Our profiler can work with graph functional dependencies. These are functional "
33+
"dependencies that consist of a pattern - a graph that specifies the scope, and a rule.\n")
34+
35+
ARTICLE_INFO = ("The mining algorithm that our miner uses is described in the article "
36+
"\"Discovering Graph Functional Dependencies\" by Fan Wenfei, Hu Chunming, "
37+
"Liu Xueli, and Lu Pinge, SIGMOD'18.\n")
38+
39+
GRAPH_INFO = ("Let's analyze GFD mining through an example. Look at the graph "
40+
"presented on the top left in the figure. It describes the connections "
41+
"between scientific articles and their authors. The vertices of this "
42+
f"graph have two labels: {colored('Article', bcolors.ARTICLE)} and "
43+
f"{colored('Person', bcolors.PERSON)}. Each vertex has its own set "
44+
"of attributes depending on the label.\n\n"
45+
f"{colored('Article', bcolors.ARTICLE)}:\n- {colored('title', bcolors.ARTICLE)}"
46+
" denotes the title of the article.\n\n"
47+
f"{colored('Person', bcolors.PERSON)}:\n- {colored('name', bcolors.PERSON)}"
48+
f" denotes the name of a person\n- {colored('role', bcolors.PERSON)}"
49+
" can take one of two values: \"teacher\" or \"student\".\n")
50+
51+
ALGO_INFO = ("The algorithm, in addition to the graph, takes two parameters:\n"
52+
"- k: maximal number of vertices in the pattern of the mined dependency.\n"
53+
"- sigma: minimal frequency of GFD occurrence.\n")
54+
55+
INFO = "Let's run the algorithm and look at the result. We will set k=3 and sigma=2.\n"
56+
57+
GFD_TEXT = (f' {colored("1", bcolors.ARTICLE)} {colored("0", bcolors.PERSON)}'
58+
f' {colored("2", bcolors.ARTICLE)}\n'
59+
f' {colored("(A)", bcolors.ARTICLE)}--{colored("(P)", bcolors.PERSON)}-'
60+
f'-{colored("(A)", bcolors.ARTICLE)}\n'
61+
'{} --> {' + colored("0", bcolors.PERSON) + '.' + colored("role", bcolors.PERSON) + ''
62+
'=teacher}\n\nThe mined dependency can also be seen on the right in the figure.\n')
63+
64+
RESULTS = ("The found dependency suggests that if a person with two published articles is found, "
65+
"then he is a teacher. In other words, only students have exactly one published article.\n")
66+
67+
EXAMPLE_INFO = ('It is recommended to look at the second example for a deeper '
68+
'understanding of graph functional dependency mining. It is '
69+
'located in the file "mining_gfd2.py".\n')
70+
71+
EXIT = colored("Close the image window to finish.", bcolors.WARNING)
72+
73+
74+
def execute_algo(algo):
75+
algo.load_data(graph=GRAPH, gfd_k=3, gfd_sigma=2)
76+
algo.execute()
77+
print(f'{bcolors.HEADER}Desbordante > {bcolors.ENDC}', end='')
78+
print('Mined GFDs:', len(algo.get_gfds()))
79+
print()
80+
print(GFD_TEXT)
81+
82+
83+
def show_example():
84+
fig, axarr = plt.subplots(2, 2, figsize=(14, 6), gridspec_kw={'width_ratios': [7, 3], 'wspace': 0.5})
85+
gs = axarr[0, 1].get_gridspec()
86+
for ax in axarr[:, 1]:
87+
ax.remove()
88+
axsbig = fig.add_subplot(gs[:, -1])
89+
90+
axarr[0, 0].set_axis_off()
91+
axarr[0, 0].set_title('$Original$ $graph$')
92+
axarr[0, 0].imshow(mpimg.imread(GRAPH_IMAGE))
93+
axarr[1, 0].set_axis_off()
94+
axarr[1, 0].set_title('$GFD$ $embeddings$')
95+
axarr[1, 0].imshow(mpimg.imread(EMBEDDINGS_IMAGE))
96+
axsbig.set_axis_off()
97+
axsbig.set_title('$Mined$ $GFD$')
98+
axsbig.imshow(mpimg.imread(GFD_IMAGE))
99+
plt.show()
100+
101+
102+
print(PREAMBLE)
103+
print(ARTICLE_INFO)
104+
print(GRAPH_INFO)
105+
print(ALGO_INFO)
106+
print(INFO)
107+
execute_algo(desbordante.gfd_mining.algorithms.GfdMiner())
108+
print(RESULTS)
109+
print(EXAMPLE_INFO)
110+
print(EXIT)
111+
112+
show_example()
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
from pathlib import Path
2+
3+
import desbordante
4+
import matplotlib.pyplot as plt
5+
import matplotlib.image as mpimg
6+
7+
8+
class bcolors:
9+
STUDENT = '\033[38;2;254;136;99m'
10+
TASK = '\033[38;2;87;206;235m'
11+
HEADER = '\033[95m'
12+
WARNING = '\033[93m'
13+
ENDC = '\033[0m'
14+
15+
16+
def colored(message, color):
17+
return color + message + bcolors.ENDC
18+
19+
20+
GRAPH_NAME = 'study_graph'
21+
EMBEDDINGS_NAME = 'study_embeddings'
22+
GFD_NAME = 'study_gfd'
23+
24+
GRAPHS_DATASETS_FOLDER_PATH = 'examples/datasets/mining_gfd'
25+
26+
GRAPH = Path(f'{GRAPHS_DATASETS_FOLDER_PATH}/{GRAPH_NAME}.dot')
27+
28+
GRAPH_IMAGE = Path(f'examples/basic/mining_gfd/figures/graphs/{GRAPH_NAME}.png')
29+
EMBEDDINGS_IMAGE = Path(f'examples/basic/mining_gfd/figures/graphs/{EMBEDDINGS_NAME}.png')
30+
GFD_IMAGE = Path(f'examples/basic/mining_gfd/figures/gfds/{GFD_NAME}.png')
31+
32+
PREAMBLE = ("Our profiler can work with graph functional dependencies. These are functional "
33+
"dependencies that consist of a pattern - a graph that specifies the scope, and a rule.\n")
34+
35+
ARTICLE_INFO = ("The mining algorithm that our miner uses is described in the article "
36+
"\"Discovering Graph Functional Dependencies\" by Fan Wenfei, Hu Chunming, "
37+
"Liu Xueli, and Lu Pinge, SIGMOD'18.\n")
38+
39+
GRAPH_INFO = ("Let's analyze GFD mining through an example. Look at the graph "
40+
"presented on the top left in the figure. It describes the connections "
41+
"between students and tasks. The vertices of this "
42+
f"graph have two labels: {colored('Student', bcolors.STUDENT)} and "
43+
f"{colored('Task', bcolors.TASK)}. Each vertex has its own set "
44+
"of attributes depending on the label.\n\n"
45+
f"{colored('Student', bcolors.STUDENT)}:\n- {colored('name', bcolors.STUDENT)}"
46+
f" denotes the name of the student.\n- {colored('degree', bcolors.STUDENT)} "
47+
f"is a level of education.\n- {colored('year', bcolors.STUDENT)} "
48+
"is a year of study.\n\n"
49+
f"{colored('Person', bcolors.TASK)}:\n- {colored('name', bcolors.TASK)}"
50+
f" denotes the name of a task\n- {colored('difficulty', bcolors.TASK)}"
51+
" a categorical parameter that takes one of the values: "
52+
"\"easy\", \"normal\" or \"hard\".\n")
53+
54+
ALGO_INFO = ("The algorithm, in addition to the graph, takes two parameters:\n"
55+
"- k: maximal number of vertices in the pattern of the mined dependency.\n"
56+
"- sigma: minimal frequency of GFD occurrence.\n")
57+
58+
INFO = "Let's run the algorithm and look at the result. We will set k=2 and sigma=3.\n"
59+
60+
GFD_TEXT = (' '
61+
f' {colored("0", bcolors.TASK)} {colored("1", bcolors.STUDENT)}\n'
62+
' '
63+
f'{colored("(T)", bcolors.TASK)}--{colored("(S)", bcolors.STUDENT)}\n'
64+
'{' + colored("0", bcolors.TASK) + '.' + colored("difficulty", bcolors.TASK) + ''
65+
'=hard} --> {' + colored("1", bcolors.STUDENT) + '.'
66+
'' + colored("degree", bcolors.STUDENT) + '=master & ' + colored("1", bcolors.STUDENT) + ''
67+
'.' + colored("year", bcolors.STUDENT) + '=2}\n\nThe mined dependency can also be '\
68+
'seen on the right in the figure.\n')
69+
70+
RESULTS = ("The dependency found indicates that only second-year master's students are "
71+
"working on the hard task.\n")
72+
73+
EXAMPLE_INFO = ('It is recommended to look at the first example for a deeper '
74+
'understanding of graph functional dependency mining. It is '
75+
'located in the file "mining_gfd1.py".\n')
76+
77+
EXIT = colored("Close the image window to finish.", bcolors.WARNING)
78+
79+
80+
def execute_algo(algo):
81+
algo.load_data(graph=GRAPH, gfd_k=2, gfd_sigma=3)
82+
algo.execute()
83+
print(f'{bcolors.HEADER}Desbordante > {bcolors.ENDC}', end='')
84+
print('Mined GFDs:', len(algo.get_gfds()))
85+
print()
86+
print(GFD_TEXT)
87+
88+
89+
def show_example():
90+
fig, axarr = plt.subplots(2, 2, figsize=(16, 7), gridspec_kw={'width_ratios': [7, 3], 'wspace': 0.5})
91+
gs = axarr[0, 1].get_gridspec()
92+
for ax in axarr[:, 1]:
93+
ax.remove()
94+
axsbig = fig.add_subplot(gs[:, -1])
95+
96+
axarr[0, 0].set_axis_off()
97+
axarr[0, 0].set_title('$Original$ $graph$')
98+
axarr[0, 0].imshow(mpimg.imread(GRAPH_IMAGE))
99+
axarr[1, 0].set_axis_off()
100+
axarr[1, 0].set_title('$GFD$ $embeddings$')
101+
axarr[1, 0].imshow(mpimg.imread(EMBEDDINGS_IMAGE))
102+
axsbig.set_axis_off()
103+
axsbig.set_title('$Mined$ $GFD$')
104+
axsbig.imshow(mpimg.imread(GFD_IMAGE))
105+
plt.show()
106+
107+
108+
print(PREAMBLE)
109+
print(ARTICLE_INFO)
110+
print(GRAPH_INFO)
111+
print(ALGO_INFO)
112+
print(INFO)
113+
execute_algo(desbordante.gfd_mining.algorithms.GfdMiner())
114+
print(RESULTS)
115+
print(EXAMPLE_INFO)
116+
print(EXIT)
117+
118+
show_example()
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
graph G {
2+
0[label=article title="Exploring the Impact of Climate Change on Marine Biodiversity"];
3+
1[label=article title="Advancements in Quantum Computing: A New Era of Information Processing"];
4+
2[label=article title="The Role of Artificial Intelligence in Modern Healthcare Systems"];
5+
3[label=article title="Understanding the Genetic Basis of Resilience in Plant Species"];
6+
4[label=person name="Emily Carter" role=teacher];
7+
5[label=person name="James Thompson" role=student];
8+
6[label=person name="Sophia Martinez" role=teacher];
9+
7[label=person name="Liam Johnson" role=student];
10+
8[label=person name="Ava Patel" role=student];
11+
0--4 [label="*"];
12+
1--4 [label="*"];
13+
1--5 [label="*"];
14+
1--6 [label="*"];
15+
2--6 [label="*"];
16+
3--6 [label="*"];
17+
3--7 [label="*"];
18+
3--8 [label="*"];
19+
}
Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
graph G {
2+
0[label=task difficulty=easy];
3+
1[label=task difficulty=normal];
4+
2[label=task difficulty=normal];
5+
3[label=task difficulty=hard];
6+
4[label=task difficulty=hard];
7+
5[label=task difficulty=hard];
8+
6[label=student name=James degree=bachelor year=2];
9+
7[label=student name=Michael degree=master year=1];
10+
8[label=student name=Robert degree=bachelor year=3];
11+
9[label=student name=John degree=master year=2];
12+
10[label=student name=David degree=bachelor year=4];
13+
11[label=student name=William degree=master year=2];
14+
12[label=student name=Richard degree=master year=2];
15+
13[label=student name=Joseph degree=master year=2];
16+
14[label=student name=Thomas degree=master year=2];
17+
15[label=student name=Christopher degree=master year=2];
18+
0--6 [label=performs];
19+
1--6 [label=performs];
20+
1--7 [label=performs];
21+
1--10 [label=performs];
22+
2--7 [label=performs];
23+
2--8 [label=performs];
24+
2--9 [label=performs];
25+
3--9 [label=performs];
26+
3--11 [label=performs];
27+
3--12 [label=performs];
28+
4--12 [label=performs];
29+
4--13 [label=performs];
30+
4--14 [label=performs];
31+
5--11 [label=performs];
32+
5--14 [label=performs];
33+
5--15 [label=performs];
34+
}

0 commit comments

Comments
 (0)