-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathquery.sql
executable file
·80 lines (68 loc) · 2.12 KB
/
query.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
use ${hiveconf:database};
drop table ${hiveconf:table}_track_linkages;
create table ${hiveconf:table}_track_linkages as
select intersectx, intersecty, dt, collect_set(track_id) as links, collect_set(velocity) as vels, collect_set(direction) as dirs
from micro_path_tripline_bins_${hiveconf:table}
group by intersectx, intersecty, dt;
drop table ${hiveconf:table}_track_linkages_final;
create table ${hiveconf:table}_track_linkages_final as
select * from ${hiveconf:table}_track_linkages where size(links) > 1;
drop table ${hiveconf:table}_network;
create table ${hiveconf:table}_network
(
intersectX string,
intersextY string,
dt string,
source string,
destination string
);
add file transform.py;
from ${hiveconf:table}_track_linkages_final
insert overwrite table ${hiveconf:table}_network
select transform(intersectX, intersectY, dt, links)
using 'python transform.py'
as intersectX, intersectY, dt, source, destination;
drop table ${hiveconf:table}_dynamic_graph;
create table ${hiveconf:table}_dynamic_graph
(
source string,
destination string,
firstdate string,
lastdate string,
value string
);
add file tag_relationships.py;
from(
select dt, source, destination
from ${hiveconf:table}_network
distribute by source, destination
sort by source, destination, dt asc
) m
insert overwrite table ${hiveconf:table}_dynamic_graph
select transform(m.dt, m.source, m.destination)
using 'python tag_relationships.py ${hiveconf:ts}'
as source, destination, firstdate, lastdate, value;
drop table ${hiveconf:table}_network_edges;
create table ${hiveconf:table}_network_edges as
select source as Source, destination as Target, count(*) as Weight
from ${hiveconf:table}_network
group by source, destination;
add file edgelist.py;
drop table ${hiveconf:table}_edgelist;
create table ${hiveconf:table}_edgelist
(
node string,
val string,
edgelist string
)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY '\t';
from(
select source, target, weight
from ${hiveconf:table}_network_edges
distribute by source sort by source
) m
insert overwrite table ${hiveconf:table}_edgelist
select transform(m.source, m.target, m.weight)
using 'python edgelist.py'
as node, val, edgelsit;