Skip to content

Commit 53b1f4f

Browse files
authored
Implemented PageRank based on BatchWorker, verified on LDBC. (#2)
1 parent f12713a commit 53b1f4f

13 files changed

+450
-130
lines changed

.github/workflows/build-doc.yml

+7-4
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,22 @@ jobs:
1818
sudo apt install -y libmpich-dev libgoogle-glog-dev libgflags-dev libboost-dev librdkafka-dev doxygen
1919
- name: Build Doc
2020
run: |
21-
cmake .
21+
mkdir build
22+
cd build
23+
cmake ..
2224
make doc
25+
cp ../misc/logo.png docs
2326
- name: Commit Doc
2427
run: |
2528
git config user.email [email protected]
2629
git config user.name github-actions
2730
git branch -D gh-pages || true
2831
git checkout --orphan gh-pages
2932
shopt -s extglob
30-
rm -rf !(docs)
33+
rm -rf !(build)
3134
rm -rf .github .clang-format .gitattributes .gitmodules .gitignore
32-
mv docs/* ./
33-
rmdir docs
35+
mv build/docs/* ./
36+
rm -rf build
3437
git add .
3538
git commit -m "auto build from ${{ github.sha }}."
3639
git push -f origin gh-pages

.github/workflows/c-cpp.yml

+1-8
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,6 @@
11
name: C++ CI
22

3-
on:
4-
push:
5-
branches:
6-
- master
7-
- 'ci/**' # for personal testing
8-
pull_request:
9-
branches:
10-
- master
3+
on: [push, pull_request]
114

125
jobs:
136
build:

CONTRIBUTING.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,6 @@ to us and thanks for your enthusiasm!
9494
[2]: https://github.com/alibaba/libgrape-lite/blob/master/LICENSE
9595
[3]: https://github.com/alibaba/libgrape-lite/blob/master/README.md
9696
[4]: https://github.com/alibaba/libgrape-lite/issues/new/choose
97-
[5]: CLA
97+
[5]: https://cla-assistant.io/alibaba/libgrape-lite
9898
[6]: https://alibaba.github.io/libgrape-lite
9999
[7]: https://github.com/alibaba/libgrape-lite/pulls

Performance.md

+5-5
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,11 @@ The best results are marked in **bold**.
3333
| | datagen-9_2-zf | 41.36 | 2.59 | 14.59 | **1.47** |
3434
| | graph500-26 | 5.42 | 0.73 | 2.19 | **0.60** |
3535
| | com-friendster | 13.14 | **1.28** | 3.55 | 1.40 |
36-
| PageRank | datagen-9_0-fb | 26.90 | X | X | **2.72** |
37-
| | datagen-9_1-fb | 34.01 | X | X | **3.25** |
38-
| | datagen-9_2-zf | 152.11 | X | X | **4.16** |
39-
| | graph500-26 | 34.89 | X | X | **5.84** |
40-
| | com-friendster | 66.61 | X | X | **11.96** |
36+
| PageRank | datagen-9_0-fb | 26.90 | X | X | **1.61** |
37+
| | datagen-9_1-fb | 34.01 | X | X | **1.99** |
38+
| | datagen-9_2-zf | 152.11 | X | X | **4.00** |
39+
| | graph500-26 | 34.89 | X | X | **2.95** |
40+
| | com-friendster | 66.61 | X | X | **5.91** |
4141
| CDLP | datagen-9_0-fb | 1535.09 | N/A | 16.67 | **8.49** |
4242
| | datagen-9_1-fb | 2725.54 | N/A | 21.60 | **10.78** |
4343
| | datagen-9_2-zf | > 3600 | N/A | 46.05 | **17.17** |

README.md

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,11 @@
1-
# libgrape-lite
1+
<h1 align="center">
2+
<img src="https://alibaba.github.io/libgrape-lite/logo.png" width="100" alt="libgrape-lite">
3+
<br>
4+
libgrape-lite
5+
</h1>
6+
<p align="center">
7+
A C++ library for parallel graph processing
8+
</p>
29

310
[![C/C++ CI](https://github.com/alibaba/libgrape-lite/workflows/C++%20CI/badge.svg)](https://github.com/alibaba/libgrape-lite/actions?workflow=C++%20CI)
411
[![codecov](https://codecov.io/gh/alibaba/libgrape-lite/branch/master/graph/badge.svg)](https://codecov.io/gh/alibaba/libgrape-lite)

examples/analytical_apps/pagerank/pagerank.h

+114-103
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1212
See the License for the specific language governing permissions and
1313
limitations under the License.
1414
*/
15-
1615
#ifndef EXAMPLES_ANALYTICAL_APPS_PAGERANK_PAGERANK_H_
1716
#define EXAMPLES_ANALYTICAL_APPS_PAGERANK_PAGERANK_H_
1817

@@ -23,177 +22,189 @@ limitations under the License.
2322
namespace grape {
2423

2524
/**
26-
* @brief An implementation of PageRank, the version in LDBC, which can work
27-
* on both directed and undirected graphs.
25+
* @brief An implementation of PageRank, which can work
26+
* on undirected graphs.
2827
*
29-
* This version of PageRank inherits ParallelAppBase. Messages can be sent in
30-
* parallel with the evaluation process. This strategy improves performance by
31-
* overlapping the communication time and the evaluation time.
28+
* This version of PageRank inherits BatchShuffleAppBase.
29+
* Messages are generated in batches and received in-place.
3230
*
3331
* @tparam FRAG_T
3432
*/
35-
3633
template <typename FRAG_T>
37-
class PageRank : public ParallelAppBase<FRAG_T, PageRankContext<FRAG_T>>,
38-
public Communicator,
39-
public ParallelEngine {
34+
class PageRank
35+
: public BatchShuffleAppBase<FRAG_T, PageRankContext<FRAG_T>>,
36+
public ParallelEngine,
37+
public Communicator {
4038
public:
39+
INSTALL_BATCH_SHUFFLE_WORKER(PageRank<FRAG_T>,
40+
PageRankContext<FRAG_T>, FRAG_T)
41+
4142
using vertex_t = typename FRAG_T::vertex_t;
43+
using vid_t = typename FRAG_T::vid_t;
44+
45+
static constexpr bool need_split_edges = true;
4246
static constexpr MessageStrategy message_strategy =
4347
MessageStrategy::kAlongOutgoingEdgeToOuterVertex;
44-
static constexpr bool need_split_edges = true;
45-
static constexpr LoadStrategy load_strategy = LoadStrategy::kBothOutIn;
48+
static constexpr LoadStrategy load_strategy = LoadStrategy::kOnlyOut;
4649

47-
INSTALL_PARALLEL_WORKER(PageRank<FRAG_T>, PageRankContext<FRAG_T>, FRAG_T)
50+
PageRank() = default;
4851

49-
PageRank() {}
5052
void PEval(const fragment_t& frag, context_t& ctx,
5153
message_manager_t& messages) {
5254
auto inner_vertices = frag.InnerVertices();
5355

54-
size_t graph_vnum = frag.GetTotalVerticesNum();
55-
messages.InitChannels(thread_num());
56-
5756
#ifdef PROFILING
5857
ctx.exec_time -= GetCurrentTime();
5958
#endif
6059

6160
ctx.step = 0;
62-
double p = 1.0 / graph_vnum;
63-
64-
// assign initial ranks
65-
ForEach(inner_vertices, [&ctx, &frag, p, &messages](int tid, vertex_t u) {
66-
int EdgeNum = frag.GetOutgoingAdjList(u).Size();
67-
ctx.degree[u] = EdgeNum;
68-
if (EdgeNum > 0) {
69-
ctx.result[u] = p / EdgeNum;
70-
messages.SendMsgThroughOEdges<fragment_t, double>(frag, u,
71-
ctx.result[u], tid);
72-
} else {
73-
ctx.result[u] = p;
74-
}
75-
});
61+
ctx.graph_vnum = frag.GetTotalVerticesNum();
62+
vid_t dangling_vnum = 0;
63+
double p = 1.0 / ctx.graph_vnum;
64+
65+
std::vector<vid_t> dangling_vnum_tid(thread_num(), 0);
66+
ForEach(inner_vertices,
67+
[&ctx, &frag, p, &dangling_vnum_tid](int tid, vertex_t u) {
68+
int EdgeNum = frag.GetLocalOutDegree(u);
69+
ctx.degree[u] = EdgeNum;
70+
if (EdgeNum > 0) {
71+
ctx.result[u] = p / EdgeNum;
72+
} else {
73+
++dangling_vnum_tid[tid];
74+
ctx.result[u] = p;
75+
}
76+
ctx.result[u] = EdgeNum > 0 ? p / EdgeNum : p;
77+
});
78+
79+
for (auto vn : dangling_vnum_tid) {
80+
dangling_vnum += vn;
81+
}
82+
83+
Sum(dangling_vnum, ctx.total_dangling_vnum);
84+
ctx.dangling_sum = p * ctx.total_dangling_vnum;
7685

7786
#ifdef PROFILING
7887
ctx.exec_time += GetCurrentTime();
7988
ctx.postprocess_time -= GetCurrentTime();
8089
#endif
8190

82-
for (auto u : inner_vertices) {
83-
if (ctx.degree[u] == 0) {
84-
++ctx.dangling_vnum;
85-
}
86-
}
87-
88-
double dangling_sum = p * static_cast<double>(ctx.dangling_vnum);
89-
90-
Sum(dangling_sum, ctx.dangling_sum);
91-
91+
messages.SyncInnerVertices<fragment_t, double>(frag, ctx.result,
92+
thread_num());
9293
#ifdef PROFILING
9394
ctx.postprocess_time += GetCurrentTime();
9495
#endif
95-
messages.ForceContinue();
9696
}
9797

9898
void IncEval(const fragment_t& frag, context_t& ctx,
9999
message_manager_t& messages) {
100100
auto inner_vertices = frag.InnerVertices();
101-
102-
double dangling_sum = ctx.dangling_sum;
103-
104-
size_t graph_vnum = frag.GetTotalVerticesNum();
105-
106101
++ctx.step;
107-
if (ctx.step > ctx.max_round) {
108-
return;
109-
}
102+
103+
double base = (1.0 - ctx.delta) / ctx.graph_vnum +
104+
ctx.delta * ctx.dangling_sum / ctx.graph_vnum;
105+
ctx.dangling_sum = base * ctx.total_dangling_vnum;
110106

111107
#ifdef PROFILING
112108
ctx.exec_time -= GetCurrentTime();
113109
#endif
114110

115-
double base =
116-
(1.0 - ctx.delta) / graph_vnum + ctx.delta * dangling_sum / graph_vnum;
117-
118-
// pull ranks from neighbors
119-
ForEach(inner_vertices, [&ctx, base, &frag](int tid, vertex_t u) {
120-
if (ctx.degree[u] == 0) {
121-
ctx.next_result[u] = base;
122-
} else {
111+
if (ctx.avg_degree > 10 && frag.fnum() > 1) {
112+
// If fragment is dense and there are multiple fragments, receiving
113+
// messages is overlapped with computation. Receiving and computing
114+
// procedures are be splitted into multiple rounds. In each round,
115+
// messages from a fragment are received and then processed.
116+
ForEach(inner_vertices, [&ctx, &frag](int tid, vertex_t u) {
123117
double cur = 0;
124-
auto es = frag.GetIncomingInnerVertexAdjList(u);
118+
auto es = frag.GetOutgoingInnerVertexAdjList(u);
125119
for (auto& e : es) {
126120
cur += ctx.result[e.neighbor];
127121
}
128122
ctx.next_result[u] = cur;
129-
}
130-
});
123+
});
131124

125+
for (fid_t i = 2; i < frag.fnum(); ++i) {
132126
#ifdef PROFILING
133-
ctx.exec_time += GetCurrentTime();
134-
ctx.preprocess_time -= GetCurrentTime();
127+
ctx.preprocess_time -= GetCurrentTime();
135128
#endif
136-
137-
// process received ranks sent by other workers
138-
{
139-
messages.ParallelProcess<fragment_t, double>(
140-
thread_num(), frag, [&ctx](int tid, vertex_t u, const double& msg) {
141-
ctx.result[u] = msg;
142-
});
143-
}
144-
129+
fid_t src_fid = messages.UpdatePartialOuterVertices();
145130
#ifdef PROFILING
146-
ctx.preprocess_time += GetCurrentTime();
147-
ctx.exec_time -= GetCurrentTime();
131+
ctx.preprocess_time += GetCurrentTime();
132+
ctx.exec_time -= GetCurrentTime();
148133
#endif
149-
150-
// compute new ranks and send messages
151-
if (ctx.step != ctx.max_round) {
152-
ForEach(inner_vertices,
153-
[&ctx, base, &frag, &messages](int tid, vertex_t u) {
154-
if (ctx.degree[u] != 0) {
155-
double cur = ctx.next_result[u];
156-
auto es = frag.GetIncomingOuterVertexAdjList(u);
157-
for (auto& e : es) {
158-
cur += ctx.result[e.neighbor];
159-
}
160-
cur = (ctx.delta * cur + base) / ctx.degree[u];
161-
ctx.next_result[u] = cur;
162-
messages.SendMsgThroughOEdges<fragment_t, double>(
163-
frag, u, ctx.next_result[u], tid);
164-
}
165-
});
166-
} else {
167-
ForEach(inner_vertices, [&ctx, base, &frag](int tid, vertex_t u) {
168-
if (ctx.degree[u] != 0) {
134+
ForEach(inner_vertices, [src_fid, &frag, &ctx](int tid, vertex_t u) {
169135
double cur = ctx.next_result[u];
170-
auto es = frag.GetIncomingOuterVertexAdjList(u);
136+
auto es = frag.GetOutgoingAdjList(u, src_fid);
171137
for (auto& e : es) {
172138
cur += ctx.result[e.neighbor];
173139
}
174-
cur = (ctx.delta * cur + base) / ctx.degree[u];
175140
ctx.next_result[u] = cur;
141+
});
142+
#ifdef PROFILING
143+
ctx.exec_time += GetCurrentTime();
144+
#endif
145+
}
146+
147+
#ifdef PROFILING
148+
ctx.preprocess_time -= GetCurrentTime();
149+
#endif
150+
fid_t src_fid = messages.UpdatePartialOuterVertices();
151+
#ifdef PROFILING
152+
ctx.preprocess_time += GetCurrentTime();
153+
ctx.exec_time -= GetCurrentTime();
154+
#endif
155+
ForEach(
156+
inner_vertices, [src_fid, &frag, &ctx, base](int tid, vertex_t u) {
157+
double cur = ctx.next_result[u];
158+
auto es = frag.GetOutgoingAdjList(u, src_fid);
159+
for (auto& e : es) {
160+
cur += ctx.result[e.neighbor];
161+
}
162+
int en = frag.GetLocalOutDegree(u);
163+
ctx.next_result[u] = en > 0 ? (ctx.delta * cur + base) / en : base;
164+
});
165+
#ifdef PROFILING
166+
ctx.exec_time += GetCurrentTime();
167+
#endif
168+
} else {
169+
// If the fragment is sparse or there is only one fragment, one round of
170+
// iterating inner vertices is prefered.
171+
#ifdef PROFILING
172+
ctx.preprocess_time -= GetCurrentTime();
173+
#endif
174+
messages.UpdateOuterVertices();
175+
#ifdef PROFILING
176+
ctx.preprocess_time += GetCurrentTime();
177+
ctx.exec_time -= GetCurrentTime();
178+
#endif
179+
ForEach(inner_vertices, [&ctx, &frag, base](int tid, vertex_t u) {
180+
double cur = 0;
181+
auto es = frag.GetOutgoingAdjList(u);
182+
for (auto& e : es) {
183+
cur += ctx.result[e.neighbor];
176184
}
185+
int en = frag.GetLocalOutDegree(u);
186+
ctx.next_result[u] = en > 0 ? (ctx.delta * cur + base) / en : base;
177187
});
188+
#ifdef PROFILING
189+
ctx.exec_time += GetCurrentTime();
190+
#endif
178191
}
179192

180193
#ifdef PROFILING
181-
ctx.exec_time += GetCurrentTime();
182194
ctx.postprocess_time -= GetCurrentTime();
183195
#endif
196+
if (ctx.step != ctx.max_round) {
197+
messages.SyncInnerVertices<fragment_t, double>(frag, ctx.next_result,
198+
thread_num());
199+
}
184200

185201
ctx.result.Swap(ctx.next_result);
186-
187-
double new_dangling = base * static_cast<double>(ctx.dangling_vnum);
188-
189-
Sum(new_dangling, ctx.dangling_sum);
190-
191202
#ifdef PROFILING
192203
ctx.postprocess_time += GetCurrentTime();
193204
#endif
194-
messages.ForceContinue();
195205
}
196206
};
197207

198208
} // namespace grape
209+
199210
#endif // EXAMPLES_ANALYTICAL_APPS_PAGERANK_PAGERANK_H_

0 commit comments

Comments
 (0)