From d8f95a20917959fdbd8a7b3f3dd6dddc0ea08606 Mon Sep 17 00:00:00 2001 From: AndreaB330 Date: Sat, 26 Jan 2019 13:04:07 +0200 Subject: [PATCH] Code --- .gitignore | 3 + README.md | 10 +++ dsu.h | 62 +++++++++++++++ emst.h | 179 ++++++++++++++++++++++++++++++++++++++++++++ main.cpp | 45 +++++++++++ model.h | 154 +++++++++++++++++++++++++++++++++++++ test_data/dim10.txt | 12 +++ test_data/dim2.txt | 12 +++ tree.h | 127 +++++++++++++++++++++++++++++++ 9 files changed, 604 insertions(+) create mode 100644 dsu.h create mode 100644 emst.h create mode 100644 main.cpp create mode 100644 model.h create mode 100644 test_data/dim10.txt create mode 100644 test_data/dim2.txt create mode 100644 tree.h diff --git a/.gitignore b/.gitignore index 3e759b7..0d9fc45 100644 --- a/.gitignore +++ b/.gitignore @@ -328,3 +328,6 @@ ASALocalRun/ # MFractors (Xamarin productivity tool) working folder .mfractor/ + +*.vcxproj +*.vcxproj.filters diff --git a/README.md b/README.md index 3adefe7..a527126 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,12 @@ # EuclideanMST Implementations of different algorithms for building Euclidean minimum spanning tree in k-dimensional space. +### Algorithms: + - EMST using Kd-tree O(NlogN) + Implementation of algorithm described in "Fast Euclidean Minimum Spanning Tree: Algorithm, Analysis, and Applications. William B. March, Parikshit Ram, Alexander G. Gray" + - Prim's algorithm O(N^2) + Straightforward MST on fully connected Eclidean graph + +### TODO: +- Implement EMST using Cover-tree +- Benchmarks +- \dots \ No newline at end of file diff --git a/dsu.h b/dsu.h new file mode 100644 index 0000000..b9d5613 --- /dev/null +++ b/dsu.h @@ -0,0 +1,62 @@ +#pragma once +#include + +/** + * Disjoin-set-union data structure + */ +class DSU { + +public: + + DSU(size_t n = 0); + + size_t get_set(size_t x) const; + bool is_in_same_set(size_t x, size_t y) const; + + void reset(size_t n); + bool unite(size_t x, size_t y); + +private: + + mutable std::vector p; + std::vector rank; +}; + +// Implementation + +DSU::DSU(size_t n) { + reset(n); +} + + +size_t DSU::get_set(size_t x) const { + return p[x] == x ? x : p[x] = get_set(p[x]); +} + +bool DSU::is_in_same_set(size_t x, size_t y) const { + return get_set(x) == get_set(y); +} + +void DSU::reset(size_t n) { + p.resize(n); + rank.assign(n, 0); + for (size_t i = 0; i < n; i++) { + p[i] = i; + } +} + +bool DSU::unite(size_t x, size_t y) { + size_t set_a = get_set(x); + size_t set_b = get_set(y); + if (set_a == set_b) { + return false; + } + if (rank[set_a] > rank[set_b]) { + std::swap(set_a, set_b); + } + if (rank[set_a] == rank[set_b]) { + rank[set_b]++; + } + p[set_a] = set_b; + return true; +} \ No newline at end of file diff --git a/emst.h b/emst.h new file mode 100644 index 0000000..62fc6df --- /dev/null +++ b/emst.h @@ -0,0 +1,179 @@ +#pragma once +#include "model.h" +#include "tree.h" +#include "dsu.h" + +typedef std::pair Edge; + +template +class EmstSolver { +public: + EmstSolver() {} + + const std::vector & get_solution() const { return solution; } + const double & get_total_length() const { return total_length; } + +protected: + std::vector solution; + double total_length = 0.0; +}; + +/** + * Implementation of EMST algorithm using K-d tree + * "Fast Euclidean Minimum Spanning Tree: Algorithm, Analysis, and Applications. William B. March, Parikshit Ram, Alexander G. Gray" + * Time complexity: O(cNlogN), extra constant c depends on the distribution of points + */ +template +class KdTreeSolver : public EmstSolver { +public: + KdTreeSolver(std::vector> & points) :num_points(points.size()) { + dsu.reset(num_points); + tree = KdTree(points, floor(log2(num_points)) - 2); + is_fully_connected.assign(tree.get_maximal_id() + 1, false); + solve(); + // todo: clear containers + } + +private: + void solve() { + auto & solution = EmstSolver::solution; + auto & total_length = EmstSolver::total_length; + + while (solution.size() + 1 < num_points) { + node_approximation.assign(tree.get_maximal_id() + 1, std::numeric_limits::max()); + nearest_set.assign(num_points, { std::numeric_limits::max(), Edge(0,0) }); + + check_fully_connected(tree.get_root_id()); + + find_component_neighbors(tree.get_root_id(), tree.get_root_id()); + + for (size_t i = 0; i < num_points; i++) { + if (i == dsu.get_set(i)) { + Edge e = nearest_set[i].second; + if (dsu.unite(e.first, e.second)) { + solution.push_back(e); + total_length += nearest_set[i].first; + } + } + } + } + } + + void find_component_neighbors(size_t q, size_t r, size_t depth = 0) { + if (is_fully_connected[q] && is_fully_connected[r] && + dsu.is_in_same_set(tree.points_begin(q)->get_id(), tree.points_begin(r)->get_id())) { + return; + } + if (distance(tree.get_bounding_box(q), tree.get_bounding_box(r)) > node_approximation[q]) { + return; + } + if (tree.is_leaf(q) && tree.is_leaf(r)) { + node_approximation[q] = 0.0; + for (auto i = tree.points_begin(q); i != tree.points_end(q); i++) { + for (auto j = tree.points_begin(r); j != tree.points_end(r); j++) { + if (!dsu.is_in_same_set(i->get_id(), j->get_id())) { + double dist = distance(*i, *j); + if (dist < nearest_set[dsu.get_set(i->get_id())].first) { + nearest_set[dsu.get_set(i->get_id())] = { dist, { i->get_id(), j->get_id() } }; + } + } + } + node_approximation[q] = std::max(node_approximation[q], nearest_set[dsu.get_set(i->get_id())].first); + } + } else { + size_t qleft = tree.get_left_child_id(q); + size_t qright = tree.get_right_child_id(q); + size_t rleft = tree.get_left_child_id(r); + size_t rright = tree.get_right_child_id(r); + if (tree.is_leaf(q)) { + find_component_neighbors(q, rleft, depth); + find_component_neighbors(q, rright, depth); + return; + } + if (tree.is_leaf(r)) { + find_component_neighbors(qleft, r, depth); + find_component_neighbors(qright, r, depth); + node_approximation[q] = std::max(node_approximation[qleft], node_approximation[qright]); + return; + } + find_component_neighbors(qleft, rleft, depth + 1); + find_component_neighbors(qleft, rright, depth + 1); + find_component_neighbors(qright, rright, depth + 1); + find_component_neighbors(qright, rleft, depth + 1); + node_approximation[q] = std::max(node_approximation[qleft], node_approximation[qright]); + } + } + + void check_fully_connected(size_t node_id) { + if (is_fully_connected[node_id]) { + return; + } + if (tree.is_leaf(node_id)) { + bool fully_connected = true; + for (auto iter = tree.points_begin(node_id); iter + 1 != tree.points_end(node_id); ++iter) { + fully_connected &= dsu.is_in_same_set(iter->get_id(), (iter + 1)->get_id()); + } + is_fully_connected[node_id] = fully_connected; + return; + } + size_t left = tree.get_left_child_id(node_id); + size_t right = tree.get_right_child_id(node_id); + check_fully_connected(left); + check_fully_connected(right); + if (is_fully_connected[left] && is_fully_connected[right] && + dsu.is_in_same_set(tree.points_begin(left)->get_id(), tree.points_begin(right)->get_id())) { + is_fully_connected[node_id] = true; + } + } + + size_t num_points; + DSU dsu; + KdTree tree; + std::vector is_fully_connected; + std::vector node_approximation; + std::vector> nearest_set; +}; + +/** + * Prim's algorithm + * Time complexity: O(N^2) + */ +template +class PrimSolver : public EmstSolver { +public: + PrimSolver(std::vector> & points) { + solve(points); + } + +private: + void solve(const std::vector> & points) { + auto & solution = EmstSolver::solution; + auto & total_length = EmstSolver::total_length; + size_t num_points = points.size(); + + std::vector> distance_to_tree(num_points, { std::numeric_limits::max(), 0 }); + std::vector used(num_points, false); + used[0] = true; + for (size_t i = 1; i < num_points; i++) { + distance_to_tree[i] = { distance(points[0], points[i]), 0 }; + } + + for (size_t iteration = 1; iteration < num_points; iteration++) { + size_t nearest_id = 0; + for (size_t i = 1; i < num_points; i++) { + if (!used[i] && distance_to_tree[i] < distance_to_tree[nearest_id]) { + nearest_id = i; + } + } + solution.push_back({ nearest_id, distance_to_tree[nearest_id].second }); + total_length += distance_to_tree[nearest_id].first; + used[nearest_id] = true; + for (size_t i = 1; i < num_points; i++) { + if (!used[i] && distance(points[i], points[nearest_id]) < distance_to_tree[i].first) { + distance_to_tree[i] = { distance(points[i], points[nearest_id]) , nearest_id }; + } + } + } + } + +}; \ No newline at end of file diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..4dfac46 --- /dev/null +++ b/main.cpp @@ -0,0 +1,45 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include "emst.h" + +using namespace std; + +template +void example() { + + fstream fin("test_data/dim" + to_string(DIM) + ".txt"); + size_t n; fin >> n; + vector> points(n); + for (size_t i = 0; i < n; i++) { + for (size_t k = 0; k < DIM; k++) { + fin >> points[i][k]; + } + } + string s; fin >> s; + double answer; fin >> answer; + + KdTreeSolver solver_fast(points); + PrimSolver solver_slow(points); + + cout << DIM << "-dimensional space:" << endl; + cout << "Answer: " << answer << endl; + cout << "Fast solver answer: " << solver_fast.get_total_length() << endl; + cout << "Slow solver answer: " << solver_slow.get_total_length() << endl; + cout << endl; +} + +int main() { + cout.setf(ios::fixed); + cout.precision(6); + + example<2>(); + example<10>(); + + return 0; +} \ No newline at end of file diff --git a/model.h b/model.h new file mode 100644 index 0000000..a3a0622 --- /dev/null +++ b/model.h @@ -0,0 +1,154 @@ +#pragma once +#include +#include +#include + +/** + * Point in DIM-dimensional space + */ +template +class Point { + +public: + Point(); + + const double & operator[](size_t i) const; + double & operator[](size_t i); + + /** + * Distance between two points + */ + template + friend double distance(const Point & a, const Point & b); + +private: + double data[DIM]; +}; + +/** + * Point with id + */ +template +class UPoint : public Point { +public: + + UPoint(const Point & ref, size_t id = 0); + UPoint(size_t id = 0); + + size_t get_id() const; + +private: + size_t id; +}; + +/** + * Bounding box in DIM-dimensional space + */ +template +class AABB { +public: + AABB(); + /** + * Construct bounding box from vector iterator + */ + AABB(typename std::vector>::const_iterator iter, typename std::vector>::const_iterator end); + + /** + * Size of the k-th dimension + */ + double size(size_t k) const; + + /** + * Get the index of a dimension with the largest size + */ + size_t get_largest_dim() const; + + /** + * Distance between two bounding boxes + */ + template + friend double distance(const AABB & a, const AABB & b); + +private: + Point minimal; + Point maximal; +}; + +// Implementation + +template +Point::Point() { + std::fill(data, data + DIM, 0.0); +} + +template +const double & Point::operator[](size_t i) const { + return data[i]; +} + +template +double & Point::operator[](size_t i) { + return data[i]; +} + +template +UPoint::UPoint(const Point & ref, size_t id) : Point(ref), id(id) {} + +template +UPoint::UPoint(size_t id) :id(id) {} + +template +size_t UPoint::get_id() const { + return id; +} + +template +AABB::AABB() {} + +template +AABB::AABB(typename std::vector>::const_iterator iter, typename std::vector>::const_iterator end) { + minimal = maximal = *iter; + for (; iter != end; ++iter) { + for (size_t k = 0; k < DIM; k++) { + minimal[k] = std::min(minimal[k], (*iter)[k]); + maximal[k] = std::max(maximal[k], (*iter)[k]); + } + } +} + +template +double AABB::size(size_t k) const { + return maximal[k] - minimal[k]; +} + +template +size_t AABB::get_largest_dim() const { + size_t biggest_dim = 0; + for (size_t k = 1; k < DIM; k++) { + if (size(k) > size(biggest_dim)) + biggest_dim = k; + } + return biggest_dim; +} + +template +double distance(const Point & a, const Point & b) { + double distance_sqr = 0; + for (size_t k = 0; k < DIM; k++) { + double diff = a[k] - b[k]; + distance_sqr += diff * diff; + } + return sqrt(distance_sqr); +} + +template +double distance(const AABB & a, const AABB & b) { + double distance_sqr = 0; + for (size_t k = 0; k < DIM; k++) { + double diff = std::max(a.minimal[k], b.minimal[k]) - std::min(a.maximal[k], b.maximal[k]); + if (diff > 0) { + distance_sqr += diff * diff; + } + } + return sqrt(distance_sqr); +} \ No newline at end of file diff --git a/test_data/dim10.txt b/test_data/dim10.txt new file mode 100644 index 0000000..a6a9441 --- /dev/null +++ b/test_data/dim10.txt @@ -0,0 +1,12 @@ +10 +-1.843 -2.017 0.344 -1.788 0.250 -2.244 0.645 0.947 -0.895 -0.061 +1.779 0.375 -1.209 -0.130 -0.018 -1.628 0.323 -1.804 -0.063 0.714 +0.489 0.099 1.301 0.743 2.139 -0.210 -0.009 0.718 -0.102 2.675 +0.928 2.035 -2.982 0.010 -0.338 0.364 0.916 0.902 -1.609 2.084 +0.701 0.413 0.692 -0.241 -1.238 -1.550 1.330 -1.600 1.404 0.639 +-2.628 -2.502 1.277 1.167 0.929 -2.372 0.785 0.044 1.120 -1.141 +1.070 -0.948 -0.241 0.808 -0.802 2.620 -0.478 1.054 0.092 0.667 +0.317 1.737 0.604 2.245 0.563 -1.852 1.448 -1.276 -3.169 1.711 +1.105 0.709 1.506 2.355 -2.075 -2.054 -0.165 -1.942 0.415 -0.482 +0.355 -0.195 -0.152 0.311 1.766 -1.117 0.216 1.539 0.769 1.330 +Answer: 37.123037 \ No newline at end of file diff --git a/test_data/dim2.txt b/test_data/dim2.txt new file mode 100644 index 0000000..3566ca1 --- /dev/null +++ b/test_data/dim2.txt @@ -0,0 +1,12 @@ +10 +-1.843 -2.017 +0.344 -1.788 +0.250 -2.244 +0.645 0.947 +-0.895 -0.061 +1.779 0.375 +-1.209 -0.130 +-0.018 -1.628 +0.323 -1.804 +-0.063 0.714 +Answer: 7.991851 diff --git a/tree.h b/tree.h new file mode 100644 index 0000000..2823dc7 --- /dev/null +++ b/tree.h @@ -0,0 +1,127 @@ +#pragma once +#include +#include +#include +#include +#include "model.h" + +/** + * K-d tree data structure + */ +template +struct KdTree { +public: + KdTree(); + /** + * Construct K-d tree with height at most max_height from a given set of points + */ + KdTree(const std::vector> & points, size_t max_height); + + size_t get_root_id() const; + size_t get_maximal_id() const; + size_t get_left_child_id(size_t id) const; + size_t get_right_child_id(size_t id) const; + const AABB & get_bounding_box(size_t id) const; + bool is_leaf(size_t id) const; + + typename std::vector>::const_iterator points_begin(size_t node_id) const; + typename std::vector>::const_iterator points_end(size_t node_id) const; + +private: + /** + * K-d tree node. Represents part of space with points from (start) to (start+size-1) position in points array + */ + struct KdNode { + KdNode() : start(0), is_leaf(false), size(0) {} + AABB aabb; + bool is_leaf; //is this node a leaf node and has no childs + size_t start; + size_t size; + }; + /** + * Recursively build tree for current node node_id, for points from position (start) to position (start+size-1) + */ + void build(size_t node_id, size_t start, size_t size, size_t remaining_height); + + std::vector> points; + std::vector nodes; +}; + +// Implementation + +template +KdTree::KdTree() {} + +template +KdTree::KdTree(const std::vector> & points, size_t max_height) { + this->points.reserve(points.size()); + for (auto & p : points) { + this->points.push_back(UPoint(p, this->points.size())); + } + nodes.resize(points.size() * 4); + build(1, 0, points.size(), max_height); +} + +template +size_t KdTree::get_root_id() const { + return 1; +} + +template +size_t KdTree::get_maximal_id() const { + return (nodes.empty() ? 0 : nodes.size() - 1); +} + +template +size_t KdTree::get_left_child_id(size_t id) const { + return id * 2; +} + +template +size_t KdTree::get_right_child_id(size_t id) const { + return id * 2 + 1; +} + + +template +bool KdTree::is_leaf(size_t id) const { + return nodes[id].is_leaf; +} + +template +const AABB & KdTree::get_bounding_box(size_t id) const { + return nodes[id].aabb; +} + +template +typename std::vector>::const_iterator KdTree::points_begin(size_t node_id) const { + return points.begin() + nodes[node_id].start; +} + +template +typename std::vector>::const_iterator KdTree::points_end(size_t node_id) const { + return points.begin() + nodes[node_id].start + nodes[node_id].size; +} + +template +void KdTree::build(size_t node_id, size_t start, size_t size, size_t remaining_height) { + nodes[node_id].aabb = AABB(points.cbegin() + start, points.cbegin() + start + size); + nodes[node_id].start = start; + nodes[node_id].size = size; + + if (size == 1 || remaining_height == 0) { + nodes[node_id].is_leaf = true; + return; + } + + size_t biggest = nodes[node_id].aabb.get_largest_dim(); + + std::nth_element(points.begin() + start, points.begin() + start + size / 2, points.begin() + start + size, + [&biggest](const Point & a, const Point & b) { + return a[biggest] < b[biggest]; + }); + + // recursively call build for two part + build(node_id * 2, start, size / 2, remaining_height - 1); + build(node_id * 2 + 1, start + size / 2, (size + 1) / 2, remaining_height - 1); +} \ No newline at end of file