Skip to content

Commit 76a910b

Browse files
committed
[RFC] dash::copy: Implement global-to-global
Active team selection is now done by tag struct argument.
1 parent 91c36c9 commit 76a910b

File tree

3 files changed

+221
-11
lines changed

3 files changed

+221
-11
lines changed

dash/include/dash/algorithm/Copy.h

+149-8
Original file line numberDiff line numberDiff line change
@@ -646,25 +646,166 @@ copy_async(
646646
}
647647
#endif
648648

649+
struct ActiveDestination{};
650+
struct ActiveSource{};
651+
652+
/**
653+
* Specialization of \c dash::copy as global-to-global blocking copy
654+
* operation.
655+
*
656+
* \ingroup DashAlgorithms
657+
*/
658+
template <
659+
class GlobInputIt,
660+
class GlobOutputIt,
661+
bool UseHandles = false>
662+
GlobOutputIt copy(
663+
GlobInputIt in_first,
664+
GlobInputIt in_last,
665+
GlobOutputIt out_first,
666+
ActiveDestination /*unused*/)
667+
{
668+
DASH_LOG_TRACE("dash::copy()", "blocking, global to global, active destination");
669+
670+
using size_type = typename GlobInputIt::size_type;
671+
using input_value_type = typename GlobInputIt::value_type;
672+
using output_value_type = typename GlobOutputIt::value_type;
673+
674+
size_type num_elem_total = dash::distance(in_first, in_last);
675+
if (num_elem_total <= 0) {
676+
DASH_LOG_TRACE("dash::copy", "input range empty");
677+
return out_first;
678+
}
679+
680+
auto g_out_first = out_first;
681+
auto g_out_last = g_out_first + num_elem_total;
682+
683+
internal::ContiguousRangeSet<GlobOutputIt> range_set{g_out_first, g_out_last};
684+
685+
const auto & out_team = out_first.team();
686+
out_team.barrier();
687+
688+
std::vector<dart_handle_t> handles;
689+
std::vector<dart_handle_t>* handles_arg = UseHandles ? &handles : nullptr;
690+
691+
dash::internal::local_copy_chunks<input_value_type, output_value_type> local_chunks;
692+
693+
size_type num_elem_processed = 0;
694+
695+
for (auto range : range_set) {
696+
697+
auto cur_out_first = range.first;
698+
auto num_copy_elem = range.second;
699+
700+
DASH_ASSERT_GT(num_copy_elem, 0,
701+
"Number of elements to copy is 0");
702+
703+
// handle local data only
704+
if (cur_out_first.is_local()) {
705+
auto dest_ptr = cur_out_first.local();
706+
auto src_ptr = in_first + num_elem_processed;
707+
internal::copy_impl(src_ptr,
708+
src_ptr + num_copy_elem,
709+
dest_ptr,
710+
handles_arg,
711+
local_chunks);
712+
}
713+
num_elem_processed += num_copy_elem;
714+
}
715+
716+
dash::internal::do_local_copies(local_chunks);
717+
718+
if (!handles.empty()) {
719+
DASH_LOG_TRACE("dash::copy", "Waiting for remote transfers to complete,",
720+
"num_handles: ", handles.size());
721+
dart_waitall_local(handles.data(), handles.size());
722+
} else if (!UseHandles) {
723+
dart_flush_local_all(in_first.dart_gptr());
724+
}
725+
out_team.barrier();
726+
727+
DASH_ASSERT_EQ(num_elem_processed, num_elem_total,
728+
"Failed to find all contiguous subranges in range");
729+
730+
return g_out_last;
731+
}
732+
649733
/**
650734
* Specialization of \c dash::copy as global-to-global blocking copy
651735
* operation.
652736
*
653737
* \ingroup DashAlgorithms
654738
*/
655-
template <typename ValueType, class GlobInputIt, class GlobOutputIt>
739+
template <
740+
class GlobInputIt,
741+
class GlobOutputIt,
742+
bool UseHandles = false>
656743
GlobOutputIt copy(
657-
GlobInputIt /*in_first*/,
658-
GlobInputIt /*in_last*/,
659-
GlobOutputIt /*out_first*/)
744+
GlobInputIt in_first,
745+
GlobInputIt in_last,
746+
GlobOutputIt out_first,
747+
ActiveSource /*unused*/)
660748
{
661749
DASH_LOG_TRACE("dash::copy()", "blocking, global to global");
662750

663-
// TODO:
664-
// - Implement adapter for local-to-global dash::copy here
665-
// - Return if global input range has no local sub-range
751+
using size_type = typename GlobInputIt::size_type;
752+
using input_value_type = typename GlobInputIt::value_type;
753+
using output_value_type = typename GlobOutputIt::value_type;
754+
755+
size_type num_elem_total = dash::distance(in_first, in_last);
756+
if (num_elem_total <= 0) {
757+
DASH_LOG_TRACE("dash::copy", "input range empty");
758+
return out_first;
759+
}
760+
761+
internal::ContiguousRangeSet<GlobOutputIt> range_set{in_first, in_last};
762+
763+
const auto & in_team = in_first.team();
764+
in_team.barrier();
765+
766+
std::vector<dart_handle_t> handles;
767+
std::vector<dart_handle_t>* handles_arg = UseHandles ? &handles : nullptr;
768+
769+
dash::internal::local_copy_chunks<input_value_type, output_value_type> local_chunks;
770+
771+
size_type num_elem_processed = 0;
772+
773+
for (auto range : range_set) {
774+
775+
auto cur_in_first = range.first;
776+
auto num_copy_elem = range.second;
777+
778+
DASH_ASSERT_GT(num_copy_elem, 0,
779+
"Number of elements to copy is 0");
780+
781+
// handle local data only
782+
if (cur_in_first.is_local()) {
783+
auto src_ptr = cur_in_first.local();
784+
auto dest_ptr = out_first + num_elem_processed;
785+
internal::copy_impl(src_ptr,
786+
src_ptr + num_copy_elem,
787+
dest_ptr,
788+
handles_arg,
789+
local_chunks);
790+
}
791+
num_elem_processed += num_copy_elem;
792+
}
793+
794+
internal::do_local_copies(local_chunks);
795+
796+
if (!handles.empty()) {
797+
DASH_LOG_TRACE("dash::copy", "Waiting for remote transfers to complete,",
798+
"num_handles: ", handles.size());
799+
dart_waitall(handles.data(), handles.size());
800+
} else if (!UseHandles) {
801+
dart_flush_all(out_first.dart_gptr());
802+
}
803+
in_team.barrier();
804+
805+
DASH_ASSERT_EQ(num_elem_processed, num_elem_total,
806+
"Failed to find all contiguous subranges in range");
666807

667-
return GlobOutputIt();
808+
return out_first + num_elem_total;
668809
}
669810

670811
#endif // DOXYGEN

dash/test/algorithm/CopyTest.cc

+68
Original file line numberDiff line numberDiff line change
@@ -1040,3 +1040,71 @@ TEST_F(CopyTest, InputOutputTypeTest)
10401040
ASSERT_TRUE_U((dash::internal::is_dash_copyable<const point_t, point_t>::value));
10411041

10421042
}
1043+
1044+
TEST_F(CopyTest, MatrixTransfersGlobalToGlobal)
1045+
{
1046+
if (_dash_size < 2) {
1047+
SKIP_TEST_MSG("At least 2 units required for this test.");
1048+
}
1049+
1050+
using TeamSpecT = dash::TeamSpec<2>;
1051+
using MatrixT = dash::NArray<double, 2>;
1052+
using PatternT = typename MatrixT::pattern_type;
1053+
using SizeSpecT = dash::SizeSpec<2>;
1054+
using DistSpecT = dash::DistributionSpec<2>;
1055+
1056+
auto& team_all = dash::Team::All();
1057+
TeamSpecT team_all_spec(team_all.size(), 1);
1058+
team_all_spec.balance_extents();
1059+
1060+
auto size_spec = SizeSpecT(4*team_all_spec.extent(1),
1061+
4*team_all_spec.extent(1));
1062+
auto dist_spec = DistSpecT(dash::BLOCKED, dash::BLOCKED);
1063+
1064+
MatrixT grid_more(size_spec, dist_spec, team_all, team_all_spec);
1065+
dash::fill(grid_more.begin(), grid_more.end(), (double)team_all.myid());
1066+
team_all.barrier();
1067+
1068+
// create a smaller team
1069+
dash::Team& team_fewer= team_all.split(2);
1070+
team_all.barrier();
1071+
if (!team_fewer.is_null() && 0 == team_fewer.position()) {
1072+
TeamSpecT team_fewer_spec(team_fewer.size(), 1);
1073+
team_fewer_spec.balance_extents();
1074+
1075+
MatrixT grid_fewer(size_spec, dist_spec, team_fewer, team_fewer_spec);
1076+
dash::fill(grid_fewer.begin(), grid_fewer.end(), -1.0);
1077+
1078+
auto lextents= grid_fewer.pattern().local_extents();
1079+
1080+
dash::copy(grid_more.begin(), grid_more.end(),
1081+
grid_fewer.begin(), dash::ActiveDestination());
1082+
1083+
if (team_fewer.myid() == 0) {
1084+
auto gextents = grid_fewer.extents();
1085+
for (uint32_t y = 0; y < gextents[0]; ++y) {
1086+
for (uint32_t x = 0; x < gextents[1]; ++x) {
1087+
ASSERT_EQ_U(grid_more(y, x), grid_fewer(y, x));
1088+
}
1089+
}
1090+
}
1091+
1092+
team_fewer.barrier();
1093+
1094+
dash::fill(grid_fewer.begin(), grid_fewer.end(), (double)team_fewer.myid());
1095+
1096+
dash::copy(grid_fewer.begin(), grid_fewer.end(),
1097+
grid_more.begin(), dash::ActiveSource());
1098+
1099+
if (team_fewer.myid() == 0) {
1100+
auto gextents = grid_fewer.extents();
1101+
for (uint32_t y = 0; y < gextents[0]; ++y) {
1102+
for (uint32_t x = 0; x < gextents[1]; ++x) {
1103+
ASSERT_EQ_U(grid_more(y, x), grid_fewer(y, x));
1104+
}
1105+
}
1106+
}
1107+
1108+
team_fewer.barrier();
1109+
}
1110+
}

dash/test/container/MatrixTest.cc

+4-3
Original file line numberDiff line numberDiff line change
@@ -745,9 +745,10 @@ TEST_F(MatrixTest, BlockCopy)
745745
LOG_MESSAGE("Team barrier passed");
746746

747747
// Copy block 1 of matrix_a to block 0 of matrix_b:
748-
dash::copy<element_t>(matrix_a.block(1).begin(),
749-
matrix_a.block(1).end(),
750-
matrix_b.block(0).begin());
748+
dash::copy(matrix_a.block(1).begin(),
749+
matrix_a.block(1).end(),
750+
matrix_b.block(0).begin(),
751+
dash::ActiveSource());
751752

752753
LOG_MESSAGE("Wait for team barrier ...");
753754
dash::barrier();

0 commit comments

Comments
 (0)