Skip to content

Commit 07f640a

Browse files
committed
[RFC] dash::copy: Implement global-to-global
Active team selection is now done by tag struct argument.
1 parent ecb9c0d commit 07f640a

File tree

2 files changed

+205
-8
lines changed

2 files changed

+205
-8
lines changed

dash/include/dash/algorithm/Copy.h

+137-8
Original file line numberDiff line numberDiff line change
@@ -624,25 +624,154 @@ copy_async(
624624
}
625625
#endif
626626

627+
struct ActiveDestination{};
628+
struct ActiveSource{};
629+
630+
/**
631+
* Specialization of \c dash::copy as global-to-global blocking copy
632+
* operation.
633+
*
634+
* \ingroup DashAlgorithms
635+
*/
636+
template <
637+
class GlobInputIt,
638+
class GlobOutputIt,
639+
typename ValueType = typename GlobInputIt::value_type>
640+
GlobOutputIt copy(
641+
GlobInputIt in_first,
642+
GlobInputIt in_last,
643+
GlobOutputIt out_first,
644+
ActiveDestination /*unused*/)
645+
{
646+
DASH_LOG_TRACE("dash::copy()", "blocking, global to global");
647+
648+
typedef typename GlobInputIt::size_type size_type;
649+
650+
size_type num_elem_total = dash::distance(in_first, in_last);
651+
if (num_elem_total <= 0) {
652+
DASH_LOG_TRACE("dash::copy", "input range empty");
653+
return out_first;
654+
}
655+
656+
auto g_out_first = out_first;
657+
auto g_out_last = g_out_first + num_elem_total;
658+
659+
internal::ContiguousRangeSet<GlobOutputIt> range_set{g_out_first, g_out_last};
660+
661+
const auto & out_team = out_first.team();
662+
out_team.barrier();
663+
664+
std::vector<dart_handle_t> handles;
665+
internal::local_copy_chunks<ValueType> local_chunks;
666+
667+
size_type num_elem_processed = 0;
668+
669+
for (auto range : range_set) {
670+
671+
auto cur_out_first = range.first;
672+
auto num_copy_elem = range.second;
673+
674+
DASH_ASSERT_GT(num_copy_elem, 0,
675+
"Number of elements to copy is 0");
676+
677+
// handle local data only
678+
if (cur_out_first.is_local()) {
679+
auto dest_ptr = cur_out_first.local();
680+
auto src_ptr = in_first + num_elem_processed;
681+
internal::copy_impl(src_ptr,
682+
src_ptr + num_copy_elem,
683+
dest_ptr,
684+
&handles,
685+
local_chunks);
686+
}
687+
num_elem_processed += num_copy_elem;
688+
}
689+
690+
internal::do_local_copies(local_chunks);
691+
692+
if (!handles.empty()) {
693+
DASH_LOG_TRACE("dash::copy", "Waiting for remote transfers to complete,",
694+
"num_handles: ", handles.size());
695+
dart_waitall_local(handles.data(), handles.size());
696+
}
697+
out_team.barrier();
698+
699+
DASH_ASSERT_EQ(num_elem_processed, num_elem_total,
700+
"Failed to find all contiguous subranges in range");
701+
702+
return g_out_last;
703+
}
704+
627705
/**
628706
* Specialization of \c dash::copy as global-to-global blocking copy
629707
* operation.
630708
*
631709
* \ingroup DashAlgorithms
632710
*/
633-
template <typename ValueType, class GlobInputIt, class GlobOutputIt>
711+
template <
712+
class GlobInputIt,
713+
class GlobOutputIt,
714+
typename ValueType = typename GlobInputIt::value_type>
634715
GlobOutputIt copy(
635-
GlobInputIt /*in_first*/,
636-
GlobInputIt /*in_last*/,
637-
GlobOutputIt /*out_first*/)
716+
GlobInputIt in_first,
717+
GlobInputIt in_last,
718+
GlobOutputIt out_first,
719+
ActiveSource /*unused*/)
638720
{
639721
DASH_LOG_TRACE("dash::copy()", "blocking, global to global");
640722

641-
// TODO:
642-
// - Implement adapter for local-to-global dash::copy here
643-
// - Return if global input range has no local sub-range
723+
typedef typename GlobInputIt::size_type size_type;
724+
725+
size_type num_elem_total = dash::distance(in_first, in_last);
726+
if (num_elem_total <= 0) {
727+
DASH_LOG_TRACE("dash::copy", "input range empty");
728+
return out_first;
729+
}
730+
731+
internal::ContiguousRangeSet<GlobOutputIt> range_set{in_first, in_last};
732+
733+
const auto & in_team = in_first.team();
734+
in_team.barrier();
735+
736+
std::vector<dart_handle_t> handles;
737+
internal::local_copy_chunks<ValueType> local_chunks;
738+
739+
size_type num_elem_processed = 0;
740+
741+
for (auto range : range_set) {
742+
743+
auto cur_in_first = range.first;
744+
auto num_copy_elem = range.second;
745+
746+
DASH_ASSERT_GT(num_copy_elem, 0,
747+
"Number of elements to copy is 0");
748+
749+
// handle local data only
750+
if (cur_in_first.is_local()) {
751+
auto src_ptr = cur_in_first.local();
752+
auto dest_ptr = out_first + num_elem_processed;
753+
internal::copy_impl(src_ptr,
754+
src_ptr + num_copy_elem,
755+
dest_ptr,
756+
&handles,
757+
local_chunks);
758+
}
759+
num_elem_processed += num_copy_elem;
760+
}
761+
762+
internal::do_local_copies(local_chunks);
763+
764+
if (!handles.empty()) {
765+
DASH_LOG_TRACE("dash::copy", "Waiting for remote transfers to complete,",
766+
"num_handles: ", handles.size());
767+
dart_waitall(handles.data(), handles.size());
768+
}
769+
in_team.barrier();
770+
771+
DASH_ASSERT_EQ(num_elem_processed, num_elem_total,
772+
"Failed to find all contiguous subranges in range");
644773

645-
return GlobOutputIt();
774+
return out_first + num_elem_total;
646775
}
647776

648777
#endif // DOXYGEN

dash/test/algorithm/CopyTest.cc

+68
Original file line numberDiff line numberDiff line change
@@ -1019,3 +1019,71 @@ TEST_F(CopyTest, MatrixToSmallerTeam)
10191019

10201020
}
10211021
}
1022+
1023+
TEST_F(CopyTest, MatrixTransfersGlobalToGlobal)
1024+
{
1025+
if (_dash_size < 2) {
1026+
SKIP_TEST_MSG("At least 2 units required for this test.");
1027+
}
1028+
1029+
using TeamSpecT = dash::TeamSpec<2>;
1030+
using MatrixT = dash::NArray<double, 2>;
1031+
using PatternT = typename MatrixT::pattern_type;
1032+
using SizeSpecT = dash::SizeSpec<2>;
1033+
using DistSpecT = dash::DistributionSpec<2>;
1034+
1035+
auto& team_all = dash::Team::All();
1036+
TeamSpecT team_all_spec(team_all.size(), 1);
1037+
team_all_spec.balance_extents();
1038+
1039+
auto size_spec = SizeSpecT(4*team_all_spec.extent(1),
1040+
4*team_all_spec.extent(1));
1041+
auto dist_spec = DistSpecT(dash::BLOCKED, dash::BLOCKED);
1042+
1043+
MatrixT grid_more(size_spec, dist_spec, team_all, team_all_spec);
1044+
dash::fill(grid_more.begin(), grid_more.end(), (double)team_all.myid());
1045+
team_all.barrier();
1046+
1047+
// create a smaller team
1048+
dash::Team& team_fewer= team_all.split(2);
1049+
team_all.barrier();
1050+
if (!team_fewer.is_null() && 0 == team_fewer.position()) {
1051+
TeamSpecT team_fewer_spec(team_fewer.size(), 1);
1052+
team_fewer_spec.balance_extents();
1053+
1054+
MatrixT grid_fewer(size_spec, dist_spec, team_fewer, team_fewer_spec);
1055+
dash::fill(grid_fewer.begin(), grid_fewer.end(), -1.0);
1056+
1057+
auto lextents= grid_fewer.pattern().local_extents();
1058+
1059+
dash::copy(grid_more.begin(), grid_more.end(),
1060+
grid_fewer.begin(), dash::ActiveDestination());
1061+
1062+
if (team_fewer.myid() == 0) {
1063+
auto gextents = grid_fewer.extents();
1064+
for (uint32_t y = 0; y < gextents[0]; ++y) {
1065+
for (uint32_t x = 0; x < gextents[1]; ++x) {
1066+
ASSERT_EQ_U(grid_more(y, x), grid_fewer(y, x));
1067+
}
1068+
}
1069+
}
1070+
1071+
team_fewer.barrier();
1072+
1073+
dash::fill(grid_fewer.begin(), grid_fewer.end(), (double)team_fewer.myid());
1074+
1075+
dash::copy(grid_fewer.begin(), grid_fewer.end(),
1076+
grid_more.begin(), dash::ActiveSource());
1077+
1078+
if (team_fewer.myid() == 0) {
1079+
auto gextents = grid_fewer.extents();
1080+
for (uint32_t y = 0; y < gextents[0]; ++y) {
1081+
for (uint32_t x = 0; x < gextents[1]; ++x) {
1082+
ASSERT_EQ_U(grid_more(y, x), grid_fewer(y, x));
1083+
}
1084+
}
1085+
}
1086+
1087+
team_fewer.barrier();
1088+
}
1089+
}

0 commit comments

Comments
 (0)