Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

🎨 Slightly faster ClusterComplete #664

Open
wants to merge 17 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -5021,6 +5021,33 @@ population stability check. In the latter case, the configuration
stability check is performed before the associated charge distribution
is added to the simulation results.

Parameter ``clustering_state``:
A clustering state that holds a specific combination of multiset
charge configurations as projector states of which the
respectively associated clusters form a clustering in the cluster
hierarchy.)doc";

static const char *__doc_fiction_detail_clustercomplete_impl_add_physically_valid_charge_configurations_2 =
R"doc(This recursive function is the heart of the *ClusterComplete*
destruction. The given clustering state is dissected at the largest
cluster to each possible specialization of it, which then enters the
recursive call with the clustering state modified to have a set of
sibling children replacing their direct parent. For each
specialization, appropriate updates are made to the potential bounds
store that is part of the clustering state. After a specialization has
been handled completely, i.e., when the recursive call for this
specialization returns, the specialization to the potential bounds
store is undone so that a new specialization may be applied.

The two base cases to the recursion are as follows: (1) the charge
distributions implied by the given clustering state do not meet the
population stability, meaning that this branch of the search space may
be pruned through terminating the recursion at this level, and, (2)
the clustering state hold only singleton clusters and passes the
population stability check. In the latter case, the configuration
stability check is performed before the associated charge distribution
is added to the simulation results.

Parameter ``w``:
The worker running on the current thread. It has a clustering
state that holds a specific combination of multiset charge
Expand Down Expand Up @@ -5053,6 +5080,18 @@ Parameter ``params``:
Parameter required for both the invocation of *Ground State
Space*, and the simulation following.)doc";

static const char *__doc_fiction_detail_clustercomplete_impl_collect_physically_valid_charge_distributions_single_threaded =
R"doc(After the *Ground State Space* construction was completed and the top
cluster was returned, this function splits the charge space of the top
cluster into sections for the individual threads to handle. Each are
decomposed recursively to generate physically valid charge
distributions that emerge from increasingly specializing multiset
charge configurations.

Parameter ``top_cluster``:
The top cluster that is returned by the *Ground State Space
construction; it contains the entire cluster hierarchy construct.)doc";

static const char *__doc_fiction_detail_clustercomplete_impl_extract_work_from_top_cluster =
R"doc(Work in the form of compositions of charge space elements of the top
cluster are extracted into a vector and shuffled at random before
Expand Down
227 changes: 164 additions & 63 deletions include/fiction/algorithms/simulation/sidb/clustercomplete.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -162,40 +162,51 @@ class clustercomplete_impl

if (!gss_stats.top_cluster->charge_space.empty())
{
// initialization
initialize_worker_queues(extract_work_from_top_cluster(gss_stats.top_cluster));

// set up threads
std::vector<std::thread> supporting_threads{};
supporting_threads.reserve(available_threads);
if (available_threads == 1)
{
// single-threaded execution

for (uint64_t i = 1; i < available_threads; ++i)
collect_physically_valid_charge_distributions_single_threaded(gss_stats.top_cluster);
}
else
{
supporting_threads.emplace_back(
[&, ix = i]
{
worker& w = *workers.at(ix);
// multi-threaded execution

// keep unfolding on this thread until no more work exists
while (const std::optional<work_t>& work = w.obtain_work())
// initialization
initialize_worker_queues(extract_work_from_top_cluster(gss_stats.top_cluster));

// set up threads
std::vector<std::thread> supporting_threads{};
supporting_threads.reserve(available_threads);

for (uint64_t i = 1; i < available_threads; ++i)
{
supporting_threads.emplace_back(
[&, ix = i]
{
unfold_composition(w, work->get());
}
});
}
worker& w = *workers.at(ix);

// keep unfolding on this thread until no more work exists
while (const std::optional<work_t>& work = w.obtain_work())
{
unfold_composition(w, work->get());
}
});
}

// keep unfolding on the main thread until no more work exists
while (const std::optional<work_t>& work = workers.front()->obtain_work())
{
unfold_composition(*workers.front(), work->get());
}
// keep unfolding on the main thread until no more work exists
while (const std::optional<work_t>& work = workers.front()->obtain_work())
{
unfold_composition(*workers.front(), work->get());
}

// wait for all threads to complete
for (auto& thread : supporting_threads)
{
if (thread.joinable())
// wait for all threads to complete
for (auto& thread : supporting_threads)
{
thread.join();
if (thread.joinable())
{
thread.join();
}
}
}
}
Expand Down Expand Up @@ -241,6 +252,11 @@ class clustercomplete_impl
* Globally available array of bounds that section the band gap, used for pruning.
*/
const std::array<double, 4> mu_bounds_with_error;

///
/// COMMON FUNCTIONS
///

/**
* Helper function for obtaining the stored lower or upper bound on the electrostatic potential that SiDBs in the
* given projector state--i.e., a cluster together with an associated multiset charge configuration--collectively
Expand Down Expand Up @@ -297,6 +313,37 @@ class clustercomplete_impl
{
return pot_bound > mu_bounds_with_error.at(2);
}
/**
* Function to initialize the charge layout.
*
* @param lyt Layout to simulate.
* @param params Parameters for ClusterComplete.
* @return The charge layout initializes with defects specified in the given parameters.
*/
[[nodiscard]] static charge_distribution_surface<Lyt>
initialize_charge_layout(const Lyt& lyt, const clustercomplete_params<cell<Lyt>>& params) noexcept
{
charge_distribution_surface<Lyt> cds{lyt};
cds.assign_physical_parameters(params.simulation_parameters);

// assign defects if applicable
if constexpr (has_foreach_sidb_defect_v<Lyt>)
{
lyt.foreach_sidb_defect(
[&](const auto& cd)
{
if (const auto& [cell, defect] = cd; defect.type != sidb_defect_type::NONE)
{
cds.add_sidb_defect_to_potential_landscape(cell, lyt.get_sidb_defect(cell));
}
});
}

cds.assign_local_external_potential(params.local_external_potential);
cds.assign_global_external_potential(params.global_potential);

return cds;
}
/**
* This function performs an analysis that is crucial to the *ClusterComplete*'s efficiency: as the *Ground State
* Space* construct is broken down, combinations of multiset charge configurations are tried together in more detail
Expand Down Expand Up @@ -484,7 +531,6 @@ class clustercomplete_impl
static void add_composition(sidb_clustering_state& clustering_state,
const sidb_charge_space_composition& composition) noexcept
{

clustering_state.pot_bounds += composition.pot_bounds;

for (const sidb_cluster_projector_state& child_pst : composition.proj_states)
Expand All @@ -511,6 +557,94 @@ class clustercomplete_impl

clustering_state.pot_bounds -= composition.pot_bounds;
}

///
/// SINGLE-THREADED FUNCTIONS
///

/**
* This recursive function is the heart of the *ClusterComplete* destruction. The given clustering state is
* dissected at the largest cluster to each possible specialization of it, which then enters the recursive call with
* the clustering state modified to have a set of sibling children replacing their direct parent. For each
* specialization, appropriate updates are made to the potential bounds store that is part of the clustering state.
* After a specialization has been handled completely, i.e., when the recursive call for this specialization
* returns, the specialization to the potential bounds store is undone so that a new specialization may be applied.
*
* The two base cases to the recursion are as follows: (1) the charge distributions implied by the given clustering
* state do not meet the population stability, meaning that this branch of the search space may be pruned through
* terminating the recursion at this level, and, (2) the clustering state hold only singleton clusters and passes
* the population stability check. In the latter case, the configuration stability check is performed before the
* associated charge distribution is added to the simulation results.
*
* @param clustering_state A clustering state that holds a specific combination of multiset charge configurations as
* projector states of which the respectively associated clusters form a clustering in the cluster hierarchy.
*/
void add_physically_valid_charge_configurations(sidb_clustering_state& clustering_state) noexcept
{
// check for pruning
if (!meets_population_stability_criterion(clustering_state))
{
return;
}

// check if all clusters are singletons
if (clustering_state.proj_states.size() == charge_layout.num_cells())
{
add_if_configuration_stability_is_met(clustering_state);
return;
}

// choose the biggest cluster to unfold
const uint64_t max_pst_ix = find_cluster_of_maximum_size(clustering_state.proj_states);

// un-apply max_pst, thereby making space for specialization
sidb_cluster_projector_state_ptr max_pst = take_parent_out(clustering_state, max_pst_ix);

// specialise for all compositions of max_pst
for (const sidb_charge_space_composition& max_pst_composition : get_projector_state_compositions(*max_pst))
{
// specialise parent to a specific composition of its children
add_composition(clustering_state, max_pst_composition);

// recurse with specialised composition
add_physically_valid_charge_configurations(clustering_state);

// undo specialization such that the specialization may consider a different children composition
remove_composition(clustering_state, max_pst_composition);
}

// apply max_pst back
add_parent(clustering_state, max_pst_ix, std::move(max_pst));
}
/**
* After the *Ground State Space* construction was completed and the top cluster was returned, this function splits
* the charge space of the top cluster into sections for the individual threads to handle. Each are decomposed
* recursively to generate physically valid charge distributions that emerge from increasingly specializing multiset
* charge configurations.
*
* @param top_cluster The top cluster that is returned by the *Ground State Space construction; it contains the
* entire cluster hierarchy construct.
*/
void collect_physically_valid_charge_distributions_single_threaded(const sidb_cluster_ptr& top_cluster) noexcept
{
for (const sidb_cluster_charge_state& ccs : top_cluster->charge_space)
{
for (const sidb_charge_space_composition& composition : ccs.compositions)
{
// convert charge space composition to clustering state
sidb_clustering_state clustering_state{charge_layout.num_cells()};
add_composition(clustering_state, composition);

// unfold
add_physically_valid_charge_configurations(clustering_state);
}
}
}

///
/// MULTI-THREADED FUNCTIONS
///

/**
* A work item is a constant reference to SiDB charge space composition.
*/
Expand Down Expand Up @@ -583,7 +717,7 @@ class clustercomplete_impl
{
const std::lock_guard lock(mutex_to_protect_this_queue);

clustering_state_for_thieves = sidb_clustering_state{clustering_state};
clustering_state_for_thieves = clustering_state;

thief_informants.clear();

Expand Down Expand Up @@ -818,37 +952,6 @@ class clustercomplete_impl
return std::nullopt;
}
};
/**
* Function to initialize the charge layout.
*
* @param lyt Layout to simulate.
* @param params Parameters for ClusterComplete.
* @return The charge layout initializes with defects specified in the given parameters.
*/
[[nodiscard]] static charge_distribution_surface<Lyt>
initialize_charge_layout(const Lyt& lyt, const clustercomplete_params<cell<Lyt>>& params) noexcept
{
charge_distribution_surface<Lyt> cds{lyt};
cds.assign_physical_parameters(params.simulation_parameters);

// assign defects if applicable
if constexpr (has_foreach_sidb_defect_v<Lyt>)
{
lyt.foreach_sidb_defect(
[&](const auto& cd)
{
if (const auto& [cell, defect] = cd; defect.type != sidb_defect_type::NONE)
{
cds.add_sidb_defect_to_potential_landscape(cell, lyt.get_sidb_defect(cell));
}
});
}

cds.assign_local_external_potential(params.local_external_potential);
cds.assign_global_external_potential(params.global_potential);

return cds;
}
/**
* Work in the form of compositions of charge space elements of the top cluster are extracted into a vector and
* shuffled at random before being returned. The shuffling may balance the initial workload division.
Expand Down Expand Up @@ -1085,9 +1188,7 @@ template <typename Lyt>
static_assert(is_cell_level_layout_v<Lyt>, "Lyt is not a cell-level layout");
static_assert(has_sidb_technology_v<Lyt>, "Lyt is not an SiDB layout");

detail::clustercomplete_impl<Lyt> p{lyt, params};

return p.run(params);
return detail::clustercomplete_impl<Lyt>{lyt, params}.run(params);
}

} // namespace fiction
Expand Down
2 changes: 1 addition & 1 deletion include/fiction/technology/sidb_cluster_hierarchy.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ sidb_cluster_hierarchy(Lyt& lyt, sidb_cluster_hierarchy_linkage_method linkage_m
const uint64_t new_n = charge_lyt.num_cells() + static_cast<uint64_t>(i);

#ifdef DEBUG_SIDB_CLUSTER_HIERARCHY
std::set<uint64_t> set_union{};
std::set<uint64_t> unioned_set{};
#else
phmap::flat_hash_set<uint64_t> unioned_set{};
#endif
Expand Down
Loading
Loading