Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve memory usage in track finding postamble #908

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions device/common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,12 @@ traccc_add_library( traccc_device_common device_common TYPE INTERFACE
"include/traccc/finding/device/fill_sort_keys.hpp"
"include/traccc/finding/device/make_barcode_sequence.hpp"
"include/traccc/finding/device/propagate_to_next_surface.hpp"
"include/traccc/finding/device/prune_tracks.hpp"
"include/traccc/finding/device/impl/apply_interaction.ipp"
"include/traccc/finding/device/impl/build_tracks.ipp"
"include/traccc/finding/device/impl/find_tracks.ipp"
"include/traccc/finding/device/impl/fill_sort_keys.ipp"
"include/traccc/finding/device/impl/make_barcode_sequence.ipp"
"include/traccc/finding/device/impl/propagate_to_next_surface.ipp"
"include/traccc/finding/device/impl/prune_tracks.ipp"
# Track fitting funtions(s).
"include/traccc/fitting/device/fit.hpp"
"include/traccc/fitting/device/impl/fit.ipp"
Expand Down
15 changes: 2 additions & 13 deletions device/common/include/traccc/finding/device/build_tracks.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,20 +50,9 @@ struct build_tracks_payload {
tips_view;

/**
* @brief View object to the vector of track candidates
* @brief View object to the vector of pruned track candidates
*/
track_candidate_container_types::view track_candidates_view;

/**
* @brief View object to the vector of indices meeting the selection
* criteria
*/
vecmem::data::vector_view<unsigned int> valid_indices_view;

/**
* @brief The number of valid tracks meeting criteria
*/
unsigned int* n_valid_tracks;
track_candidate_container_types::view final_candidates_view;
};

/// Function for building full tracks from the link container:
Expand Down
101 changes: 28 additions & 73 deletions device/common/include/traccc/finding/device/impl/build_tracks.ipp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

#pragma once

#include <vecmem/containers/device_vector.hpp>
namespace traccc::device {

TRACCC_DEVICE inline void build_tracks(const global_index_t globalIndex,
Expand All @@ -25,59 +26,32 @@ TRACCC_DEVICE inline void build_tracks(const global_index_t globalIndex,
const vecmem::device_vector<const typename candidate_link::link_index_type>
tips(payload.tips_view);

track_candidate_container_types::device track_candidates(
payload.track_candidates_view);

vecmem::device_vector<unsigned int> valid_indices(
payload.valid_indices_view);
track_candidate_container_types::device final_candidates(
payload.final_candidates_view);

if (globalIndex >= tips.size()) {
return;
}

const auto tip = tips.at(globalIndex);
auto& seed = track_candidates[globalIndex].header.seed_params;
auto& trk_quality = track_candidates[globalIndex].header.trk_quality;
auto cands_per_track = track_candidates[globalIndex].items;

// Get the link corresponding to tip
auto L = links[tip.first][tip.second];
const unsigned int n_meas = measurements.size();

// Count the number of skipped steps
unsigned int n_skipped{0u};
while (true) {
if (L.meas_idx > n_meas) {
n_skipped++;
}

if (L.previous.first == 0u) {
break;
}

L = links[L.previous.first][L.previous.second];
}

// Retrieve tip
L = links[tip.first][tip.second];

const unsigned int n_cands = tip.first + 1 - n_skipped;

// Resize the candidates with the exact size
cands_per_track.resize(n_cands);
const unsigned int num_meas = final_candidates.at(globalIndex).items.size();

bool success = true;
assert(num_meas >= cfg.min_track_candidates_per_track &&
num_meas <= cfg.max_track_candidates_per_track);

// Track summary variables
scalar ndf_sum = 0.f;
scalar chi2_sum = 0.f;

[[maybe_unused]] std::size_t num_inserted = 0;
unsigned int final_n_skipped = 0;
unsigned int seed_idx = 0;

// Reversely iterate to fill the track candidates
for (auto it = cands_per_track.rbegin(); it != cands_per_track.rend();
it++) {

for (unsigned int i = num_meas - 1; i < num_meas; --i) {
while (L.meas_idx >= n_meas &&
L.previous.first !=
std::numeric_limits<
Expand All @@ -86,64 +60,45 @@ TRACCC_DEVICE inline void build_tracks(const global_index_t globalIndex,
L = links[L.previous.first][L.previous.second];
}

// Break if the measurement is still invalid
if (L.meas_idx >= measurements.size()) {
success = false;
break;
}
assert(L.meas_idx < measurements.size());

*it = {measurements.at(L.meas_idx)};
num_inserted++;
final_candidates.at(globalIndex).items.at(i) =
measurements.at(L.meas_idx);

// Sanity check on chi2
assert(L.chi2 < std::numeric_limits<traccc::scalar>::max());
assert(L.chi2 >= 0.f);

ndf_sum += static_cast<scalar>(it->meas_dim);
ndf_sum += static_cast<scalar>(measurements.at(L.meas_idx).meas_dim);
chi2_sum += L.chi2;

// Break the loop if the iterator is at the first candidate and fill the
// seed and track quality
if (it == cands_per_track.rend() - 1) {
seed = seeds.at(L.previous.second);
trk_quality.ndf = ndf_sum - 5.f;
trk_quality.chi2 = chi2_sum;
trk_quality.n_holes = L.n_skipped;
if (i == 0) {
seed_idx = L.previous.second;
final_n_skipped = L.n_skipped;
} else {
L = links[L.previous.first][L.previous.second];
}
}

final_candidates.at(globalIndex).header.seed_params = seeds.at(seed_idx);
final_candidates.at(globalIndex).header.trk_quality.chi2 = chi2_sum;
final_candidates.at(globalIndex).header.trk_quality.ndf = ndf_sum - 5.f;
final_candidates.at(globalIndex).header.trk_quality.n_holes =
final_n_skipped;
Comment on lines +85 to +89
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I honestly don't know this. Since the compiler might already do this as efficiently as possible.

But would it not be more efficient to write this as:

final_candidates.at(globalIndex).header = {...};

? Note that I'm very aware that I'm also talking against my SoA efforts with this. 😛 Since if this formalism is indeed more efficient, the SoA migration will make this part worse. 😦


#ifndef NDEBUG
if (success) {
// Assert that we inserted exactly as many elements as we reserved
// space for.
assert(num_inserted == cands_per_track.size());

// Assert that we did not make any duplicate track states.
for (unsigned int i = 0; i < cands_per_track.size(); ++i) {
for (unsigned int j = 0; j < cands_per_track.size(); ++j) {
if (i != j) {
assert(cands_per_track.at(i).measurement_id !=
cands_per_track.at(j).measurement_id);
}
// Assert that we did not make any duplicate track states.
for (unsigned int i = 0; i < num_meas; ++i) {
for (unsigned int j = 0; j < num_meas; ++j) {
if (i != j) {
// TODO: Reenable
// assert(meas_indxs[i] != meas_indxs[j]);
}
}
}
#endif

// NOTE: We may at some point want to assert that `success` is true

// Criteria for valid tracks
if (n_cands >= cfg.min_track_candidates_per_track &&
n_cands <= cfg.max_track_candidates_per_track && success) {

vecmem::device_atomic_ref<unsigned int> num_valid_tracks(
*payload.n_valid_tracks);

const unsigned int pos = num_valid_tracks.fetch_add(1);
valid_indices[pos] = globalIndex;
}
}

} // namespace traccc::device
Original file line number Diff line number Diff line change
Expand Up @@ -47,85 +47,97 @@ TRACCC_DEVICE inline void propagate_to_next_surface(
const unsigned int s_pos = num_tracks_per_seed.fetch_add(1);
vecmem::device_vector<unsigned int> params_liveness(
payload.params_liveness_view);

if (s_pos >= cfg.max_num_branches_per_seed) {
params_liveness[param_id] = 0u;
return;
}

// tips
vecmem::device_vector<typename candidate_link::link_index_type> tips(
payload.tips_view);
vecmem::device_vector<unsigned int> tip_lengths(payload.tip_lengths_view);

if (links.at(param_id).n_skipped > cfg.max_num_skipping_per_cand) {
params_liveness[param_id] = 0u;
tips.push_back({payload.step, param_id});
return;
}

// Detector
typename propagator_t::detector_type det(payload.det_data);

// Parameters
bound_track_parameters_collection_types::device params(payload.params_view);
bool create_tip = false;

if (params_liveness.at(param_id) == 0u) {
return;
if (s_pos >= cfg.max_num_branches_per_seed) {
params_liveness.at(param_id) = 0u;
} else if (links.at(param_id).n_skipped > cfg.max_num_skipping_per_cand) {
params_liveness.at(param_id) = 0u;
create_tip = true;
}

// Input bound track parameter
const bound_track_parameters<> in_par = params.at(param_id);

// Create propagator
propagator_t propagator(cfg.propagation);

// Create propagator state
typename propagator_t::state propagation(in_par, payload.field_data, det);
propagation.set_particle(
detail::correct_particle_hypothesis(cfg.ptc_hypothesis, in_par));
propagation._stepping
.template set_constraint<detray::step::constraint::e_accuracy>(
cfg.propagation.stepping.step_constraint);

// Actor state
// @TODO: simplify the syntax here
// @NOTE: Post material interaction might be required here
using actor_list_type =
typename propagator_t::actor_chain_type::actor_list_type;
typename detray::detail::tuple_element<0, actor_list_type>::type::state
s0{};
typename detray::detail::tuple_element<1, actor_list_type>::type::state
s1{};
typename detray::detail::tuple_element<3, actor_list_type>::type::state
s3{};
typename detray::detail::tuple_element<2, actor_list_type>::type::state s2{
s3};
typename detray::detail::tuple_element<4, actor_list_type>::type::state s4;
s4.min_step_length = cfg.min_step_length_for_next_surface;
s4.max_count = cfg.max_step_counts_for_next_surface;

// @TODO: Should be removed once detray is fixed to set the volume in the
// constructor
propagation._navigation.set_volume(in_par.surface_link().volume());

// Propagate to the next surface
propagator.propagate_sync(propagation, detray::tie(s0, s1, s2, s3, s4));

// If a surface found, add the parameter for the next step
if (s4.success) {
params[param_id] = propagation._stepping.bound_params();

if (payload.step == cfg.max_track_candidates_per_track - 1) {
tips.push_back({payload.step, param_id});
params_liveness[param_id] = 0u;
if (params_liveness.at(param_id) != 0u) {
// Detector
typename propagator_t::detector_type det(payload.det_data);

// Parameters
bound_track_parameters_collection_types::device params(
payload.params_view);

// Input bound track parameter
const bound_track_parameters<> in_par = params.at(param_id);

// Create propagator
propagator_t propagator(cfg.propagation);

// Create propagator state
typename propagator_t::state propagation(in_par, payload.field_data,
det);
propagation.set_particle(
detail::correct_particle_hypothesis(cfg.ptc_hypothesis, in_par));
propagation._stepping
.template set_constraint<detray::step::constraint::e_accuracy>(
cfg.propagation.stepping.step_constraint);

// Actor state
// @TODO: simplify the syntax here
// @NOTE: Post material interaction might be required here
using actor_list_type =
typename propagator_t::actor_chain_type::actor_list_type;
typename detray::detail::tuple_element<0, actor_list_type>::type::state
s0{};
typename detray::detail::tuple_element<1, actor_list_type>::type::state
s1{};
typename detray::detail::tuple_element<3, actor_list_type>::type::state
s3{};
typename detray::detail::tuple_element<2, actor_list_type>::type::state
s2{s3};
typename detray::detail::tuple_element<4, actor_list_type>::type::state
s4;
s4.min_step_length = cfg.min_step_length_for_next_surface;
s4.max_count = cfg.max_step_counts_for_next_surface;

// @TODO: Should be removed once detray is fixed to set the volume in
// the constructor
propagation._navigation.set_volume(in_par.surface_link().volume());

// Propagate to the next surface
propagator.propagate_sync(propagation, detray::tie(s0, s1, s2, s3, s4));

// If a surface found, add the parameter for the next step
if (s4.success) {
params[param_id] = propagation._stepping.bound_params();

if (payload.step == cfg.max_track_candidates_per_track - 1) {
create_tip = true;
params_liveness[param_id] = 0u;
} else {
params_liveness[param_id] = 1u;
}
} else {
params_liveness[param_id] = 1u;
params_liveness[param_id] = 0u;

if (payload.step >= cfg.min_track_candidates_per_track - 1) {
create_tip = true;
}
}
} else {
params_liveness[param_id] = 0u;
}

if (create_tip) {
const auto& L = links.at(param_id);

const unsigned int num_meas = payload.step + 1 - L.n_skipped;

if (payload.step >= cfg.min_track_candidates_per_track - 1) {
tips.push_back({payload.step, param_id});
// Criteria for valid tracks
if (num_meas >= cfg.min_track_candidates_per_track &&
num_meas <= cfg.max_track_candidates_per_track) {
const unsigned int tip_pos =
tips.push_back({payload.step, param_id});
tip_lengths.at(tip_pos) = num_meas;
}
}
}
Expand Down
Loading