Skip to content

Commit 2de0911

Browse files
authored
Cell Reading Rewrite, main branch (2024.04.19.) (#547)
* Re-wrote the CSV cell reading code. Made sure that proper comparison operators would exist for traccc::cell and traccc::cell_module, and then just relied on STL containers for ordering the cells and modules correctly in memory. * Re-enabled reading duplicate cells from CSV files. The truth mapping code (currently) relies on possibly having duplicate cells in the event record, for different particles depositing energy in the same cell. The truth mapping code, and some of the I/O unit tests, are now set up to disable the cell de-duplication during CSV reading. Reverted to the previous ordering scheme for traccc::cell-s, and implemented a (different) custom ordering for the cell collection. Since as it turns out, the project implicitly relies on these two behaving a little differently. * Using an std::map for cell deduplication instead of an std::set. This way it becomes possible to sum up the activations of the cells, using the value in the map.
1 parent 002bd91 commit 2de0911

File tree

7 files changed

+144
-112
lines changed

7 files changed

+144
-112
lines changed

core/include/traccc/edm/cell.hpp

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/** TRACCC library, part of the ACTS project (R&D line)
22
*
3-
* (c) 2022 CERN for the benefit of the ACTS project
3+
* (c) 2022-2024 CERN for the benefit of the ACTS project
44
*
55
* Mozilla Public License Version 2.0
66
*/
@@ -37,6 +37,12 @@ struct cell_module {
3737
/// Declare all cell module collection types
3838
using cell_module_collection_types = collection_types<cell_module>;
3939

40+
/// Comparison operator for cell modules
41+
TRACCC_HOST_DEVICE
42+
inline bool operator<(const cell_module& lhs, const cell_module& rhs) {
43+
return lhs.surface_link < rhs.surface_link;
44+
}
45+
4046
/// Equality operator for cell module
4147
TRACCC_HOST_DEVICE
4248
inline bool operator==(const cell_module& lhs, const cell_module& rhs) {
@@ -61,6 +67,7 @@ struct cell {
6167
/// Declare all cell collection types
6268
using cell_collection_types = collection_types<cell>;
6369

70+
/// Comparison operator for cells
6471
TRACCC_HOST_DEVICE
6572
inline bool operator<(const cell& lhs, const cell& rhs) {
6673

io/include/traccc/io/read_cells.hpp

+7-3
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/** TRACCC library, part of the ACTS project (R&D line)
22
*
3-
* (c) 2022-2023 CERN for the benefit of the ACTS project
3+
* (c) 2022-2024 CERN for the benefit of the ACTS project
44
*
55
* Mozilla Public License Version 2.0
66
*/
@@ -37,13 +37,15 @@ namespace traccc::io {
3737
/// @param dconfig The detector's digitization configuration
3838
/// @param bardoce_map An object to perform barcode re-mapping with
3939
/// (For Acts->Detray identifier re-mapping, if necessary)
40+
/// @param deduplicate Whether to deduplicate the cells
4041
///
4142
void read_cells(
4243
cell_reader_output &out, std::size_t event, std::string_view directory,
4344
data_format format = data_format::csv, const geometry *geom = nullptr,
4445
const digitization_config *dconfig = nullptr,
4546
const std::map<std::uint64_t, detray::geometry::barcode> *barcode_map =
46-
nullptr);
47+
nullptr,
48+
bool deduplicate = true);
4749

4850
/// Read cell data into memory
4951
///
@@ -56,12 +58,14 @@ void read_cells(
5658
/// @param dconfig The detector's digitization configuration
5759
/// @param bardoce_map An object to perform barcode re-mapping with
5860
/// (For Acts->Detray identifier re-mapping, if necessary)
61+
/// @param deduplicate Whether to deduplicate the cells
5962
///
6063
void read_cells(cell_reader_output &out, std::string_view filename,
6164
data_format format = data_format::csv,
6265
const geometry *geom = nullptr,
6366
const digitization_config *dconfig = nullptr,
6467
const std::map<std::uint64_t, detray::geometry::barcode>
65-
*barcode_map = nullptr);
68+
*barcode_map = nullptr,
69+
bool deduplicate = true);
6670

6771
} // namespace traccc::io

io/src/csv/read_cells.cpp

+110-94
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/** TRACCC library, part of the ACTS project (R&D line)
22
*
3-
* (c) 2022-2023 CERN for the benefit of the ACTS project
3+
* (c) 2022-2024 CERN for the benefit of the ACTS project
44
*
55
* Mozilla Public License Version 2.0
66
*/
@@ -13,6 +13,9 @@
1313
// System include(s).
1414
#include <algorithm>
1515
#include <cassert>
16+
#include <iostream>
17+
#include <map>
18+
#include <set>
1619
#include <stdexcept>
1720
#include <utility>
1821
#include <vector>
@@ -21,20 +24,28 @@ namespace {
2124

2225
/// Comparator used for sorting cells. This sorting is one of the assumptions
2326
/// made in the clusterization algorithm
24-
const auto comp = [](const traccc::cell& c1, const traccc::cell& c2) {
25-
return c1.channel1 < c2.channel1;
26-
};
27+
struct cell_order {
28+
bool operator()(const traccc::cell& lhs, const traccc::cell& rhs) const {
29+
if (lhs.module_link != rhs.module_link) {
30+
return lhs.module_link < rhs.module_link;
31+
} else if (lhs.channel1 != rhs.channel1) {
32+
return (lhs.channel1 < rhs.channel1);
33+
} else {
34+
return (lhs.channel0 < rhs.channel0);
35+
}
36+
}
37+
}; // struct cell_order
2738

2839
/// Helper function which finds module from csv::cell in the geometry and
2940
/// digitization config, and initializes the modules limits with the cell's
3041
/// properties
31-
traccc::cell_module get_module(traccc::io::csv::cell c,
42+
traccc::cell_module get_module(const std::uint64_t geometry_id,
3243
const traccc::geometry* geom,
3344
const traccc::digitization_config* dconfig,
3445
const std::uint64_t original_geometry_id) {
3546

3647
traccc::cell_module result;
37-
result.surface_link = detray::geometry::barcode{c.geometry_id};
48+
result.surface_link = detray::geometry::barcode{geometry_id};
3849

3950
// Find/set the 3D position of the detector module.
4051
if (geom != nullptr) {
@@ -72,114 +83,119 @@ traccc::cell_module get_module(traccc::io::csv::cell c,
7283
return result;
7384
}
7485

75-
} // namespace
76-
77-
namespace traccc::io::csv {
86+
std::map<std::uint64_t, std::vector<traccc::cell> > read_deduplicated_cells(
87+
std::string_view filename) {
7888

79-
void read_cells(
80-
cell_reader_output& out, std::string_view filename, const geometry* geom,
81-
const digitization_config* dconfig,
82-
const std::map<std::uint64_t, detray::geometry::barcode>* barcode_map) {
89+
// Temporary storage for all the cells and modules.
90+
std::map<std::uint64_t, std::map<traccc::cell, float, ::cell_order> >
91+
cellMap;
8392

8493
// Construct the cell reader object.
85-
auto reader = make_cell_reader(filename);
86-
87-
// Create cell counter vector.
88-
std::vector<unsigned int> cellCounts;
89-
cellCounts.reserve(5000);
90-
91-
cell_module_collection_types::host& result_modules = out.modules;
92-
result_modules.reserve(5000);
93-
94-
// Create a cell collection, which holds on to a flat list of all the cells
95-
// and the position of their respective cell counter & module.
96-
std::vector<std::pair<csv::cell, unsigned int>> allCells;
97-
allCells.reserve(50000);
94+
auto reader = traccc::io::csv::make_cell_reader(filename);
9895

9996
// Read all cells from input file.
100-
csv::cell iocell;
97+
traccc::io::csv::cell iocell;
98+
unsigned int nduplicates = 0;
10199
while (reader.read(iocell)) {
102100

103-
// Modify the geometry ID of the cell if a barcode map is provided.
104-
const std::uint64_t original_geometry_id = iocell.geometry_id;
105-
if (barcode_map != nullptr) {
106-
const auto it = barcode_map->find(iocell.geometry_id);
107-
if (it != barcode_map->end()) {
108-
iocell.geometry_id = it->second.value();
109-
} else {
110-
throw std::runtime_error(
111-
"Could not find barcode for geometry ID " +
112-
std::to_string(iocell.geometry_id));
113-
}
101+
// Construct a cell object.
102+
const traccc::cell cell{iocell.channel0, iocell.channel1, iocell.value,
103+
iocell.timestamp, 0};
104+
105+
// Add the cell to the module. At this point the module link of the
106+
// cells is not set up correctly yet.
107+
auto ret = cellMap[iocell.geometry_id].insert({cell, iocell.value});
108+
if (ret.second == false) {
109+
cellMap[iocell.geometry_id].at(cell) += iocell.value;
110+
++nduplicates;
114111
}
112+
}
113+
if (nduplicates > 0) {
114+
std::cout << "WARNING: @traccc::io::csv::read_cells: " << nduplicates
115+
<< " duplicate cells found in " << filename << std::endl;
116+
}
115117

116-
// Look for current module in cell counter vector.
117-
auto rit = std::find_if(result_modules.rbegin(), result_modules.rend(),
118-
[&iocell](const cell_module& mod) {
119-
return mod.surface_link.value() ==
120-
iocell.geometry_id;
121-
});
122-
if (rit == result_modules.rend()) {
123-
// Add new cell and new cell counter if a new module is found
124-
const cell_module mod =
125-
get_module(iocell, geom, dconfig, original_geometry_id);
126-
allCells.push_back({iocell, result_modules.size()});
127-
result_modules.push_back(mod);
128-
cellCounts.push_back(1);
129-
} else {
130-
// Add a new cell and update cell counter if repeat module is found
131-
const unsigned int pos =
132-
std::distance(result_modules.begin(), rit.base()) - 1;
133-
allCells.push_back({iocell, pos});
134-
++(cellCounts[pos]);
118+
// Create and fill the result container. With summed activation values.
119+
std::map<std::uint64_t, std::vector<traccc::cell> > result;
120+
for (const auto& [geometry_id, cells] : cellMap) {
121+
for (const auto& [cell, value] : cells) {
122+
traccc::cell summed_cell{cell};
123+
summed_cell.activation = value;
124+
result[geometry_id].push_back(summed_cell);
135125
}
136126
}
137127

138-
// Transform the cellCounts vector into a prefix sum for accessing
139-
// positions in the result vector.
140-
std::partial_sum(cellCounts.begin(), cellCounts.end(), cellCounts.begin());
128+
// Return the container.
129+
return result;
130+
}
141131

142-
// The total number cells.
143-
const unsigned int totalCells = allCells.size();
132+
std::map<std::uint64_t, std::vector<traccc::cell> > read_all_cells(
133+
std::string_view filename) {
144134

145-
// Construct the result collection.
146-
cell_collection_types::host& result_cells = out.cells;
147-
result_cells.resize(totalCells);
135+
// The result container.
136+
std::map<std::uint64_t, std::vector<traccc::cell> > result;
148137

149-
// Member "-1" of the prefix sum vector
150-
unsigned int nCellsZero = 0;
151-
// Fill the result object with the read csv cells
152-
for (unsigned int i = 0; i < totalCells; ++i) {
153-
const csv::cell& c = allCells[i].first;
138+
// Construct the cell reader object.
139+
auto reader = traccc::io::csv::make_cell_reader(filename);
154140

155-
// The position of the cell counter this cell belongs to
156-
const unsigned int& counterPos = allCells[i].second;
141+
// Read all cells from input file.
142+
traccc::io::csv::cell iocell;
143+
while (reader.read(iocell)) {
157144

158-
unsigned int& prefix_sum_previous =
159-
counterPos == 0 ? nCellsZero : cellCounts[counterPos - 1];
160-
result_cells[prefix_sum_previous++] = traccc::cell{
161-
c.channel0, c.channel1, c.value, c.timestamp, counterPos};
145+
// Add the cell to the module. At this point the module link of the
146+
// cells is not set up correctly yet.
147+
result[iocell.geometry_id].push_back({iocell.channel0, iocell.channel1,
148+
iocell.value, iocell.timestamp,
149+
0});
162150
}
163151

164-
if (cellCounts.size() == 0) {
165-
return;
152+
// Sort the cells. Deduplication or not, they do need to be sorted.
153+
for (auto& [_, cells] : result) {
154+
std::sort(cells.begin(), cells.end(), ::cell_order());
166155
}
167-
/* This is might look a bit overcomplicated, and could be made simpler by
168-
* having a copy of the prefix sum vector before incrementing its value when
169-
* filling the vector. however this seems more efficient, but requires
170-
* manually setting the 1st & 2nd modules instead of just the 1st.
171-
*/
172-
173-
// Sort the cells belonging to the first module.
174-
std::sort(result_cells.begin(), result_cells.begin() + nCellsZero, comp);
175-
// Sort the cells belonging to the second module.
176-
std::sort(result_cells.begin() + nCellsZero,
177-
result_cells.begin() + cellCounts[0], comp);
178-
179-
// Sort cells belonging to all other modules.
180-
for (unsigned int i = 1; i < cellCounts.size() - 1; ++i) {
181-
std::sort(result_cells.begin() + cellCounts[i - 1],
182-
result_cells.begin() + cellCounts[i], comp);
156+
157+
// Return the container.
158+
return result;
159+
}
160+
161+
} // namespace
162+
163+
namespace traccc::io::csv {
164+
165+
void read_cells(
166+
cell_reader_output& out, std::string_view filename, const geometry* geom,
167+
const digitization_config* dconfig,
168+
const std::map<std::uint64_t, detray::geometry::barcode>* barcode_map,
169+
const bool deduplicate) {
170+
171+
// Get the cells and modules into an intermediate format.
172+
auto cellsMap = (deduplicate ? read_deduplicated_cells(filename)
173+
: read_all_cells(filename));
174+
175+
// Fill the output containers with the ordered cells and modules.
176+
for (const auto& [original_geometry_id, cells] : cellsMap) {
177+
// Modify the geometry ID of the module if a barcode map is
178+
// provided.
179+
std::uint64_t geometry_id = original_geometry_id;
180+
if (barcode_map != nullptr) {
181+
const auto it = barcode_map->find(geometry_id);
182+
if (it != barcode_map->end()) {
183+
geometry_id = it->second.value();
184+
} else {
185+
throw std::runtime_error(
186+
"Could not find barcode for geometry ID " +
187+
std::to_string(geometry_id));
188+
}
189+
}
190+
191+
// Add the module and its cells to the output.
192+
out.modules.push_back(
193+
get_module(geometry_id, geom, dconfig, original_geometry_id));
194+
for (auto& cell : cells) {
195+
out.cells.push_back(cell);
196+
// Set the module link.
197+
out.cells.back().module_link = out.modules.size() - 1;
198+
}
183199
}
184200
}
185201

io/src/csv/read_cells.hpp

+4-2
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
/** TRACCC library, part of the ACTS project (R&D line)
22
*
3-
* (c) 2022-2023 CERN for the benefit of the ACTS project
3+
* (c) 2022-2024 CERN for the benefit of the ACTS project
44
*
55
* Mozilla Public License Version 2.0
66
*/
@@ -27,11 +27,13 @@ namespace traccc::io::csv {
2727
/// @param geom The description of the detector geometry
2828
/// @param dconfig The detector's digitization configuration
2929
/// @param bardoce_map An object to perform barcode re-mapping with
30+
/// @param deduplicate Whether to deduplicate the cells
3031
///
3132
void read_cells(cell_reader_output& out, std::string_view filename,
3233
const geometry* geom = nullptr,
3334
const digitization_config* dconfig = nullptr,
3435
const std::map<std::uint64_t, detray::geometry::barcode>*
35-
barcode_map = nullptr);
36+
barcode_map = nullptr,
37+
bool deduplicate = true);
3638

3739
} // namespace traccc::io::csv

io/src/mapper.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -207,7 +207,7 @@ generate_measurement_cell_map(std::size_t event,
207207
// Read the cells from the relevant event file
208208
traccc::io::cell_reader_output readOut(&resource);
209209
io::read_cells(readOut, event, cells_dir, traccc::data_format::csv,
210-
&surface_transforms, &digi_cfg);
210+
&surface_transforms, &digi_cfg, nullptr, false);
211211
cell_collection_types::host& cells_per_event = readOut.cells;
212212
cell_module_collection_types::host& modules_per_event = readOut.modules;
213213

0 commit comments

Comments
 (0)