Skip to content

Commit 28c4331

Browse files
committed
Refactor edge storage.
This changes edge storage from an allocation array of struct per node to struct of array for all edge data. Several algorithms over edges that were previous per node per edge, but were actually just iteration over edges are now part of rr_node_storage. Signed-off-by: Keith Rothman <[email protected]>
1 parent 3c19d3d commit 28c4331

File tree

10 files changed

+273
-342
lines changed

10 files changed

+273
-342
lines changed

vpr/src/base/vpr_types.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1316,4 +1316,6 @@ class RouteStatus {
13161316

13171317
typedef vtr::vector<ClusterBlockId, std::vector<std::vector<int>>> t_clb_opins_used; //[0..num_blocks-1][0..class-1][0..used_pins-1]
13181318

1319+
typedef std::vector<std::map<int, int>> t_arch_switch_fanin;
1320+
13191321
#endif

vpr/src/route/rr_graph.cpp

Lines changed: 8 additions & 131 deletions
Original file line numberDiff line numberDiff line change
@@ -67,8 +67,6 @@ struct t_pin_loc {
6767
e_side side;
6868
};
6969

70-
typedef std::vector<std::map<int, int>> t_arch_switch_fanin;
71-
7270
/******************* Variables local to this module. ***********************/
7371

7472
/********************* Subroutines local to this module. *******************/
@@ -822,44 +820,11 @@ static void alloc_and_load_rr_switch_inf(const int num_arch_switches, const floa
822820
static void alloc_rr_switch_inf(t_arch_switch_fanin& arch_switch_fanins) {
823821
auto& device_ctx = g_vpr_ctx.mutable_device();
824822

825-
int num_rr_switches = 0;
826-
{
827-
//Collect the fan-in per switch type for each node in the graph
828-
//
829-
//Note that since we don't store backward edge info in the RR graph we need to walk
830-
//the whole graph to get the per-switch-type fanin info
831-
std::vector<vtr::flat_map<int, int>> inward_switch_inf(device_ctx.rr_nodes.size()); //[to_node][arch_switch] -> fanin
832-
for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); ++inode) {
833-
for (auto iedge : device_ctx.rr_nodes[inode].edges()) {
834-
int iswitch = device_ctx.rr_nodes[inode].edge_switch(iedge);
835-
int to_node = device_ctx.rr_nodes[inode].edge_sink_node(iedge);
836-
837-
if (inward_switch_inf[to_node].count(iswitch) == 0) {
838-
inward_switch_inf[to_node][iswitch] = 0;
839-
}
840-
inward_switch_inf[to_node][iswitch]++;
841-
}
842-
}
843-
844-
//Record the unique switch type/fanin combinations
845-
for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); ++inode) {
846-
for (auto& switch_fanin : inward_switch_inf[inode]) {
847-
int iswitch, fanin;
848-
std::tie(iswitch, fanin) = switch_fanin;
849-
850-
if (device_ctx.arch_switch_inf[iswitch].fixed_Tdel()) {
851-
//If delay is independent of fanin drop the unique fanin info
852-
fanin = UNDEFINED;
853-
}
854-
855-
if (arch_switch_fanins[iswitch].count(fanin) == 0) { //New fanin for this switch
856-
arch_switch_fanins[iswitch][fanin] = num_rr_switches++; //Assign it a unique index
857-
}
858-
}
859-
}
860-
}
861-
862823
/* allocate space for the rr_switch_inf array */
824+
size_t num_rr_switches = device_ctx.rr_nodes.count_rr_switches(
825+
device_ctx.num_arch_switches,
826+
device_ctx.arch_switch_inf,
827+
arch_switch_fanins);
863828
device_ctx.rr_switch_inf.resize(num_rr_switches);
864829
}
865830

@@ -930,27 +895,7 @@ void load_rr_switch_from_arch_switch(int arch_switch_idx,
930895
static void remap_rr_node_switch_indices(const t_arch_switch_fanin& switch_fanin) {
931896
auto& device_ctx = g_vpr_ctx.mutable_device();
932897

933-
for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
934-
auto from_node = device_ctx.rr_nodes[inode];
935-
int num_edges = from_node.num_edges();
936-
for (int iedge = 0; iedge < num_edges; iedge++) {
937-
const t_rr_node& to_node = device_ctx.rr_nodes[from_node.edge_sink_node(iedge)];
938-
/* get the switch which this edge uses and its fanin */
939-
int switch_index = from_node.edge_switch(iedge);
940-
int fanin = to_node.fan_in();
941-
942-
if (switch_fanin[switch_index].count(UNDEFINED) == 1) {
943-
fanin = UNDEFINED;
944-
}
945-
946-
auto itr = switch_fanin[switch_index].find(fanin);
947-
VTR_ASSERT(itr != switch_fanin[switch_index].end());
948-
949-
int rr_switch_index = itr->second;
950-
951-
from_node.set_edge_switch(iedge, rr_switch_index);
952-
}
953-
}
898+
device_ctx.rr_nodes.remap_rr_node_switch_indices(switch_fanin);
954899
}
955900

956901
static void rr_graph_externals(const std::vector<t_segment_inf>& segment_inf,
@@ -1324,7 +1269,7 @@ static std::function<void(t_chan_width*)> alloc_and_load_rr_graph(t_rr_node_stor
13241269
};
13251270
}
13261271

1327-
init_fan_in(L_rr_node, L_rr_node.size());
1272+
L_rr_node.init_fan_in();
13281273

13291274
return update_chan_width;
13301275
}
@@ -1481,9 +1426,6 @@ static void build_rr_sinks_sources(const int i,
14811426
* leads to. If route throughs are allowed, you may want to increase the *
14821427
* base cost of OPINs and/or SOURCES so they aren't used excessively. */
14831428

1484-
/* Initialize to unconnected */
1485-
L_rr_node[inode].set_num_edges(0);
1486-
14871429
L_rr_node[inode].set_cost_index(SINK_COST_INDEX);
14881430
L_rr_node[inode].set_type(SINK);
14891431
}
@@ -1555,24 +1497,6 @@ static void build_rr_sinks_sources(const int i,
15551497
//Create the actual edges
15561498
}
15571499

1558-
void init_fan_in(t_rr_node_storage& L_rr_node, const int num_rr_nodes) {
1559-
//Loads fan-ins for all nodes
1560-
1561-
//Reset all fan-ins to zero
1562-
for (int i = 0; i < num_rr_nodes; i++) {
1563-
L_rr_node[i].set_fan_in(0);
1564-
}
1565-
1566-
//Walk the graph and increment fanin on all downstream nodes
1567-
for (int i = 0; i < num_rr_nodes; i++) {
1568-
for (t_edge_size iedge = 0; iedge < L_rr_node[i].num_edges(); iedge++) {
1569-
int to_node = L_rr_node[i].edge_sink_node(iedge);
1570-
1571-
L_rr_node[to_node].set_fan_in(L_rr_node[to_node].fan_in() + 1);
1572-
}
1573-
}
1574-
}
1575-
15761500
/* Allocates/loads edges for nodes belonging to specified channel segment and initializes
15771501
* node properties such as cost, occupancy and capacity */
15781502
static void build_rr_chan(const int x_coord,
@@ -1756,54 +1680,7 @@ void uniquify_edges(t_rr_edge_info_set& rr_edges_to_create) {
17561680

17571681
void alloc_and_load_edges(t_rr_node_storage& L_rr_node,
17581682
const t_rr_edge_info_set& rr_edges_to_create) {
1759-
/* Sets up all the edge related information for rr_node */
1760-
1761-
struct compare_from_node {
1762-
auto operator()(const t_rr_edge_info& lhs, const int from_node) {
1763-
return lhs.from_node < from_node;
1764-
}
1765-
auto operator()(const int from_node, const t_rr_edge_info& rhs) {
1766-
return from_node < rhs.from_node;
1767-
}
1768-
};
1769-
1770-
std::set<int> from_nodes;
1771-
for (auto& edge : rr_edges_to_create) {
1772-
from_nodes.insert(edge.from_node);
1773-
}
1774-
1775-
VTR_ASSERT_SAFE(std::is_sorted(rr_edges_to_create.begin(), rr_edges_to_create.end()));
1776-
1777-
for (int inode : from_nodes) {
1778-
auto edge_range = std::equal_range(rr_edges_to_create.begin(), rr_edges_to_create.end(), inode, compare_from_node());
1779-
1780-
size_t edge_count = std::distance(edge_range.first, edge_range.second);
1781-
1782-
if (L_rr_node[inode].num_edges() == 0) {
1783-
//Create initial edges
1784-
//
1785-
//Note that we do this in bulk instead of via add_edge() to reduce
1786-
//memory fragmentation
1787-
1788-
L_rr_node[inode].set_num_edges(edge_count);
1789-
1790-
int iedge = 0;
1791-
for (auto itr = edge_range.first; itr != edge_range.second; ++itr) {
1792-
VTR_ASSERT(itr->from_node == inode);
1793-
1794-
L_rr_node[inode].set_edge_sink_node(iedge, itr->to_node);
1795-
L_rr_node[inode].set_edge_switch(iedge, itr->switch_type);
1796-
++iedge;
1797-
}
1798-
} else {
1799-
//Add new edge incrementally
1800-
//
1801-
//This should occur relatively rarely (e.g. a backward bidir edge) so memory fragmentation shouldn't be a big problem
1802-
for (auto itr = edge_range.first; itr != edge_range.second; ++itr) {
1803-
L_rr_node[inode].add_edge(itr->to_node, itr->switch_type);
1804-
}
1805-
}
1806-
}
1683+
L_rr_node.alloc_and_load_edges(&rr_edges_to_create);
18071684
}
18081685

18091686
/* allocate pin to track map for each segment type individually and then combine into a single
@@ -2545,7 +2422,7 @@ std::string describe_rr_node(int inode) {
25452422

25462423
std::string msg = vtr::string_fmt("RR node: %d", inode);
25472424

2548-
const auto& rr_node = device_ctx.rr_nodes[inode];
2425+
auto rr_node = device_ctx.rr_nodes[inode];
25492426

25502427
msg += vtr::string_fmt(" type: %s", rr_node.type_string());
25512428

vpr/src/route/rr_graph.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,8 +47,6 @@ std::string describe_rr_node(int inode);
4747

4848
class t_rr_node_storage;
4949

50-
void init_fan_in(t_rr_node_storage& L_rr_node, const int num_rr_nodes);
51-
5250
// Sets the spec for the rr_switch based on the arch switch
5351
void load_rr_switch_from_arch_switch(int arch_switch_idx,
5452
int rr_switch_idx,

vpr/src/route/rr_graph2.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2582,9 +2582,5 @@ static bool should_apply_switch_override(int switch_override) {
25822582
}
25832583

25842584
void partition_rr_graph_edges(DeviceContext& device_ctx) {
2585-
for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); ++inode) {
2586-
device_ctx.rr_nodes[inode].partition_edges();
2587-
2588-
VTR_ASSERT_SAFE(device_ctx.rr_nodes[inode].validate());
2589-
}
2585+
device_ctx.rr_nodes.partition_edges();
25902586
}

vpr/src/route/rr_graph_reader.cpp

Lines changed: 16 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -156,13 +156,17 @@ void load_rr_file(const t_graph_type graph_type,
156156
next_component = get_single_child(rr_graph, "rr_edges", loc_data);
157157
process_edges(next_component, loc_data, wire_to_rr_ipin_switch, numSwitches);
158158

159+
// Edge switch indicies were converted to rr_switch_inf indicies prior
160+
// to being written.
161+
device_ctx.rr_nodes.mark_edges_as_rr_switch_ids();
162+
159163
//Partition the rr graph edges for efficient access to configurable/non-configurable
160164
//edge subsets. Must be done after RR switches have been allocated
161165
partition_rr_graph_edges(device_ctx);
162166

163167
process_rr_node_indices(grid);
164168

165-
init_fan_in(device_ctx.rr_nodes, device_ctx.rr_nodes.size());
169+
device_ctx.rr_nodes.init_fan_in();
166170

167171
//sets the cost index and seg id information
168172
next_component = get_single_child(rr_graph, "rr_nodes", loc_data);
@@ -364,9 +368,6 @@ void process_nodes(pugi::xml_node parent, const pugiutil::loc_data& loc_data) {
364368
}
365369
node.set_rc_index(find_create_rr_rc_data(R, C));
366370

367-
//clear each node edge
368-
node.set_num_edges(0);
369-
370371
// <metadata>
371372
// <meta name='grid_prefix' >CLBLL_L_</meta>
372373
// </metadata>
@@ -392,35 +393,15 @@ void process_edges(pugi::xml_node parent, const pugiutil::loc_data& loc_data, in
392393
auto& device_ctx = g_vpr_ctx.mutable_device();
393394
pugi::xml_node edges;
394395

396+
size_t num_edges = 0;
395397
edges = get_first_child(parent, "edge", loc_data);
396-
//count the number of edges and store it in a vector
397-
std::vector<size_t> num_edges_for_node;
398-
num_edges_for_node.resize(device_ctx.rr_nodes.size(), 0);
399-
400398
while (edges) {
401-
size_t source_node = get_attribute(edges, "src_node", loc_data).as_uint();
402-
if (source_node >= device_ctx.rr_nodes.size()) {
403-
VPR_FATAL_ERROR(VPR_ERROR_OTHER,
404-
"source_node %d is larger than rr_nodes.size() %d",
405-
source_node, device_ctx.rr_nodes.size());
406-
}
407-
408-
num_edges_for_node[source_node]++;
399+
num_edges += 1;
409400
edges = edges.next_sibling(edges.name());
410401
}
411402

412-
//reset this vector in order to start count for num edges again
413-
for (size_t inode = 0; inode < device_ctx.rr_nodes.size(); inode++) {
414-
if (num_edges_for_node[inode] > std::numeric_limits<t_edge_size>::max()) {
415-
VPR_FATAL_ERROR(VPR_ERROR_OTHER,
416-
"source node %d edge count %d is too high",
417-
inode, num_edges_for_node[inode]);
418-
}
419-
device_ctx.rr_nodes[inode].set_num_edges(num_edges_for_node[inode]);
420-
num_edges_for_node[inode] = 0;
421-
}
422-
423403
edges = get_first_child(parent, "edge", loc_data);
404+
424405
/*initialize a vector that keeps track of the number of wire to ipin switches
425406
* There should be only one wire to ipin switch. In case there are more, make sure to
426407
* store the most frequent switch */
@@ -434,6 +415,12 @@ void process_edges(pugi::xml_node parent, const pugiutil::loc_data& loc_data, in
434415
size_t sink_node = get_attribute(edges, "sink_node", loc_data).as_uint();
435416
int switch_id = get_attribute(edges, "switch_id", loc_data).as_int();
436417

418+
if (source_node >= device_ctx.rr_nodes.size()) {
419+
VPR_FATAL_ERROR(VPR_ERROR_OTHER,
420+
"sink_node %d is larger than rr_nodes.size() %d",
421+
sink_node, device_ctx.rr_nodes.size());
422+
}
423+
437424
if (sink_node >= device_ctx.rr_nodes.size()) {
438425
VPR_FATAL_ERROR(VPR_ERROR_OTHER,
439426
"sink_node %d is larger than rr_nodes.size() %d",
@@ -458,8 +445,7 @@ void process_edges(pugi::xml_node parent, const pugiutil::loc_data& loc_data, in
458445
}
459446
}
460447
//set edge in correct rr_node data structure
461-
device_ctx.rr_nodes[source_node].set_edge_sink_node(num_edges_for_node[source_node], sink_node);
462-
device_ctx.rr_nodes[source_node].set_edge_switch(num_edges_for_node[source_node], switch_id);
448+
device_ctx.rr_nodes.emplace_back_edge(RRNodeId(source_node), RRNodeId(sink_node), switch_id);
463449

464450
// Read the metadata for the edge
465451
auto metadata = get_single_child(edges, "metadata", loc_data, pugiutil::OPTIONAL);
@@ -474,12 +460,10 @@ void process_edges(pugi::xml_node parent, const pugiutil::loc_data& loc_data, in
474460
edges_meta = edges_meta.next_sibling(edges_meta.name());
475461
}
476462
}
477-
num_edges_for_node[source_node]++;
478-
479463
edges = edges.next_sibling(edges.name()); //Next edge
480464
}
465+
481466
*wire_to_rr_ipin_switch = most_frequent_switch.first;
482-
num_edges_for_node.clear();
483467
count_for_wire_to_ipin_switches.clear();
484468
}
485469

0 commit comments

Comments
 (0)