-
Notifications
You must be signed in to change notification settings - Fork 418
Generalize Parmys Mult_Split to Allow for Multipliers Whose Input Widths are not Equal #3143
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
WhiteNinjaZ
wants to merge
8
commits into
verilog-to-routing:master
Choose a base branch
from
byuccl:fix_mult
base: master
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Changes from all commits
Commits
Show all changes
8 commits
Select commit
Hold shift + click to select a range
1788fe3
added check that would have made debuging easier
WhiteNinjaZ 19653ee
fix multiplier
WhiteNinjaZ c713f85
new method, passing parmys
WhiteNinjaZ b900ffd
cleanup and clarified comments
WhiteNinjaZ da60450
Merge branch 'master' of github.com:verilog-to-routing/vtr-verilog-to…
WhiteNinjaZ d87e7e1
add tests and golden for reg_strong
WhiteNinjaZ 1391852
Added mcml golden results to nightly
WhiteNinjaZ 44782b5
Merge branch 'master' into fix_mult
WhiteNinjaZ File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -937,7 +937,7 @@ void init_multiplier_adder(nnode_t *node, nnode_t *parent, int a, int b) | |
*-----------------------------------------------------------------------*/ | ||
void split_multiplier(nnode_t *node, int a0, int b0, int a1, int b1, netlist_t *netlist) | ||
{ | ||
nnode_t *a0b0, *a0b1, *a1b0, *a1b1, *addsmall, *addbig; | ||
nnode_t *a0b0, *a0b1, *a1b0, *a1b1, *addsmall, *addsmall2, *addbig; | ||
int size; | ||
|
||
/* Check for a legitimate split */ | ||
|
@@ -976,50 +976,153 @@ void split_multiplier(nnode_t *node, int a0, int b0, int a1, int b1, netlist_t * | |
init_split_multiplier(node, a1b0, a0, a1, 0, b0, a1b1, a0b0); | ||
mult_list = insert_in_vptr_list(mult_list, a1b0); | ||
|
||
/* New node for the initial add */ | ||
addsmall = allocate_nnode(node->loc); | ||
addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6); | ||
strcpy(addsmall->name, node->name); | ||
strcat(addsmall->name, "-add0"); | ||
// this addition will have a carry out in the worst case, add to input pins and connect then to gnd | ||
init_multiplier_adder(addsmall, a1b0, a1b0->num_output_pins + 1, a0b1->num_output_pins + 1); | ||
|
||
/* New node for the BIG add */ | ||
addbig = allocate_nnode(node->loc); | ||
addbig->name = (char *)vtr::malloc(strlen(node->name) + 6); | ||
strcpy(addbig->name, node->name); | ||
strcat(addbig->name, "-add1"); | ||
init_multiplier_adder(addbig, addsmall, addsmall->num_output_pins, a0b0->num_output_pins - b0 + a1b1->num_output_pins); | ||
|
||
// connect inputs to port a of addsmall | ||
for (int i = 0; i < a1b0->num_output_pins; i++) | ||
connect_nodes(a1b0, i, addsmall, i); | ||
add_input_pin_to_node(addsmall, get_zero_pin(netlist), a1b0->num_output_pins); | ||
// connect inputs to port b of addsmall | ||
for (int i = 0; i < a0b1->num_output_pins; i++) | ||
connect_nodes(a0b1, i, addsmall, i + addsmall->input_port_sizes[0]); | ||
add_input_pin_to_node(addsmall, get_zero_pin(netlist), a0b1->num_output_pins + addsmall->input_port_sizes[0]); | ||
|
||
// connect inputs to port a of addbig | ||
size = addsmall->num_output_pins; | ||
for (int i = 0; i < size; i++) | ||
connect_nodes(addsmall, i, addbig, i); | ||
|
||
// connect inputs to port b of addbig | ||
for (int i = b0; i < a0b0->output_port_sizes[0]; i++) | ||
connect_nodes(a0b0, i, addbig, i - b0 + size); | ||
size = size + a0b0->output_port_sizes[0] - b0; | ||
for (int i = 0; i < a1b1->output_port_sizes[0]; i++) | ||
connect_nodes(a1b1, i, addbig, i + size); | ||
|
||
// remap the multiplier outputs coming directly from a0b0 | ||
for (int i = 0; i < b0; i++) { | ||
remap_pin_to_new_node(node->output_pins[i], a0b0, i); | ||
} | ||
// using the balenced addition method only works if a0 and b0 are the same size | ||
// (i.e. if the input ports on the hardware multiplier are equal) | ||
if (b0 == a0) { | ||
/* New node for the initial add */ | ||
addsmall = allocate_nnode(node->loc); | ||
addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6); | ||
strcpy(addsmall->name, node->name); | ||
strcat(addsmall->name, "-add0"); | ||
// this addition will have a carry out in the worst case, add to input pins and connect then to gnd | ||
init_multiplier_adder(addsmall, a1b0, a1b0->num_output_pins + 1, a0b1->num_output_pins + 1); | ||
|
||
// connect inputs to port a of addsmall | ||
for (int i = 0; i < a1b0->num_output_pins; i++) | ||
connect_nodes(a1b0, i, addsmall, i); | ||
|
||
add_input_pin_to_node(addsmall, get_zero_pin(netlist), a1b0->num_output_pins); | ||
// connect inputs to port b of addsmall | ||
for (int i = 0; i < a0b1->num_output_pins; i++) | ||
connect_nodes(a0b1, i, addsmall, i + addsmall->input_port_sizes[0]); | ||
add_input_pin_to_node(addsmall, get_zero_pin(netlist), a0b1->num_output_pins + addsmall->input_port_sizes[0]); | ||
|
||
/* New node for the BIG add */ | ||
addbig = allocate_nnode(node->loc); | ||
addbig->name = (char *)vtr::malloc(strlen(node->name) + 6); | ||
strcpy(addbig->name, node->name); | ||
strcat(addbig->name, "-add1"); | ||
init_multiplier_adder(addbig, addsmall, addsmall->num_output_pins, a0b0->num_output_pins - b0 + a1b1->num_output_pins); | ||
|
||
// connect inputs to port a of addbig | ||
size = addsmall->num_output_pins; | ||
for (int i = 0; i < size; i++) | ||
connect_nodes(addsmall, i, addbig, i); | ||
|
||
// connect inputs to port b of addbig | ||
for (int i = b0; i < a0b0->output_port_sizes[0]; i++) | ||
connect_nodes(a0b0, i, addbig, i - b0 + size); | ||
size = size + a0b0->output_port_sizes[0] - b0; | ||
for (int i = 0; i < a1b1->output_port_sizes[0]; i++) | ||
connect_nodes(a1b1, i, addbig, i + size); | ||
|
||
// remap the multiplier outputs coming directly from a0b0 | ||
for (int i = 0; i < b0; i++) { | ||
remap_pin_to_new_node(node->output_pins[i], a0b0, i); | ||
} | ||
|
||
// remap the multiplier outputs coming from addbig | ||
for (int i = 0; i < addbig->num_output_pins; i++) { | ||
remap_pin_to_new_node(node->output_pins[i + b0], addbig, i); | ||
} | ||
} else { | ||
/* Expounding upon the description for the method in this function. | ||
if we have two numbers A and B and we have a hardware multiplier of size a0xb0, | ||
we can split them into two parts: | ||
A = A1 << a0 + A0 | ||
B = B1 << b0 + B0 | ||
where A1 and B1 are the high bits of A and B, and A0 and B0 are the low bits. | ||
Note that len(A0) = a0 and len(B0) = b0 by definition. | ||
The multiplication of A and B can be expressed as: | ||
A * B = (A1 << a0 + A0) * (B1 << b0 + B0) | ||
= {A1 * B1 << (a0 + b0)} + {(A1 * B0) << a0 + (A0 * B1) << b0} + {A0 * B0} | ||
we define split the editions up like so: | ||
addsmall = (A1 * B0) << a0 + (A0 * B1) << b0 // can have carry | ||
addsmall2 = (A1 * B1 << (a0 + b0)) + (A0 * B0) // Will not have carry | ||
addbig = addsmall + addsmall2 | ||
This is a slightly modified version of the Karatsuba algorithm. | ||
*/ | ||
/////////////// Addsmall ///////////////////// | ||
addsmall = allocate_nnode(node->loc); | ||
addsmall->name = (char *)vtr::malloc(strlen(node->name) + 6); | ||
strcpy(addsmall->name, node->name); | ||
strcat(addsmall->name, "-add0"); | ||
init_multiplier_adder(addsmall, a1b0, a1b0->num_output_pins + a0 + 1, a0b1->num_output_pins + b0 + 1); | ||
|
||
// The first a0 pins of addsmall input connecting to a1b0 are connected to zero | ||
for (int i = 0; i < a0; i++) { | ||
add_input_pin_to_node(addsmall, get_zero_pin(netlist), i); | ||
} | ||
|
||
// connect inputs to port a of addsmall | ||
for (int i = 0; i < a1b0->num_output_pins; i++) { | ||
connect_nodes(a1b0, i, addsmall, i + a0); | ||
} | ||
|
||
// add zero pin for carry | ||
add_input_pin_to_node(addsmall, get_zero_pin(netlist), a1b0->num_output_pins + a0); | ||
|
||
// The first b0 pins of addsmall input connecting to a0b1 are connected to zero | ||
for (int i = 0; i < b0; i++) { | ||
add_input_pin_to_node(addsmall, get_zero_pin(netlist), i + addsmall->input_port_sizes[0]); | ||
} | ||
|
||
// connect inputs to port b of addsmall | ||
for (int i = 0; i < a0b1->num_output_pins; i++) { | ||
connect_nodes(a0b1, i, addsmall, i + addsmall->input_port_sizes[0] + b0); | ||
} | ||
|
||
// add zero pin for carry | ||
add_input_pin_to_node(addsmall, get_zero_pin(netlist), a0b1->num_output_pins + addsmall->input_port_sizes[0] + b0); | ||
|
||
/////////////// Addsmall2 ///////////////////// | ||
addsmall2 = allocate_nnode(node->loc); | ||
addsmall2->name = (char *)vtr::malloc(strlen(node->name) + 6); | ||
strcpy(addsmall2->name, node->name); | ||
strcat(addsmall2->name, "-add1"); | ||
init_multiplier_adder(addsmall2, a1b1, a1b1->num_output_pins + a0 + b0, a0b0->num_output_pins); | ||
|
||
// remap the multiplier outputs coming from addbig | ||
for (int i = 0; i < addbig->num_output_pins; i++) { | ||
remap_pin_to_new_node(node->output_pins[i + b0], addbig, i); | ||
// connect first a0+ b0 pins of addsmall2 to zero | ||
for (int i = 0; i < a0 + b0; i++) { | ||
add_input_pin_to_node(addsmall2, get_zero_pin(netlist), i); | ||
} | ||
|
||
// connect inputs to port a of addsmall2 | ||
for (int i = 0; i < a1b1->num_output_pins; i++) { | ||
connect_nodes(a1b1, i, addsmall2, i + a0 + b0); | ||
} | ||
|
||
// connect inputs to port b of addsmall2 | ||
for (int i = 0; i < a0b0->output_port_sizes[0]; i++) { | ||
connect_nodes(a0b0, i, addsmall2, i + addsmall2->input_port_sizes[0]); | ||
} | ||
|
||
/////////////// Addbig ///////////////////// | ||
addbig = allocate_nnode(node->loc); | ||
addbig->name = (char *)vtr::malloc(strlen(node->name) + 6); | ||
strcpy(addbig->name, node->name); | ||
strcat(addbig->name, "-add2"); | ||
init_multiplier_adder(addbig, addsmall, addsmall->num_output_pins, addsmall2->num_output_pins); | ||
// Here the final addition can have a carry out in the worst case, however, | ||
// our final product will always only be the length of the longest input port so regardless of the carry the | ||
// final adds carry will always drop out. | ||
|
||
// connect inputs to port a of addbig | ||
for (int i = 0; i < addsmall->num_output_pins; i++) { | ||
connect_nodes(addsmall, i, addbig, i); | ||
} | ||
// add_input_pin_to_node(addbig, get_zero_pin(netlist), addsmall->num_output_pins); | ||
|
||
// connect inputs to port b of addbig | ||
for (int i = 0; i < addsmall2->num_output_pins; i++) { | ||
connect_nodes(addsmall2, i, addbig, i + addbig->input_port_sizes[0]); | ||
} | ||
// add_input_pin_to_node(addbig, get_zero_pin(netlist), addbig->input_port_sizes[0] + addsmall->num_output_pins); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same comment. Just want to make sure that some zeroed inputs were not being forgotten. |
||
|
||
// remap the multiplier outputs coming directly from a0b0 | ||
for (int i = 0; i < addbig->num_output_pins; i++) { | ||
remap_pin_to_new_node(node->output_pins[i], addbig, i); | ||
} | ||
} | ||
|
||
// CLEAN UP | ||
|
@@ -1060,7 +1163,6 @@ void split_multiplier_a(nnode_t *node, int a0, int a1, int b) | |
strcat(a0b->name, "-0"); | ||
init_split_multiplier(node, a0b, 0, a0, 0, b, nullptr, nullptr); | ||
mult_list = insert_in_vptr_list(mult_list, a0b); | ||
|
||
/* New node for a1b multiply */ | ||
a1b = allocate_nnode(node->loc); | ||
a1b->name = (char *)vtr::malloc(strlen(node->name) + 3); | ||
|
@@ -1184,7 +1286,6 @@ void pad_multiplier(nnode_t *node, netlist_t *netlist) | |
|
||
oassert(node->type == MULTIPLY); | ||
oassert(hard_multipliers != NULL); | ||
|
||
sizea = node->input_port_sizes[0]; | ||
sizeb = node->input_port_sizes[1]; | ||
sizeout = node->output_port_sizes[0]; | ||
|
@@ -1199,6 +1300,13 @@ void pad_multiplier(nnode_t *node, netlist_t *netlist) | |
} | ||
diffa = ina - sizea; | ||
diffb = inb - sizeb; | ||
// input multiplier size on middle range of unequal Hard Block size(ex; mul_size>18 && mul_size<25) | ||
if (diffb < 0) { | ||
std::swap(ina, inb); | ||
diffa = ina - sizea; | ||
diffb = inb - sizeb; | ||
} | ||
|
||
diffout = hard_multipliers->outputs->size - sizeout; | ||
|
||
if (configuration.split_hard_multiplier == 1) { | ||
|
@@ -1281,11 +1389,10 @@ void iterate_multipliers(netlist_t *netlist) | |
int mula, mulb; | ||
int a0, a1, b0, b1; | ||
nnode_t *node; | ||
|
||
/* Can only perform the optimisation if hard multipliers exist! */ | ||
if (hard_multipliers == NULL) | ||
return; | ||
|
||
// std::cin.get(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Commented out code. |
||
sizea = hard_multipliers->inputs->size; | ||
sizeb = hard_multipliers->inputs->next->size; | ||
if (sizea < sizeb) { | ||
|
@@ -1313,7 +1420,6 @@ void iterate_multipliers(netlist_t *netlist) | |
sizea = sizeb; | ||
sizeb = swap; | ||
} | ||
|
||
/* Do I need to split the multiplier on both inputs? */ | ||
if ((mula > sizea) && (mulb > sizeb)) { | ||
a0 = sizea; | ||
|
@@ -1890,4 +1996,4 @@ void free_multipliers() | |
|
||
hard_multipliers->instances = NULL; | ||
} | ||
} | ||
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Commented out code. Was this intended to be commented out?