diff --git a/.gitignore b/.gitignore
index 24787044..e2e9fdb9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,6 +8,21 @@
\ No newline at end of file
+### SBT ###
diff --git a/bin/pdl b/bin/pdl
index e315a9c5..d368076e 100755
--- a/bin/pdl
+++ b/bin/pdl
@@ -1,8 +1,10 @@
+set -e
#execute the pdl compiler
-SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
+SCRIPTPATH=$(cd "$(dirname "$0")" && pwd -P)
#pass through all cmds except fist
java -jar "$SCRIPTPATH"/../"$JARPATH" "$@"
diff --git a/bin/runbsc b/bin/runbsc
index ca0d4d6c..ba4d74e9 100755
--- a/bin/runbsc
+++ b/bin/runbsc
@@ -1,5 +1,8 @@
+set -e
+SCRIPTPATH=$(cd "$(dirname "$0")" && pwd -P)
usage () {
echo "Usage: ./runbsc [v|s|c]
@@ -31,11 +34,14 @@ else
echo "$WDIR"" does not exist"
exit 1
+RUNTIMEPATH=$(cd "$SCRIPTPATH/../bscRuntime" && pwd -P)
ARGS="-no-show-timestamps -no-show-version --aggressive-conditions"
-BSC_LIB_DIR=$(realpath "$SCRIPTPATH"/../bscRuntime/locks)":"$(realpath "$SCRIPTPATH"/../bscRuntime/memories)
-BPATH="-p .:"$BSC_LIB_DIR":"$BLUESPECDIR/inst/lib/Libraries/":%/Libraries"
+BPATH="-p .:$BSC_LIB_DIR:$BLUESPECDIR/inst/lib/Libraries/"
mkdir -p $VDIR
@@ -44,12 +50,12 @@ mkdir -p $SDIR
case "$CMD" in
#Compile to Verilog
- bsc $ARGS $BPATH -show-schedule -verilog -vdir $VDIR -u "$TOP".bsv
+ "$BSC" $ARGS $BPATH -show-schedule -verilog -vdir $VDIR -u "$TOP".bsv
#Run simulation
- bsc $ARGS $BPATH -sim -simdir $SDIR -u "$TOP".bsv
- bsc $ARGS -sim -simdir $SDIR -o "$TB".bexe -e "$TB" "$TB".ba
+ "$BSC" $ARGS $BPATH -sim -simdir $SDIR -u "$TOP".bsv
+ "$BSC" $ARGS -sim -simdir $SDIR -o "$TB".bexe -e "$TB" "$TB".ba
timeout 10s ./"$TB".bexe > top.sim.out
diff --git a/docs/interfaces.md b/docs/interfaces.md
index c728c423..6c2269b5 100644
--- a/docs/interfaces.md
+++ b/docs/interfaces.md
@@ -366,6 +366,11 @@ One option is to say that bypass(x) has the same latency as (x),
but compilation is a little tricky (i.e., we send requests only when the bypass fails,
and then need to send data b/w stages that says whether or not the bypass value was valid).
+### Idea
+Read from regfile anyway (regardless of lock state) and then fix w/ bypass later?
+Its own type? (e.g., speculative type on the data)
### Runtime Semantics
The main question becomes how to efficiently provide a bypass interface to locks that allows storing
diff --git a/docs/renaming.md b/docs/renaming.md
new file mode 100644
index 00000000..855f2a49
--- /dev/null
+++ b/docs/renaming.md
@@ -0,0 +1,177 @@
+# Renaming Abstraction
+Renaming may be a more general form of the lock abstraction from the early version of PDL,
+which also may be able to provide an interface over many different data dependency breaking violations.
+## Operations
+The notion of "explicit renaming" in hardware architecture involves the following operations:
+1. Reading the current physical name for an architectural location
+2. Allocating a new physical name for an architectural location
+3. Checking data validity, given a name
+4. Reading data, given a name
+5. Writing data, given a name
+6. Freeing an old physical name, once it is no longer in use
+"Explicit renaming" involves maintaining a map from architectural names to physical names;
+this abstraction requires a mapping function but is otherwise flexible in terms of implementation.
+Names may refer to unified register files, reservation stations or other locations.
+This makes it somewhat attractive as an abstraction.
+## Restrictions
+Like locking in our original language, there are restrictions on the ordering
+of these operations necessary for correct execution:
+1. Reading names and allocating new names must occur in thread order.
+2. A thread should always read names before allocating new ones (i.e., a thread should read only old names, not ones it allocates)
+3. Only "new names" can be used as write targets
+4. Only "old names" can be used as read targets
+5. Data can only be read if the name refers to valid data. (i.e., can only execute `read(name)` iff `isValid(name)`)
+6. Once a name is freed, it cannot be used again (until allocated). One way to enforce this is to ensure that (1) `free`-ing a name prevents all reads + write by this thread and (2) `frees` happen in thread order.
+## Relationship to Locking
+Our locking idea supports the following operations:
+1. Reserve a lock for an architectural location
+2. Check if a lock reservation is "owned"
+3. Release a lock reservation
+The restrictions on locks are:
+1. Locks must be reserved in thread order.
+2. Locks must be "owned" before a thread can read from or write to that location.
+3. Locks must be released, but only once owned.
+There are some parallels between these abstractions.
+### Reservation
+Lock reservation returns a unique lock identifier, which names the reservation.
+This is analogous to allocating a new name for a location.
+The main differences are that name allocation must also return the _old_ name so
+that the old name can be `freed` later (it is possible to hide this from the programmer
+and just have the internals of the abstraction keep track of this association);
+and that locks are required for both reads and writes - whereas name allocation
+is only necessary for writes.
+### Blocking
+Locks require blocking all operations until they have been acquired (i.e., all
+other threads have released their locks that alias the same location).
+With renaming, writes are never blocked once a name for them has been allocated,
+and reads are blocked until data has been written.
+### Release
+Lock release, once owned, indicates that a location will
+never be used by that thread again.
+We need both read and write locks to ensure that
+writes don't run ahead of reads.
+With renaming, only _old names_ need to be released,
+indicating they will never be used again (read or written).
+This is basically analogous to freeing locks that are used for writes,
+since typically no reads happen after w/in a single thread.
+Andrew: I'm confused about what ensures that there is just one
+canonical value for a given renamed resource ultimately. Weren't the
+locks providing some functionality there?
+### Translation
+We can implement the Rename API using locks _and_ vice versa.
+#### Rename API implemented (=>) via Lock API
+1. read name => reserve
+2. allocate name => acquire (i.e., reserve;block)
+3. check data validity => block
+4. read data => normal memory read
+5. write data => normal memory write
+6. free name => release
+With a slightly different implementation we could more naturally
+support similar performance by allowing the locks themselves to support
+reads and writes; this would allow write to not have to block and they
+would only be committed to the real memory on release.
+#### Lock API implemented (=>) via Rename API
+1. reserve => allocate name
+2. block => check data validity
+3. release => free
+Since locks don't have any notion of "reads" vs. "writes"
+we would have to allocate a fresh name for every location
+that gets accessed. This would obviously not be ideal, but
+that's the limitation of translating from the less expressive
+to more expressive API. Similarly, we have to "check data validity"
+on both reads and writes, using the "old name" from each allocation.
+### Edge Cases
+What happens if a thread wants to write two locations in the same memory?
+If they end up being the same location, whose responsibility is it to de-alias them?
+With reads this is less important in the Renaming API since they'll just get the same
+name. Ideally, I'd say that the rename semantics provide some clear ordering s.t.,
+in the event of an alias, one of the writes is ordered before the other. This lets the user
+de-alias themselves, or just ignore that and have some behavior that doesn't really do anything.
+Andrew: are there any realistic cases where this happens?
+## A NEW Lock API
+Let's imagine a new lock API that has the _same_ expressivity as the Rename API;
+in other words, it differentiates _reads_ and _writes_.
+1. reserve(w|r) | same old reserve, but you get to specify the type of operations this lock allows you to do
+2. block | establishes that the operations this lock lets you do are do-able
+3. release | indicates you're done w/ the lock
+This new API has a different set of restrictions than the old one:
+1. reserves must happen in thread order, regardless of operation type
+2. block is only required for reads
+3. write locks must be released in thread order
+4. all read locks must be released _before_ write locks the thread holds
+We can translate this API into the rename API as follows:
+1. reserve(r) => read physical name
+2. reserve(w) => allocate new physical name
+3. block(r) => check data valid
+4. block(w) => nop
+5. release(r) => nop -> implies static checks but no runtime behavior
+6. release(w) => free physical name
+Reads and writes must also obviously go through this API since
+the "names" returned need to be interpreted in _some_ way.
+Therefore reads and writes don't use the original address, but the lock identifier
+returned from the API (a.k.a. the underlying name).
+## Interacting With Speculation
+An interesting question is how both our high level lock API and the low level rename API interact w/ speculation
+based on how we know speculative architectures need to be built.
+The lock API now needs to come with a dynamic identifier, which the requester uses to indicate
+whether or not it is speculative.
+In this way, reading / allocating physical names can happen speculatively and relies on the
+naming layer to track speculative state.
+Therefore, in order to _resolve_ speculation, we need to make further use of the "free" API call.
+We can imagine "free" being split into "commit" and "abort" which the compiler must ensure are
+only used when the thread is either _nonspeculative_ or _misspeculated_, respectively.
+"commit" has the behavior of finalizing writes or reads; in the latter case this is necessary
+for modules that might update internal state to track ordering of reads w.r.t writes.
+"abort" takes a name and 'rollsback' the state affected by
+that operation (including internal state, not just the memory). Likely, rollback will need to rollback
+everything "newer" than that point - or we may need multiple versions (one which rolls it all back, one which doesn't).
diff --git a/project/build.properties b/project/build.properties
index a919a9b5..e1d13cca 100644
--- a/project/build.properties
+++ b/project/build.properties
@@ -1 +1,2 @@
diff --git a/src/test/tests/branchesCheck/nested-branches-1.pdl b/src/test/tests/branchesCheck/nested-branches-1.pdl
new file mode 100644
index 00000000..8550991f
--- /dev/null
+++ b/src/test/tests/branchesCheck/nested-branches-1.pdl
@@ -0,0 +1,149 @@
+pipe multi_stg_mul(a1: int<32>, a2: int<32>)[]: int<32> {
+ int<32> rr = a1{15:0} * a2{15:0};
+ int<32> rl = a1{15:0} * a2{31:16};
+ int<32> lr = a1{31:16} * a2{15:0};
+// int<32> ll = a1{31:16} * a2{31:16};
+ ---
+ int<32> t1 = rl + lr;
+ int<32> res = (t1 << 16<5>) + rr;
+ output(res);
+pipe multi_stg_div(num: int<32>, denom: int<32>, quot: int<32>, acc: int<32>, cnt: int<5>)[]: int<32> {
+ int<32> tmp = acc{30:0} ++ num{31:31};
+ int<32> na = (tmp >= denom) ? (tmp - denom) : (tmp);
+ int<32> nq = (tmp >= denom) ? ((quot << 1){31:1} ++ 1<1>) : (quot << 1);
+ int<32> nnum = num << 1;
+ if (cnt == 31<5>) {
+ output(nq);
+ } else {
+ call multi_stg_div(nnum, denom, nq, na, cnt + 1<5>);
+ }
+pipe dummy(a1: int<32>, a2: int<32>)[]: int<32> {
+ int<32> a3 = a1 + a2 + a1;
+ ---
+ int<32> res = a3 >> 1;
+ output(res);
+pipe dummy2(a1: int<32>, a2: int<32>)[]: int<32> {
+ split {
+ case: (a1 % 2<32> == 0<32>) {
+ if (a2 > a1) {
+ int<32> res = a1 + a1;
+ } else {
+ int<32> res = a1 / 2<32>;
+ }
+ }
+ default: {
+ if (a2 > a1) {
+ int<32> temp = a1 + a1;
+ int<32> res = temp + 1<32>;
+ }
+ else {
+ int<32> res = (a1 - 1<32>)/2<32>;
+ }
+ }
+ }
+ output(res);
+pipe cpu(pc: int<16>)[rf: int<32>[5], imem: int<32>[16], m: multi_stg_mul, f: multi_stg_div, d: dummy, d2: dummy2]: bool {
+ if (pc < 14<16>) {
+ call cpu(pc + 1<16>);
+ }
+ start(imem);
+ acquire(imem);
+ int<32> insn <- imem[pc];
+ release(imem);
+ end(imem);
+ ---
+ int<2> op = insn{1:0};
+ int<5> rs1 = insn{6:2};
+ int<5> rs2 = insn{11:7};
+ int<5> rd = insn{16:12};
+ start(rf);
+ acquire(rf[rs1]);
+ acquire(rf[rs2]);
+ int<32> rf1 = rf[rs1];
+ int<32> rf2 = rf[rs2];
+ release(rf[rs1]);
+ release(rf[rs2]);
+ reserve(rf[rd]);
+ end(rf);
+ ---
+ split {
+ case: (op == 0<2>) {
+ split {
+ case: (rs1 % 3<5> == 0<5>) {
+ int<32> res <- rf1 + rf2;
+ }
+ case: (rs1 % 3<5> == 1<5>) {
+ int<32> res <- rf1 - rf2;
+ }
+ default: {
+ int<32> res <- rf1 << rf2;
+ }
+ }
+ }
+ case: (op == 1<2>) {
+ split {
+ case: (rs2 % 3<5> == 0<5>) {
+ start(f);
+ acquire(f);
+ int<32> res <- call f(rf1, rf2, 0<32>, 0<32>, 0<5>);
+ release(f);
+ end(f);
+ }
+ case: (rs2 % 3<5> == 1<5>){
+ int<32> res <- rf1 >> rf2;
+ }
+ default: {
+ start(m);
+ acquire(m);
+ int<32> res <- call m(rf1, rf2);
+ release(m);
+ end(m);
+ }
+ }
+ }
+ default: {
+ if(op % 2<2> == 0<2>) {
+ start(d);
+ acquire(d);
+ int<32> res <- call d(rf1, rf2);
+ release(d);
+ end(d);
+ } else {
+ start(d2);
+ acquire(d2);
+ int<32> res <- call d2(rf1, rf2);
+ release(d2);
+ end(d2);
+ }
+ }
+ }
+ ---
+ block(rf[rd]);
+ print(res);
+ rf[rd] <- res;
+ release(rf[rd]);
+ ---
+ if (pc == 14<16>) {
+ output(true);
+ }
+circuit {
+ i = memory(int<32>, 16);
+ r = regfile(int<32>, 5);
+ m = new multi_stg_mul[];
+ fp = new multi_stg_div[];
+ d = new dummy[];
+ d2 = new dummy2[];
+ c = new cpu[r, i, m, fp, d, d2];
+ call c(0<16>);
\ No newline at end of file
diff --git a/src/test/tests/branchesCheck/solutions/nested-branches-1.parsesol b/src/test/tests/branchesCheck/solutions/nested-branches-1.parsesol
new file mode 100644
index 00000000..8dc2ada6
--- /dev/null
+++ b/src/test/tests/branchesCheck/solutions/nested-branches-1.parsesol
@@ -0,0 +1,184 @@
+pipe multi_stg_mul(a1:int<32>,a2:int<32>)[] {
+ int<32> rr = a1{15:0} * a2{15:0};
+ int<32> rl = a1{15:0} * a2{31:16};
+ int<32> lr = a1{31:16} * a2{15:0};
+ ---
+ int<32> t1 = rl + lr;
+ int<32> res = t1 << 16<5> + rr;
+ output res;
+pipe multi_stg_div(num:int<32>,denom:int<32>,quot:int<32>,acc:int<32>,cnt:int<5>)[] {
+ int<32> tmp = acc{30:0} ++ num{31:31};
+ int<32> na = tmp >= denom ? tmp - denom : tmp;
+ int<32> nq = tmp >= denom ? quot << 1<1>{31:1} ++ 1<1> : quot << 1<1>;
+ int<32> nnum = num << 1<1>;
+ if ( cnt == 31<5> ) {
+ output nq;
+ } else {
+ call multi_stg_div(nnum,denom,nq,na,cnt + 1<5>);
+ }
+pipe dummy(a1:int<32>,a2:int<32>)[] {
+ int<32> a3 = a1 + a2 + a1;
+ ---
+ int<32> res = a3 >> 1<1>;
+ output res;
+pipe dummy2(a1:int<32>,a2:int<32>)[] {
+ split {
+ case: a1 % 2<32> == 0<32> {
+ if ( a2 > a1 ) {
+ int<32> res = a1 + a1;
+ } else {
+ int<32> res = a1 / 2<32>;
+ }
+ }
+ default: {
+ if ( a2 > a1 ) {
+ int<32> temp = a1 + a1;
+ int<32> res = temp + 1<32>;
+ } else {
+ int<32> res = a1 - 1<32> / 2<32>;
+ }
+ }
+ }
+ output res;
+pipe cpu(pc:int<16>)[rf:int<32>[5],imem:int<32>[16],m:multi_stg_mul,f:multi_stg_div,d:dummy,d2:dummy2] {
+ if ( pc < 14<16> ) {
+ call cpu(pc + 1<16>);
+ } else {
+ }
+ start(imem);
+ reserved(imem);
+ acquired(imem);
+ int<32> insn <- imem[pc];
+ released(imem);
+ end(imem);
+ ---
+ int<2> op = insn{1:0};
+ int<5> rs1 = insn{6:2};
+ int<5> rs2 = insn{11:7};
+ int<5> rd = insn{16:12};
+ start(rf);
+ reserved(rf[rs1]);
+ acquired(rf[rs1]);
+ reserved(rf[rs2]);
+ acquired(rf[rs2]);
+ int<32> rf1 = rf[rs1];
+ int<32> rf2 = rf[rs2];
+ released(rf[rs1]);
+ released(rf[rs2]);
+ reserved(rf[rd]);
+ end(rf);
+ ---
+ split {
+ case: op == 0<2> {
+ split {
+ case: rs1 % 3<5> == 0<5> {
+ int<32> res <- rf1 + rf2;
+ }
+ case: rs1 % 3<5> == 1<5> {
+ int<32> res <- rf1 - rf2;
+ }
+ default: {
+ int<32> res <- rf1 << rf2;
+ }
+ }
+ }
+ case: op == 1<2> {
+ split {
+ case: rs2 % 3<5> == 0<5> {
+ start(f);
+ reserved(f);
+ acquired(f);
+ int<32> res <- call f(rf1,rf2,0<32>,0<32>,0<5>);
+ released(f);
+ end(f);
+ }
+ case: rs2 % 3<5> == 1<5> {
+ int<32> res <- rf1 >> rf2;
+ }
+ default: {
+ start(m);
+ reserved(m);
+ acquired(m);
+ int<32> res <- call m(rf1,rf2);
+ released(m);
+ end(m);
+ }
+ }
+ }
+ default: {
+ if ( op % 2<2> == 0<2> ) {
+ start(d);
+ reserved(d);
+ acquired(d);
+ int<32> res <- call d(rf1,rf2);
+ released(d);
+ end(d);
+ } else {
+ start(d2);
+ reserved(d2);
+ acquired(d2);
+ int<32> res <- call d2(rf1,rf2);
+ released(d2);
+ end(d2);
+ }
+ }
+ }
+ ---
+ acquired(rf[rd]);
+ print(res);
+ rf[rd] <- res;
+ released(rf[rd]);
+ ---
+ if ( pc == 14<16> ) {
+ output true;
+ } else {
+ }
+circuit {
+ i = memory(int<32>,16);
+ r = regfile(int<32>,5);
+ m = new multi_stg_mul[];
+ fp = new multi_stg_div[];
+ d = new dummy[];
+ d2 = new dummy2[];
+ c = new cpu[r,i,m,fp,d,d2];
+call c(0<16>);
diff --git a/src/test/tests/branchesCheck/solutions/nested-branches-1.typechecksol b/src/test/tests/branchesCheck/solutions/nested-branches-1.typechecksol
new file mode 100644
index 00000000..9fb4ec93
--- /dev/null
+++ b/src/test/tests/branchesCheck/solutions/nested-branches-1.typechecksol
@@ -0,0 +1 @@
\ No newline at end of file