diff --git a/.gitignore b/.gitignore index 24787044..e2e9fdb9 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,21 @@ *.parse *.typecheck *.interpret +out testOutputs/* miscnotes -\#* \ No newline at end of file +\#* + +### SBT ### + +dist/* +target/ +lib_managed/ +src_managed/ +project/boot/ +project/plugins/project/ +project/**/target/ +.history +.cache +.lib/ +.bsp diff --git a/bin/pdl b/bin/pdl index e315a9c5..d368076e 100755 --- a/bin/pdl +++ b/bin/pdl @@ -1,8 +1,10 @@ #!/bin/bash +set -e + #execute the pdl compiler JARNAME=pdsl.jar JARPATH=target/scala-2.13/"$JARNAME" -SCRIPTPATH="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )" +SCRIPTPATH=$(cd "$(dirname "$0")" && pwd -P) #pass through all cmds except fist java -jar "$SCRIPTPATH"/../"$JARPATH" "$@" diff --git a/bin/runbsc b/bin/runbsc index ca0d4d6c..ba4d74e9 100755 --- a/bin/runbsc +++ b/bin/runbsc @@ -1,5 +1,8 @@ #!/bin/bash +set -e + +SCRIPTPATH=$(cd "$(dirname "$0")" && pwd -P) usage () { echo "Usage: ./runbsc [v|s|c] " @@ -31,11 +34,14 @@ else echo "$WDIR"" does not exist" exit 1 fi + +RUNTIMEPATH=$(cd "$SCRIPTPATH/../bscRuntime" && pwd -P) TOP=Circuit TB="mkTB" ARGS="-no-show-timestamps -no-show-version --aggressive-conditions" -BSC_LIB_DIR=$(realpath "$SCRIPTPATH"/../bscRuntime/locks)":"$(realpath "$SCRIPTPATH"/../bscRuntime/memories) -BPATH="-p .:"$BSC_LIB_DIR":"$BLUESPECDIR/inst/lib/Libraries/":%/Libraries" +BSC=$BLUESPECDIR/inst/bin/bsc +BSC_LIB_DIR="$RUNTIMEPATH/locks:$RUNTIMEPATH/memories" +BPATH="-p .:$BSC_LIB_DIR:$BLUESPECDIR/inst/lib/Libraries/" VDIR="$TOP"_verilog SDIR="$TOP"_sim mkdir -p $VDIR @@ -44,12 +50,12 @@ mkdir -p $SDIR case "$CMD" in "v") #Compile to Verilog - bsc $ARGS $BPATH -show-schedule -verilog -vdir $VDIR -u "$TOP".bsv + "$BSC" $ARGS $BPATH -show-schedule -verilog -vdir $VDIR -u "$TOP".bsv ;; "s") #Run simulation - bsc $ARGS $BPATH -sim -simdir $SDIR -u "$TOP".bsv - bsc $ARGS -sim -simdir $SDIR -o "$TB".bexe -e "$TB" "$TB".ba + "$BSC" $ARGS $BPATH -sim -simdir $SDIR -u "$TOP".bsv + "$BSC" $ARGS -sim -simdir $SDIR -o "$TB".bexe -e "$TB" "$TB".ba timeout 10s ./"$TB".bexe > top.sim.out ;; "c") diff --git a/docs/interfaces.md b/docs/interfaces.md index c728c423..6c2269b5 100644 --- a/docs/interfaces.md +++ b/docs/interfaces.md @@ -366,6 +366,11 @@ One option is to say that bypass(x) has the same latency as (x), but compilation is a little tricky (i.e., we send requests only when the bypass fails, and then need to send data b/w stages that says whether or not the bypass value was valid). +### Idea + +Read from regfile anyway (regardless of lock state) and then fix w/ bypass later? +Its own type? (e.g., speculative type on the data) + ### Runtime Semantics The main question becomes how to efficiently provide a bypass interface to locks that allows storing diff --git a/docs/renaming.md b/docs/renaming.md new file mode 100644 index 00000000..855f2a49 --- /dev/null +++ b/docs/renaming.md @@ -0,0 +1,177 @@ +# Renaming Abstraction + +Renaming may be a more general form of the lock abstraction from the early version of PDL, +which also may be able to provide an interface over many different data dependency breaking violations. + +## Operations + +The notion of "explicit renaming" in hardware architecture involves the following operations: + +1. Reading the current physical name for an architectural location +2. Allocating a new physical name for an architectural location +3. Checking data validity, given a name +4. Reading data, given a name +5. Writing data, given a name +6. Freeing an old physical name, once it is no longer in use + + +"Explicit renaming" involves maintaining a map from architectural names to physical names; +this abstraction requires a mapping function but is otherwise flexible in terms of implementation. +Names may refer to unified register files, reservation stations or other locations. +This makes it somewhat attractive as an abstraction. + + + +## Restrictions + +Like locking in our original language, there are restrictions on the ordering +of these operations necessary for correct execution: + +1. Reading names and allocating new names must occur in thread order. +2. A thread should always read names before allocating new ones (i.e., a thread should read only old names, not ones it allocates) +3. Only "new names" can be used as write targets +4. Only "old names" can be used as read targets +5. Data can only be read if the name refers to valid data. (i.e., can only execute `read(name)` iff `isValid(name)`) +6. Once a name is freed, it cannot be used again (until allocated). One way to enforce this is to ensure that (1) `free`-ing a name prevents all reads + write by this thread and (2) `frees` happen in thread order. + +## Relationship to Locking + +Our locking idea supports the following operations: + +1. Reserve a lock for an architectural location +2. Check if a lock reservation is "owned" +3. Release a lock reservation + +The restrictions on locks are: + +1. Locks must be reserved in thread order. +2. Locks must be "owned" before a thread can read from or write to that location. +3. Locks must be released, but only once owned. + + +There are some parallels between these abstractions. + +### Reservation + +Lock reservation returns a unique lock identifier, which names the reservation. +This is analogous to allocating a new name for a location. +The main differences are that name allocation must also return the _old_ name so +that the old name can be `freed` later (it is possible to hide this from the programmer +and just have the internals of the abstraction keep track of this association); +and that locks are required for both reads and writes - whereas name allocation +is only necessary for writes. + +### Blocking + +Locks require blocking all operations until they have been acquired (i.e., all +other threads have released their locks that alias the same location). +With renaming, writes are never blocked once a name for them has been allocated, +and reads are blocked until data has been written. + +### Release + +Lock release, once owned, indicates that a location will +never be used by that thread again. +We need both read and write locks to ensure that +writes don't run ahead of reads. + +With renaming, only _old names_ need to be released, +indicating they will never be used again (read or written). +This is basically analogous to freeing locks that are used for writes, +since typically no reads happen after w/in a single thread. + +Andrew: I'm confused about what ensures that there is just one +canonical value for a given renamed resource ultimately. Weren't the +locks providing some functionality there? + + +### Translation + +We can implement the Rename API using locks _and_ vice versa. + +#### Rename API implemented (=>) via Lock API + +1. read name => reserve +2. allocate name => acquire (i.e., reserve;block) +3. check data validity => block +4. read data => normal memory read +5. write data => normal memory write +6. free name => release + +With a slightly different implementation we could more naturally +support similar performance by allowing the locks themselves to support +reads and writes; this would allow write to not have to block and they +would only be committed to the real memory on release. + +#### Lock API implemented (=>) via Rename API + +1. reserve => allocate name +2. block => check data validity +3. release => free + +Since locks don't have any notion of "reads" vs. "writes" +we would have to allocate a fresh name for every location +that gets accessed. This would obviously not be ideal, but +that's the limitation of translating from the less expressive +to more expressive API. Similarly, we have to "check data validity" +on both reads and writes, using the "old name" from each allocation. + +### Edge Cases + +What happens if a thread wants to write two locations in the same memory? +If they end up being the same location, whose responsibility is it to de-alias them? +With reads this is less important in the Renaming API since they'll just get the same +name. Ideally, I'd say that the rename semantics provide some clear ordering s.t., +in the event of an alias, one of the writes is ordered before the other. This lets the user +de-alias themselves, or just ignore that and have some behavior that doesn't really do anything. + +Andrew: are there any realistic cases where this happens? + +## A NEW Lock API + +Let's imagine a new lock API that has the _same_ expressivity as the Rename API; +in other words, it differentiates _reads_ and _writes_. + +1. reserve(w|r) | same old reserve, but you get to specify the type of operations this lock allows you to do +2. block | establishes that the operations this lock lets you do are do-able +3. release | indicates you're done w/ the lock + +This new API has a different set of restrictions than the old one: + +1. reserves must happen in thread order, regardless of operation type +2. block is only required for reads +3. write locks must be released in thread order +4. all read locks must be released _before_ write locks the thread holds + +We can translate this API into the rename API as follows: + +1. reserve(r) => read physical name +2. reserve(w) => allocate new physical name +3. block(r) => check data valid +4. block(w) => nop +5. release(r) => nop -> implies static checks but no runtime behavior +6. release(w) => free physical name + +Reads and writes must also obviously go through this API since +the "names" returned need to be interpreted in _some_ way. +Therefore reads and writes don't use the original address, but the lock identifier +returned from the API (a.k.a. the underlying name). + +## Interacting With Speculation + +An interesting question is how both our high level lock API and the low level rename API interact w/ speculation +based on how we know speculative architectures need to be built. + +The lock API now needs to come with a dynamic identifier, which the requester uses to indicate +whether or not it is speculative. +In this way, reading / allocating physical names can happen speculatively and relies on the +naming layer to track speculative state. +Therefore, in order to _resolve_ speculation, we need to make further use of the "free" API call. +We can imagine "free" being split into "commit" and "abort" which the compiler must ensure are +only used when the thread is either _nonspeculative_ or _misspeculated_, respectively. + +"commit" has the behavior of finalizing writes or reads; in the latter case this is necessary +for modules that might update internal state to track ordering of reads w.r.t writes. +"abort" takes a name and 'rollsback' the state affected by +that operation (including internal state, not just the memory). Likely, rollback will need to rollback +everything "newer" than that point - or we may need multiple versions (one which rolls it all back, one which doesn't). diff --git a/project/build.properties b/project/build.properties index a919a9b5..e1d13cca 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1,2 @@ -sbt.version=1.3.8 +sbt.version=1.4.4 + diff --git a/src/test/tests/branchesCheck/nested-branches-1.pdl b/src/test/tests/branchesCheck/nested-branches-1.pdl new file mode 100644 index 00000000..8550991f --- /dev/null +++ b/src/test/tests/branchesCheck/nested-branches-1.pdl @@ -0,0 +1,149 @@ +pipe multi_stg_mul(a1: int<32>, a2: int<32>)[]: int<32> { + int<32> rr = a1{15:0} * a2{15:0}; + int<32> rl = a1{15:0} * a2{31:16}; + int<32> lr = a1{31:16} * a2{15:0}; +// int<32> ll = a1{31:16} * a2{31:16}; + --- + int<32> t1 = rl + lr; + int<32> res = (t1 << 16<5>) + rr; + output(res); +} + +pipe multi_stg_div(num: int<32>, denom: int<32>, quot: int<32>, acc: int<32>, cnt: int<5>)[]: int<32> { + int<32> tmp = acc{30:0} ++ num{31:31}; + int<32> na = (tmp >= denom) ? (tmp - denom) : (tmp); + int<32> nq = (tmp >= denom) ? ((quot << 1){31:1} ++ 1<1>) : (quot << 1); + int<32> nnum = num << 1; + if (cnt == 31<5>) { + output(nq); + } else { + call multi_stg_div(nnum, denom, nq, na, cnt + 1<5>); + } +} + +pipe dummy(a1: int<32>, a2: int<32>)[]: int<32> { + int<32> a3 = a1 + a2 + a1; + --- + int<32> res = a3 >> 1; + output(res); +} + +pipe dummy2(a1: int<32>, a2: int<32>)[]: int<32> { + split { + case: (a1 % 2<32> == 0<32>) { + if (a2 > a1) { + int<32> res = a1 + a1; + } else { + int<32> res = a1 / 2<32>; + } + } + default: { + if (a2 > a1) { + int<32> temp = a1 + a1; + int<32> res = temp + 1<32>; + } + else { + int<32> res = (a1 - 1<32>)/2<32>; + } + } + } + output(res); +} + +pipe cpu(pc: int<16>)[rf: int<32>[5], imem: int<32>[16], m: multi_stg_mul, f: multi_stg_div, d: dummy, d2: dummy2]: bool { + if (pc < 14<16>) { + call cpu(pc + 1<16>); + } + start(imem); + acquire(imem); + int<32> insn <- imem[pc]; + release(imem); + end(imem); + --- + int<2> op = insn{1:0}; + int<5> rs1 = insn{6:2}; + int<5> rs2 = insn{11:7}; + int<5> rd = insn{16:12}; + start(rf); + acquire(rf[rs1]); + acquire(rf[rs2]); + int<32> rf1 = rf[rs1]; + int<32> rf2 = rf[rs2]; + release(rf[rs1]); + release(rf[rs2]); + reserve(rf[rd]); + end(rf); + --- + split { + case: (op == 0<2>) { + split { + case: (rs1 % 3<5> == 0<5>) { + int<32> res <- rf1 + rf2; + } + case: (rs1 % 3<5> == 1<5>) { + int<32> res <- rf1 - rf2; + } + default: { + int<32> res <- rf1 << rf2; + } + } + } + case: (op == 1<2>) { + split { + case: (rs2 % 3<5> == 0<5>) { + start(f); + acquire(f); + int<32> res <- call f(rf1, rf2, 0<32>, 0<32>, 0<5>); + release(f); + end(f); + } + case: (rs2 % 3<5> == 1<5>){ + int<32> res <- rf1 >> rf2; + } + default: { + start(m); + acquire(m); + int<32> res <- call m(rf1, rf2); + release(m); + end(m); + } + + } + } + default: { + if(op % 2<2> == 0<2>) { + start(d); + acquire(d); + int<32> res <- call d(rf1, rf2); + release(d); + end(d); + } else { + start(d2); + acquire(d2); + int<32> res <- call d2(rf1, rf2); + release(d2); + end(d2); + } + } + } + --- + block(rf[rd]); + print(res); + rf[rd] <- res; + release(rf[rd]); + --- + if (pc == 14<16>) { + output(true); + } +} + +circuit { + i = memory(int<32>, 16); + r = regfile(int<32>, 5); + m = new multi_stg_mul[]; + fp = new multi_stg_div[]; + d = new dummy[]; + d2 = new dummy2[]; + c = new cpu[r, i, m, fp, d, d2]; + call c(0<16>); +} \ No newline at end of file diff --git a/src/test/tests/branchesCheck/solutions/nested-branches-1.parsesol b/src/test/tests/branchesCheck/solutions/nested-branches-1.parsesol new file mode 100644 index 00000000..8dc2ada6 --- /dev/null +++ b/src/test/tests/branchesCheck/solutions/nested-branches-1.parsesol @@ -0,0 +1,184 @@ +pipe multi_stg_mul(a1:int<32>,a2:int<32>)[] { + int<32> rr = a1{15:0} * a2{15:0}; + int<32> rl = a1{15:0} * a2{31:16}; + int<32> lr = a1{31:16} * a2{15:0}; + + --- + int<32> t1 = rl + lr; + int<32> res = t1 << 16<5> + rr; + output res; + +} +pipe multi_stg_div(num:int<32>,denom:int<32>,quot:int<32>,acc:int<32>,cnt:int<5>)[] { + int<32> tmp = acc{30:0} ++ num{31:31}; + int<32> na = tmp >= denom ? tmp - denom : tmp; + int<32> nq = tmp >= denom ? quot << 1<1>{31:1} ++ 1<1> : quot << 1<1>; + int<32> nnum = num << 1<1>; + if ( cnt == 31<5> ) { + output nq; + + } else { + call multi_stg_div(nnum,denom,nq,na,cnt + 1<5>); + + } + +} +pipe dummy(a1:int<32>,a2:int<32>)[] { + int<32> a3 = a1 + a2 + a1; + + --- + int<32> res = a3 >> 1<1>; + output res; + +} +pipe dummy2(a1:int<32>,a2:int<32>)[] { + split { + case: a1 % 2<32> == 0<32> { + if ( a2 > a1 ) { + int<32> res = a1 + a1; + + } else { + int<32> res = a1 / 2<32>; + + } + + } + default: { + if ( a2 > a1 ) { + int<32> temp = a1 + a1; + int<32> res = temp + 1<32>; + + } else { + int<32> res = a1 - 1<32> / 2<32>; + + } + + } + } + output res; + +} +pipe cpu(pc:int<16>)[rf:int<32>[5],imem:int<32>[16],m:multi_stg_mul,f:multi_stg_div,d:dummy,d2:dummy2] { + if ( pc < 14<16> ) { + call cpu(pc + 1<16>); + + } else { + + } + start(imem); + reserved(imem); + acquired(imem); + int<32> insn <- imem[pc]; + released(imem); + end(imem); + + --- + int<2> op = insn{1:0}; + int<5> rs1 = insn{6:2}; + int<5> rs2 = insn{11:7}; + int<5> rd = insn{16:12}; + start(rf); + reserved(rf[rs1]); + acquired(rf[rs1]); + reserved(rf[rs2]); + acquired(rf[rs2]); + int<32> rf1 = rf[rs1]; + int<32> rf2 = rf[rs2]; + released(rf[rs1]); + released(rf[rs2]); + reserved(rf[rd]); + end(rf); + + --- + split { + case: op == 0<2> { + split { + case: rs1 % 3<5> == 0<5> { + int<32> res <- rf1 + rf2; + + } + case: rs1 % 3<5> == 1<5> { + int<32> res <- rf1 - rf2; + + } + default: { + int<32> res <- rf1 << rf2; + + } + } + + } + case: op == 1<2> { + split { + case: rs2 % 3<5> == 0<5> { + start(f); + reserved(f); + acquired(f); + int<32> res <- call f(rf1,rf2,0<32>,0<32>,0<5>); + released(f); + end(f); + + } + case: rs2 % 3<5> == 1<5> { + int<32> res <- rf1 >> rf2; + + } + default: { + start(m); + reserved(m); + acquired(m); + int<32> res <- call m(rf1,rf2); + released(m); + end(m); + + } + } + + } + default: { + if ( op % 2<2> == 0<2> ) { + start(d); + reserved(d); + acquired(d); + int<32> res <- call d(rf1,rf2); + released(d); + end(d); + + } else { + start(d2); + reserved(d2); + acquired(d2); + int<32> res <- call d2(rf1,rf2); + released(d2); + end(d2); + + } + + } + } + + --- + acquired(rf[rd]); + print(res); + rf[rd] <- res; + released(rf[rd]); + + --- + if ( pc == 14<16> ) { + output true; + + } else { + + } + +} +circuit { + i = memory(int<32>,16); + r = regfile(int<32>,5); + m = new multi_stg_mul[]; + fp = new multi_stg_div[]; + d = new dummy[]; + d2 = new dummy2[]; + c = new cpu[r,i,m,fp,d,d2]; +call c(0<16>); +} diff --git a/src/test/tests/branchesCheck/solutions/nested-branches-1.typechecksol b/src/test/tests/branchesCheck/solutions/nested-branches-1.typechecksol new file mode 100644 index 00000000..9fb4ec93 --- /dev/null +++ b/src/test/tests/branchesCheck/solutions/nested-branches-1.typechecksol @@ -0,0 +1 @@ +Passed \ No newline at end of file