Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add clock-cycle benchmark #578

Merged
merged 3 commits into from
Aug 30, 2021
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,8 @@ RUST_MODE = debug
endif

ifeq ($(RUST_MODE),release)
CARGOFLAGS += --release
OPTFLAGS = -O2
CARGOFLAGS = --release
endif

# OBJS = \
Expand Down Expand Up @@ -122,10 +123,22 @@ CFLAGS += -ffreestanding -fno-common -nostdlib
CFLAGS += -I.
CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector)

ifeq ($(ISBENCH), yes)
CFLAGS += -DISBENCH
endif

ifeq ($(USERTEST),yes)
CFLAGS += -DUSERTEST
endif

ifdef CASE
CFLAGS += -D CASE=$(CASE)
endif

ifdef ITER
CFLAGS += -D ITER=$(ITER)
endif

# Disable PIE when possible (for Ubuntu 16.10 toolchain)
ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]no-pie'),)
CFLAGS += -fno-pie -no-pie
Expand Down Expand Up @@ -173,6 +186,9 @@ $U/usys.S : $U/usys.pl
$U/usys.o : $U/usys.S
$(CC) $(CFLAGS) -c -o $U/usys.o $U/usys.S

# $U/usertests.o: $U/usertests.c
# $(CC) $(CFLAGS) -c -o $U/usertests.o $U/usertests.c

$U/_forktest: $U/forktest.o $(ULIB)
# forktest has less library code linked in - needs to be small
# in order to be able to max out the proc table.
Expand All @@ -185,8 +201,8 @@ $(LM)/%.o: $(LM)/%.c

$U/_%: $(LM)/%.o $(ULIB) $(LM)/lmbench.a $U/rand.o
$(LD) $(LDFLAGS) -N -e main -Ttext 0 -o $@ $^ $(LM)/lmbench.a
$(OBJDUMP) -S $@ > $*.asm
$(OBJDUMP) -t $@ | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > $*.sym
$(OBJDUMP) -S $@ > $U/$*.asm
$(OBJDUMP) -t $@ | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > $U/$*.sym

AR=ar
ARCREATE=cr
Expand Down
98 changes: 83 additions & 15 deletions README-rv6.md
Original file line number Diff line number Diff line change
Expand Up @@ -173,29 +173,97 @@
## Benchmark
Run `bench.py`. This script runs `usertests` 10 times by default.
Run `bench.py` with options.
```sh
./bench.py
# Or, run 30 times
./bench.py -n 30
usage: bench.py [-h] [-i ITER] [-o OUTPUT] [-e EXECCOUNT] [-c CASE] [-t TIMEMODE] [-v VERBOSE] [--option OPTION]
usertests benchmark
optional arguments:
-h, --help show this help message and exit
-i ITER, --iter ITER number of iterations. Default = 10
-o OUTPUT, --output OUTPUT
benchmark result path. Default = bench.result
-e EXECCOUNT, --execcount EXECCOUNT
number of executions per iteration for each testcase. Default=10
-c CASE, --case CASE index of testcase to be executed
-t TIMEMODE, --timemode TIMEMODE
time measurment scale: cpu-clock | wall-clock. Default = cpu-clock
-v VERBOSE, --verbose VERBOSE
write detailed information to the result. Default =False
--option OPTION make option
```

For the experiment, we used the following options:

```sh
./bench.py -i 1 -e 10 -t cpu-clock
```

You can see the result in `bench.result`. An exemplary output is:

```
Start benchmark 2021-01-21 16:01:02.001441
73.16322882205714
73.82074988691602
74.60743912809994
73.06701795198023
74.04991712688934
74.74715550499968
73.93327224906534
74.19972170796245
73.0705537419999
72.86001828499138
Mean=73.75190744049614, Standard Deviation=0.6422428124461621, N=10
Test=manywrites, Iter=0, ExecCount=10, Mean=10250831503.7, Standard Deviation=310099929.7603766
Test=execout, Iter=0, ExecCount=10, Mean=234127979491.5, Standard Deviation=2063096239.6600628
Test=copyin, Iter=0, ExecCount=10, Mean=72843513.2, Standard Deviation=2593483.998048691
Test=copyout, Iter=0, ExecCount=10, Mean=22197443.3, Standard Deviation=508065.61895619787
Test=copyinstr1, Iter=0, ExecCount=10, Mean=17590118.2, Standard Deviation=724335.8052858264
Test=copyinstr2, Iter=0, ExecCount=10, Mean=31026224.2, Standard Deviation=1277484.7001188442
Test=copyinstr3, Iter=0, ExecCount=10, Mean=22767261.8, Standard Deviation=850377.8462254686
Test=rwsbrk, Iter=0, ExecCount=10, Mean=52610103.9, Standard Deviation=3796969.8039983716
Test=truncate1, Iter=0, ExecCount=10, Mean=72626663.6, Standard Deviation=3588155.194589591
Test=truncate2, Iter=0, ExecCount=10, Mean=57515131, Standard Deviation=2888724.774054423
Test=truncate3, Iter=0, ExecCount=10, Mean=699796084.9, Standard Deviation=8685616.881588288
Test=reparent2, Iter=0, ExecCount=10, Mean=35125494097.1, Standard Deviation=499343605.2572245
Test=pgbug, Iter=0, ExecCount=10, Mean=16853126.3, Standard Deviation=576202.2373050493
Test=sbrkbugs, Iter=0, ExecCount=10, Mean=57242485.4, Standard Deviation=1375587.7073186815
Test=badarg, Iter=0, ExecCount=10, Mean=16732117744.5, Standard Deviation=314298214.1929783
Test=reparent, Iter=0, ExecCount=10, Mean=4694825008.8, Standard Deviation=53248537.52366887
Test=twochildren, Iter=0, ExecCount=10, Mean=23553479262.2, Standard Deviation=312654479.1470448
Test=forkfork, Iter=0, ExecCount=10, Mean=4140333092.8, Standard Deviation=42441996.98412191
Test=forkforkfork, Iter=0, ExecCount=10, Mean=11070152007.5, Standard Deviation=4214247.017523988
Test=argptest, Iter=0, ExecCount=10, Mean=19669825.3, Standard Deviation=1076447.8157422373
Test=createdelete, Iter=0, ExecCount=10, Mean=1692106081.6, Standard Deviation=43254490.91422707
Test=linkunlink, Iter=0, ExecCount=10, Mean=738579118.6, Standard Deviation=17181223.968526576
Test=linktest, Iter=0, ExecCount=10, Mean=122194298.2, Standard Deviation=8043328.834906379
Test=unlinkread, Iter=0, ExecCount=10, Mean=112391973.3, Standard Deviation=5462054.9097064
Test=concreate, Iter=0, ExecCount=10, Mean=5555714294.8, Standard Deviation=86235817.9635743
Test=subdir, Iter=0, ExecCount=10, Mean=317356618.3, Standard Deviation=6779472.5084936945
Test=fourfiles, Iter=0, ExecCount=10, Mean=364562909.2, Standard Deviation=18523538.444742586
Test=sharedfd, Iter=0, ExecCount=10, Mean=1258305490.5, Standard Deviation=99211093.82524228
Test=dirtest, Iter=0, ExecCount=10, Mean=62557539.1, Standard Deviation=5084440.4897059435
Test=exectest, Iter=0, ExecCount=10, Mean=101016489.8, Standard Deviation=3849253.3196121096
Test=bigargtest, Iter=0, ExecCount=10, Mean=86731337.4, Standard Deviation=1234924.8689326271
Test=bigwrite, Iter=0, ExecCount=10, Mean=2179088452.5, Standard Deviation=47737862.418833666
Test=bsstest, Iter=0, ExecCount=10, Mean=16703757.3, Standard Deviation=1191501.0847995295
Test=sbrkbasic, Iter=0, ExecCount=10, Mean=14768144087.8, Standard Deviation=122624404.54519753
Test=sbrkmuch, Iter=0, ExecCount=10, Mean=11604135700.4, Standard Deviation=160586718.12640956
Test=kernmem, Iter=0, ExecCount=10, Mean=590334800.2, Standard Deviation=23889526.48877363
Test=sbrkfail, Iter=0, ExecCount=10, Mean=53275587805.5, Standard Deviation=501197941.33818287
Test=sbrkarg, Iter=0, ExecCount=10, Mean=75074295, Standard Deviation=6985498.194494156
Test=validatetest, Iter=0, ExecCount=10, Mean=151271691.7, Standard Deviation=15584963.951729957
Test=stacktest, Iter=0, ExecCount=10, Mean=33503426, Standard Deviation=1647149.0807118826
Test=opentest, Iter=0, ExecCount=10, Mean=25377926.3, Standard Deviation=1436404.7789807292
Test=writetest, Iter=0, ExecCount=10, Mean=1000989926.6, Standard Deviation=47432019.17070772
Test=writebig, Iter=0, ExecCount=10, Mean=2151370834.9, Standard Deviation=126541151.0858887
Test=createtest, Iter=0, ExecCount=10, Mean=1952230863.1, Standard Deviation=62090935.928086035
Test=openiput, Iter=0, ExecCount=10, Mean=339365750.1, Standard Deviation=71608476.42326514
Test=exitiput, Iter=0, ExecCount=10, Mean=74096425.7, Standard Deviation=1950034.465084602
Test=iput, Iter=0, ExecCount=10, Mean=62158786.4, Standard Deviation=2645237.072454994
Test=mem, Iter=0, ExecCount=10, Mean=16648525535.2, Standard Deviation=142852748.4777774
Test=pipe1, Iter=0, ExecCount=10, Mean=36972031.7, Standard Deviation=1142602.1014287276
Test=killstatus, Iter=0, ExecCount=10, Mean=36911937347.1, Standard Deviation=28681127.996621083
Test=preempt, Iter=0, ExecCount=10, Mean=585400609.7, Standard Deviation=181312782.38400963
Test=exitwait, Iter=0, ExecCount=10, Mean=1175053781.1, Standard Deviation=9824044.958759846
Test=rmdot, Iter=0, ExecCount=10, Mean=72308323, Standard Deviation=2467978.045960656
Test=fourteen, Iter=0, ExecCount=10, Mean=140964120.7, Standard Deviation=3282846.848378064
Test=bigfile, Iter=0, ExecCount=10, Mean=216833686.3, Standard Deviation=16241193.844760058
Test=dirfile, Iter=0, ExecCount=10, Mean=93530890.3, Standard Deviation=10745218.64240552
Test=iref, Iter=0, ExecCount=10, Mean=1965505009, Standard Deviation=61145641.97305806
Test=forktest, Iter=0, ExecCount=10, Mean=788582250.6, Standard Deviation=10910378.262444843
Test=bigdir, Iter=0, ExecCount=10, Mean=40395690096, Standard Deviation=2016582586.665442
```

## How we ported xv6 to Rust
Expand Down
72 changes: 55 additions & 17 deletions bench.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,36 +3,74 @@
import os, argparse, datetime, time, statistics, subprocess

parser = argparse.ArgumentParser(description='usertests benchmark')
parser.add_argument('-n', '--number', type=int, default=10, help='number of usertests')
parser.add_argument('-o', '--output', type=str, default='bench.result', help='benchmark result path')
parser.add_argument('-i', '--iter', type=int, default=10, help='number of iterations. Default = 10')
parser.add_argument('-o', '--output', type=str, default='bench.result', help='benchmark result path. Default = bench.result')
parser.add_argument('-e', '--execcount', type=int, default=10, help='number of executions per iteration for each testcase. Default=10')
parser.add_argument('-c', '--case', type=str, default="", help='index of testcase to be executed')
parser.add_argument('-t', '--timemode', type=str, default="cpu-clock", help='time measurment scale: cpu-clock | wall-clock. Default = cpu-clock')
parser.add_argument('-v', '--verbose', type=bool, default=False, help='write detailed information to the result. Default =False')
parser.add_argument('--option', type=str, default='RUST_MODE=release OPTFLAGS=-O3', help='make option')


tmpfile = '_bench.tmp'

def main(args):
stat = []
compile_args = f'ITER={args.execcount} USERTEST=yes ISBENCH=yes CASE={args.case} {args.option}'

with open(args.output, 'a', buffering=1) as f:
stat = []
f.write('Start benchmark {}\n'.format(datetime.datetime.now()))
try:
subprocess.check_call('make clean', shell=True)
except Exception:
print("")
subprocess.check_call(f'make kernel/kernel {compile_args}', shell=True)
subprocess.check_call(f'make fs.img {compile_args}', shell=True)

subprocess.check_call('make clean', shell=True)
subprocess.check_call(f'make kernel/kernel USERTEST=yes {args.option}', shell=True)
subprocess.check_call(f'make fs.img USERTEST=yes {args.option}', shell=True)

for _ in range(args.number):
for n in range(args.iter):
begin = time.perf_counter()
subprocess.check_call(f'make qemu USERTEST=yes {args.option} 2>/dev/null', shell=True)
subprocess.check_call(f'make qemu {compile_args} 2>/dev/null > {tmpfile}', shell=True)
elapsed = time.perf_counter() - begin
f.write(f'{elapsed}\n')
stat.append(elapsed)
if args.timemode == 'wall-clock':
f.write(f'{elapsed}\n')
stat.append(elapsed)

os.remove('fs.img')
subprocess.check_call(f'make fs.img USERTEST=yes {args.option}', shell=True)
results = {}
if args.timemode == 'cpu-clock':
with open(f'{tmpfile}', 'r') as f2:
for line in f2:
if line[0:5] == 'Test=':
if args.verbose:
f.write(line)
tokens = line.split(',')
test_name = tokens[0][5:]
elapsed = int(tokens[1].split('=')[-1].strip())
if not test_name in results:
results[test_name] = []
results[test_name].append(elapsed)
for test_name in results:
if len(results[test_name]) == 1:
mean = results[test_name][0]
else:
mean = statistics.mean(results[test_name])
std = statistics.stdev(results[test_name])
f.write(f"Test={test_name}, Iter={n}, ExecCount={args.execcount}, Mean={mean}, Standard Deviation={std}\n")

avg = statistics.mean(stat)
std = statistics.stdev(stat)
os.remove('fs.img')
os.remove(f'{tmpfile}')
subprocess.check_call(f'make fs.img {compile_args}', shell=True)

f.write(f'Mean={avg}, Standard Deviation={std}, N={args.number}\n')
f.write('Finish benchmark\n')
if args.timemode == 'wall-clock':
if len(stat) > 1:
avg = statistics.mean(stat)
std = statistics.stdev(stat)
else:
avg = stat[0]
std = 0

f.write(f'duration = {stat[0]}\n')
f.write(f'Mean={avg}, Standard Deviation={std}, N={args.iter}, Iter={args.execcount}\n')
f.write('Finish benchmark\n')

if __name__ == "__main__":
main(parser.parse_args())
4 changes: 4 additions & 0 deletions kernel-rs/src/arch/arm/timer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,10 @@ impl TimeManager for Armv8 {
fn uptime_as_micro() -> Result<usize, ()> {
Ok((read_cntpct() * US_PER_S / read_freq()) as usize)
}

fn r_cycle() -> usize {
read_cntpct() as usize
}
}

pub fn read_cntpct() -> u64 {
Expand Down
2 changes: 2 additions & 0 deletions kernel-rs/src/arch/interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ pub trait TimeManager {
/// The uptime since power-on of the device, in microseconds.
/// This includes time consumed by firmware and bootloaders.
fn uptime_as_micro() -> Result<usize, ()>;

fn r_cycle() -> usize;
}

pub trait TrapManager {
Expand Down
8 changes: 8 additions & 0 deletions kernel-rs/src/arch/riscv/timer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,4 +11,12 @@ impl TimeManager for RiscV {
fn uptime_as_micro() -> Result<usize, ()> {
todo!()
}

fn r_cycle() -> usize {
let mut x;
unsafe {
asm!("rdcycle {}", out(reg) x);
}
x
}
}
11 changes: 11 additions & 0 deletions kernel-rs/src/syscall.rs
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ impl KernelCtx<'_, '_> {
26 => self.sys_getppid(),
27 => self.sys_lseek(),
28 => self.sys_uptime_as_micro(),
29 => self.sys_clock(),
_ => {
self.kernel().as_ref().write_fmt(format_args!(
"{} {}: unknown sys call {}",
Expand Down Expand Up @@ -542,4 +543,14 @@ impl KernelCtx<'_, '_> {
// SAFETY: `lseek` will not access proc's open_files.
unsafe { (*(f as *const RcFile)).lseek(offset, whence, self) }
}

pub fn sys_clock(&mut self) -> Result<usize, ()> {
let p = self.proc().argaddr(0)?;
let addr = UVAddr::from(p);

let clk = TargetArch::r_cycle();
self.proc_mut().memory_mut().copy_out(addr, &clk)?;

Ok(0)
}
}
1 change: 1 addition & 0 deletions kernel/syscall.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,4 @@
#define SYS_getppid 26
#define SYS_lseek 27
#define SYS_uptime_as_micro 28
#define SYS_clock 29
2 changes: 1 addition & 1 deletion lmbench/lib_mem.c
Original file line number Diff line number Diff line change
Expand Up @@ -672,7 +672,7 @@ par_mem(size_t len, int warmup, int repetitions, struct mem_state* state)
{
int i, j;
iter_t __n = 1;
double baseline, max_par, par;
double baseline = 0.0f, max_par, par = 0.0f;

state->width = 1;
max_par = 1.;
Expand Down
2 changes: 2 additions & 0 deletions lmbench/script.c
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#include "bench.h"

#ifndef ITER
#define ITER 10
#endif

int main(int ac, char **av)
{
Expand Down
1 change: 1 addition & 0 deletions user/user.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ off_t lseek(int fildes, off_t offset, int whence);
int uptime_as_micro();
int gettimeofday(struct timeval *__restrict__ tp,
struct timezone *__restrict__ tzp);
int clock(unsigned long*);

// ulib.c
int stat(const char*, struct stat*);
Expand Down
Loading