add clock-cycle benchmark

kaist-cp · Aug 24, 2021 · e177f70 · e177f70
1 parent b2c6c8e
commit e177f70
Show file tree

Hide file tree

Showing 13 changed files with 228 additions and 43 deletions.
diff --git a/Makefile b/Makefile
@@ -42,7 +42,8 @@ RUST_MODE = debug
 endif
 
 ifeq ($(RUST_MODE),release)
-CARGOFLAGS += --release
+OPTFLAGS = -O2
+CARGOFLAGS = --release
 endif
 
 # OBJS = \
@@ -122,10 +123,22 @@ CFLAGS += -ffreestanding -fno-common -nostdlib
 CFLAGS += -I.
 CFLAGS += $(shell $(CC) -fno-stack-protector -E -x c /dev/null >/dev/null 2>&1 && echo -fno-stack-protector)
 
+ifeq ($(ISBENCH), yes)
+CFLAGS += -DISBENCH
+endif
+
 ifeq ($(USERTEST),yes)
 CFLAGS += -DUSERTEST
 endif
 
+ifdef CASE
+CFLAGS += -D CASE=$(CASE)
+endif
+
+ifdef ITER
+CFLAGS += -D ITER=$(ITER)
+endif
+
 # Disable PIE when possible (for Ubuntu 16.10 toolchain)
 ifneq ($(shell $(CC) -dumpspecs 2>/dev/null | grep -e '[^f]no-pie'),)
 CFLAGS += -fno-pie -no-pie
@@ -173,6 +186,9 @@ $U/usys.S : $U/usys.pl
 $U/usys.o : $U/usys.S
 	$(CC) $(CFLAGS) -c -o $U/usys.o $U/usys.S
 
+# $U/usertests.o: $U/usertests.c
+# 	$(CC) $(CFLAGS) -c -o $U/usertests.o $U/usertests.c
+
 $U/_forktest: $U/forktest.o $(ULIB)
 	# forktest has less library code linked in - needs to be small
 	# in order to be able to max out the proc table.
@@ -185,8 +201,8 @@ $(LM)/%.o: $(LM)/%.c
 
 $U/_%: $(LM)/%.o $(ULIB) $(LM)/lmbench.a $U/rand.o
 	$(LD) $(LDFLAGS) -N -e main -Ttext 0 -o $@ $^ $(LM)/lmbench.a
-	$(OBJDUMP) -S $@ > $*.asm
-	$(OBJDUMP) -t $@ | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > $*.sym
+	$(OBJDUMP) -S $@ > $U/$*.asm
+	$(OBJDUMP) -t $@ | sed '1,/SYMBOL TABLE/d; s/ .* / /; /^$$/d' > $U/$*.sym
 
 AR=ar
 ARCREATE=cr

diff --git a/README-rv6.md b/README-rv6.md
@@ -173,29 +173,97 @@
 
 ## Benchmark
 
-Run `bench.py`. This script runs `usertests` 10 times by default.
+Run `bench.py` with options.
 
 ```sh
-./bench.py
-# Or, run 30 times
-./bench.py -n 30
+usage: bench.py [-h] [-i ITER] [-o OUTPUT] [-e EXECCOUNT] [-c CASE] [-t TIMEMODE] [-v VERBOSE] [--option OPTION]
+
+usertests benchmark
+
+optional arguments:
+  -h, --help            show this help message and exit
+  -i ITER, --iter ITER  number of iterations. Default = 10
+  -o OUTPUT, --output OUTPUT
+                        benchmark result path. Default = bench.result
+  -e EXECCOUNT, --execcount EXECCOUNT
+                        number of executions per iteration for each testcase. Default=10
+  -c CASE, --case CASE  index of testcase to be executed
+  -t TIMEMODE, --timemode TIMEMODE
+                        time measurment scale: cpu-clock | wall-clock. Default = cpu-clock
+  -v VERBOSE, --verbose VERBOSE
+                        write detailed information to the result. Default =False
+  --option OPTION       make option
+```
+
+For the experiment, we used the following options:
+
+```sh
+./bench.py -i 1 -e 10 -t cpu-clock
 ```
 
 You can see the result in `bench.result`. An exemplary output is:
 
 ```
 Start benchmark 2021-01-21 16:01:02.001441
-73.16322882205714
-73.82074988691602
-74.60743912809994
-73.06701795198023
-74.04991712688934
-74.74715550499968
-73.93327224906534
-74.19972170796245
-73.0705537419999
-72.86001828499138
-Mean=73.75190744049614, Standard Deviation=0.6422428124461621, N=10
+Test=manywrites, Iter=0, ExecCount=10, Mean=10250831503.7, Standard Deviation=310099929.7603766
+Test=execout, Iter=0, ExecCount=10, Mean=234127979491.5, Standard Deviation=2063096239.6600628
+Test=copyin, Iter=0, ExecCount=10, Mean=72843513.2, Standard Deviation=2593483.998048691
+Test=copyout, Iter=0, ExecCount=10, Mean=22197443.3, Standard Deviation=508065.61895619787
+Test=copyinstr1, Iter=0, ExecCount=10, Mean=17590118.2, Standard Deviation=724335.8052858264
+Test=copyinstr2, Iter=0, ExecCount=10, Mean=31026224.2, Standard Deviation=1277484.7001188442
+Test=copyinstr3, Iter=0, ExecCount=10, Mean=22767261.8, Standard Deviation=850377.8462254686
+Test=rwsbrk, Iter=0, ExecCount=10, Mean=52610103.9, Standard Deviation=3796969.8039983716
+Test=truncate1, Iter=0, ExecCount=10, Mean=72626663.6, Standard Deviation=3588155.194589591
+Test=truncate2, Iter=0, ExecCount=10, Mean=57515131, Standard Deviation=2888724.774054423
+Test=truncate3, Iter=0, ExecCount=10, Mean=699796084.9, Standard Deviation=8685616.881588288
+Test=reparent2, Iter=0, ExecCount=10, Mean=35125494097.1, Standard Deviation=499343605.2572245
+Test=pgbug, Iter=0, ExecCount=10, Mean=16853126.3, Standard Deviation=576202.2373050493
+Test=sbrkbugs, Iter=0, ExecCount=10, Mean=57242485.4, Standard Deviation=1375587.7073186815
+Test=badarg, Iter=0, ExecCount=10, Mean=16732117744.5, Standard Deviation=314298214.1929783
+Test=reparent, Iter=0, ExecCount=10, Mean=4694825008.8, Standard Deviation=53248537.52366887
+Test=twochildren, Iter=0, ExecCount=10, Mean=23553479262.2, Standard Deviation=312654479.1470448
+Test=forkfork, Iter=0, ExecCount=10, Mean=4140333092.8, Standard Deviation=42441996.98412191
+Test=forkforkfork, Iter=0, ExecCount=10, Mean=11070152007.5, Standard Deviation=4214247.017523988
+Test=argptest, Iter=0, ExecCount=10, Mean=19669825.3, Standard Deviation=1076447.8157422373
+Test=createdelete, Iter=0, ExecCount=10, Mean=1692106081.6, Standard Deviation=43254490.91422707
+Test=linkunlink, Iter=0, ExecCount=10, Mean=738579118.6, Standard Deviation=17181223.968526576
+Test=linktest, Iter=0, ExecCount=10, Mean=122194298.2, Standard Deviation=8043328.834906379
+Test=unlinkread, Iter=0, ExecCount=10, Mean=112391973.3, Standard Deviation=5462054.9097064
+Test=concreate, Iter=0, ExecCount=10, Mean=5555714294.8, Standard Deviation=86235817.9635743
+Test=subdir, Iter=0, ExecCount=10, Mean=317356618.3, Standard Deviation=6779472.5084936945
+Test=fourfiles, Iter=0, ExecCount=10, Mean=364562909.2, Standard Deviation=18523538.444742586
+Test=sharedfd, Iter=0, ExecCount=10, Mean=1258305490.5, Standard Deviation=99211093.82524228
+Test=dirtest, Iter=0, ExecCount=10, Mean=62557539.1, Standard Deviation=5084440.4897059435
+Test=exectest, Iter=0, ExecCount=10, Mean=101016489.8, Standard Deviation=3849253.3196121096
+Test=bigargtest, Iter=0, ExecCount=10, Mean=86731337.4, Standard Deviation=1234924.8689326271
+Test=bigwrite, Iter=0, ExecCount=10, Mean=2179088452.5, Standard Deviation=47737862.418833666
+Test=bsstest, Iter=0, ExecCount=10, Mean=16703757.3, Standard Deviation=1191501.0847995295
+Test=sbrkbasic, Iter=0, ExecCount=10, Mean=14768144087.8, Standard Deviation=122624404.54519753
+Test=sbrkmuch, Iter=0, ExecCount=10, Mean=11604135700.4, Standard Deviation=160586718.12640956
+Test=kernmem, Iter=0, ExecCount=10, Mean=590334800.2, Standard Deviation=23889526.48877363
+Test=sbrkfail, Iter=0, ExecCount=10, Mean=53275587805.5, Standard Deviation=501197941.33818287
+Test=sbrkarg, Iter=0, ExecCount=10, Mean=75074295, Standard Deviation=6985498.194494156
+Test=validatetest, Iter=0, ExecCount=10, Mean=151271691.7, Standard Deviation=15584963.951729957
+Test=stacktest, Iter=0, ExecCount=10, Mean=33503426, Standard Deviation=1647149.0807118826
+Test=opentest, Iter=0, ExecCount=10, Mean=25377926.3, Standard Deviation=1436404.7789807292
+Test=writetest, Iter=0, ExecCount=10, Mean=1000989926.6, Standard Deviation=47432019.17070772
+Test=writebig, Iter=0, ExecCount=10, Mean=2151370834.9, Standard Deviation=126541151.0858887
+Test=createtest, Iter=0, ExecCount=10, Mean=1952230863.1, Standard Deviation=62090935.928086035
+Test=openiput, Iter=0, ExecCount=10, Mean=339365750.1, Standard Deviation=71608476.42326514
+Test=exitiput, Iter=0, ExecCount=10, Mean=74096425.7, Standard Deviation=1950034.465084602
+Test=iput, Iter=0, ExecCount=10, Mean=62158786.4, Standard Deviation=2645237.072454994
+Test=mem, Iter=0, ExecCount=10, Mean=16648525535.2, Standard Deviation=142852748.4777774
+Test=pipe1, Iter=0, ExecCount=10, Mean=36972031.7, Standard Deviation=1142602.1014287276
+Test=killstatus, Iter=0, ExecCount=10, Mean=36911937347.1, Standard Deviation=28681127.996621083
+Test=preempt, Iter=0, ExecCount=10, Mean=585400609.7, Standard Deviation=181312782.38400963
+Test=exitwait, Iter=0, ExecCount=10, Mean=1175053781.1, Standard Deviation=9824044.958759846
+Test=rmdot, Iter=0, ExecCount=10, Mean=72308323, Standard Deviation=2467978.045960656
+Test=fourteen, Iter=0, ExecCount=10, Mean=140964120.7, Standard Deviation=3282846.848378064
+Test=bigfile, Iter=0, ExecCount=10, Mean=216833686.3, Standard Deviation=16241193.844760058
+Test=dirfile, Iter=0, ExecCount=10, Mean=93530890.3, Standard Deviation=10745218.64240552
+Test=iref, Iter=0, ExecCount=10, Mean=1965505009, Standard Deviation=61145641.97305806
+Test=forktest, Iter=0, ExecCount=10, Mean=788582250.6, Standard Deviation=10910378.262444843
+Test=bigdir, Iter=0, ExecCount=10, Mean=40395690096, Standard Deviation=2016582586.665442
 ```
 
 ## How we ported xv6 to Rust

diff --git a/bench.py b/bench.py
@@ -3,36 +3,74 @@
 import os, argparse, datetime, time, statistics, subprocess
 
 parser = argparse.ArgumentParser(description='usertests benchmark')
-parser.add_argument('-n', '--number', type=int, default=10, help='number of usertests')
-parser.add_argument('-o', '--output', type=str, default='bench.result', help='benchmark result path')
+parser.add_argument('-i', '--iter', type=int, default=10, help='number of iterations. Default = 10')
+parser.add_argument('-o', '--output', type=str, default='bench.result', help='benchmark result path. Default = bench.result')
+parser.add_argument('-e', '--execcount', type=int, default=10, help='number of executions per iteration for each testcase. Default=10')
+parser.add_argument('-c', '--case', type=str, default="", help='index of testcase to be executed')
+parser.add_argument('-t', '--timemode', type=str, default="cpu-clock", help='time measurment scale: cpu-clock | wall-clock. Default = cpu-clock')
+parser.add_argument('-v', '--verbose', type=bool, default=False, help='write detailed information to the result. Default =False')
 parser.add_argument('--option', type=str, default='RUST_MODE=release OPTFLAGS=-O3', help='make option')
 
+
+tmpfile = '_bench.tmp'
+
 def main(args):
-    stat = []
+    compile_args = f'ITER={args.execcount} USERTEST=yes ISBENCH=yes CASE={args.case} {args.option}'
 
     with open(args.output, 'a', buffering=1) as f:
+        stat = []
         f.write('Start benchmark {}\n'.format(datetime.datetime.now()))
+        try:
+            subprocess.check_call('make clean', shell=True)
+        except Exception:
+            print("")
+        subprocess.check_call(f'make kernel/kernel {compile_args}', shell=True)
+        subprocess.check_call(f'make fs.img {compile_args}', shell=True)
 
-        subprocess.check_call('make clean', shell=True)
-        subprocess.check_call(f'make kernel/kernel USERTEST=yes {args.option}', shell=True)
-        subprocess.check_call(f'make fs.img USERTEST=yes {args.option}', shell=True)
-
-        for _ in range(args.number):
+        for n in range(args.iter):
             begin = time.perf_counter()
-            subprocess.check_call(f'make qemu USERTEST=yes {args.option} 2>/dev/null', shell=True)
+            subprocess.check_call(f'make qemu {compile_args} 2>/dev/null > {tmpfile}', shell=True)
             elapsed = time.perf_counter() - begin
-            f.write(f'{elapsed}\n')
-            stat.append(elapsed)
+            if args.timemode == 'wall-clock':
+                f.write(f'{elapsed}\n')
+                stat.append(elapsed)
 
-            os.remove('fs.img')
-            subprocess.check_call(f'make fs.img USERTEST=yes {args.option}', shell=True)
+            results = {}
+            if args.timemode == 'cpu-clock':
+                with open(f'{tmpfile}', 'r') as f2:
+                    for line in f2:
+                        if line[0:5] == 'Test=':
+                            if args.verbose:
+                                f.write(line)
+                            tokens = line.split(',')
+                            test_name = tokens[0][5:]
+                            elapsed = int(tokens[1].split('=')[-1].strip())
+                            if not test_name in results:
+                                results[test_name] = []
+                            results[test_name].append(elapsed)
+                for test_name in results:
+                    if len(results[test_name]) == 1:
+                        mean = results[test_name][0]
+                    else:
+                        mean = statistics.mean(results[test_name])
+                    std = statistics.stdev(results[test_name])
+                    f.write(f"Test={test_name}, Iter={n}, ExecCount={args.execcount}, Mean={mean}, Standard Deviation={std}\n")
 
-        avg = statistics.mean(stat)
-        std = statistics.stdev(stat)
+            os.remove('fs.img')
+            os.remove(f'{tmpfile}')
+            subprocess.check_call(f'make fs.img {compile_args}', shell=True)
 
-        f.write(f'Mean={avg}, Standard Deviation={std}, N={args.number}\n')
-        f.write('Finish benchmark\n')
+            if args.timemode == 'wall-clock':
+                if len(stat) > 1:
+                    avg = statistics.mean(stat)
+                    std = statistics.stdev(stat)
+                else:
+                    avg = stat[0]
+                    std = 0
 
+                f.write(f'duration = {stat[0]}\n')
+                f.write(f'Mean={avg}, Standard Deviation={std}, N={args.iter}, Iter={args.execcount}\n')
+                f.write('Finish benchmark\n')
 
 if __name__ == "__main__":
     main(parser.parse_args())
diff --git a/kernel-rs/src/arch/arm/timer.rs b/kernel-rs/src/arch/arm/timer.rs
@@ -17,6 +17,10 @@ impl TimeManager for Armv8 {
     fn uptime_as_micro() -> Result<usize, ()> {
         Ok((read_cntpct() * US_PER_S / read_freq()) as usize)
     }
+
+    fn r_cycle() -> usize {
+        read_cntpct() as usize
+    }
 }
 
 pub fn read_cntpct() -> u64 {

diff --git a/kernel-rs/src/arch/interface.rs b/kernel-rs/src/arch/interface.rs
@@ -43,6 +43,8 @@ pub trait TimeManager {
     /// The uptime since power-on of the device, in microseconds.
     /// This includes time consumed by firmware and bootloaders.
     fn uptime_as_micro() -> Result<usize, ()>;
+
+    fn r_cycle() -> usize;
 }
 
 pub trait TrapManager {

diff --git a/kernel-rs/src/arch/riscv/timer.rs b/kernel-rs/src/arch/riscv/timer.rs
@@ -11,4 +11,12 @@ impl TimeManager for RiscV {
     fn uptime_as_micro() -> Result<usize, ()> {
         todo!()
     }
+
+    fn r_cycle() -> usize {
+        let mut x;
+        unsafe {
+            asm!("rdcycle {}", out(reg) x);
+        }
+        x
+    }
 }
diff --git a/kernel-rs/src/syscall.rs b/kernel-rs/src/syscall.rs
@@ -118,6 +118,7 @@ impl KernelCtx<'_, '_> {
             26 => self.sys_getppid(),
             27 => self.sys_lseek(),
             28 => self.sys_uptime_as_micro(),
+            29 => self.sys_clock(),
             _ => {
                 self.kernel().as_ref().write_fmt(format_args!(
                     "{} {}: unknown sys call {}",
@@ -542,4 +543,14 @@ impl KernelCtx<'_, '_> {
         // SAFETY: `lseek` will not access proc's open_files.
         unsafe { (*(f as *const RcFile)).lseek(offset, whence, self) }
     }
+
+    pub fn sys_clock(&mut self) -> Result<usize, ()> {
+        let p = self.proc().argaddr(0)?;
+        let addr = UVAddr::from(p);
+
+        let clk = TargetArch::r_cycle();
+        self.proc_mut().memory_mut().copy_out(addr, &clk)?;
+
+        Ok(0)
+    }
 }
diff --git a/kernel/syscall.h b/kernel/syscall.h
@@ -27,3 +27,4 @@
 #define SYS_getppid 26
 #define SYS_lseek 27
 #define SYS_uptime_as_micro 28
+#define SYS_clock  29
diff --git a/lmbench/lib_mem.c b/lmbench/lib_mem.c
@@ -672,7 +672,7 @@ par_mem(size_t len, int warmup, int repetitions, struct mem_state* state)
 {
 	int	i, j;
 	iter_t	__n = 1;
-	double	baseline, max_par, par;
+	double	baseline = 0.0f, max_par, par = 0.0f;
 
 	state->width = 1;
 	max_par = 1.;

diff --git a/lmbench/script.c b/lmbench/script.c
@@ -1,6 +1,8 @@
 #include "bench.h"
 
+#ifndef ITER
 #define ITER 10
+#endif
 
 int main(int ac, char **av)
 {

diff --git a/user/user.h b/user/user.h
@@ -38,6 +38,7 @@ off_t lseek(int fildes, off_t offset, int whence);
 int uptime_as_micro();
 int gettimeofday(struct timeval *__restrict__ tp,
                 struct timezone *__restrict__ tzp);
+int clock(unsigned long*);
 
 // ulib.c
 int stat(const char*, struct stat*);