Small fix

RattataKing · RattataKing · commit 725a1febfb9f · 2024-08-21T11:23:10.000-05:00
diff --git a/tuning/libtuner.py b/tuning/libtuner.py
@@ -43,7 +43,7 @@
 """Do not need to change"""
 
 # Declare special symbols for libtuner to search and locate
-DEVICE_ID_RE = "DEVICE_ID_RE"
+DEVICE_ID_PLACEHOLDER = "!IREE!"
 
 
 @dataclass
@@ -199,31 +199,15 @@ class IREEBenchmarkResult:
     candidate_id: int
     result_str: str
 
-    def extract_key(self, pattern: str) -> Optional[str]:
+    def get_mean_time(self) -> Optional[float]:
         if not self.result_str:
             return None
+        pattern = r"process_time/real_time_mean\s+([\d.]+)\s\w{2}"
         match = re.search(pattern, self.result_str)
         if not match:
             return None
-        return match.group(1)
-
-    def get_mean_time(self) -> Optional[float]:
-        pattern = r"process_time/real_time_mean\s+([\d.]+)\s\w{2}"
-        time_str = self.extract_key(pattern)
-        if not time_str:
-            return None
         try:
-            return float(time_str)
-        except ValueError:
-            return None
-
-    def get_median_time(self) -> Optional[float]:
-        pattern = r"process_time/real_time_median\s+([\d.]+)\s\w{2}"
-        time_str = self.extract_key(pattern)
-        if not time_str:
-            return None
-        try:
-            return float(time_str)
+            return float(match.group(1))
         except ValueError:
             return None
 
@@ -232,7 +216,6 @@ def generate_display_DBR(
     candidate_id: int = 0, mean_time: float = random.uniform(100.0, 500.0)
 ) -> str:
     """Generate dispatch_benchmark_result string for displaying"""
-    # time unit is implicit and dependent on the output of iree-benchmark-module
     return f"{candidate_id}\tMean Time: {mean_time:.1f}\n"
 
 
@@ -243,13 +226,12 @@ def generate_display_MBR(
     calibrated_diff: Optional[float] = None,
 ) -> str:
     """Generate model_benchmark_result string for displaying"""
-    # time unit is implicit and dependent on the output of iree-benchmark-module
     head_str = f"Benchmarking: {candidate_vmfb_path_str} on device {device_id}\n"
-    res_str = f"process_time/real_time_median\t    {t1:.3g} ms\n\n"
+    res_str = f"process_time/real_time_mean\t    {t1:.3g} ms\n\n"
     if calibrated_diff:
         percentage_change = calibrated_diff * 100
         change_str = f"({percentage_change:+.3f}%)"
-        res_str = f"process_time/real_time_median\t    {t1:.3g} ms {change_str}\n\n"
+        res_str = f"process_time/real_time_mean\t    {t1:.3g} ms {change_str}\n\n"
     return head_str + res_str
 
 
@@ -534,7 +516,7 @@ def run_command_wrapper(task_tuple: TaskPack) -> TaskResult:
     """pool.imap_unordered can't iterate an iterable of iterables input, this function helps dividing arguments"""
     if task_tuple.command_need_device_id:
         # worker searches for special symbol and substitute to correct device_id
-        pattern = re.compile(re.escape(DEVICE_ID_RE))
+        pattern = re.compile(re.escape(DEVICE_ID_PLACEHOLDER))
         task_tuple.command = [
             pattern.sub(str(device_id), s) for s in task_tuple.command
         ]
@@ -907,14 +889,12 @@ def generate_dryrun_model_benchmark_results(
 ) -> tuple[list[TaskResult], list[TaskResult]]:
     candidate_results = []
     for i, j in enumerate(model_candidates):
-        stdout = (
-            f"process_time/real_time_median    {random.uniform(100.0, 500.0):.3g} ms"
-        )
+        stdout = f"process_time/real_time_mean    {random.uniform(100.0, 500.0):.3g} ms"
         candidate_results.append(generate_sample_task_result(stdout, j, str(i % 3)))
 
     baseline_results = [
         generate_sample_task_result(
-            f"process_time/real_time_median    {random.uniform(100.0, 500.0):.3g} ms",
+            f"process_time/real_time_mean    {random.uniform(100.0, 500.0):.3g} ms",
             0,
             str(i),
         )
@@ -1142,7 +1122,7 @@ def parse_model_benchmark_results(
                 continue
 
             res = IREEBenchmarkResult(candidate_id, result_str)
-            benchmark_time = res.get_median_time()
+            benchmark_time = res.get_mean_time()
 
             # Check completion
             if benchmark_time == None:
diff --git a/tuning/punet_autotune.py b/tuning/punet_autotune.py
@@ -51,7 +51,7 @@ def get_dispatch_benchmark_command(
             "timeout",
             "16s",
             "./tools/iree-benchmark-module",
-            f"--device={libtuner.DEVICE_ID_RE}",
+            f"--device={libtuner.DEVICE_ID_PLACEHOLDER}",
             f"--module={compiled_vmfb_path.resolve()}",
             "--hip_use_streams=true",
             "--hip_allow_inline_execution=true",
@@ -83,7 +83,7 @@ def get_model_benchmark_command(
             "timeout",
             "180s",
             "tools/iree-benchmark-module",
-            f"--device={libtuner.DEVICE_ID_RE}",
+            f"--device={libtuner.DEVICE_ID_PLACEHOLDER}",
             "--hip_use_streams=true",
             "--hip_allow_inline_execution=true",
             "--device_allocator=caching",
diff --git a/tuning/test_libtuner.py b/tuning/test_libtuner.py
@@ -83,29 +83,25 @@ def test_IREEBenchmarkResult_get():
     BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time               274 us          275 us         3000 items_per_second=3.65481k/s
     BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time               273 us          275 us         3000 items_per_second=3.65671k/s
     BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time_mean          274 us          275 us            3 items_per_second=3.65587k/s
-    BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time_median        275 us          275 us            3 items_per_second=3.65611k/s
+    BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time_mean        275 us          275 us            3 items_per_second=3.65611k/s
     BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time_stddev      0.073 us        0.179 us            3 items_per_second=0.971769/s
     BM_main$async_dispatch_311_rocm_hsaco_fb_main$async_dispatch_311_matmul_like_2x1024x1280x5120_i8xi8xi32/process_time/real_time_cv           0.03 %          0.07 %             3 items_per_second=0.03%
     """
     res = libtuner.IREEBenchmarkResult(candidate_id=1, result_str=normal_str)
     assert res.get_mean_time() == float(274)
-    assert res.get_median_time() == float(275)
 
     # Time is float
     res = libtuner.IREEBenchmarkResult(
         candidate_id=2,
-        result_str="process_time/real_time_mean 123.45 us, process_time/real_time_median 246.78 us",
+        result_str="process_time/real_time_mean 123.45 us, process_time/real_time_mean 246.78 us",
     )
     assert res.get_mean_time() == 123.45
-    assert res.get_median_time() == 246.78
 
     # Invalid str
     res = libtuner.IREEBenchmarkResult(candidate_id=3, result_str="hello world")
     assert res.get_mean_time() == None
-    assert res.get_median_time() == None
     res = libtuner.IREEBenchmarkResult(candidate_id=4, result_str="")
     assert res.get_mean_time() == None
-    assert res.get_median_time() == None
 
 
 def test_generate_display_BR():
@@ -114,13 +110,13 @@ def test_generate_display_BR():
     assert output == expected, "DispatchBenchmarkResult generates invalid sample string"
 
     output = libtuner.generate_display_MBR("baseline.vmfb", str(1), 567.89)
-    expected = "Benchmarking: baseline.vmfb on device 1\nprocess_time/real_time_median\t    568 ms\n\n"
+    expected = "Benchmarking: baseline.vmfb on device 1\nprocess_time/real_time_mean\t    568 ms\n\n"
     assert output == expected, "ModelBenchmarkResult generates invalid sample string"
     output = libtuner.generate_display_MBR("baseline.vmfb", str(1), 567.89, 0.0314)
-    expected = "Benchmarking: baseline.vmfb on device 1\nprocess_time/real_time_median\t    568 ms (+3.140%)\n\n"
+    expected = "Benchmarking: baseline.vmfb on device 1\nprocess_time/real_time_mean\t    568 ms (+3.140%)\n\n"
     assert output == expected, "ModelBenchmarkResult generates invalid sample string"
     output = libtuner.generate_display_MBR("baseline.vmfb", str(1), 567.89, -3.14)
-    expected = "Benchmarking: baseline.vmfb on device 1\nprocess_time/real_time_median\t    568 ms (-314.000%)\n\n"
+    expected = "Benchmarking: baseline.vmfb on device 1\nprocess_time/real_time_mean\t    568 ms (-314.000%)\n\n"
     assert output == expected, "ModelBenchmarkResult generates invalid sample string"
 
 
@@ -225,13 +221,11 @@ def test_parse_model_benchmark_results():
     baseline_results = [result3, result4]
 
     # Mock IREEBenchmarkResult to return float value from stdout
-    def mock_get_median_time(self):
+    def mock_get_mean_time(self):
         return float(self.result_str)
 
     # Mock IREEBenchmarkResult to return specific benchmark times
-    with patch(
-        "libtuner.IREEBenchmarkResult.get_median_time", new=mock_get_median_time
-    ):
+    with patch("libtuner.IREEBenchmarkResult.get_mean_time", new=mock_get_mean_time):
         # Mock generate_display_MBR to return a fixed display string
         with patch(
             "libtuner.generate_display_MBR",
@@ -248,12 +242,16 @@ def mock_get_median_time(self):
                 assert tracker1.model_benchmark_time == 1.23
                 assert tracker1.model_benchmark_device_id == "device1"
                 assert tracker1.baseline_benchmark_time == 0.98
-                assert tracker1.calibrated_benchmark_diff == (1.23 - 0.98) / 0.98
+                assert tracker1.calibrated_benchmark_diff == pytest.approx(
+                    (1.23 - 0.98) / 0.98, rel=1e-6
+                )
 
                 assert tracker2.model_benchmark_time == 4.56
                 assert tracker2.model_benchmark_device_id == "device2"
                 assert tracker2.baseline_benchmark_time == 4.13
-                assert tracker2.calibrated_benchmark_diff == (4.56 - 4.13) / 4.13
+                assert tracker2.calibrated_benchmark_diff == pytest.approx(
+                    (4.56 - 4.13) / 4.13, rel=1e-6
+                )
 
                 assert result == [
                     "display_str",