pre-commit fix

louie-tsai · louie-tsai · commit 1ea29243aa3f · 2025-10-29T15:32:32.000-07:00
Signed-off-by: louie-tsai &lt;louie.tsai@intel.com&gt;
diff --git a/.cd/README.md b/.cd/README.md
@@ -69,10 +69,13 @@ cd vllm-gaudi/.cd/
    To improve memory access cohererence and release CPUs to other CPU only workloads like a vLLM serving with Llama3 8B,
    pin the CPU cores based on different CPU NUMA nodes by using an auto-generate docker-compose.override.yml file.
    Couple python libraries are needed for the python scripts, so install the required packages using following commnad.
+
    ```bash
    pip install -r vllm-fork/.cd/server/requirements_cpu_binding.txt
    ```
+
    Run below command to do CPU cores pinning via auto-generated docker-compose.override.yml file.
+
    ```bash
    cd vllm-fork/.cd/
    MODEL="Qwen/Qwen2.5-14B-Instruct" \
diff --git a/.cd/server/cpu_binding.py b/.cd/server/cpu_binding.py
@@ -2,22 +2,20 @@
 import os
 import csv
 from importlib import util
-from typing import Optional
 from enum import Enum
 from gaudi_topology import GaudiTopology
-from typing import List, Tuple
-REQUIRED_COLUMNS = ["model_id", "input_length", "output_length", "world_size", "data_type","num_allocated_cpu"]
+
+REQUIRED_COLUMNS = ["model_id", "input_length", "output_length", "world_size", "data_type", "num_allocated_cpu"]
+
 
 class BindingPolicy(Enum):
     Evenly_on_NUMAs = "evenly"
     NUMAs_with_cards = "close2cards"
 
 
-class CPU_Binding():
+class CPU_Binding:
 
-    def __init__(self,
-                 csv_path: str = "cpu_binding_gnr.csv",
-                 use_hyperthread: bool = False):
+    def __init__(self, csv_path: str = "cpu_binding_gnr.csv", use_hyperthread: bool = False):
         self.libnuma_found = util.find_spec("numa") is not None
         self.psutil_found = util.find_spec("psutil") is not None
         if self.libnuma_found and self.psutil_found:
@@ -28,7 +26,7 @@ def __init__(self,
             self.cpus_allow_list = psutil.Process().cpu_affinity()
             #print("cpu allow list:",self.cpus_allow_list)
             self.numa_size = info.get_num_configured_nodes()
-            self.cpu_count_per_numa =self.cpu_count // self.numa_size
+            self.cpu_count_per_numa = self.cpu_count // self.numa_size
 
             # Get CSV info
             with open(csv_path, newline="") as f:
@@ -38,7 +36,7 @@ def __init__(self,
                 raise ValueError(f"CSV missing required headers {REQUIRED_COLUMNS}. Found: {found}")
             model = os.environ.get("MODEL")
             if not model:
-                raise RuntimeError("Set environment variable MODEL to a model_id in the CSV (e.g., export MODEL='meta-llama/Llama-3.1-8B-Instruct').")
+                raise RuntimeError("Set environment variable MODEL to a model_id in the CSV.")
             input_tok = os.environ.get("INPUT_TOK")
             output_tok = os.environ.get("OUTPUT_TOK")
             con_req = os.environ.get("CONCURRENT_REQ")
@@ -57,7 +55,7 @@ def __init__(self,
             elif row["num_allocated_cpu"] == 'NA':
                 raise RuntimeError("Invalid NUM_CPU value. Set environment variable NUM_CPUS instead .")
             else:
-                self.num_allocated_cpu  = self.parse_int(row["num_allocated_cpu"], "num_allocated_cpu")
+                self.num_allocated_cpu = self.parse_int(row["num_allocated_cpu"], "num_allocated_cpu")
 
             # CPU
             # check allow node_to_cpus list
@@ -77,8 +75,8 @@ def __init__(self,
             # Gaudi
             topo = GaudiTopology()
             self.cards = topo.get_cards()
-            if self.cards != None:
-                self.gaudi_numa_list=[]
+            if self.cards is not None:
+                self.gaudi_numa_list = []
                 # Assume to use cards from 0 to 7
                 for card in self.cards[:self.world_size]:
                     if card['numa_node'] not in self.gaudi_numa_list:
@@ -91,36 +89,34 @@ def __init__(self,
     def parse_int(self, v: str, name: str) -> int:
         try:
             return int(v)
-        except Exception:
-            raise ValueError(f"Invalid integer for {name!r}: {v!r}")
+        except Exception as err:
+            raise ValueError(f"Invalid integer for {name!r}: {v!r}") from err
 
-    def pick_row_by_parameters(self, rows: List[dict], model: str, input_tok: str, output_tok: str, con_req: str) -> dict:
+    def pick_row_by_parameters(self, rows: list[dict], model: str, input_tok: str, output_tok: str,
+                               con_req: str) -> dict:
         matches = [
-                r for r in rows
-                if r.get("model_id", "").strip() == model
-                if r.get("input_length", "").strip() == input_tok
-                if r.get("output_length", "").strip() == output_tok
-                ]
+            r for r in rows if r.get("model_id", "").strip() == model if r.get("input_length", "").strip() == input_tok
+            if r.get("output_length", "").strip() == output_tok
+        ]
         if not matches:
-            available = ", ".join(sorted({r.get('model_id','') for r in rows}))
-            raise ValueError(f"MODEL '{model}', input_lenght '{input_tok}', output_length '{output_tok}' not found in CSV. Available: {available}")
+            available = ", ".join(sorted({r.get('model_id', '') for r in rows}))
+            raise ValueError(f"MODEL '{model}', input_length '{input_tok}', output_length '{output_tok}' "
+                             f"not found in CSV. Available: {available}")
         return matches[0]
 
-    def get_cpus_id_binding_based_on_numa_nodes(self,
-                                                rank: int) -> str:
+    def get_cpus_id_binding_based_on_numa_nodes(self, rank: int) -> str:
         """Return CPUs id binding based on NUMA nodes.
         """
         rank_to_cpus = ''
         if not self.libnuma_found or not self.psutil_found:
-            print(
-                "Auto thread-binding is not supported due to "
-                "the lack of package numa and psutil,"
-                "fallback to no thread-binding. To get better performance,"
-                "please try to manually bind threads.")
+            print("Auto thread-binding is not supported due to "
+                  "the lack of package numa and psutil,"
+                  "fallback to no thread-binding. To get better performance,"
+                  "please try to manually bind threads.")
             return rank_to_cpus
 
         if self.binding_policy is BindingPolicy.Evenly_on_NUMAs or self.cards is None:
-            divider = min (self.world_size, len(self.node_to_cpus))
+            divider = min(self.world_size, len(self.node_to_cpus))
             self.allocated_cpu_per_numa = self.num_allocated_cpu // divider
             node_id = rank
         elif self.binding_policy is BindingPolicy.NUMAs_with_cards:
@@ -136,10 +132,13 @@ def get_cpus_id_binding_based_on_numa_nodes(self,
 
         rank_to_cpus = ','.join(str(x) for x in rank_to_cpus_list)
         print("rank %d auto thread-binding list: %s", rank, rank_to_cpus)
-        self.node_to_idle_cpus[node_id] = [cpu for cpu in self.node_to_idle_cpus[node_id] if cpu not in rank_to_cpus_list]
+        self.node_to_idle_cpus[node_id] = [
+            cpu for cpu in self.node_to_idle_cpus[node_id] if cpu not in rank_to_cpus_list
+        ]
         return rank_to_cpus
 
-if __name__=="__main__":
+
+if __name__ == "__main__":
     libnuma_found = util.find_spec("numa") is not None
     if libnuma_found:
         from numa import info
@@ -153,7 +152,6 @@ def get_cpus_id_binding_based_on_numa_nodes(self,
         rank_to_cpus = cpu_binder.get_cpus_id_binding_based_on_numa_nodes(i)
         print(rank_to_cpus)
 
-
     rank_to_idle_cpus = ','.join(str(x) for row in cpu_binder.node_to_idle_cpus for x in row)
     print(rank_to_idle_cpus)
     for r in cpu_binder.node_to_idle_cpus:
diff --git a/.cd/server/gaudi_topology.py b/.cd/server/gaudi_topology.py
@@ -10,9 +10,10 @@
 import subprocess
 import re
 import os
-from typing import List, Dict, Optional
+from typing import Optional
 import shutil
 
+
 class GaudiTopology:
     """Utility class to discover Gaudi cards and their NUMA / CPU locality."""
 
@@ -23,40 +24,32 @@ def __init__(self):
     def _run_cmd(self, cmd: str) -> str:
         """Run a shell command and return stdout."""
         try:
-            result = subprocess.run(cmd, shell=True, check=True,
-                                    stdout=subprocess.PIPE, stderr=subprocess.PIPE,
-                                    text=True)
+            result = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
             return result.stdout
         except subprocess.CalledProcessError as e:
-            raise RuntimeError(f"Command failed: {cmd}\n{e.stderr}")
+            raise RuntimeError(f"Command failed: {cmd}\n{e.stderr}") from e
 
     # ------------------------------------------------------------------
-    def _parse_hl_smi_table(self, text: str) -> List[Dict]:
+    def _parse_hl_smi_table(self, text: str) -> list[dict]:
         """
         Parse hl-smi v1.22+ table format.
         Example line:
         |   0  HL-325L             N/A  | 0000:97:00.0     N/A | ...
         """
         cards = []
-        pattern = re.compile(
-            r'^\|\s*(\d+)\s+([A-Z0-9-]+)\s+N/A\s+\|\s*([0-9a-fA-F:.]+)\s+N/A\s*\|'
-        )
+        pattern = re.compile(r'^\|\s*(\d+)\s+([A-Z0-9-]+)\s+N/A\s+\|\s*([0-9a-fA-F:.]+)\s+N/A\s*\|')
         for line in text.splitlines():
             match = pattern.match(line)
             if not match:
                 continue
             card_id, model, bus_id = match.groups()
             if not bus_id.startswith("0000:"):
                 bus_id = "0000:" + bus_id
-            cards.append({
-                "card_id": int(card_id),
-                "model": model,
-                "bus_id": bus_id
-            })
+            cards.append({"card_id": int(card_id), "model": model, "bus_id": bus_id})
         return cards
 
     # ------------------------------------------------------------------
-    def _get_sysfs_info(self, bus_id: str) -> Dict[str, Optional[str]]:
+    def _get_sysfs_info(self, bus_id: str) -> dict[str, Optional[str]]:
         """Fetch NUMA node and local CPU list from sysfs."""
         sys_path = f"/sys/bus/pci/devices/{bus_id}"
         info = {"numa_node": None, "local_cpulist": None}
@@ -73,7 +66,7 @@ def _get_sysfs_info(self, bus_id: str) -> Dict[str, Optional[str]]:
         return info
 
     # ------------------------------------------------------------------
-    def _discover_cards(self) -> List[Dict]:
+    def _discover_cards(self) -> list[dict]:
         """Run hl-smi and discover Gaudi cards."""
         if shutil.which("hl-smi") is None:
             print("No hl-smi found")
@@ -87,16 +80,17 @@ def _discover_cards(self) -> List[Dict]:
         return cards
 
     # ------------------------------------------------------------------
-    def get_cards(self) -> List[Dict]:
+    def get_cards(self) -> list[dict]:
         """Return list of all discovered cards sorted by NUMA node (then card_id)."""
+
         def sort_key(c):
             # Convert numa_node to int when possible, else put N/A at the end
             try:
                 return (int(c["numa_node"]), c["card_id"])
             except (TypeError, ValueError):
                 return (999, c["card_id"])
-        return sorted(self.cards, key=sort_key)
 
+        return sorted(self.cards, key=sort_key)
 
     # ------------------------------------------------------------------
     def get_numa_for_card(self, card_id: int) -> Optional[str]:
@@ -114,6 +108,7 @@ def get_cpus_for_card(self, card_id: int) -> Optional[str]:
                 return c["local_cpulist"]
         return None
 
+
 # ------------------------------------------------------------------------------
 
 if __name__ == "__main__":
diff --git a/.cd/server/generate_cpu_binding_from_csv.py b/.cd/server/generate_cpu_binding_from_csv.py
@@ -1,24 +1,19 @@
 #!/usr/bin/env python3
 import os
-import csv
 import argparse
-from typing import List, Tuple
 
 # Requires: pip install ruamel.yaml
 from ruamel.yaml import YAML
 from ruamel.yaml.comments import CommentedMap
 from ruamel.yaml.scalarstring import DoubleQuotedScalarString
 # Import CPU_Binding directly from sibling cpu_binding.py
-from gaudi_topology import GaudiTopology
-from cpu_binding import CPU_Binding, BindingPolicy
-from importlib import util
+from cpu_binding import CPU_Binding
 
-SERVICE_NAME = "vllm-server"     # single service
-XSET_NAME    = "vllm_server_cpu" # x-sets key/anchor
+SERVICE_NAME = "vllm-server"  # single service
+XSET_NAME = "vllm_server_cpu"  # x-sets key/anchor
 
-REQUIRED_COLUMNS = ["model_id", "input_length", "output_length", "world_size", "data_type","num_allocated_cpu"]
+REQUIRED_COLUMNS = ["model_id", "input_length", "output_length", "world_size", "data_type", "num_allocated_cpu"]
 
-from ruamel.yaml.comments import CommentedMap
 
 def build_cpuset_and_limit(csv_path: str):
     cpus_list = ''
@@ -33,10 +28,11 @@ def build_cpuset_and_limit(csv_path: str):
             cpus_list += rank_to_cpus
 
     idle_cpus_list = ','.join(str(x) for row in cpu_binder.node_to_idle_cpus for x in row)
-    print("bind cpus: " ,cpus_list)
+    print("bind cpus: ", cpus_list)
     print("idle cpus: ", idle_cpus_list)
     return cpus_list, idle_cpus_list, cpu_binder
 
+
 def generate_yaml_file(cpuset_csv, num_alloc, idle_cpuset_csv, num_idle_cpus, args_cpuservice, args_output):
     yaml = YAML()
     yaml.preserve_quotes = True
@@ -47,37 +43,40 @@ def generate_yaml_file(cpuset_csv, num_alloc, idle_cpuset_csv, num_idle_cpus, ar
     root["services"] = services
     vllm_server = CommentedMap()
     vllm_server["cpuset"] = DoubleQuotedScalarString(cpuset_csv)
-    vllm_server["cpus"]   = DoubleQuotedScalarString(str(num_alloc))
+    vllm_server["cpus"] = DoubleQuotedScalarString(str(num_alloc))
 
     services[SERVICE_NAME] = vllm_server
 
     # optional cpuservice: allocate remaining idle CPUs
     if args_cpuservice and args_cpuservice.strip():
         cpuservice = CommentedMap()
         cpuservice["cpuset"] = DoubleQuotedScalarString(idle_cpuset_csv)
-        cpuservice["cpus"]   = DoubleQuotedScalarString(str(num_idle_cpus))
+        cpuservice["cpus"] = DoubleQuotedScalarString(str(num_idle_cpus))
         services[args_cpuservice.strip()] = cpuservice
 
-
     with open(args_output, "w") as f:
         yaml.dump(root, f)
 
+
 def main():
     ap = argparse.ArgumentParser(description="Generate override docker-compose YAML (x-sets) for single 'vllm-server'.")
-    ap.add_argument("--settings", default="server/cpu_binding_gnr.csv",
+    ap.add_argument("--settings",
+                    default="server/cpu_binding_gnr.csv",
                     help="CSV with columns: model_id,input length,output length,world_size,num_allocated_cpu")
     ap.add_argument("--output", default="docker-compose.override.yml", help="Output compose YAML path")
     ap.add_argument("--cpuservice", help="name of the docker service binding on idle CPUs")
     args = ap.parse_args()
     model = os.environ.get("MODEL")
     if not model:
-        raise RuntimeError("Set environment variable MODEL to a model_id in the CSV (e.g., export MODEL='meta-llama/Llama-3.1-8B-Instruct').")
+        raise RuntimeError("Set environment variable MODEL to a model_id in the CSV.")
 
     cpuset_csv, idle_cpuset_csv, cpu_binder = build_cpuset_and_limit(args.settings)
     num_idle_cpus = len(idle_cpuset_csv.split(","))
-    generate_yaml_file(cpuset_csv, cpu_binder.num_allocated_cpu, idle_cpuset_csv, num_idle_cpus, args.cpuservice, args.output)
+    generate_yaml_file(cpuset_csv, cpu_binder.num_allocated_cpu, idle_cpuset_csv, num_idle_cpus, args.cpuservice,
+                       args.output)
+
+    print(f"Wrote {args.output} for MODEL={model} ( num_allocated_cpu={cpu_binder.num_allocated_cpu})")
 
-    print(f"Wrote {args.output} for MODEL={model} (world_size={cpu_binder.world_size}, num_allocated_cpu={cpu_binder.num_allocated_cpu})")
 
 if __name__ == "__main__":
     main()