Skip to content

Commit 1ea2924

Browse files
committed
pre-commit fix
Signed-off-by: louie-tsai <[email protected]>
1 parent c070b05 commit 1ea2924

File tree

4 files changed

+63
-68
lines changed

4 files changed

+63
-68
lines changed

.cd/README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,10 +69,13 @@ cd vllm-gaudi/.cd/
6969
To improve memory access cohererence and release CPUs to other CPU only workloads like a vLLM serving with Llama3 8B,
7070
pin the CPU cores based on different CPU NUMA nodes by using an auto-generate docker-compose.override.yml file.
7171
Couple python libraries are needed for the python scripts, so install the required packages using following commnad.
72+
7273
```bash
7374
pip install -r vllm-fork/.cd/server/requirements_cpu_binding.txt
7475
```
76+
7577
Run below command to do CPU cores pinning via auto-generated docker-compose.override.yml file.
78+
7679
```bash
7780
cd vllm-fork/.cd/
7881
MODEL="Qwen/Qwen2.5-14B-Instruct" \

.cd/server/cpu_binding.py

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,20 @@
22
import os
33
import csv
44
from importlib import util
5-
from typing import Optional
65
from enum import Enum
76
from gaudi_topology import GaudiTopology
8-
from typing import List, Tuple
9-
REQUIRED_COLUMNS = ["model_id", "input_length", "output_length", "world_size", "data_type","num_allocated_cpu"]
7+
8+
REQUIRED_COLUMNS = ["model_id", "input_length", "output_length", "world_size", "data_type", "num_allocated_cpu"]
9+
1010

1111
class BindingPolicy(Enum):
1212
Evenly_on_NUMAs = "evenly"
1313
NUMAs_with_cards = "close2cards"
1414

1515

16-
class CPU_Binding():
16+
class CPU_Binding:
1717

18-
def __init__(self,
19-
csv_path: str = "cpu_binding_gnr.csv",
20-
use_hyperthread: bool = False):
18+
def __init__(self, csv_path: str = "cpu_binding_gnr.csv", use_hyperthread: bool = False):
2119
self.libnuma_found = util.find_spec("numa") is not None
2220
self.psutil_found = util.find_spec("psutil") is not None
2321
if self.libnuma_found and self.psutil_found:
@@ -28,7 +26,7 @@ def __init__(self,
2826
self.cpus_allow_list = psutil.Process().cpu_affinity()
2927
#print("cpu allow list:",self.cpus_allow_list)
3028
self.numa_size = info.get_num_configured_nodes()
31-
self.cpu_count_per_numa =self.cpu_count // self.numa_size
29+
self.cpu_count_per_numa = self.cpu_count // self.numa_size
3230

3331
# Get CSV info
3432
with open(csv_path, newline="") as f:
@@ -38,7 +36,7 @@ def __init__(self,
3836
raise ValueError(f"CSV missing required headers {REQUIRED_COLUMNS}. Found: {found}")
3937
model = os.environ.get("MODEL")
4038
if not model:
41-
raise RuntimeError("Set environment variable MODEL to a model_id in the CSV (e.g., export MODEL='meta-llama/Llama-3.1-8B-Instruct').")
39+
raise RuntimeError("Set environment variable MODEL to a model_id in the CSV.")
4240
input_tok = os.environ.get("INPUT_TOK")
4341
output_tok = os.environ.get("OUTPUT_TOK")
4442
con_req = os.environ.get("CONCURRENT_REQ")
@@ -57,7 +55,7 @@ def __init__(self,
5755
elif row["num_allocated_cpu"] == 'NA':
5856
raise RuntimeError("Invalid NUM_CPU value. Set environment variable NUM_CPUS instead .")
5957
else:
60-
self.num_allocated_cpu = self.parse_int(row["num_allocated_cpu"], "num_allocated_cpu")
58+
self.num_allocated_cpu = self.parse_int(row["num_allocated_cpu"], "num_allocated_cpu")
6159

6260
# CPU
6361
# check allow node_to_cpus list
@@ -77,8 +75,8 @@ def __init__(self,
7775
# Gaudi
7876
topo = GaudiTopology()
7977
self.cards = topo.get_cards()
80-
if self.cards != None:
81-
self.gaudi_numa_list=[]
78+
if self.cards is not None:
79+
self.gaudi_numa_list = []
8280
# Assume to use cards from 0 to 7
8381
for card in self.cards[:self.world_size]:
8482
if card['numa_node'] not in self.gaudi_numa_list:
@@ -91,36 +89,34 @@ def __init__(self,
9189
def parse_int(self, v: str, name: str) -> int:
9290
try:
9391
return int(v)
94-
except Exception:
95-
raise ValueError(f"Invalid integer for {name!r}: {v!r}")
92+
except Exception as err:
93+
raise ValueError(f"Invalid integer for {name!r}: {v!r}") from err
9694

97-
def pick_row_by_parameters(self, rows: List[dict], model: str, input_tok: str, output_tok: str, con_req: str) -> dict:
95+
def pick_row_by_parameters(self, rows: list[dict], model: str, input_tok: str, output_tok: str,
96+
con_req: str) -> dict:
9897
matches = [
99-
r for r in rows
100-
if r.get("model_id", "").strip() == model
101-
if r.get("input_length", "").strip() == input_tok
102-
if r.get("output_length", "").strip() == output_tok
103-
]
98+
r for r in rows if r.get("model_id", "").strip() == model if r.get("input_length", "").strip() == input_tok
99+
if r.get("output_length", "").strip() == output_tok
100+
]
104101
if not matches:
105-
available = ", ".join(sorted({r.get('model_id','') for r in rows}))
106-
raise ValueError(f"MODEL '{model}', input_lenght '{input_tok}', output_length '{output_tok}' not found in CSV. Available: {available}")
102+
available = ", ".join(sorted({r.get('model_id', '') for r in rows}))
103+
raise ValueError(f"MODEL '{model}', input_length '{input_tok}', output_length '{output_tok}' "
104+
f"not found in CSV. Available: {available}")
107105
return matches[0]
108106

109-
def get_cpus_id_binding_based_on_numa_nodes(self,
110-
rank: int) -> str:
107+
def get_cpus_id_binding_based_on_numa_nodes(self, rank: int) -> str:
111108
"""Return CPUs id binding based on NUMA nodes.
112109
"""
113110
rank_to_cpus = ''
114111
if not self.libnuma_found or not self.psutil_found:
115-
print(
116-
"Auto thread-binding is not supported due to "
117-
"the lack of package numa and psutil,"
118-
"fallback to no thread-binding. To get better performance,"
119-
"please try to manually bind threads.")
112+
print("Auto thread-binding is not supported due to "
113+
"the lack of package numa and psutil,"
114+
"fallback to no thread-binding. To get better performance,"
115+
"please try to manually bind threads.")
120116
return rank_to_cpus
121117

122118
if self.binding_policy is BindingPolicy.Evenly_on_NUMAs or self.cards is None:
123-
divider = min (self.world_size, len(self.node_to_cpus))
119+
divider = min(self.world_size, len(self.node_to_cpus))
124120
self.allocated_cpu_per_numa = self.num_allocated_cpu // divider
125121
node_id = rank
126122
elif self.binding_policy is BindingPolicy.NUMAs_with_cards:
@@ -136,10 +132,13 @@ def get_cpus_id_binding_based_on_numa_nodes(self,
136132

137133
rank_to_cpus = ','.join(str(x) for x in rank_to_cpus_list)
138134
print("rank %d auto thread-binding list: %s", rank, rank_to_cpus)
139-
self.node_to_idle_cpus[node_id] = [cpu for cpu in self.node_to_idle_cpus[node_id] if cpu not in rank_to_cpus_list]
135+
self.node_to_idle_cpus[node_id] = [
136+
cpu for cpu in self.node_to_idle_cpus[node_id] if cpu not in rank_to_cpus_list
137+
]
140138
return rank_to_cpus
141139

142-
if __name__=="__main__":
140+
141+
if __name__ == "__main__":
143142
libnuma_found = util.find_spec("numa") is not None
144143
if libnuma_found:
145144
from numa import info
@@ -153,7 +152,6 @@ def get_cpus_id_binding_based_on_numa_nodes(self,
153152
rank_to_cpus = cpu_binder.get_cpus_id_binding_based_on_numa_nodes(i)
154153
print(rank_to_cpus)
155154

156-
157155
rank_to_idle_cpus = ','.join(str(x) for row in cpu_binder.node_to_idle_cpus for x in row)
158156
print(rank_to_idle_cpus)
159157
for r in cpu_binder.node_to_idle_cpus:

.cd/server/gaudi_topology.py

Lines changed: 13 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,10 @@
1010
import subprocess
1111
import re
1212
import os
13-
from typing import List, Dict, Optional
13+
from typing import Optional
1414
import shutil
1515

16+
1617
class GaudiTopology:
1718
"""Utility class to discover Gaudi cards and their NUMA / CPU locality."""
1819

@@ -23,40 +24,32 @@ def __init__(self):
2324
def _run_cmd(self, cmd: str) -> str:
2425
"""Run a shell command and return stdout."""
2526
try:
26-
result = subprocess.run(cmd, shell=True, check=True,
27-
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
28-
text=True)
27+
result = subprocess.run(cmd, shell=True, check=True, capture_output=True, text=True)
2928
return result.stdout
3029
except subprocess.CalledProcessError as e:
31-
raise RuntimeError(f"Command failed: {cmd}\n{e.stderr}")
30+
raise RuntimeError(f"Command failed: {cmd}\n{e.stderr}") from e
3231

3332
# ------------------------------------------------------------------
34-
def _parse_hl_smi_table(self, text: str) -> List[Dict]:
33+
def _parse_hl_smi_table(self, text: str) -> list[dict]:
3534
"""
3635
Parse hl-smi v1.22+ table format.
3736
Example line:
3837
| 0 HL-325L N/A | 0000:97:00.0 N/A | ...
3938
"""
4039
cards = []
41-
pattern = re.compile(
42-
r'^\|\s*(\d+)\s+([A-Z0-9-]+)\s+N/A\s+\|\s*([0-9a-fA-F:.]+)\s+N/A\s*\|'
43-
)
40+
pattern = re.compile(r'^\|\s*(\d+)\s+([A-Z0-9-]+)\s+N/A\s+\|\s*([0-9a-fA-F:.]+)\s+N/A\s*\|')
4441
for line in text.splitlines():
4542
match = pattern.match(line)
4643
if not match:
4744
continue
4845
card_id, model, bus_id = match.groups()
4946
if not bus_id.startswith("0000:"):
5047
bus_id = "0000:" + bus_id
51-
cards.append({
52-
"card_id": int(card_id),
53-
"model": model,
54-
"bus_id": bus_id
55-
})
48+
cards.append({"card_id": int(card_id), "model": model, "bus_id": bus_id})
5649
return cards
5750

5851
# ------------------------------------------------------------------
59-
def _get_sysfs_info(self, bus_id: str) -> Dict[str, Optional[str]]:
52+
def _get_sysfs_info(self, bus_id: str) -> dict[str, Optional[str]]:
6053
"""Fetch NUMA node and local CPU list from sysfs."""
6154
sys_path = f"/sys/bus/pci/devices/{bus_id}"
6255
info = {"numa_node": None, "local_cpulist": None}
@@ -73,7 +66,7 @@ def _get_sysfs_info(self, bus_id: str) -> Dict[str, Optional[str]]:
7366
return info
7467

7568
# ------------------------------------------------------------------
76-
def _discover_cards(self) -> List[Dict]:
69+
def _discover_cards(self) -> list[dict]:
7770
"""Run hl-smi and discover Gaudi cards."""
7871
if shutil.which("hl-smi") is None:
7972
print("No hl-smi found")
@@ -87,16 +80,17 @@ def _discover_cards(self) -> List[Dict]:
8780
return cards
8881

8982
# ------------------------------------------------------------------
90-
def get_cards(self) -> List[Dict]:
83+
def get_cards(self) -> list[dict]:
9184
"""Return list of all discovered cards sorted by NUMA node (then card_id)."""
85+
9286
def sort_key(c):
9387
# Convert numa_node to int when possible, else put N/A at the end
9488
try:
9589
return (int(c["numa_node"]), c["card_id"])
9690
except (TypeError, ValueError):
9791
return (999, c["card_id"])
98-
return sorted(self.cards, key=sort_key)
9992

93+
return sorted(self.cards, key=sort_key)
10094

10195
# ------------------------------------------------------------------
10296
def get_numa_for_card(self, card_id: int) -> Optional[str]:
@@ -114,6 +108,7 @@ def get_cpus_for_card(self, card_id: int) -> Optional[str]:
114108
return c["local_cpulist"]
115109
return None
116110

111+
117112
# ------------------------------------------------------------------------------
118113

119114
if __name__ == "__main__":
Lines changed: 16 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1,24 +1,19 @@
11
#!/usr/bin/env python3
22
import os
3-
import csv
43
import argparse
5-
from typing import List, Tuple
64

75
# Requires: pip install ruamel.yaml
86
from ruamel.yaml import YAML
97
from ruamel.yaml.comments import CommentedMap
108
from ruamel.yaml.scalarstring import DoubleQuotedScalarString
119
# Import CPU_Binding directly from sibling cpu_binding.py
12-
from gaudi_topology import GaudiTopology
13-
from cpu_binding import CPU_Binding, BindingPolicy
14-
from importlib import util
10+
from cpu_binding import CPU_Binding
1511

16-
SERVICE_NAME = "vllm-server" # single service
17-
XSET_NAME = "vllm_server_cpu" # x-sets key/anchor
12+
SERVICE_NAME = "vllm-server" # single service
13+
XSET_NAME = "vllm_server_cpu" # x-sets key/anchor
1814

19-
REQUIRED_COLUMNS = ["model_id", "input_length", "output_length", "world_size", "data_type","num_allocated_cpu"]
15+
REQUIRED_COLUMNS = ["model_id", "input_length", "output_length", "world_size", "data_type", "num_allocated_cpu"]
2016

21-
from ruamel.yaml.comments import CommentedMap
2217

2318
def build_cpuset_and_limit(csv_path: str):
2419
cpus_list = ''
@@ -33,10 +28,11 @@ def build_cpuset_and_limit(csv_path: str):
3328
cpus_list += rank_to_cpus
3429

3530
idle_cpus_list = ','.join(str(x) for row in cpu_binder.node_to_idle_cpus for x in row)
36-
print("bind cpus: " ,cpus_list)
31+
print("bind cpus: ", cpus_list)
3732
print("idle cpus: ", idle_cpus_list)
3833
return cpus_list, idle_cpus_list, cpu_binder
3934

35+
4036
def generate_yaml_file(cpuset_csv, num_alloc, idle_cpuset_csv, num_idle_cpus, args_cpuservice, args_output):
4137
yaml = YAML()
4238
yaml.preserve_quotes = True
@@ -47,37 +43,40 @@ def generate_yaml_file(cpuset_csv, num_alloc, idle_cpuset_csv, num_idle_cpus, ar
4743
root["services"] = services
4844
vllm_server = CommentedMap()
4945
vllm_server["cpuset"] = DoubleQuotedScalarString(cpuset_csv)
50-
vllm_server["cpus"] = DoubleQuotedScalarString(str(num_alloc))
46+
vllm_server["cpus"] = DoubleQuotedScalarString(str(num_alloc))
5147

5248
services[SERVICE_NAME] = vllm_server
5349

5450
# optional cpuservice: allocate remaining idle CPUs
5551
if args_cpuservice and args_cpuservice.strip():
5652
cpuservice = CommentedMap()
5753
cpuservice["cpuset"] = DoubleQuotedScalarString(idle_cpuset_csv)
58-
cpuservice["cpus"] = DoubleQuotedScalarString(str(num_idle_cpus))
54+
cpuservice["cpus"] = DoubleQuotedScalarString(str(num_idle_cpus))
5955
services[args_cpuservice.strip()] = cpuservice
6056

61-
6257
with open(args_output, "w") as f:
6358
yaml.dump(root, f)
6459

60+
6561
def main():
6662
ap = argparse.ArgumentParser(description="Generate override docker-compose YAML (x-sets) for single 'vllm-server'.")
67-
ap.add_argument("--settings", default="server/cpu_binding_gnr.csv",
63+
ap.add_argument("--settings",
64+
default="server/cpu_binding_gnr.csv",
6865
help="CSV with columns: model_id,input length,output length,world_size,num_allocated_cpu")
6966
ap.add_argument("--output", default="docker-compose.override.yml", help="Output compose YAML path")
7067
ap.add_argument("--cpuservice", help="name of the docker service binding on idle CPUs")
7168
args = ap.parse_args()
7269
model = os.environ.get("MODEL")
7370
if not model:
74-
raise RuntimeError("Set environment variable MODEL to a model_id in the CSV (e.g., export MODEL='meta-llama/Llama-3.1-8B-Instruct').")
71+
raise RuntimeError("Set environment variable MODEL to a model_id in the CSV.")
7572

7673
cpuset_csv, idle_cpuset_csv, cpu_binder = build_cpuset_and_limit(args.settings)
7774
num_idle_cpus = len(idle_cpuset_csv.split(","))
78-
generate_yaml_file(cpuset_csv, cpu_binder.num_allocated_cpu, idle_cpuset_csv, num_idle_cpus, args.cpuservice, args.output)
75+
generate_yaml_file(cpuset_csv, cpu_binder.num_allocated_cpu, idle_cpuset_csv, num_idle_cpus, args.cpuservice,
76+
args.output)
77+
78+
print(f"Wrote {args.output} for MODEL={model} ( num_allocated_cpu={cpu_binder.num_allocated_cpu})")
7979

80-
print(f"Wrote {args.output} for MODEL={model} (world_size={cpu_binder.world_size}, num_allocated_cpu={cpu_binder.num_allocated_cpu})")
8180

8281
if __name__ == "__main__":
8382
main()

0 commit comments

Comments
 (0)