Skip to content

Commit df4933d

Browse files
committed
fix even policy issue for 70b and 8b
use one CPU id per core, and fallback model_id match if no input/output match Signed-off-by: louie-tsai <[email protected]>
1 parent 8b3160f commit df4933d

File tree

3 files changed

+38
-6
lines changed

3 files changed

+38
-6
lines changed

.cd/server/cpu_binding.py

Lines changed: 30 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,8 @@ def __init__(self, csv_path: str = "cpu_binding_gnr.csv", use_hyperthread: bool
6262
self.node_to_cpus = []
6363
for i in range(self.numa_size):
6464
from numa import info
65-
node_intersect = [cpu for cpu in info.node_to_cpus(i) if cpu in self.cpus_allow_list]
65+
filtered_node_to_cpus = self.filter_one_cpu_per_core(info.node_to_cpus(i))
66+
node_intersect = [cpu for cpu in filtered_node_to_cpus if cpu in self.cpus_allow_list]
6667
if bool(node_intersect):
6768
self.node_to_cpus.append(list(node_intersect))
6869
self.node_to_idle_cpus = self.node_to_cpus.copy()
@@ -98,11 +99,33 @@ def pick_row_by_parameters(self, rows: list[dict], model: str, input_tok: str, o
9899
r for r in rows if r.get("model_id", "").strip() == model if r.get("input_length", "").strip() == input_tok
99100
if r.get("output_length", "").strip() == output_tok
100101
]
102+
if not matches:
103+
# fallback: match only by model_id
104+
matches = [r for r in rows if r.get('model_id', '') == model]
105+
print(f"Warning: using fallback entry for model '{model}' without exact input/output token match")
101106
if not matches:
102107
available = ", ".join(sorted({r.get('model_id', '') for r in rows}))
103108
raise ValueError(f"MODEL '{model}', input_length '{input_tok}', output_length '{output_tok}' "
104109
f"not found in CSV. Available: {available}")
105110
return matches[0]
111+
def filter_one_cpu_per_core(self, cpus):
112+
"""
113+
Given a list of CPU IDs (possibly with HT pairs),
114+
return a filtered list with only one logical CPU per physical core.
115+
"""
116+
seen_cores = set()
117+
filtered = []
118+
for cpu in sorted(cpus):
119+
core_path = f"/sys/devices/system/cpu/cpu{cpu}/topology/core_id"
120+
try:
121+
with open(core_path) as f:
122+
core_id = int(f.read().strip())
123+
except FileNotFoundError:
124+
continue
125+
if core_id not in seen_cores:
126+
seen_cores.add(core_id)
127+
filtered.append(cpu)
128+
return filtered
106129

107130
def get_cpus_id_binding_based_on_numa_nodes(self, rank: int) -> str:
108131
"""Return CPUs id binding based on NUMA nodes.
@@ -116,8 +139,8 @@ def get_cpus_id_binding_based_on_numa_nodes(self, rank: int) -> str:
116139
return rank_to_cpus
117140

118141
if self.binding_policy is BindingPolicy.Evenly_on_NUMAs or self.cards is None:
119-
divider = min(self.world_size, len(self.node_to_cpus))
120-
self.allocated_cpu_per_numa = self.num_allocated_cpu // divider
142+
#divider = min(self.world_size, len(self.node_to_cpus))
143+
self.allocated_cpu_per_numa = self.num_allocated_cpu // len(self.node_to_cpus)
121144
node_id = rank
122145
elif self.binding_policy is BindingPolicy.NUMAs_with_cards:
123146
self.allocated_cpu_per_numa = self.num_allocated_cpu // len(self.gaudi_numa_list)
@@ -147,7 +170,10 @@ def get_cpus_id_binding_based_on_numa_nodes(self, rank: int) -> str:
147170
numa_size = 1
148171
world_size = numa_size
149172
cpu_binder = CPU_Binding(use_hyperthread=False)
150-
max_needed_numa_size = min(cpu_binder.world_size, cpu_binder.numa_size)
173+
if cpu_binder.binding_policy is BindingPolicy.Evenly_on_NUMAs or cpu_binder.cards is None:
174+
max_needed_numa_size = len(cpu_binder.node_to_cpus)
175+
elif cpu_binder.binding_policy is BindingPolicy.NUMAs_with_cards:
176+
max_needed_numa_size = min(cpu_binder.world_size, len(cpu_binder.node_to_cpus))
151177
for i in range(max_needed_numa_size):
152178
rank_to_cpus = cpu_binder.get_cpus_id_binding_based_on_numa_nodes(i)
153179
print(rank_to_cpus)

.cd/server/cpu_binding_gnr.csv

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,3 +5,6 @@ meta-llama/Llama-3.1-405B-Instruct,4096,128,8,bf16,18,0
55
meta-llama/Llama-3.1-70B-Instruct,128,4096,4,bf16,12,0
66
meta-llama/Llama-3.1-70B-Instruct,2048,2048,4,bf16,12,0
77
meta-llama/Llama-3.1-70B-Instruct,4096,128,4,bf16,12,0
8+
meta-llama/Llama-3.1-8B-Instruct,128,4096,1,bf16,6,0
9+
meta-llama/Llama-3.1-8B-Instruct,2048,2048,1,bf16,6,0
10+
meta-llama/Llama-3.1-8B-Instruct,4096,128,1,bf16,6,0

.cd/server/generate_cpu_binding_from_csv.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
from ruamel.yaml.comments import CommentedMap
88
from ruamel.yaml.scalarstring import DoubleQuotedScalarString
99
# Import CPU_Binding directly from sibling cpu_binding.py
10-
from cpu_binding import CPU_Binding
10+
from cpu_binding import CPU_Binding, BindingPolicy
1111

1212
SERVICE_NAME = "vllm-server" # single service
1313
XSET_NAME = "vllm_server_cpu" # x-sets key/anchor
@@ -19,7 +19,10 @@ def build_cpuset_and_limit(csv_path: str):
1919
cpus_list = ''
2020
idle_cpus_list = ''
2121
cpu_binder = CPU_Binding(csv_path=csv_path, use_hyperthread=False)
22-
max_needed_numa_size = min(cpu_binder.world_size, cpu_binder.numa_size)
22+
if cpu_binder.binding_policy is BindingPolicy.Evenly_on_NUMAs or cpu_binder.cards is None:
23+
max_needed_numa_size = len(cpu_binder.node_to_cpus)
24+
elif cpu_binder.binding_policy is BindingPolicy.NUMAs_with_cards:
25+
max_needed_numa_size = min(cpu_binder.world_size, len(cpu_binder.node_to_cpus))
2326
for rank in range(max_needed_numa_size):
2427
rank_to_cpus = cpu_binder.get_cpus_id_binding_based_on_numa_nodes(rank)
2528
if rank_to_cpus not in cpus_list:

0 commit comments

Comments
 (0)