Skip to content

Commit 2c81c63

Browse files
author
Paweł Szulik
committed
Add mon groups for resctrl.
"mon_groups" can be created to monitor subsets of tasks in the CTRL_MON group that is their ancestor. More info: https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt Signed-off-by: Paweł Szulik <[email protected]>
1 parent 9d4c02c commit 2c81c63

File tree

14 files changed

+592
-143
lines changed

14 files changed

+592
-143
lines changed

events.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -154,12 +154,12 @@ func convertLibcontainerStats(ls *libcontainer.Stats) *types.Stats {
154154
}
155155

156156
if is := ls.IntelRdtStats; is != nil {
157-
if intelrdt.IsCatEnabled() {
157+
if intelrdt.IsCatEnabled() && is.L3CacheInfo != nil {
158158
s.IntelRdt.L3CacheInfo = convertL3CacheInfo(is.L3CacheInfo)
159159
s.IntelRdt.L3CacheSchemaRoot = is.L3CacheSchemaRoot
160160
s.IntelRdt.L3CacheSchema = is.L3CacheSchema
161161
}
162-
if intelrdt.IsMbaEnabled() {
162+
if intelrdt.IsMbaEnabled() && is.MemBwInfo != nil{
163163
s.IntelRdt.MemBwInfo = convertMemBwInfo(is.MemBwInfo)
164164
s.IntelRdt.MemBwSchemaRoot = is.MemBwSchemaRoot
165165
s.IntelRdt.MemBwSchema = is.MemBwSchema

libcontainer/SPEC.md

Lines changed: 53 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -158,22 +158,31 @@ init process will block waiting for the parent to finish setup.
158158
### IntelRdt
159159

160160
Intel platforms with new Xeon CPU support Resource Director Technology (RDT).
161-
Cache Allocation Technology (CAT) and Memory Bandwidth Allocation (MBA) are
162-
two sub-features of RDT.
161+
Cache Allocation Technology (CAT), Cache Monitoring Technology (CMT),
162+
Memory Bandwidth Allocation (MBA) and Memory Bandwidth Monitoring (MBM) are
163+
four sub-features of RDT.
163164

164165
Cache Allocation Technology (CAT) provides a way for the software to restrict
165166
cache allocation to a defined 'subset' of L3 cache which may be overlapping
166167
with other 'subsets'. The different subsets are identified by class of
167168
service (CLOS) and each CLOS has a capacity bitmask (CBM).
168169

170+
Cache Monitoring Technology (CMT) supports monitoring of the last-level cache (LLC) occupancy
171+
for each running thread simultaneously.
172+
169173
Memory Bandwidth Allocation (MBA) provides indirect and approximate throttle
170174
over memory bandwidth for the software. A user controls the resource by
171-
indicating the percentage of maximum memory bandwidth or memory bandwidth limit
172-
in MBps unit if MBA Software Controller is enabled.
175+
indicating the percentage of maximum memory bandwidth or memory bandwidth
176+
limit in MBps unit if MBA Software Controller is enabled.
177+
178+
Memory Bandwidth Monitoring (MBM) supports monitoring of total and local memory bandwidth
179+
for each running thread simultaneously.
173180

174-
It can be used to handle L3 cache and memory bandwidth resources allocation
175-
for containers if hardware and kernel support Intel RDT CAT and MBA features.
181+
More details about Intel RDT CAT and MBA can be found in the section 17.18 and 17.19, Volume 3
182+
of Intel Software Developer Manual:
183+
https://software.intel.com/en-us/articles/intel-sdm
176184

185+
About Intel RDT kernel interface:
177186
In Linux 4.10 kernel or newer, the interface is defined and exposed via
178187
"resource control" filesystem, which is a "cgroup-like" interface.
179188

@@ -194,22 +203,43 @@ tree /sys/fs/resctrl
194203
| | |-- cbm_mask
195204
| | |-- min_cbm_bits
196205
| | |-- num_closids
206+
| |-- L3_MON
207+
| | |-- max_threshold_occupancy
208+
| | |-- mon_features
209+
| | |-- num_rmids
197210
| |-- MB
198211
| |-- bandwidth_gran
199212
| |-- delay_linear
200213
| |-- min_bandwidth
201214
| |-- num_closids
202-
|-- ...
215+
|-- mon_groups
216+
|-- <container_id>
217+
|-- ...
218+
|-- mon_data
219+
|-- mon_L3_00
220+
|-- llc_occupancy
221+
|-- mbm_local_bytes
222+
|-- mbm_total_bytes
223+
|-- ...
224+
|-- tasks
203225
|-- schemata
204226
|-- tasks
205227
|-- <container_id>
206228
|-- ...
207-
|-- schemata
229+
|-- mon_data
230+
|-- mon_L3_00
231+
|-- llc_occupancy
232+
|-- mbm_local_bytes
233+
|-- mbm_total_bytes
234+
|-- ...
208235
|-- tasks
236+
|-- schemata
237+
|-- ...
209238
```
210239

211240
For runc, we can make use of `tasks` and `schemata` configuration for L3
212-
cache and memory bandwidth resources constraints.
241+
cache and memory bandwidth resources constraints, `mon_data` directory for
242+
CMT and MBM statistics.
213243

214244
The file `tasks` has a list of tasks that belongs to this group (e.g.,
215245
<container_id>" group). Tasks can be added to a group by writing the task ID
@@ -251,7 +281,7 @@ that is allocated is also dependent on the CPU model and can be looked up at
251281
min_bw + N * bw_gran. Intermediate values are rounded to the next control
252282
step available on the hardware.
253283

254-
If MBA Software Controller is enabled through mount option "-o mba_MBps"
284+
If MBA Software Controller is enabled through mount option "-o mba_MBps":
255285
mount -t resctrl resctrl -o mba_MBps /sys/fs/resctrl
256286
We could specify memory bandwidth in "MBps" (Mega Bytes per second) unit
257287
instead of "percentages". The kernel underneath would use a software feedback
@@ -263,11 +293,12 @@ For example, on a two-socket machine, the schema line could be
263293
"MB:0=5000;1=7000" which means 5000 MBps memory bandwidth limit on socket 0
264294
and 7000 MBps memory bandwidth limit on socket 1.
265295

266-
For more information about Intel RDT kernel interface:
296+
For more information about Intel RDT kernel interface:
267297
https://www.kernel.org/doc/Documentation/x86/intel_rdt_ui.txt
268298

269-
```
299+
270300
An example for runc:
301+
```
271302
Consider a two-socket machine with two L3 caches where the default CBM is
272303
0x7ff and the max CBM length is 11 bits, and minimum memory bandwidth of 10%
273304
with a memory bandwidth granularity of 10%.
@@ -278,10 +309,18 @@ maximum memory bandwidth of 20% on socket 0 and 70% on socket 1.
278309
279310
"linux": {
280311
"intelRdt": {
281-
"closID": "guaranteed_group",
282312
"l3CacheSchema": "L3:0=7f0;1=1f",
283313
"memBwSchema": "MB:0=20;1=70"
284-
}
314+
}
315+
}
316+
```
317+
Another example:
318+
```
319+
We only want to monitor memory bandwidth and llc occupancy.
320+
"linux": {
321+
"intelRdt": {
322+
"monitoring": true
323+
}
285324
}
286325
```
287326

libcontainer/configs/config.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ type Config struct {
192192
NoNewKeyring bool `json:"no_new_keyring"`
193193

194194
// IntelRdt specifies settings for Intel RDT group that the container is placed into
195-
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available
195+
// to limit the resources (e.g., L3 cache, memory bandwidth) the container has available.
196196
IntelRdt *IntelRdt `json:"intel_rdt,omitempty"`
197197

198198
// RootlessEUID is set when the runc was launched with non-zero EUID.

libcontainer/configs/intelrdt.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,4 +10,7 @@ type IntelRdt struct {
1010
// The unit of memory bandwidth is specified in "percentages" by
1111
// default, and in "MBps" if MBA Software Controller is enabled.
1212
MemBwSchema string `json:"memBwSchema,omitempty"`
13+
14+
// Monitoring flag that's indicated to monitor llc-occupancy or total and local memory bandwidth.
15+
Monitoring bool `json:"monitoring,omitempty"`
1316
}

libcontainer/configs/validate/validator.go

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -182,22 +182,17 @@ func (v *ConfigValidator) sysctl(config *configs.Config) error {
182182

183183
func (v *ConfigValidator) intelrdt(config *configs.Config) error {
184184
if config.IntelRdt != nil {
185-
if !intelrdt.IsCatEnabled() && !intelrdt.IsMbaEnabled() {
186-
return errors.New("intelRdt is specified in config, but Intel RDT is not supported or enabled")
185+
if config.IntelRdt.L3CacheSchema == "" && config.IntelRdt.MemBwSchema == "" && !config.IntelRdt.Monitoring {
186+
return errors.New("intelRdt config is specified but values are empty")
187187
}
188-
189188
if !intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema != "" {
190189
return errors.New("intelRdt.l3CacheSchema is specified in config, but Intel RDT/CAT is not enabled")
191190
}
192191
if !intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema != "" {
193192
return errors.New("intelRdt.memBwSchema is specified in config, but Intel RDT/MBA is not enabled")
194193
}
195-
196-
if intelrdt.IsCatEnabled() && config.IntelRdt.L3CacheSchema == "" {
197-
return errors.New("Intel RDT/CAT is enabled and intelRdt is specified in config, but intelRdt.l3CacheSchema is empty")
198-
}
199-
if intelrdt.IsMbaEnabled() && config.IntelRdt.MemBwSchema == "" {
200-
return errors.New("Intel RDT/MBA is enabled and intelRdt is specified in config, but intelRdt.memBwSchema is empty")
194+
if !intelrdt.IsMBMEnabled() && !intelrdt.IsCMTEnabled() && config.IntelRdt.Monitoring {
195+
return errors.New("intelRdt.monitoring is specified in config, but MBM and CMT Intel RDT are not enabled")
201196
}
202197
}
203198

libcontainer/container_linux.go

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1846,16 +1846,17 @@ func (c *linuxContainer) currentState() (*State, error) {
18461846
startTime uint64
18471847
externalDescriptors []string
18481848
pid = -1
1849+
intelRdtPath string
18491850
)
18501851
if c.initProcess != nil {
18511852
pid = c.initProcess.pid()
18521853
startTime, _ = c.initProcess.startTime()
18531854
externalDescriptors = c.initProcess.externalDescriptors()
18541855
}
1855-
intelRdtPath, err := intelrdt.GetIntelRdtPath(c.ID())
1856-
if err != nil {
1857-
intelRdtPath = ""
1856+
if c.intelRdtManager != nil {
1857+
intelRdtPath = c.intelRdtManager.GetPath()
18581858
}
1859+
18591860
state := &State{
18601861
BaseState: BaseState{
18611862
ID: c.ID(),

libcontainer/factory_linux.go

Lines changed: 12 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -262,18 +262,16 @@ func (l *LinuxFactory) Create(id string, config *configs.Config) (Container, err
262262
return nil, newGenericError(err, SystemError)
263263
}
264264
c := &linuxContainer{
265-
id: id,
266-
root: containerRoot,
267-
config: config,
268-
initPath: l.InitPath,
269-
initArgs: l.InitArgs,
270-
criuPath: l.CriuPath,
271-
newuidmapPath: l.NewuidmapPath,
272-
newgidmapPath: l.NewgidmapPath,
273-
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
274-
}
275-
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
276-
c.intelRdtManager = l.NewIntelRdtManager(config, id, "")
265+
id: id,
266+
root: containerRoot,
267+
config: config,
268+
initPath: l.InitPath,
269+
initArgs: l.InitArgs,
270+
criuPath: l.CriuPath,
271+
newuidmapPath: l.NewuidmapPath,
272+
newgidmapPath: l.NewgidmapPath,
273+
cgroupManager: l.NewCgroupsManager(config.Cgroups, nil),
274+
intelRdtManager: l.NewIntelRdtManager(config, id, ""),
277275
}
278276
c.state = &stoppedState{c: c}
279277
return c, nil
@@ -313,14 +311,13 @@ func (l *LinuxFactory) Load(id string) (Container, error) {
313311
cgroupManager: l.NewCgroupsManager(state.Config.Cgroups, state.CgroupPaths),
314312
root: containerRoot,
315313
created: state.Created,
314+
intelRdtManager: l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath),
316315
}
317316
c.state = &loadedState{c: c}
318317
if err := c.refreshState(); err != nil {
319318
return nil, err
320319
}
321-
if intelrdt.IsCatEnabled() || intelrdt.IsMbaEnabled() {
322-
c.intelRdtManager = l.NewIntelRdtManager(&state.Config, id, state.IntelRdtPath)
323-
}
320+
324321
return c, nil
325322
}
326323

0 commit comments

Comments
 (0)