Skip to content

Commit d61832d

Browse files
kerumetogvisor-bot
authored andcommitted
Implement Nftables DELTABLE, DESTROYTABLE message functionality
These messages now allow us to delete tables by name or by handle, and flush tables under certain scenarios. Also updated the nftables package to create a translation between nftables flags (NFPROTO_INET, etc) to regular address families. This "translation" in Linux is done implicitly by setting these enums to overlapping values, but here it is done explicitly because stack.AFs are of custom type AF. This gives us an extra safety check on allowing only supported family addresses. Finally, updated tests to dynamically create requests with buffers. This was needed so that the correct amount of padding is inserted between headers and payloads. PiperOrigin-RevId: 783444579
1 parent 8b94975 commit d61832d

File tree

12 files changed

+1000
-333
lines changed

12 files changed

+1000
-333
lines changed

pkg/abi/linux/netlink.go

Lines changed: 23 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -66,21 +66,35 @@ type NetlinkMessageHeader struct {
6666
// NetlinkMessageHeaderSize is the size of NetlinkMessageHeader.
6767
const NetlinkMessageHeaderSize = 16
6868

69-
// Netlink message header flags, from uapi/linux/netlink.h.
69+
// Netlink message header flag values, from uapi/linux/netlink.h.
7070
const (
7171
NLM_F_REQUEST = 0x1
7272
NLM_F_MULTI = 0x2
7373
NLM_F_ACK = 0x4
7474
NLM_F_ECHO = 0x8
7575
NLM_F_DUMP_INTR = 0x10
76-
NLM_F_ROOT = 0x100
77-
NLM_F_MATCH = 0x200
78-
NLM_F_ATOMIC = 0x400
79-
NLM_F_DUMP = NLM_F_ROOT | NLM_F_MATCH
80-
NLM_F_REPLACE = 0x100
81-
NLM_F_EXCL = 0x200
82-
NLM_F_CREATE = 0x400
83-
NLM_F_APPEND = 0x800
76+
)
77+
78+
// Netlink message header flags for GET requests, from uapi/linux/netlink.h.
79+
const (
80+
NLM_F_ROOT = 0x100
81+
NLM_F_MATCH = 0x200
82+
NLM_F_ATOMIC = 0x400
83+
NLM_F_DUMP = NLM_F_ROOT | NLM_F_MATCH
84+
)
85+
86+
// Netlink message header flags for NEW requests, from uapi/linux/netlink.h.
87+
const (
88+
NLM_F_REPLACE = 0x100
89+
NLM_F_EXCL = 0x200
90+
NLM_F_CREATE = 0x400
91+
NLM_F_APPEND = 0x800
92+
)
93+
94+
// Netlink message header flags for DELETE requests, from uapi/linux/netlink.h.
95+
const (
96+
NLM_F_NONREC = 0x100
97+
NLM_F_BULK = 0x200
8498
)
8599

86100
// Standard netlink message types, from uapi/linux/netlink.h.

pkg/sentry/socket/netlink/netfilter/protocol.go

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,10 +90,18 @@ func (p *Protocol) ProcessMessage(ctx context.Context, s *netlink.Socket, msg *n
9090
}
9191

9292
// Nftables functions error check the address family value.
93-
family := stack.AddressFamily(nfGenMsg.Family)
93+
family, err := nftables.AFtoNetlinkAF(nfGenMsg.Family)
9494
// TODO: b/421437663 - Match the message type and call the appropriate Nftables function.
9595
switch msgType {
9696
case linux.NFT_MSG_NEWTABLE:
97+
// We only check the error value in the case of NFT_MSG_NEWTABLE as linux returns
98+
// an EOPNOTSUPP error only in that case. Otherwise the other operations will return
99+
// errors specific to their function.
100+
if err != nil {
101+
log.Debugf("Nftables: Unsupported address family: %d", int(nfGenMsg.Family))
102+
return err
103+
}
104+
97105
if err := p.newTable(nft, attrs, family, hdr.Flags, ms); err != nil {
98106
log.Debugf("Nftables new table error: %s", err)
99107
return err.GetError()
@@ -105,6 +113,12 @@ func (p *Protocol) ProcessMessage(ctx context.Context, s *netlink.Socket, msg *n
105113
return err.GetError()
106114
}
107115
return nil
116+
case linux.NFT_MSG_DELTABLE, linux.NFT_MSG_DESTROYTABLE:
117+
if err := p.deleteTable(nft, attrs, family, hdr, msgType, ms); err != nil {
118+
log.Debugf("Nftables delete table error: %s", err)
119+
return err.GetError()
120+
}
121+
return nil
108122
default:
109123
log.Debugf("Unsupported message type: %d", msgType)
110124
return syserr.ErrNotSupported
@@ -247,10 +261,58 @@ func (p *Protocol) getTable(nft *nftables.NFTables, attrs map[uint16]nlmsg.Bytes
247261
return nil
248262
}
249263

264+
// deleteTable deletes a table for the given family.
265+
func (p *Protocol) deleteTable(nft *nftables.NFTables, attrs map[uint16]nlmsg.BytesView, family stack.AddressFamily, hdr linux.NetlinkMessageHeader, msgType linux.NfTableMsgType, ms *nlmsg.MessageSet) *syserr.AnnotatedError {
266+
if family == stack.Unspec || (!hasAttr(linux.NFTA_TABLE_NAME, attrs) && !hasAttr(linux.NFTA_TABLE_HANDLE, attrs)) {
267+
nft.Flush(attrs, uint32(ms.PortID))
268+
return nil
269+
}
270+
271+
var tab *nftables.Table
272+
var err *syserr.AnnotatedError
273+
if tabHandleBytes, ok := attrs[linux.NFTA_TABLE_HANDLE]; ok {
274+
tabHandle, ok := tabHandleBytes.Uint64()
275+
if !ok {
276+
return syserr.NewAnnotatedError(syserr.ErrInvalidArgument, fmt.Sprintf("Nftables: Table handle attribute is malformed or not found"))
277+
}
278+
279+
tab, err = nft.GetTableByHandle(family, uint64(tabHandle), uint32(ms.PortID))
280+
} else {
281+
tabNameBytes, ok := attrs[linux.NFTA_TABLE_NAME]
282+
if !ok {
283+
return syserr.NewAnnotatedError(syserr.ErrInvalidArgument, fmt.Sprintf("Nftables: Table name attribute is malformed or not found"))
284+
}
285+
tab, err = nft.GetTable(family, tabNameBytes.String(), uint32(ms.PortID))
286+
}
287+
288+
if err != nil {
289+
// Ignore ENOENT if DESTROY_TABLE is set
290+
if err.GetError() == syserr.ErrNoFileOrDir && msgType == linux.NFT_MSG_DESTROYTABLE {
291+
return nil
292+
}
293+
return err
294+
}
295+
296+
// Don't delete the table if it is not empty and NLM_F_NONREC is set.
297+
if hdr.Flags&linux.NLM_F_NONREC == linux.NLM_F_NONREC && tab.ChainCount() > 0 {
298+
return syserr.NewAnnotatedError(syserr.ErrBusy, fmt.Sprintf("Nftables: Table with family: %d and name: %s already exists", int(family), tab.GetName()))
299+
}
300+
301+
_, err = nft.DeleteTable(family, tab.GetName())
302+
return err
303+
}
304+
305+
// netLinkMessagePayloadSize returns the size of the netlink message payload.
250306
func netLinkMessagePayloadSize(h *linux.NetlinkMessageHeader) int {
251307
return int(h.Length) - linux.NetlinkMessageHeaderSize
252308
}
253309

310+
// hasAttr returns whether the given attribute key is present in the attribute map.
311+
func hasAttr(attrName uint16, attrs map[uint16]nlmsg.BytesView) bool {
312+
_, ok := attrs[attrName]
313+
return ok
314+
}
315+
254316
// init registers the NETLINK_NETFILTER provider.
255317
func init() {
256318
netlink.RegisterProvider(linux.NETLINK_NETFILTER, NewProtocol)

pkg/sentry/socket/netlink/nlmsg/BUILD

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,15 @@ go_library(
1010
srcs = [
1111
"message.go",
1212
],
13-
visibility = ["//pkg/sentry:internal"],
13+
visibility = [
14+
"//pkg/sentry:internal",
15+
"//pkg/tcpip/nftables:__subpackages__",
16+
],
1417
deps = [
1518
"//pkg/abi/linux",
1619
"//pkg/bits",
1720
"//pkg/hostarch",
21+
"//pkg/log",
1822
"//pkg/marshal",
1923
"//pkg/marshal/primitive",
2024
],

pkg/sentry/socket/netlink/nlmsg/message.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"gvisor.dev/gvisor/pkg/abi/linux"
2323
"gvisor.dev/gvisor/pkg/bits"
2424
"gvisor.dev/gvisor/pkg/hostarch"
25+
"gvisor.dev/gvisor/pkg/log"
2526
"gvisor.dev/gvisor/pkg/marshal"
2627
"gvisor.dev/gvisor/pkg/marshal/primitive"
2728
)
@@ -252,17 +253,20 @@ func (v AttrsView) ParseFirst() (hdr linux.NetlinkAttrHeader, value []byte, rest
252253

253254
hdrBytes, ok := b.Extract(linux.NetlinkAttrHeaderSize)
254255
if !ok {
256+
log.Debugf("Failed to parse netlink attributes at header stage")
255257
return
256258
}
257259
hdr.UnmarshalUnsafe(hdrBytes)
258260

259261
value, ok = b.Extract(int(hdr.Length) - linux.NetlinkAttrHeaderSize)
260262
if !ok {
263+
log.Debugf("Failed to parse %d bytes after %d header bytes", int(hdr.Length)-linux.NetlinkAttrHeaderSize, linux.NetlinkAttrHeaderSize)
261264
return
262265
}
263266

264267
_, ok = b.Extract(alignPad(int(hdr.Length), linux.NLA_ALIGNTO))
265268
if !ok {
269+
log.Debugf("Failed to parse netlink attributes at aligning stage")
266270
return
267271
}
268272

@@ -323,6 +327,17 @@ func (v *BytesView) Uint32() (uint32, bool) {
323327
return uint32(val), true
324328
}
325329

330+
// Uint64 converts the raw attribute value to uint64.
331+
func (v *BytesView) Uint64() (uint64, bool) {
332+
attr := []byte(*v)
333+
val := primitive.Uint64(0)
334+
if len(attr) != val.SizeBytes() {
335+
return 0, false
336+
}
337+
val.UnmarshalBytes(attr)
338+
return uint64(val), true
339+
}
340+
326341
// Int32 converts the raw attribute value to int32.
327342
func (v *BytesView) Int32() (int32, bool) {
328343
attr := []byte(*v)

pkg/tcpip/nftables/BUILD

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ go_library(
3131
"//pkg/abi/linux",
3232
"//pkg/atomicbitops",
3333
"//pkg/rand",
34+
"//pkg/sentry/socket/netlink/nlmsg",
3435
"//pkg/syserr",
3536
"//pkg/tcpip",
3637
"//pkg/tcpip/checksum",

pkg/tcpip/nftables/nftables.go

Lines changed: 73 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"gvisor.dev/gvisor/pkg/abi/linux"
2222
"gvisor.dev/gvisor/pkg/atomicbitops"
2323
"gvisor.dev/gvisor/pkg/rand"
24+
"gvisor.dev/gvisor/pkg/sentry/socket/netlink/nlmsg"
2425
"gvisor.dev/gvisor/pkg/syserr"
2526
"gvisor.dev/gvisor/pkg/tcpip"
2627
"gvisor.dev/gvisor/pkg/tcpip/stack"
@@ -245,10 +246,38 @@ func NewNFTables(clock tcpip.Clock, rng rand.RNG) *NFTables {
245246
return &NFTables{clock: clock, startTime: clock.Now(), rng: rng, tableHandleCounter: atomicbitops.Uint64{}}
246247
}
247248

248-
// Flush clears entire ruleset and all data for all address families.
249-
func (nf *NFTables) Flush() {
249+
// Flush clears entire ruleset and all data for all address families
250+
// except for the tables that are not owned by the given owner.
251+
func (nf *NFTables) Flush(attrs map[uint16]nlmsg.BytesView, owner uint32) {
250252
for family := range stack.NumAFs {
251-
nf.filters[family] = nil
253+
afFilter := nf.filters[family]
254+
if afFilter == nil {
255+
continue
256+
}
257+
258+
var attrName *string = nil
259+
if nameBytes, ok := attrs[linux.NFTA_TABLE_NAME]; ok {
260+
name := nameBytes.String()
261+
attrName = &name
262+
}
263+
var tablesToDelete []TableInfo
264+
for name, table := range afFilter.tables {
265+
// Caller cannot delete a table they do not own.
266+
if table.HasOwner() && table.GetOwner() != owner {
267+
continue
268+
}
269+
270+
if attrName != nil && *attrName != table.GetName() {
271+
continue
272+
}
273+
274+
tablesToDelete = append(tablesToDelete, TableInfo{Name: name, Handle: table.GetHandle()})
275+
}
276+
277+
for _, tableData := range tablesToDelete {
278+
delete(afFilter.tables, tableData.Name)
279+
delete(afFilter.tableHandles, tableData.Handle)
280+
}
252281
}
253282
}
254283

@@ -295,6 +324,38 @@ func (nf *NFTables) GetTable(family stack.AddressFamily, tableName string, portI
295324
return t, nil
296325
}
297326

327+
// GetTableByHandle validates the inputs and gets a table by its handle and family if it exists,
328+
// error otherwise.
329+
func (nf *NFTables) GetTableByHandle(family stack.AddressFamily, handle uint64, portID uint32) (*Table, *syserr.AnnotatedError) {
330+
// Ensures address family is valid.
331+
if err := validateAddressFamily(family); err != nil {
332+
return nil, err
333+
}
334+
335+
// Checks if the table handle map for the address family has been initialized.
336+
if nf.filters[family] == nil || nf.filters[family].tableHandles == nil {
337+
return nil, syserr.NewAnnotatedError(syserr.ErrNoFileOrDir, fmt.Sprintf("table handle map for address family %v has no tables", family))
338+
}
339+
340+
// Gets the corresponding table map for the address family.
341+
tableHandleMap := nf.filters[family].tableHandles
342+
343+
// Checks if a table with the name exists.
344+
t, exists := tableHandleMap[handle]
345+
if !exists {
346+
return nil, syserr.NewAnnotatedError(syserr.ErrNoFileOrDir, fmt.Sprintf("table with handle %d not found for address family %v", handle, family))
347+
}
348+
349+
// If the table has an owner, it must match the Netlink portID of the calling process.
350+
// User space processes only have non-zero port ids.
351+
// Only the kernel can have a zero port id.
352+
if t.HasOwner() && portID != 0 && portID != t.GetOwner() {
353+
return nil, syserr.NewAnnotatedError(syserr.ErrNotPermitted, fmt.Sprintf("table with handle %d has owner %d, which does not match the Netlink portID of the calling process %d", handle, t.GetOwner(), portID))
354+
}
355+
356+
return t, nil
357+
}
358+
298359
// AddTable makes a new table for the specified address family, returning an
299360
// error if the address family is invalid. Can return an error if a table by the
300361
// same name already exists if errorOnDuplicate is true. Can be used to get an
@@ -312,15 +373,17 @@ func (nf *NFTables) AddTable(family stack.AddressFamily, name string,
312373
// Initializes filter if first table for the address family.
313374
if nf.filters[family] == nil {
314375
nf.filters[family] = &addressFamilyFilter{
315-
family: family,
316-
nftState: nf,
317-
tables: make(map[string]*Table),
318-
hfStacks: make(map[stack.NFHook]*hookFunctionStack),
376+
family: family,
377+
nftState: nf,
378+
tables: make(map[string]*Table),
379+
tableHandles: make(map[uint64]*Table),
380+
hfStacks: make(map[stack.NFHook]*hookFunctionStack),
319381
}
320382
}
321383

322384
// Gets the corresponding table map for the address family.
323385
tableMap := nf.filters[family].tables
386+
tableHandleMap := nf.filters[family].tableHandles
324387

325388
// Checks if a table with the same name already exists. If so, returns the
326389
// existing table (unless errorOnDuplicate is true).
@@ -340,6 +403,7 @@ func (nf *NFTables) AddTable(family stack.AddressFamily, name string,
340403
handle: nf.getNewTableHandle(),
341404
}
342405
tableMap[name] = t
406+
tableHandleMap[t.handle] = t
343407

344408
return t, nil
345409
}
@@ -377,8 +441,9 @@ func (nf *NFTables) DeleteTable(family stack.AddressFamily, tableName string) (b
377441
t.DeleteChain(chainName)
378442
}
379443

380-
// Deletes the table from the table map.
444+
// Deletes the table from the table map and from the table handle map.
381445
delete(nf.filters[family].tables, tableName)
446+
delete(nf.filters[family].tableHandles, t.handle)
382447
return true, nil
383448
}
384449

pkg/tcpip/nftables/nftables_types.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,9 @@ type addressFamilyFilter struct {
161161
// tables is a map of tables for each address family.
162162
tables map[string]*Table
163163

164+
// tableHandles is a map of table handles (ids) to tables for a given address family.
165+
tableHandles map[uint64]*Table
166+
164167
// hfStacks is a map of hook function stacks (slice of base chains for a
165168
// given hook ordered by priority).
166169
hfStacks map[stack.NFHook]*hookFunctionStack
@@ -195,6 +198,12 @@ type Table struct {
195198
userData []byte
196199
}
197200

201+
// TableInfo represents data between an AFfilter and a Table.
202+
type TableInfo struct {
203+
Name string
204+
Handle uint64
205+
}
206+
198207
// hookFunctionStack represents the list of base chains for a specific hook.
199208
// The stack is ordered by priority and built as chains are added to tables.
200209
type hookFunctionStack struct {
@@ -759,3 +768,27 @@ func VerdictCodeToString(v uint32) string {
759768
}
760769
return fmt.Sprintf("invalid verdict: %d", v)
761770
}
771+
772+
// netlinkAFToStackAF maps address families from linux/socket.h to their corresponding
773+
// netfilter address families.
774+
// From linux/include/uapi/linux/netfilter.h
775+
var netlinkAFToStackAF = map[uint8]stack.AddressFamily{
776+
linux.AF_UNSPEC: stack.Unspec,
777+
linux.AF_UNIX: stack.Inet,
778+
linux.AF_INET: stack.IP,
779+
linux.AF_AX25: stack.Arp,
780+
linux.AF_APPLETALK: stack.Netdev,
781+
linux.AF_BRIDGE: stack.Bridge,
782+
linux.AF_INET6: stack.IP6,
783+
}
784+
785+
// AFtoNetlinkAF converts a generic address family to a netfilter address family.
786+
// On error, we simply cast it to be a stack.AddressFamily and return an error to allow netfilter
787+
// sockets to handle it accordingly if needed.
788+
func AFtoNetlinkAF(af uint8) (stack.AddressFamily, *syserr.Error) {
789+
naf, ok := netlinkAFToStackAF[af]
790+
if !ok {
791+
return stack.NumAFs, syserr.ErrNotSupported
792+
}
793+
return naf, nil
794+
}

0 commit comments

Comments
 (0)