Skip to content

Commit 6194338

Browse files
manninglucasgvisor-bot
authored andcommitted
Add support for setting PACKET_VERSION for PACKET_MMAP.
tcpdump requires TPACKET_V2 to work properly. PiperOrigin-RevId: 723670574
1 parent f9b03d3 commit 6194338

File tree

7 files changed

+184
-19
lines changed

7 files changed

+184
-19
lines changed

pkg/abi/linux/socket.go

Lines changed: 25 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,7 @@ const (
147147
// Packet socket options from <linux/if_packet.h>
148148
const (
149149
PACKET_RX_RING = 5
150+
PACKET_VERSION = 10
150151
)
151152

152153
// Statuses for a frame in a packet_mmap ring buffer from <linux/if_packet.h>.
@@ -189,6 +190,23 @@ type TpacketHdr struct {
189190
_ [4]byte
190191
}
191192

193+
// Tpacket2Hdr is the header for a frame in a packet_mmap ring buffer from
194+
// <linux/if_packet.h>.
195+
//
196+
// +marshal
197+
type Tpacket2Hdr struct {
198+
TpStatus uint32
199+
TpLen uint32
200+
TpSnaplen uint32
201+
TpMac uint16
202+
TpNet uint16
203+
TpSec uint32
204+
TpNSec uint32
205+
TpVlanTci uint16
206+
TpVlanTpid uint16
207+
_ [4]uint8
208+
}
209+
192210
// TpacketAlignment is the alignment of a frame in a packet_mmap ring buffer
193211
// from <linux/if_packet.h>.
194212
const (
@@ -198,12 +216,17 @@ const (
198216
// TPACKET_V1 is the version of a packet_mmap ring buffer from
199217
// <linux/if_packet.h> that is implemented in gVisor.
200218
const (
219+
// TPACKET_V1 is the default version of PACKET_MMAP.
201220
TPACKET_V1 = iota
221+
// TPACKET_V2 is the version of PACKET_MMAP for tpacket2_hdr.
222+
TPACKET_V2
202223
)
203224

204-
// TPACKET_HDRLEN is the length of a TpacketHdr from <linux/if_packet.h>.
205225
var (
206-
TPACKET_HDRLEN = TPacketAlign(uint32((*TpacketHdr)(nil).SizeBytes())) + uint32((*SockAddrLink)(nil).SizeBytes())
226+
// TPACKET_HDRLEN is the length of a TpacketHdr from <linux/if_packet.h>.
227+
TPACKET_HDRLEN = TPacketAlign(uint32((*TpacketHdr)(nil).SizeBytes()) + uint32((*SockAddrLink)(nil).SizeBytes()))
228+
// TPACKET2_HDRLEN is the length of a Tpacket2Hdr from <linux/if_packet.h>.
229+
TPACKET2_HDRLEN = TPacketAlign(uint32((*Tpacket2Hdr)(nil).SizeBytes()) + uint32((*SockAddrLink)(nil).SizeBytes()))
207230
)
208231

209232
// TPacketAlign aligns a value to the alignment of a TPacket.

pkg/sentry/socket/netstack/netstack.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2749,8 +2749,13 @@ func setSockOptPacket(t *kernel.Task, s socket.Socket, ep commonEndpoint, name i
27492749
} else {
27502750
return syserr.ErrNotSupported
27512751
}
2752+
return nil
2753+
case linux.PACKET_VERSION:
2754+
v := hostarch.ByteOrder.Uint32(optVal)
2755+
return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.PacketMMapVersionOption, int(v)))
2756+
default:
2757+
return syserr.ErrNotSupported
27522758
}
2753-
return nil
27542759
}
27552760

27562761
// GetSockName implements the linux syscall getsockname(2) for sockets backed by

pkg/sentry/socket/netstack/packetmmap/endpoint.go

Lines changed: 43 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -66,6 +66,7 @@ type Endpoint struct {
6666
mode ringBufferMode
6767
nicID tcpip.NICID
6868
netProto tcpip.NetworkProtocolNumber
69+
version int
6970
headerLen uint32
7071

7172
stack *stack.Stack
@@ -88,7 +89,15 @@ func (m *Endpoint) Init(ctx context.Context, opts stack.PacketMMapOpts) error {
8889
m.stats = opts.Stats
8990
m.nicID = opts.NICID
9091
m.netProto = opts.NetProto
91-
m.headerLen = linux.TPACKET_HDRLEN
92+
m.version = opts.Version
93+
switch m.version {
94+
case linux.TPACKET_V1:
95+
m.headerLen = linux.TPACKET_HDRLEN
96+
case linux.TPACKET_V2:
97+
m.headerLen = linux.TPACKET2_HDRLEN
98+
default:
99+
panic(fmt.Sprintf("invalid version %d supplied to InitPacketMMap", m.version))
100+
}
92101
if opts.Req.TpBlockNr != 0 {
93102
if opts.Req.TpBlockSize <= 0 {
94103
return linuxerr.EINVAL
@@ -378,22 +387,43 @@ func (m *Endpoint) marshalSockAddr(pkt *stack.PacketBuffer, view *buffer.View) {
378387
hdr := header.Ethernet(pkt.LinkHeader().Slice())
379388
copy(sll.HardwareAddr[:], hdr.SourceAddress())
380389
}
381-
hdrSize := uint32((*linux.TpacketHdr)(nil).SizeBytes())
390+
var hdrSize uint32
391+
if m.version == linux.TPACKET_V2 {
392+
hdrSize = uint32((*linux.Tpacket2Hdr)(nil).SizeBytes())
393+
} else {
394+
hdrSize = uint32((*linux.TpacketHdr)(nil).SizeBytes())
395+
}
382396
sll.MarshalBytes(view.AsSlice()[linux.TPacketAlign(hdrSize):])
383397
}
384398

385399
func (m *Endpoint) marshalFrameHeader(pktBuf buffer.Buffer, macOffset, netOffset, dataLength uint32, view *buffer.View) {
386400
t := m.stack.Clock().Now()
387-
hdr := linux.TpacketHdr{
388-
// The status is set separately to ensure the frame is written before the
389-
// status is set.
390-
TpStatus: linux.TP_STATUS_KERNEL,
391-
TpLen: uint32(pktBuf.Size()),
392-
TpSnaplen: dataLength,
393-
TpMac: uint16(macOffset),
394-
TpNet: uint16(netOffset),
395-
TpSec: uint32(t.Unix()),
396-
TpUsec: uint32(t.UnixMicro() % 1e6),
401+
switch m.version {
402+
case linux.TPACKET_V1:
403+
hdr := linux.TpacketHdr{
404+
// The status is set separately to ensure the frame is written before the
405+
// status is set.
406+
TpStatus: linux.TP_STATUS_KERNEL,
407+
TpLen: uint32(pktBuf.Size()),
408+
TpSnaplen: dataLength,
409+
TpMac: uint16(macOffset),
410+
TpNet: uint16(netOffset),
411+
TpSec: uint32(t.Unix()),
412+
TpUsec: uint32(t.UnixMicro() % 1e6),
413+
}
414+
hdr.MarshalBytes(view.AsSlice())
415+
case linux.TPACKET_V2:
416+
hdr := linux.Tpacket2Hdr{
417+
TpStatus: linux.TP_STATUS_KERNEL,
418+
TpLen: uint32(pktBuf.Size()),
419+
TpSnaplen: dataLength,
420+
TpMac: uint16(macOffset),
421+
TpNet: uint16(netOffset),
422+
TpSec: uint32(t.Unix()),
423+
TpNSec: uint32(t.UnixNano() % 1e9),
424+
}
425+
hdr.MarshalBytes(view.AsSlice())
426+
default:
427+
panic(fmt.Sprintf("invalid version %d supplied to HandlePacket", m.version))
397428
}
398-
hdr.MarshalBytes(view.AsSlice())
399429
}

pkg/tcpip/stack/registration.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,7 @@ type PacketMMapOpts struct {
208208
NICID tcpip.NICID
209209
NetProto tcpip.NetworkProtocolNumber
210210
PacketEndpoint MappablePacketEndpoint
211+
Version int
211212
}
212213

213214
// PacketMMapEndpoint is the interface implemented by endpoints to handle memory

pkg/tcpip/tcpip.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -996,6 +996,9 @@ const (
996996
// IPv6Checksum is used to request the stack to populate and validate the IPv6
997997
// checksum for transport level headers.
998998
IPv6Checksum
999+
1000+
// PacketMMapVersionOption is used to set the packet mmap version.
1001+
PacketMMapVersionOption
9991002
)
10001003

10011004
const (

pkg/tcpip/transport/packet/endpoint.go

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,13 @@ import (
3636
"gvisor.dev/gvisor/pkg/waiter"
3737
)
3838

39+
type tpacketVersion int
40+
41+
const (
42+
tpacketVersion1 tpacketVersion = iota
43+
tpacketVersion2
44+
)
45+
3946
var _ stack.MappablePacketEndpoint = (*endpoint)(nil)
4047

4148
// +stateify savable
@@ -96,6 +103,7 @@ type endpoint struct {
96103

97104
packetMmapRxConfig *tcpip.TpacketReq
98105
packetMmapTxConfig *tcpip.TpacketReq
106+
packetMMapVersion tpacketVersion
99107
packetMMapEp stack.PacketMMapEndpoint
100108
}
101109

@@ -391,8 +399,24 @@ func (ep *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error {
391399
}
392400

393401
// SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
394-
func (*endpoint) SetSockOptInt(tcpip.SockOptInt, int) tcpip.Error {
395-
return &tcpip.ErrUnknownProtocolOption{}
402+
func (ep *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error {
403+
switch opt {
404+
case tcpip.PacketMMapVersionOption:
405+
// We support up to TPACKET_V2.
406+
version := tpacketVersion(v)
407+
switch version {
408+
case tpacketVersion1, tpacketVersion2:
409+
if ep.packetMMapEp != nil {
410+
return &tcpip.ErrEndpointBusy{}
411+
}
412+
ep.packetMMapVersion = version
413+
return nil
414+
default:
415+
return &tcpip.ErrInvalidOptionValue{}
416+
}
417+
default:
418+
return &tcpip.ErrUnknownProtocolOption{}
419+
}
396420
}
397421

398422
func (ep *endpoint) LastError() tcpip.Error {
@@ -544,6 +568,7 @@ func (ep *endpoint) GetPacketMMapOpts(req *tcpip.TpacketReq, isRx bool) stack.Pa
544568
NICID: ep.boundNIC,
545569
NetProto: ep.boundNetProto,
546570
PacketEndpoint: ep,
571+
Version: int(ep.packetMMapVersion),
547572
}
548573
}
549574

test/syscalls/linux/packet_mmap.cc

Lines changed: 79 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,10 @@ namespace testing {
4646
namespace {
4747

4848
PosixErrorOr<void*> MakePacketMmapRing(int fd, const sockaddr* bind_addr,
49-
int bind_addr_size, tpacket_req* req) {
49+
int bind_addr_size, tpacket_req* req,
50+
int version = TPACKET_V1) {
51+
RETURN_ERROR_IF_SYSCALL_FAIL(
52+
setsockopt(fd, SOL_PACKET, PACKET_VERSION, &version, sizeof(version)));
5053
RETURN_ERROR_IF_SYSCALL_FAIL(
5154
setsockopt(fd, SOL_PACKET, PACKET_RX_RING, req, sizeof(*req)));
5255
RETURN_ERROR_IF_SYSCALL_FAIL(bind(fd, bind_addr, bind_addr_size));
@@ -463,6 +466,81 @@ TEST(PacketMmapTest, MmapCopy) {
463466
EXPECT_EQ(hdr->tp_snaplen, tp_frame_size - hdr->tp_mac);
464467
}
465468

469+
TEST(PacketMmapTest, SetVersion) {
470+
if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
471+
ASSERT_THAT(socket(AF_PACKET, SOCK_RAW, 0), SyscallFailsWithErrno(EPERM));
472+
GTEST_SKIP() << "Missing packet socket capability";
473+
}
474+
FileDescriptor mmap_sock =
475+
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_PACKET, SOCK_DGRAM, 0));
476+
477+
int version = TPACKET_V2;
478+
EXPECT_THAT(setsockopt(mmap_sock.get(), SOL_PACKET, PACKET_VERSION, &version,
479+
sizeof(version)),
480+
SyscallSucceeds());
481+
version = TPACKET_V1;
482+
EXPECT_THAT(setsockopt(mmap_sock.get(), SOL_PACKET, PACKET_VERSION, &version,
483+
sizeof(version)),
484+
SyscallSucceeds());
485+
version = TPACKET_V3;
486+
EXPECT_THAT(setsockopt(mmap_sock.get(), SOL_PACKET, PACKET_VERSION, &version,
487+
sizeof(version)),
488+
SyscallFailsWithErrno(EINVAL));
489+
version = TPACKET_V1 + 100;
490+
EXPECT_THAT(setsockopt(mmap_sock.get(), SOL_PACKET, PACKET_VERSION, &version,
491+
sizeof(version)),
492+
SyscallFailsWithErrno(EINVAL));
493+
}
494+
495+
TEST(PacketMmapTest, BasicV2) {
496+
if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
497+
ASSERT_THAT(socket(AF_PACKET, SOCK_RAW, 0), SyscallFailsWithErrno(EPERM));
498+
GTEST_SKIP() << "Missing packet socket capability";
499+
}
500+
sockaddr_ll bind_addr = {
501+
.sll_family = AF_PACKET,
502+
.sll_protocol = htons(ETH_P_IP),
503+
.sll_ifindex = ASSERT_NO_ERRNO_AND_VALUE(GetLoopbackIndex()),
504+
.sll_halen = ETH_ALEN,
505+
};
506+
FileDescriptor mmap_sock =
507+
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_PACKET, SOCK_DGRAM, 0));
508+
509+
uint32_t tp_frame_size = 65536 + 128;
510+
uint32_t tp_block_size = tp_frame_size * 32;
511+
uint32_t tp_block_nr = 2;
512+
uint32_t tp_frame_nr = (tp_block_size * tp_block_nr) / tp_frame_size;
513+
tpacket_req req = {
514+
.tp_block_size = tp_block_size,
515+
.tp_block_nr = tp_block_nr,
516+
.tp_frame_size = tp_frame_size,
517+
.tp_frame_nr = tp_frame_nr,
518+
};
519+
void* ring = ASSERT_NO_ERRNO_AND_VALUE(MakePacketMmapRing(
520+
mmap_sock.get(), reinterpret_cast<const sockaddr*>(&bind_addr),
521+
sizeof(bind_addr), &req, TPACKET_V2));
522+
auto ring_cleanup = Cleanup([ring, tp_block_size, tp_block_nr] {
523+
ASSERT_THAT(munmap(ring, tp_block_size * tp_block_nr), SyscallSucceeds());
524+
});
525+
526+
std::string kMessage = "123abc";
527+
ASSERT_THAT(
528+
sendto(mmap_sock.get(), kMessage.c_str(), kMessage.size(), 0 /* flags */,
529+
reinterpret_cast<const sockaddr*>(&bind_addr), sizeof(bind_addr)),
530+
SyscallSucceeds());
531+
532+
tpacket2_hdr* hdr = reinterpret_cast<tpacket2_hdr*>(ring);
533+
struct pollfd pollset;
534+
pollset.fd = mmap_sock.get();
535+
pollset.revents = 0;
536+
pollset.events = POLLIN | POLLRDNORM | POLLERR;
537+
ASSERT_THAT(poll(&pollset, 1, -1), SyscallSucceeds());
538+
EXPECT_EQ(hdr->tp_status & TP_STATUS_USER, 1);
539+
EXPECT_EQ(hdr->tp_len, kMessage.size());
540+
EXPECT_EQ(hdr->tp_snaplen, kMessage.size());
541+
EXPECT_STREQ((char*)(hdr) + hdr->tp_net, kMessage.c_str());
542+
}
543+
466544
} // namespace
467545
} // namespace testing
468546
} // namespace gvisor

0 commit comments

Comments
 (0)