Skip to content

Commit

Permalink
Add support for setting PACKET_VERSION for PACKET_MMAP.
Browse files Browse the repository at this point in the history
tcpdump requires TPACKET_V2 to work properly.

PiperOrigin-RevId: 723670574
  • Loading branch information
manninglucas authored and gvisor-bot committed Feb 5, 2025
1 parent f9b03d3 commit 6194338
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 19 deletions.
27 changes: 25 additions & 2 deletions pkg/abi/linux/socket.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ const (
// Packet socket options from <linux/if_packet.h>
const (
PACKET_RX_RING = 5
PACKET_VERSION = 10
)

// Statuses for a frame in a packet_mmap ring buffer from <linux/if_packet.h>.
Expand Down Expand Up @@ -189,6 +190,23 @@ type TpacketHdr struct {
_ [4]byte
}

// Tpacket2Hdr is the header for a frame in a packet_mmap ring buffer from
// <linux/if_packet.h>.
//
// +marshal
type Tpacket2Hdr struct {
TpStatus uint32
TpLen uint32
TpSnaplen uint32
TpMac uint16
TpNet uint16
TpSec uint32
TpNSec uint32
TpVlanTci uint16
TpVlanTpid uint16
_ [4]uint8
}

// TpacketAlignment is the alignment of a frame in a packet_mmap ring buffer
// from <linux/if_packet.h>.
const (
Expand All @@ -198,12 +216,17 @@ const (
// TPACKET_V1 is the version of a packet_mmap ring buffer from
// <linux/if_packet.h> that is implemented in gVisor.
const (
// TPACKET_V1 is the default version of PACKET_MMAP.
TPACKET_V1 = iota
// TPACKET_V2 is the version of PACKET_MMAP for tpacket2_hdr.
TPACKET_V2
)

// TPACKET_HDRLEN is the length of a TpacketHdr from <linux/if_packet.h>.
var (
TPACKET_HDRLEN = TPacketAlign(uint32((*TpacketHdr)(nil).SizeBytes())) + uint32((*SockAddrLink)(nil).SizeBytes())
// TPACKET_HDRLEN is the length of a TpacketHdr from <linux/if_packet.h>.
TPACKET_HDRLEN = TPacketAlign(uint32((*TpacketHdr)(nil).SizeBytes()) + uint32((*SockAddrLink)(nil).SizeBytes()))
// TPACKET2_HDRLEN is the length of a Tpacket2Hdr from <linux/if_packet.h>.
TPACKET2_HDRLEN = TPacketAlign(uint32((*Tpacket2Hdr)(nil).SizeBytes()) + uint32((*SockAddrLink)(nil).SizeBytes()))
)

// TPacketAlign aligns a value to the alignment of a TPacket.
Expand Down
7 changes: 6 additions & 1 deletion pkg/sentry/socket/netstack/netstack.go
Original file line number Diff line number Diff line change
Expand Up @@ -2749,8 +2749,13 @@ func setSockOptPacket(t *kernel.Task, s socket.Socket, ep commonEndpoint, name i
} else {
return syserr.ErrNotSupported
}
return nil
case linux.PACKET_VERSION:
v := hostarch.ByteOrder.Uint32(optVal)
return syserr.TranslateNetstackError(ep.SetSockOptInt(tcpip.PacketMMapVersionOption, int(v)))
default:
return syserr.ErrNotSupported
}
return nil
}

// GetSockName implements the linux syscall getsockname(2) for sockets backed by
Expand Down
56 changes: 43 additions & 13 deletions pkg/sentry/socket/netstack/packetmmap/endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ type Endpoint struct {
mode ringBufferMode
nicID tcpip.NICID
netProto tcpip.NetworkProtocolNumber
version int
headerLen uint32

stack *stack.Stack
Expand All @@ -88,7 +89,15 @@ func (m *Endpoint) Init(ctx context.Context, opts stack.PacketMMapOpts) error {
m.stats = opts.Stats
m.nicID = opts.NICID
m.netProto = opts.NetProto
m.headerLen = linux.TPACKET_HDRLEN
m.version = opts.Version
switch m.version {
case linux.TPACKET_V1:
m.headerLen = linux.TPACKET_HDRLEN
case linux.TPACKET_V2:
m.headerLen = linux.TPACKET2_HDRLEN
default:
panic(fmt.Sprintf("invalid version %d supplied to InitPacketMMap", m.version))
}
if opts.Req.TpBlockNr != 0 {
if opts.Req.TpBlockSize <= 0 {
return linuxerr.EINVAL
Expand Down Expand Up @@ -378,22 +387,43 @@ func (m *Endpoint) marshalSockAddr(pkt *stack.PacketBuffer, view *buffer.View) {
hdr := header.Ethernet(pkt.LinkHeader().Slice())
copy(sll.HardwareAddr[:], hdr.SourceAddress())
}
hdrSize := uint32((*linux.TpacketHdr)(nil).SizeBytes())
var hdrSize uint32
if m.version == linux.TPACKET_V2 {
hdrSize = uint32((*linux.Tpacket2Hdr)(nil).SizeBytes())
} else {
hdrSize = uint32((*linux.TpacketHdr)(nil).SizeBytes())
}
sll.MarshalBytes(view.AsSlice()[linux.TPacketAlign(hdrSize):])
}

func (m *Endpoint) marshalFrameHeader(pktBuf buffer.Buffer, macOffset, netOffset, dataLength uint32, view *buffer.View) {
t := m.stack.Clock().Now()
hdr := linux.TpacketHdr{
// The status is set separately to ensure the frame is written before the
// status is set.
TpStatus: linux.TP_STATUS_KERNEL,
TpLen: uint32(pktBuf.Size()),
TpSnaplen: dataLength,
TpMac: uint16(macOffset),
TpNet: uint16(netOffset),
TpSec: uint32(t.Unix()),
TpUsec: uint32(t.UnixMicro() % 1e6),
switch m.version {
case linux.TPACKET_V1:
hdr := linux.TpacketHdr{
// The status is set separately to ensure the frame is written before the
// status is set.
TpStatus: linux.TP_STATUS_KERNEL,
TpLen: uint32(pktBuf.Size()),
TpSnaplen: dataLength,
TpMac: uint16(macOffset),
TpNet: uint16(netOffset),
TpSec: uint32(t.Unix()),
TpUsec: uint32(t.UnixMicro() % 1e6),
}
hdr.MarshalBytes(view.AsSlice())
case linux.TPACKET_V2:
hdr := linux.Tpacket2Hdr{
TpStatus: linux.TP_STATUS_KERNEL,
TpLen: uint32(pktBuf.Size()),
TpSnaplen: dataLength,
TpMac: uint16(macOffset),
TpNet: uint16(netOffset),
TpSec: uint32(t.Unix()),
TpNSec: uint32(t.UnixNano() % 1e9),
}
hdr.MarshalBytes(view.AsSlice())
default:
panic(fmt.Sprintf("invalid version %d supplied to HandlePacket", m.version))
}
hdr.MarshalBytes(view.AsSlice())
}
1 change: 1 addition & 0 deletions pkg/tcpip/stack/registration.go
Original file line number Diff line number Diff line change
Expand Up @@ -208,6 +208,7 @@ type PacketMMapOpts struct {
NICID tcpip.NICID
NetProto tcpip.NetworkProtocolNumber
PacketEndpoint MappablePacketEndpoint
Version int
}

// PacketMMapEndpoint is the interface implemented by endpoints to handle memory
Expand Down
3 changes: 3 additions & 0 deletions pkg/tcpip/tcpip.go
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,9 @@ const (
// IPv6Checksum is used to request the stack to populate and validate the IPv6
// checksum for transport level headers.
IPv6Checksum

// PacketMMapVersionOption is used to set the packet mmap version.
PacketMMapVersionOption
)

const (
Expand Down
29 changes: 27 additions & 2 deletions pkg/tcpip/transport/packet/endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,13 @@ import (
"gvisor.dev/gvisor/pkg/waiter"
)

type tpacketVersion int

const (
tpacketVersion1 tpacketVersion = iota
tpacketVersion2
)

var _ stack.MappablePacketEndpoint = (*endpoint)(nil)

// +stateify savable
Expand Down Expand Up @@ -96,6 +103,7 @@ type endpoint struct {

packetMmapRxConfig *tcpip.TpacketReq
packetMmapTxConfig *tcpip.TpacketReq
packetMMapVersion tpacketVersion
packetMMapEp stack.PacketMMapEndpoint
}

Expand Down Expand Up @@ -391,8 +399,24 @@ func (ep *endpoint) SetSockOpt(opt tcpip.SettableSocketOption) tcpip.Error {
}

// SetSockOptInt implements tcpip.Endpoint.SetSockOptInt.
func (*endpoint) SetSockOptInt(tcpip.SockOptInt, int) tcpip.Error {
return &tcpip.ErrUnknownProtocolOption{}
func (ep *endpoint) SetSockOptInt(opt tcpip.SockOptInt, v int) tcpip.Error {
switch opt {
case tcpip.PacketMMapVersionOption:
// We support up to TPACKET_V2.
version := tpacketVersion(v)
switch version {
case tpacketVersion1, tpacketVersion2:
if ep.packetMMapEp != nil {
return &tcpip.ErrEndpointBusy{}
}
ep.packetMMapVersion = version
return nil
default:
return &tcpip.ErrInvalidOptionValue{}
}
default:
return &tcpip.ErrUnknownProtocolOption{}
}
}

func (ep *endpoint) LastError() tcpip.Error {
Expand Down Expand Up @@ -544,6 +568,7 @@ func (ep *endpoint) GetPacketMMapOpts(req *tcpip.TpacketReq, isRx bool) stack.Pa
NICID: ep.boundNIC,
NetProto: ep.boundNetProto,
PacketEndpoint: ep,
Version: int(ep.packetMMapVersion),
}
}

Expand Down
80 changes: 79 additions & 1 deletion test/syscalls/linux/packet_mmap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,10 @@ namespace testing {
namespace {

PosixErrorOr<void*> MakePacketMmapRing(int fd, const sockaddr* bind_addr,
int bind_addr_size, tpacket_req* req) {
int bind_addr_size, tpacket_req* req,
int version = TPACKET_V1) {
RETURN_ERROR_IF_SYSCALL_FAIL(
setsockopt(fd, SOL_PACKET, PACKET_VERSION, &version, sizeof(version)));
RETURN_ERROR_IF_SYSCALL_FAIL(
setsockopt(fd, SOL_PACKET, PACKET_RX_RING, req, sizeof(*req)));
RETURN_ERROR_IF_SYSCALL_FAIL(bind(fd, bind_addr, bind_addr_size));
Expand Down Expand Up @@ -463,6 +466,81 @@ TEST(PacketMmapTest, MmapCopy) {
EXPECT_EQ(hdr->tp_snaplen, tp_frame_size - hdr->tp_mac);
}

TEST(PacketMmapTest, SetVersion) {
if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
ASSERT_THAT(socket(AF_PACKET, SOCK_RAW, 0), SyscallFailsWithErrno(EPERM));
GTEST_SKIP() << "Missing packet socket capability";
}
FileDescriptor mmap_sock =
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_PACKET, SOCK_DGRAM, 0));

int version = TPACKET_V2;
EXPECT_THAT(setsockopt(mmap_sock.get(), SOL_PACKET, PACKET_VERSION, &version,
sizeof(version)),
SyscallSucceeds());
version = TPACKET_V1;
EXPECT_THAT(setsockopt(mmap_sock.get(), SOL_PACKET, PACKET_VERSION, &version,
sizeof(version)),
SyscallSucceeds());
version = TPACKET_V3;
EXPECT_THAT(setsockopt(mmap_sock.get(), SOL_PACKET, PACKET_VERSION, &version,
sizeof(version)),
SyscallFailsWithErrno(EINVAL));
version = TPACKET_V1 + 100;
EXPECT_THAT(setsockopt(mmap_sock.get(), SOL_PACKET, PACKET_VERSION, &version,
sizeof(version)),
SyscallFailsWithErrno(EINVAL));
}

TEST(PacketMmapTest, BasicV2) {
if (!ASSERT_NO_ERRNO_AND_VALUE(HaveCapability(CAP_NET_RAW))) {
ASSERT_THAT(socket(AF_PACKET, SOCK_RAW, 0), SyscallFailsWithErrno(EPERM));
GTEST_SKIP() << "Missing packet socket capability";
}
sockaddr_ll bind_addr = {
.sll_family = AF_PACKET,
.sll_protocol = htons(ETH_P_IP),
.sll_ifindex = ASSERT_NO_ERRNO_AND_VALUE(GetLoopbackIndex()),
.sll_halen = ETH_ALEN,
};
FileDescriptor mmap_sock =
ASSERT_NO_ERRNO_AND_VALUE(Socket(AF_PACKET, SOCK_DGRAM, 0));

uint32_t tp_frame_size = 65536 + 128;
uint32_t tp_block_size = tp_frame_size * 32;
uint32_t tp_block_nr = 2;
uint32_t tp_frame_nr = (tp_block_size * tp_block_nr) / tp_frame_size;
tpacket_req req = {
.tp_block_size = tp_block_size,
.tp_block_nr = tp_block_nr,
.tp_frame_size = tp_frame_size,
.tp_frame_nr = tp_frame_nr,
};
void* ring = ASSERT_NO_ERRNO_AND_VALUE(MakePacketMmapRing(
mmap_sock.get(), reinterpret_cast<const sockaddr*>(&bind_addr),
sizeof(bind_addr), &req, TPACKET_V2));
auto ring_cleanup = Cleanup([ring, tp_block_size, tp_block_nr] {
ASSERT_THAT(munmap(ring, tp_block_size * tp_block_nr), SyscallSucceeds());
});

std::string kMessage = "123abc";
ASSERT_THAT(
sendto(mmap_sock.get(), kMessage.c_str(), kMessage.size(), 0 /* flags */,
reinterpret_cast<const sockaddr*>(&bind_addr), sizeof(bind_addr)),
SyscallSucceeds());

tpacket2_hdr* hdr = reinterpret_cast<tpacket2_hdr*>(ring);
struct pollfd pollset;
pollset.fd = mmap_sock.get();
pollset.revents = 0;
pollset.events = POLLIN | POLLRDNORM | POLLERR;
ASSERT_THAT(poll(&pollset, 1, -1), SyscallSucceeds());
EXPECT_EQ(hdr->tp_status & TP_STATUS_USER, 1);
EXPECT_EQ(hdr->tp_len, kMessage.size());
EXPECT_EQ(hdr->tp_snaplen, kMessage.size());
EXPECT_STREQ((char*)(hdr) + hdr->tp_net, kMessage.c_str());
}

} // namespace
} // namespace testing
} // namespace gvisor
Expand Down

0 comments on commit 6194338

Please sign in to comment.