Skip to content

Commit

Permalink
Add support for custom socket options and setting the experiment IP o…
Browse files Browse the repository at this point in the history
…ption.

PiperOrigin-RevId: 700011458
  • Loading branch information
manninglucas authored and gvisor-bot committed Nov 25, 2024
1 parent e816d99 commit 2267c24
Show file tree
Hide file tree
Showing 13 changed files with 193 additions and 57 deletions.
1 change: 1 addition & 0 deletions pkg/sentry/socket/netstack/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ go_library(
"netstack_state.go",
"provider.go",
"save_restore.go",
"socketopt_custom.go",
"stack.go",
"tun.go",
],
Expand Down
8 changes: 8 additions & 0 deletions pkg/sentry/socket/netstack/netstack.go
Original file line number Diff line number Diff line change
Expand Up @@ -1107,6 +1107,10 @@ func getSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, family

v := primitive.Int32(ep.SocketOptions().GetRcvlowat())
return &v, nil
default:
if v, err, handled := getSockOptSocketCustom(t, s, ep, name, outLen); handled {
return v, err
}
}
return nil, syserr.ErrProtocolNotAvailable
}
Expand Down Expand Up @@ -2028,6 +2032,10 @@ func setSockOptSocket(t *kernel.Task, s socket.Socket, ep commonEndpoint, name i
v := hostarch.ByteOrder.Uint32(optVal)
ep.SocketOptions().SetRcvlowat(int32(v))
return nil
default:
if err, handled := setSockOptSocketCustom(t, s, ep, name, optVal); handled {
return err
}
}

return nil
Expand Down
39 changes: 39 additions & 0 deletions pkg/sentry/socket/netstack/socketopt_custom.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
// Copyright 2024 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

//go:build !false
// +build !false

package netstack

import (
"gvisor.dev/gvisor/pkg/marshal"
"gvisor.dev/gvisor/pkg/sentry/kernel"
"gvisor.dev/gvisor/pkg/sentry/socket"
"gvisor.dev/gvisor/pkg/syserr"
)

// setSockOptSocketCustom handles SetSockOpt options not handled by setSockOptSocket.
// It returns a bool indicating whether the option was handled in addition to
// return values from setSockOptSocket.
func setSockOptSocketCustom(t *kernel.Task, s socket.Socket, ep commonEndpoint, name int, optVal []byte) (*syserr.Error, bool) {
return nil, false
}

// getSockOptSocketCustom handles GetSockOpt options not handled by getSockOptSocket.
// It returns a bool indicating whether the option was handled in addition to
// return values from getSockOptSocket.
func getSockOptSocketCustom(t *kernel.Task, s socket.Socket, ep commonEndpoint, name int, outLen int) (marshal.Marshallable, *syserr.Error, bool) {
return nil, nil, false
}
15 changes: 15 additions & 0 deletions pkg/tcpip/socketops.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,10 @@ type SocketOptions struct {
// rcvlowat specifies the minimum number of bytes which should be
// received to indicate the socket as readable.
rcvlowat atomicbitops.Int32

// experimentOptionValue is the value set for the IP option experiment header
// if it is not zero.
experimentOptionValue atomicbitops.Uint32
}

// InitHandler initializes the handler. This must be called before using the
Expand Down Expand Up @@ -539,6 +543,17 @@ func (so *SocketOptions) SetLinger(linger LingerOption) {
so.mu.Unlock()
}

// GetExperimentOptionValue gets value for the experiment IP option header.
func (so *SocketOptions) GetExperimentOptionValue() uint16 {
v := so.experimentOptionValue.Load()
return uint16(v)
}

// SetExperimentOptionValue sets the value for the experiment IP option header.
func (so *SocketOptions) SetExperimentOptionValue(v uint16) {
so.experimentOptionValue.Store(uint32(v))
}

// SockErrOrigin represents the constants for error origin.
type SockErrOrigin uint8

Expand Down
4 changes: 4 additions & 0 deletions pkg/tcpip/stack/registration.go
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,10 @@ type NetworkHeaderParams struct {

// DF indicates whether the DF bit should be set.
DF bool

// ExperimentOptionValue is a 16 bit value that is set for the IP experiment
// option headers if it is not zero.
ExperimentOptionValue uint16
}

// GroupAddressableEndpoint is an endpoint that supports group addressing.
Expand Down
7 changes: 4 additions & 3 deletions pkg/tcpip/transport/internal/network/endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,9 +345,10 @@ func (c *WriteContext) WritePacket(pkt *stack.PacketBuffer, headerIncluded bool)
}

err := c.route.WritePacket(stack.NetworkHeaderParams{
Protocol: c.e.transProto,
TTL: c.ttl,
TOS: c.tos,
Protocol: c.e.transProto,
TTL: c.ttl,
TOS: c.tos,
ExperimentOptionValue: c.e.ops.GetExperimentOptionValue(),
}, pkt)

if _, ok := err.(*tcpip.ErrNoBufferSpace); ok {
Expand Down
15 changes: 8 additions & 7 deletions pkg/tcpip/transport/tcp/accept.go
Original file line number Diff line number Diff line change
Expand Up @@ -526,13 +526,14 @@ func (e *Endpoint) handleListenSegment(ctx *listenContext, s *segment) tcpip.Err
}
cookie := ctx.createCookie(s.id, s.sequenceNumber, encodeMSS(opts.MSS))
fields := tcpFields{
id: s.id,
ttl: calculateTTL(route, e.ipv4TTL, e.ipv6HopLimit),
tos: e.sendTOS,
flags: header.TCPFlagSyn | header.TCPFlagAck,
seq: cookie,
ack: s.sequenceNumber + 1,
rcvWnd: ctx.rcvWnd,
id: s.id,
ttl: calculateTTL(route, e.ipv4TTL, e.ipv6HopLimit),
tos: e.sendTOS,
flags: header.TCPFlagSyn | header.TCPFlagAck,
seq: cookie,
ack: s.sequenceNumber + 1,
rcvWnd: ctx.rcvWnd,
expOptVal: e.SocketOptions().GetExperimentOptionValue(),
}
if err := e.sendSynTCP(route, fields, synOpts); err != nil {
return err
Expand Down
97 changes: 57 additions & 40 deletions pkg/tcpip/transport/tcp/connect.go
Original file line number Diff line number Diff line change
Expand Up @@ -364,13 +364,14 @@ func (h *handshake) synSentState(s *segment) tcpip.Error {
ttl = h.ep.route.DefaultTTL()
}
h.ep.sendSynTCP(h.ep.route, tcpFields{
id: h.ep.TransportEndpointInfo.ID,
ttl: ttl,
tos: h.ep.sendTOS,
flags: h.flags,
seq: h.iss,
ack: h.ackNum,
rcvWnd: h.rcvWnd,
id: h.ep.TransportEndpointInfo.ID,
ttl: ttl,
tos: h.ep.sendTOS,
flags: h.flags,
seq: h.iss,
ack: h.ackNum,
rcvWnd: h.rcvWnd,
expOptVal: h.ep.SocketOptions().GetExperimentOptionValue(),
}, synOpts)
return nil
}
Expand Down Expand Up @@ -450,13 +451,14 @@ func (h *handshake) synRcvdState(s *segment) tcpip.Error {
MSS: h.ep.amss,
}
h.ep.sendSynTCP(h.ep.route, tcpFields{
id: h.ep.TransportEndpointInfo.ID,
ttl: calculateTTL(h.ep.route, h.ep.ipv4TTL, h.ep.ipv6HopLimit),
tos: h.ep.sendTOS,
flags: h.flags,
seq: h.iss,
ack: h.ackNum,
rcvWnd: h.rcvWnd,
id: h.ep.TransportEndpointInfo.ID,
ttl: calculateTTL(h.ep.route, h.ep.ipv4TTL, h.ep.ipv6HopLimit),
tos: h.ep.sendTOS,
flags: h.flags,
seq: h.iss,
ack: h.ackNum,
rcvWnd: h.rcvWnd,
expOptVal: h.ep.SocketOptions().GetExperimentOptionValue(),
}, synOpts)
return nil
}
Expand Down Expand Up @@ -587,13 +589,14 @@ func (h *handshake) start() {

h.sendSYNOpts = synOpts
h.ep.sendSynTCP(h.ep.route, tcpFields{
id: h.ep.TransportEndpointInfo.ID,
ttl: calculateTTL(h.ep.route, h.ep.ipv4TTL, h.ep.ipv6HopLimit),
tos: h.ep.sendTOS,
flags: h.flags,
seq: h.iss,
ack: h.ackNum,
rcvWnd: h.rcvWnd,
id: h.ep.TransportEndpointInfo.ID,
ttl: calculateTTL(h.ep.route, h.ep.ipv4TTL, h.ep.ipv6HopLimit),
tos: h.ep.sendTOS,
flags: h.flags,
seq: h.iss,
ack: h.ackNum,
rcvWnd: h.rcvWnd,
expOptVal: h.ep.SocketOptions().GetExperimentOptionValue(),
}, synOpts)
}

Expand Down Expand Up @@ -623,13 +626,14 @@ func (h *handshake) retransmitHandlerLocked() tcpip.Error {
// retransmitted on their own).
if h.active || !h.acked || h.deferAccept != 0 && e.stack.Clock().NowMonotonic().Sub(h.startTime) > h.deferAccept {
e.sendSynTCP(e.route, tcpFields{
id: e.TransportEndpointInfo.ID,
ttl: calculateTTL(e.route, e.ipv4TTL, e.ipv6HopLimit),
tos: e.sendTOS,
flags: h.flags,
seq: h.iss,
ack: h.ackNum,
rcvWnd: h.rcvWnd,
id: e.TransportEndpointInfo.ID,
ttl: calculateTTL(e.route, e.ipv4TTL, e.ipv6HopLimit),
tos: e.sendTOS,
flags: h.flags,
seq: h.iss,
ack: h.ackNum,
rcvWnd: h.rcvWnd,
expOptVal: e.SocketOptions().GetExperimentOptionValue(),
}, h.sendSYNOpts)
// If we have ever retransmitted the SYN-ACK or
// SYN segment, we should only measure RTT if
Expand Down Expand Up @@ -800,16 +804,17 @@ func makeSynOptions(opts header.TCPSynOptions) []byte {
// tcpFields is a struct to carry different parameters required by the
// send*TCP variant functions below.
type tcpFields struct {
id stack.TransportEndpointID
ttl uint8
tos uint8
flags header.TCPFlags
seq seqnum.Value
ack seqnum.Value
rcvWnd seqnum.Size
opts []byte
txHash uint32
df bool
id stack.TransportEndpointID
ttl uint8
tos uint8
flags header.TCPFlags
seq seqnum.Value
ack seqnum.Value
rcvWnd seqnum.Size
opts []byte
txHash uint32
df bool
expOptVal uint16
}

func (e *Endpoint) sendSynTCP(r *stack.Route, tf tcpFields, opts header.TCPSynOptions) tcpip.Error {
Expand Down Expand Up @@ -897,7 +902,13 @@ func sendTCPBatch(r *stack.Route, tf tcpFields, pkt *stack.PacketBuffer, gso sta
buildTCPHdr(r, tf, pkt, gso)
tf.seq = tf.seq.Add(seqnum.Size(packetSize))
pkt.GSOOptions = gso
if err := r.WritePacket(stack.NetworkHeaderParams{Protocol: ProtocolNumber, TTL: tf.ttl, TOS: tf.tos, DF: tf.df}, pkt); err != nil {
if err := r.WritePacket(stack.NetworkHeaderParams{
Protocol: ProtocolNumber,
TTL: tf.ttl,
TOS: tf.tos,
DF: tf.df,
ExperimentOptionValue: tf.expOptVal,
}, pkt); err != nil {
r.Stats().TCP.SegmentSendErrors.Increment()
if shouldSplitPacket {
pkt.DecRef()
Expand Down Expand Up @@ -929,7 +940,13 @@ func sendTCP(r *stack.Route, tf tcpFields, pkt *stack.PacketBuffer, gso stack.GS
pkt.Owner = owner
buildTCPHdr(r, tf, pkt, gso)

if err := r.WritePacket(stack.NetworkHeaderParams{Protocol: ProtocolNumber, TTL: tf.ttl, TOS: tf.tos, DF: tf.df}, pkt); err != nil {
if err := r.WritePacket(stack.NetworkHeaderParams{
Protocol: ProtocolNumber,
TTL: tf.ttl,
TOS: tf.tos,
DF: tf.df,
ExperimentOptionValue: tf.expOptVal,
}, pkt); err != nil {
r.Stats().TCP.SegmentSendErrors.Increment()
return err
}
Expand Down
19 changes: 12 additions & 7 deletions pkg/tcpip/transport/tcp/protocol.go
Original file line number Diff line number Diff line change
Expand Up @@ -235,14 +235,19 @@ func replyWithReset(st *stack.Stack, s *segment, tos, ipv4TTL uint8, ipv6HopLimi

p := stack.NewPacketBuffer(stack.PacketBufferOptions{ReserveHeaderBytes: header.TCPMinimumSize + int(route.MaxHeaderLength())})
defer p.DecRef()
var expOptVal uint16
if s.ep != nil {
expOptVal = s.ep.SocketOptions().GetExperimentOptionValue()
}
return sendTCP(route, tcpFields{
id: s.id,
ttl: ttl,
tos: tos,
flags: flags,
seq: seq,
ack: ack,
rcvWnd: 0,
id: s.id,
ttl: ttl,
tos: tos,
flags: flags,
seq: seq,
ack: ack,
rcvWnd: 0,
expOptVal: expOptVal,
}, p, stack.GSO{}, nil /* PacketOwner */)
}

Expand Down
5 changes: 5 additions & 0 deletions test/syscalls/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -1263,3 +1263,8 @@ syscall_test(
save = False,
test = "//test/syscalls/linux:close_range_test",
)

syscall_test(
save = False,
test = "//test/syscalls/linux:socketopt_test",
)
16 changes: 16 additions & 0 deletions test/syscalls/linux/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -2557,6 +2557,22 @@ cc_library(
alwayslink = 1,
)

cc_binary(
name = "socketopt_test",
testonly = 1,
srcs = ["socketopt_test.cc"],
linkstatic = 1,
deps = select_gtest() + [
"//test/util:capability_util",
"//test/util:file_descriptor",
"//test/util:posix_error",
"//test/util:socket_util",
"//test/util:test_main",
"//test/util:test_util",
"@com_google_absl//absl/strings:str_format",
],
)

cc_binary(
name = "socket_stress_test",
testonly = 1,
Expand Down
23 changes: 23 additions & 0 deletions test/syscalls/linux/socketopt_test.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
// Copyright 2024 The gVisor Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <sys/socket.h>
#include <sys/types.h>
#include <unistd.h>

namespace gvisor {
namespace testing {
namespace {} // namespace
} // namespace testing
} // namespace gvisor
1 change: 1 addition & 0 deletions tools/bazeldefs/tags.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ generic = [
"_unsafe",
"_opts",
"_testonly",
"_custom",
]

# State explosion? Sure. This is approximately:
Expand Down

0 comments on commit 2267c24

Please sign in to comment.