linux-loongson/include/linux/udp.h
Jordan Rife 66d454e99d bpf: udp: Make sure iter->batch always contains a full bucket snapshot
Require that iter->batch always contains a full bucket snapshot. This
invariant is important to avoid skipping or repeating sockets during
iteration when combined with the next few patches. Before, there were
two cases where a call to bpf_iter_udp_batch may only capture part of a
bucket:

1. When bpf_iter_udp_realloc_batch() returns -ENOMEM [1].
2. When more sockets are added to the bucket while calling
   bpf_iter_udp_realloc_batch(), making the updated batch size
   insufficient [2].

In cases where the batch size only covers part of a bucket, it is
possible to forget which sockets were already visited, especially if we
have to process a bucket in more than two batches. This forces us to
choose between repeating or skipping sockets, so don't allow this:

1. Stop iteration and propagate -ENOMEM up to userspace if reallocation
   fails instead of continuing with a partial batch.
2. Try bpf_iter_udp_realloc_batch() with GFP_USER just as before, but if
   we still aren't able to capture the full bucket, call
   bpf_iter_udp_realloc_batch() again while holding the bucket lock to
   guarantee the bucket does not change. On the second attempt use
   GFP_NOWAIT since we hold onto the spin lock.

Introduce the udp_portaddr_for_each_entry_from macro and use it instead
of udp_portaddr_for_each_entry to make it possible to continue iteration
from an arbitrary socket. This is required for this patch in the
GFP_NOWAIT case to allow us to fill the rest of a batch starting from
the middle of a bucket and the later patch which skips sockets that were
already seen.

Testing all scenarios directly is a bit difficult, but I did some manual
testing to exercise the code paths where GFP_NOWAIT is used and where
ERR_PTR(err) is returned. I used the realloc test case included later
in this series to trigger a scenario where a realloc happens inside
bpf_iter_udp_batch and made a small code tweak to force the first
realloc attempt to allocate a too-small batch, thus requiring
another attempt with GFP_NOWAIT. Some printks showed both reallocs with
the tests passing:

Apr 25 23:16:24 crow kernel: go again GFP_USER
Apr 25 23:16:24 crow kernel: go again GFP_NOWAIT

With this setup, I also forced each of the bpf_iter_udp_realloc_batch
calls to return -ENOMEM to ensure that iteration ends and that the
read() in userspace fails.

[1]: https://lore.kernel.org/bpf/CABi4-ogUtMrH8-NVB6W8Xg_F_KDLq=yy-yu-tKr2udXE2Mu1Lg@mail.gmail.com/
[2]: https://lore.kernel.org/bpf/7ed28273-a716-4638-912d-f86f965e54bb@linux.dev/

Signed-off-by: Jordan Rife <jordan@jrife.io>
Signed-off-by: Martin KaFai Lau <martin.lau@kernel.org>
2025-05-02 10:54:37 -07:00

242 lines
6.6 KiB
C

/* SPDX-License-Identifier: GPL-2.0-or-later */
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* Definitions for the UDP protocol.
*
* Version: @(#)udp.h 1.0.2 04/28/93
*
* Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
*/
#ifndef _LINUX_UDP_H
#define _LINUX_UDP_H
#include <net/inet_sock.h>
#include <linux/skbuff.h>
#include <net/netns/hash.h>
#include <uapi/linux/udp.h>
static inline struct udphdr *udp_hdr(const struct sk_buff *skb)
{
return (struct udphdr *)skb_transport_header(skb);
}
#define UDP_HTABLE_SIZE_MIN_PERNET 128
#define UDP_HTABLE_SIZE_MIN (IS_ENABLED(CONFIG_BASE_SMALL) ? 128 : 256)
#define UDP_HTABLE_SIZE_MAX 65536
static inline u32 udp_hashfn(const struct net *net, u32 num, u32 mask)
{
return (num + net_hash_mix(net)) & mask;
}
enum {
UDP_FLAGS_CORK, /* Cork is required */
UDP_FLAGS_NO_CHECK6_TX, /* Send zero UDP6 checksums on TX? */
UDP_FLAGS_NO_CHECK6_RX, /* Allow zero UDP6 checksums on RX? */
UDP_FLAGS_GRO_ENABLED, /* Request GRO aggregation */
UDP_FLAGS_ACCEPT_FRAGLIST,
UDP_FLAGS_ACCEPT_L4,
UDP_FLAGS_ENCAP_ENABLED, /* This socket enabled encap */
UDP_FLAGS_UDPLITE_SEND_CC, /* set via udplite setsockopt */
UDP_FLAGS_UDPLITE_RECV_CC, /* set via udplite setsockopt */
};
struct udp_sock {
/* inet_sock has to be the first member */
struct inet_sock inet;
#define udp_port_hash inet.sk.__sk_common.skc_u16hashes[0]
#define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1]
#define udp_portaddr_node inet.sk.__sk_common.skc_portaddr_node
unsigned long udp_flags;
int pending; /* Any pending frames ? */
__u8 encap_type; /* Is this an Encapsulation socket? */
#if !IS_ENABLED(CONFIG_BASE_SMALL)
/* For UDP 4-tuple hash */
__u16 udp_lrpa_hash;
struct hlist_nulls_node udp_lrpa_node;
#endif
/*
* Following member retains the information to create a UDP header
* when the socket is uncorked.
*/
__u16 len; /* total length of pending frames */
__u16 gso_size;
/*
* Fields specific to UDP-Lite.
*/
__u16 pcslen;
__u16 pcrlen;
/*
* For encapsulation sockets.
*/
int (*encap_rcv)(struct sock *sk, struct sk_buff *skb);
void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, int err,
__be16 port, u32 info, u8 *payload);
int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb);
void (*encap_destroy)(struct sock *sk);
/* GRO functions for UDP socket */
struct sk_buff * (*gro_receive)(struct sock *sk,
struct list_head *head,
struct sk_buff *skb);
int (*gro_complete)(struct sock *sk,
struct sk_buff *skb,
int nhoff);
/* udp_recvmsg try to use this before splicing sk_receive_queue */
struct sk_buff_head reader_queue ____cacheline_aligned_in_smp;
/* This field is dirtied by udp_recvmsg() */
int forward_deficit;
/* This fields follows rcvbuf value, and is touched by udp_recvmsg */
int forward_threshold;
/* Cache friendly copy of sk->sk_peek_off >= 0 */
bool peeking_with_offset;
/*
* Accounting for the tunnel GRO fastpath.
* Unprotected by compilers guard, as it uses space available in
* the last UDP socket cacheline.
*/
struct hlist_node tunnel_list;
};
#define udp_test_bit(nr, sk) \
test_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
#define udp_set_bit(nr, sk) \
set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
#define udp_test_and_set_bit(nr, sk) \
test_and_set_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
#define udp_clear_bit(nr, sk) \
clear_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags)
#define udp_assign_bit(nr, sk, val) \
assign_bit(UDP_FLAGS_##nr, &udp_sk(sk)->udp_flags, val)
#define UDP_MAX_SEGMENTS (1 << 7UL)
#define udp_sk(ptr) container_of_const(ptr, struct udp_sock, inet.sk)
static inline int udp_set_peek_off(struct sock *sk, int val)
{
sk_set_peek_off(sk, val);
WRITE_ONCE(udp_sk(sk)->peeking_with_offset, val >= 0);
return 0;
}
static inline void udp_set_no_check6_tx(struct sock *sk, bool val)
{
udp_assign_bit(NO_CHECK6_TX, sk, val);
}
static inline void udp_set_no_check6_rx(struct sock *sk, bool val)
{
udp_assign_bit(NO_CHECK6_RX, sk, val);
}
static inline bool udp_get_no_check6_tx(const struct sock *sk)
{
return udp_test_bit(NO_CHECK6_TX, sk);
}
static inline bool udp_get_no_check6_rx(const struct sock *sk)
{
return udp_test_bit(NO_CHECK6_RX, sk);
}
static inline void udp_cmsg_recv(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
int gso_size;
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4) {
gso_size = skb_shinfo(skb)->gso_size;
put_cmsg(msg, SOL_UDP, UDP_GRO, sizeof(gso_size), &gso_size);
}
}
DECLARE_STATIC_KEY_FALSE(udp_encap_needed_key);
#if IS_ENABLED(CONFIG_IPV6)
DECLARE_STATIC_KEY_FALSE(udpv6_encap_needed_key);
#endif
static inline bool udp_encap_needed(void)
{
if (static_branch_unlikely(&udp_encap_needed_key))
return true;
#if IS_ENABLED(CONFIG_IPV6)
if (static_branch_unlikely(&udpv6_encap_needed_key))
return true;
#endif
return false;
}
static inline bool udp_unexpected_gso(struct sock *sk, struct sk_buff *skb)
{
if (!skb_is_gso(skb))
return false;
if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4 &&
!udp_test_bit(ACCEPT_L4, sk))
return true;
if (skb_shinfo(skb)->gso_type & SKB_GSO_FRAGLIST &&
!udp_test_bit(ACCEPT_FRAGLIST, sk))
return true;
/* GSO packets lacking the SKB_GSO_UDP_TUNNEL/_CSUM bits might still
* land in a tunnel as the socket check in udp_gro_receive cannot be
* foolproof.
*/
if (udp_encap_needed() &&
READ_ONCE(udp_sk(sk)->encap_rcv) &&
!(skb_shinfo(skb)->gso_type &
(SKB_GSO_UDP_TUNNEL | SKB_GSO_UDP_TUNNEL_CSUM)))
return true;
return false;
}
static inline void udp_allow_gso(struct sock *sk)
{
udp_set_bit(ACCEPT_L4, sk);
udp_set_bit(ACCEPT_FRAGLIST, sk);
}
#define udp_portaddr_for_each_entry(__sk, list) \
hlist_for_each_entry(__sk, list, __sk_common.skc_portaddr_node)
#define udp_portaddr_for_each_entry_from(__sk) \
hlist_for_each_entry_from(__sk, __sk_common.skc_portaddr_node)
#define udp_portaddr_for_each_entry_rcu(__sk, list) \
hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node)
#if !IS_ENABLED(CONFIG_BASE_SMALL)
#define udp_lrpa_for_each_entry_rcu(__up, node, list) \
hlist_nulls_for_each_entry_rcu(__up, node, list, udp_lrpa_node)
#endif
#define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE)
static inline struct sock *udp_tunnel_sk(const struct net *net, bool is_ipv6)
{
#if IS_ENABLED(CONFIG_NET_UDP_TUNNEL)
return rcu_dereference(net->ipv4.udp_tunnel_gro[is_ipv6].sk);
#else
return NULL;
#endif
}
#endif /* _LINUX_UDP_H */