#include <linux/atomic.h>
#include <net/dst.h>
#include <net/checksum.h>
+#include <linux/net_tstamp.h>
struct cgroup;
struct cgroup_subsys;
unsigned short skc_family;
volatile unsigned char skc_state;
unsigned char skc_reuse:4;
- unsigned char skc_reuseport:4;
+ unsigned char skc_reuseport:1;
+ unsigned char skc_ipv6only:1;
int skc_bound_dev_if;
union {
struct hlist_node skc_bind_node;
* @sk_sndbuf: size of send buffer in bytes
* @sk_flags: %SO_LINGER (l_onoff), %SO_BROADCAST, %SO_KEEPALIVE,
* %SO_OOBINLINE settings, %SO_TIMESTAMPING settings
- * @sk_no_check: %SO_NO_CHECK setting, whether or not checkup packets
+ * @sk_no_check_tx: %SO_NO_CHECK setting, set checksum in TX packets
+ * @sk_no_check_rx: allow zero checksum in RX packets
* @sk_route_caps: route capabilities (e.g. %NETIF_F_TSO)
* @sk_route_nocaps: forbidden route capabilities (e.g NETIF_F_GSO_MASK)
* @sk_gso_type: GSO type (e.g. %SKB_GSO_TCPV4)
* @sk_rcvtimeo: %SO_RCVTIMEO setting
* @sk_sndtimeo: %SO_SNDTIMEO setting
* @sk_rxhash: flow hash received from netif layer
+ * @sk_txhash: computed flow hash for use on transmit
* @sk_filter: socket filtering instructions
* @sk_protinfo: private area, net family specific, when not using slab
* @sk_timer: sock cleanup timer
* @sk_stamp: time stamp of last packet received
+ * @sk_tsflags: SO_TIMESTAMPING socket options
+ * @sk_tskey: counter to disambiguate concurrent tstamp requests
* @sk_socket: Identd and reporting IO signals
* @sk_user_data: RPC layer private data
* @sk_frag: cached page frag
#define sk_state __sk_common.skc_state
#define sk_reuse __sk_common.skc_reuse
#define sk_reuseport __sk_common.skc_reuseport
+#define sk_ipv6only __sk_common.skc_ipv6only
#define sk_bound_dev_if __sk_common.skc_bound_dev_if
#define sk_bind_node __sk_common.skc_bind_node
#define sk_prot __sk_common.skc_prot
#ifdef CONFIG_RPS
__u32 sk_rxhash;
#endif
+ __u32 sk_txhash;
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sk_napi_id;
unsigned int sk_ll_usec;
struct sk_buff_head sk_write_queue;
kmemcheck_bitfield_begin(flags);
unsigned int sk_shutdown : 2,
- sk_no_check : 2,
+ sk_no_check_tx : 1,
+ sk_no_check_rx : 1,
sk_userlocks : 4,
sk_protocol : 8,
sk_type : 16;
void *sk_protinfo;
struct timer_list sk_timer;
ktime_t sk_stamp;
+ u16 sk_tsflags;
+ u32 sk_tskey;
struct socket *sk_socket;
void *sk_user_data;
struct page_frag sk_frag;
u32 sk_classid;
struct cg_proto *sk_cgrp;
void (*sk_state_change)(struct sock *sk);
- void (*sk_data_ready)(struct sock *sk, int bytes);
+ void (*sk_data_ready)(struct sock *sk);
void (*sk_write_space)(struct sock *sk);
void (*sk_error_report)(struct sock *sk);
int (*sk_backlog_rcv)(struct sock *sk,
#define sk_for_each_bound(__sk, list) \
hlist_for_each_entry(__sk, list, sk_bind_node)
+/**
+ * sk_nulls_for_each_entry_offset - iterate over a list at a given struct offset
+ * @tpos: the type * to use as a loop cursor.
+ * @pos: the &struct hlist_node to use as a loop cursor.
+ * @head: the head for your list.
+ * @offset: offset of hlist_node within the struct.
+ *
+ */
+#define sk_nulls_for_each_entry_offset(tpos, pos, head, offset) \
+ for (pos = (head)->first; \
+ (!is_a_nulls(pos)) && \
+ ({ tpos = (typeof(*tpos) *)((void *)pos - offset); 1;}); \
+ pos = pos->next)
+
static inline struct user_namespace *sk_user_ns(struct sock *sk)
{
/* Careful only use this in a context where these parameters
SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */
SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */
SOCK_MEMALLOC, /* VM depends on this socket for swapping */
- SOCK_TIMESTAMPING_TX_HARDWARE, /* %SOF_TIMESTAMPING_TX_HARDWARE */
- SOCK_TIMESTAMPING_TX_SOFTWARE, /* %SOF_TIMESTAMPING_TX_SOFTWARE */
- SOCK_TIMESTAMPING_RX_HARDWARE, /* %SOF_TIMESTAMPING_RX_HARDWARE */
SOCK_TIMESTAMPING_RX_SOFTWARE, /* %SOF_TIMESTAMPING_RX_SOFTWARE */
- SOCK_TIMESTAMPING_SOFTWARE, /* %SOF_TIMESTAMPING_SOFTWARE */
- SOCK_TIMESTAMPING_RAW_HARDWARE, /* %SOF_TIMESTAMPING_RAW_HARDWARE */
- SOCK_TIMESTAMPING_SYS_HARDWARE, /* %SOF_TIMESTAMPING_SYS_HARDWARE */
SOCK_FASYNC, /* fasync() active */
SOCK_RXQ_OVFL,
SOCK_ZEROCOPY, /* buffers from userspace */
* Do not take into account this skb truesize,
* to allow even a single big packet to come.
*/
-static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb,
- unsigned int limit)
+static inline bool sk_rcvqueues_full(const struct sock *sk, unsigned int limit)
{
unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);
static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb,
unsigned int limit)
{
- if (sk_rcvqueues_full(sk, skb, limit))
+ if (sk_rcvqueues_full(sk, limit))
return -ENOBUFS;
__sk_add_backlog(sk, skb);
const struct sk_buff *skb)
{
#ifdef CONFIG_RPS
- if (unlikely(sk->sk_rxhash != skb->rxhash)) {
+ if (unlikely(sk->sk_rxhash != skb->hash)) {
sock_rps_reset_flow(sk);
- sk->sk_rxhash = skb->rxhash;
+ sk->sk_rxhash = skb->hash;
}
#endif
}
struct sk_buff *skb);
void (*release_cb)(struct sock *sk);
- void (*mtu_reduced)(struct sock *sk);
/* Keeping track of sk's, looking them up, and port selection methods. */
void (*hash)(struct sock *sk);
/* Initialise core socket variables */
void sock_init_data(struct socket *sock, struct sock *sk);
-void sk_filter_release_rcu(struct rcu_head *rcu);
-
-/**
- * sk_filter_release - release a socket filter
- * @fp: filter to remove
- *
- * Remove a filter from a socket and release its resources.
- */
-
-static inline void sk_filter_release(struct sk_filter *fp)
-{
- if (atomic_dec_and_test(&fp->refcnt))
- call_rcu(&fp->rcu, sk_filter_release_rcu);
-}
-
-static inline void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp)
-{
- atomic_sub(sk_filter_size(fp->len), &sk->sk_omem_alloc);
- sk_filter_release(fp);
-}
-
-static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp)
-{
- atomic_inc(&fp->refcnt);
- atomic_add(sk_filter_size(fp->len), &sk->sk_omem_alloc);
-}
-
/*
* Socket reference counting postulates.
*
rcu_read_lock();
dst = rcu_dereference(sk->sk_dst_cache);
- if (dst)
- dst_hold(dst);
+ if (dst && !atomic_inc_not_zero(&dst->__refcnt))
+ dst = NULL;
rcu_read_unlock();
return dst;
}
static inline void
sk_dst_set(struct sock *sk, struct dst_entry *dst)
{
- spin_lock(&sk->sk_dst_lock);
- __sk_dst_set(sk, dst);
- spin_unlock(&sk->sk_dst_lock);
+ struct dst_entry *old_dst;
+
+ sk_tx_queue_clear(sk);
+ old_dst = xchg((__force struct dst_entry **)&sk->sk_dst_cache, dst);
+ dst_release(old_dst);
}
static inline void
static inline void
sk_dst_reset(struct sock *sk)
{
- spin_lock(&sk->sk_dst_lock);
- __sk_dst_reset(sk);
- spin_unlock(&sk->sk_dst_lock);
+ sk_dst_set(sk, NULL);
}
struct dst_entry *__sk_dst_check(struct sock *sk, u32 cookie);
}
}
+static inline void skb_set_hash_from_sk(struct sk_buff *skb, struct sock *sk)
+{
+ if (sk->sk_txhash) {
+ skb->l4_hash = 1;
+ skb->hash = sk->sk_txhash;
+ }
+}
+
/*
* Queue a received datagram if it will fit. Stream and sequenced
* protocols can't normally use this as they need to fit buffers in
skb_orphan(skb);
skb->sk = sk;
skb->destructor = sock_wfree;
+ skb_set_hash_from_sk(skb, sk);
/*
* We used to take a refcount on sk, but following operation
* is enough to guarantee sk_free() wont free this sock until
/*
* generate control messages if
- * - receive time stamping in software requested (SOCK_RCVTSTAMP
- * or SOCK_TIMESTAMPING_RX_SOFTWARE)
+ * - receive time stamping in software requested
* - software time stamp available and wanted
- * (SOCK_TIMESTAMPING_SOFTWARE)
* - hardware time stamps available and wanted
- * (SOCK_TIMESTAMPING_SYS_HARDWARE or
- * SOCK_TIMESTAMPING_RAW_HARDWARE)
*/
if (sock_flag(sk, SOCK_RCVTSTAMP) ||
- sock_flag(sk, SOCK_TIMESTAMPING_RX_SOFTWARE) ||
- (kt.tv64 && sock_flag(sk, SOCK_TIMESTAMPING_SOFTWARE)) ||
+ (sk->sk_tsflags & SOF_TIMESTAMPING_RX_SOFTWARE) ||
+ (kt.tv64 && sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) ||
(hwtstamps->hwtstamp.tv64 &&
- sock_flag(sk, SOCK_TIMESTAMPING_RAW_HARDWARE)) ||
- (hwtstamps->syststamp.tv64 &&
- sock_flag(sk, SOCK_TIMESTAMPING_SYS_HARDWARE)))
+ (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE)))
__sock_recv_timestamp(msg, sk, skb);
else
sk->sk_stamp = kt;
struct sk_buff *skb)
{
#define FLAGS_TS_OR_DROPS ((1UL << SOCK_RXQ_OVFL) | \
- (1UL << SOCK_RCVTSTAMP) | \
- (1UL << SOCK_TIMESTAMPING_SOFTWARE) | \
- (1UL << SOCK_TIMESTAMPING_RAW_HARDWARE) | \
- (1UL << SOCK_TIMESTAMPING_SYS_HARDWARE))
+ (1UL << SOCK_RCVTSTAMP))
+#define TSFLAGS_ANY (SOF_TIMESTAMPING_SOFTWARE | \
+ SOF_TIMESTAMPING_RAW_HARDWARE)
- if (sk->sk_flags & FLAGS_TS_OR_DROPS)
+ if (sk->sk_flags & FLAGS_TS_OR_DROPS || sk->sk_tsflags & TSFLAGS_ANY)
__sock_recv_ts_and_drops(msg, sk, skb);
else
sk->sk_stamp = skb->tstamp;
/**
* sock_tx_timestamp - checks whether the outgoing packet is to be time stamped
* @sk: socket sending this packet
- * @tx_flags: filled with instructions for time stamping
+ * @tx_flags: completed with instructions for time stamping
*
- * Currently only depends on SOCK_TIMESTAMPING* flags.
+ * Note : callers should take care of initial *tx_flags value (usually 0)
*/
-void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags);
+void sock_tx_timestamp(const struct sock *sk, __u8 *tx_flags);
/**
* sk_eat_skb - Release a skb if it is no longer needed
*/
static inline void sk_change_net(struct sock *sk, struct net *net)
{
- put_net(sock_net(sk));
- sock_net_set(sk, hold_net(net));
+ struct net *current_net = sock_net(sk);
+
+ if (!net_eq(current_net, net)) {
+ put_net(current_net);
+ sock_net_set(sk, hold_net(net));
+ }
}
static inline struct sock *skb_steal_sock(struct sk_buff *skb)
int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len, int level,
int type);
+bool sk_ns_capable(const struct sock *sk,
+ struct user_namespace *user_ns, int cap);
+bool sk_capable(const struct sock *sk, int cap);
+bool sk_net_capable(const struct sock *sk, int cap);
+
/*
* Enable debug/info messages
*/