#include <linux/hrtimer.h>
#include <linux/dma-mapping.h>
#include <linux/netdev_features.h>
+#include <linux/sched.h>
#include <net/flow_keys.h>
/* A. Checksumming of received packets by device.
#define CHECKSUM_COMPLETE 2
#define CHECKSUM_PARTIAL 3
-#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \
- ~(SMP_CACHE_BYTES - 1))
+#define SKB_DATA_ALIGN(X) ALIGN(X, SMP_CACHE_BYTES)
#define SKB_WITH_OVERHEAD(X) \
((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
#define SKB_MAX_ORDER(X, ORDER) \
* struct skb_shared_hwtstamps - hardware time stamps
* @hwtstamp: hardware time stamp transformed into duration
* since arbitrary point in time
- * @syststamp: hwtstamp transformed to system time base
*
* Software time stamps generated by ktime_get_real() are stored in
- * skb->tstamp. The relation between the different kinds of time
- * stamps is as follows:
- *
- * syststamp and tstamp can be compared against each other in
- * arbitrary combinations. The accuracy of a
- * syststamp/tstamp/"syststamp from other device" comparison is
- * limited by the accuracy of the transformation into system time
- * base. This depends on the device driver and its underlying
- * hardware.
+ * skb->tstamp.
*
* hwtstamps can only be compared against other hwtstamps from
* the same device.
*/
struct skb_shared_hwtstamps {
ktime_t hwtstamp;
- ktime_t syststamp;
};
/* Definitions for tx_flags in struct skb_shared_info */
/* generate hardware time stamp */
SKBTX_HW_TSTAMP = 1 << 0,
- /* generate software time stamp */
+ /* generate software time stamp when queueing packet to NIC */
SKBTX_SW_TSTAMP = 1 << 1,
/* device driver is going to provide hardware time stamp */
* all frags to avoid possible bad checksum
*/
SKBTX_SHARED_FRAG = 1 << 5,
+
+ /* generate software time stamp when entering packet scheduling */
+ SKBTX_SCHED_TSTAMP = 1 << 6,
+
+ /* generate software timestamp on peer data acknowledgment */
+ SKBTX_ACK_TSTAMP = 1 << 7,
};
+#define SKBTX_ANY_SW_TSTAMP (SKBTX_SW_TSTAMP | \
+ SKBTX_SCHED_TSTAMP | \
+ SKBTX_ACK_TSTAMP)
+#define SKBTX_ANY_TSTAMP (SKBTX_HW_TSTAMP | SKBTX_ANY_SW_TSTAMP)
+
/*
* The callback notifies userspace to release buffers when skb DMA is done in
* lower device, the skb last reference should be 0 when calling this.
unsigned short gso_type;
struct sk_buff *frag_list;
struct skb_shared_hwtstamps hwtstamps;
+ u32 tskey;
__be32 ip6_frag_id;
/*
SKB_GSO_GRE = 1 << 6,
- SKB_GSO_IPIP = 1 << 7,
+ SKB_GSO_GRE_CSUM = 1 << 7,
+
+ SKB_GSO_IPIP = 1 << 8,
+
+ SKB_GSO_SIT = 1 << 9,
- SKB_GSO_SIT = 1 << 8,
+ SKB_GSO_UDP_TUNNEL = 1 << 10,
- SKB_GSO_UDP_TUNNEL = 1 << 9,
+ SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
+
+ SKB_GSO_MPLS = 1 << 12,
- SKB_GSO_MPLS = 1 << 10,
};
#if BITS_PER_LONG > 32
typedef unsigned char *sk_buff_data_t;
#endif
+/**
+ * struct skb_mstamp - multi resolution time stamps
+ * @stamp_us: timestamp in us resolution
+ * @stamp_jiffies: timestamp in jiffies
+ */
+struct skb_mstamp {
+ union {
+ u64 v64;
+ struct {
+ u32 stamp_us;
+ u32 stamp_jiffies;
+ };
+ };
+};
+
+/**
+ * skb_mstamp_get - get current timestamp
+ * @cl: place to store timestamps
+ */
+static inline void skb_mstamp_get(struct skb_mstamp *cl)
+{
+ u64 val = local_clock();
+
+ do_div(val, NSEC_PER_USEC);
+ cl->stamp_us = (u32)val;
+ cl->stamp_jiffies = (u32)jiffies;
+}
+
+/**
+ * skb_mstamp_delta - compute the difference in usec between two skb_mstamp
+ * @t1: pointer to newest sample
+ * @t0: pointer to oldest sample
+ */
+static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
+ const struct skb_mstamp *t0)
+{
+ s32 delta_us = t1->stamp_us - t0->stamp_us;
+ u32 delta_jiffies = t1->stamp_jiffies - t0->stamp_jiffies;
+
+ /* If delta_us is negative, this might be because interval is too big,
+ * or local_clock() drift is too big : fallback using jiffies.
+ */
+ if (delta_us <= 0 ||
+ delta_jiffies >= (INT_MAX / (USEC_PER_SEC / HZ)))
+
+ delta_us = jiffies_to_usecs(delta_jiffies);
+
+ return delta_us;
+}
+
+
/**
* struct sk_buff - socket buffer
* @next: Next buffer in list
* @prev: Previous buffer in list
- * @tstamp: Time we arrived
+ * @tstamp: Time we arrived/left
* @sk: Socket we are owned by
* @dev: Device we arrived on/are leaving by
* @cb: Control buffer. Free for use by every layer. Put private vars here
* @csum_start: Offset from skb->head where checksumming should start
* @csum_offset: Offset from csum_start where checksum should be stored
* @priority: Packet queueing priority
- * @local_df: allow local fragmentation
+ * @ignore_df: allow local fragmentation
* @cloned: Head may be cloned (check refcnt to be sure)
* @ip_summed: Driver fed us an IP checksum
* @nohdr: Payload reference only, must not modify header
* @skb_iif: ifindex of device we arrived on
* @tc_index: Traffic control index
* @tc_verd: traffic control verdict
- * @rxhash: the packet hash computed on receive
+ * @hash: the packet hash
* @queue_mapping: Queue mapping for multiqueue devices
* @ndisc_nodetype: router type (from link layer)
* @ooo_okay: allow the mapping of a socket to a queue to be changed
- * @l4_rxhash: indicate rxhash is a canonical 4-tuple hash over transport
+ * @l4_hash: indicate hash is a canonical 4-tuple hash over transport
* ports.
+ * @sw_hash: indicates hash was computed in software stack
* @wifi_acked_valid: wifi_acked was set
* @wifi_acked: whether frame was acked on wifi or not
* @no_fcs: Request NIC to treat last 4 bytes as Ethernet FCS
struct sk_buff *next;
struct sk_buff *prev;
- ktime_t tstamp;
+ union {
+ ktime_t tstamp;
+ struct skb_mstamp skb_mstamp;
+ };
struct sock *sk;
struct net_device *dev;
};
__u32 priority;
kmemcheck_bitfield_begin(flags1);
- __u8 local_df:1,
+ __u8 ignore_df:1,
cloned:1,
ip_summed:2,
nohdr:1,
int skb_iif;
- __u32 rxhash;
+ __u32 hash;
__be16 vlan_proto;
__u16 vlan_tci;
#endif
__u8 pfmemalloc:1;
__u8 ooo_okay:1;
- __u8 l4_rxhash:1;
+ __u8 l4_hash:1;
+ __u8 sw_hash:1;
__u8 wifi_acked_valid:1;
__u8 wifi_acked:1;
__u8 no_fcs:1;
* headers if needed
*/
__u8 encapsulation:1;
- /* 6/8 bit hole (depending on ndisc_nodetype presence) */
+ __u8 encap_hdr_csum:1;
+ __u8 csum_valid:1;
+ __u8 csum_complete_sw:1;
+ /* 2/4 bit hole (depending on ndisc_nodetype presence) */
kmemcheck_bitfield_end(flags2);
#ifdef CONFIG_NET_RX_BUSY_POLL
int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask);
struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority);
struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t priority);
-struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom, gfp_t gfp_mask);
+struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom,
+ gfp_t gfp_mask, bool fclone);
+static inline struct sk_buff *__pskb_copy(struct sk_buff *skb, int headroom,
+ gfp_t gfp_mask)
+{
+ return __pskb_copy_fclone(skb, headroom, gfp_mask, false);
+}
int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, gfp_t gfp_mask);
struct sk_buff *skb_realloc_headroom(struct sk_buff *skb,
unsigned int headroom);
struct sk_buff *skb_copy_expand(const struct sk_buff *skb, int newheadroom,
int newtailroom, gfp_t priority);
+int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
+ int offset, int len);
int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset,
int len);
int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
static inline void
skb_set_hash(struct sk_buff *skb, __u32 hash, enum pkt_hash_types type)
{
- skb->l4_rxhash = (type == PKT_HASH_TYPE_L4);
- skb->rxhash = hash;
+ skb->l4_hash = (type == PKT_HASH_TYPE_L4);
+ skb->sw_hash = 0;
+ skb->hash = hash;
}
void __skb_get_hash(struct sk_buff *skb);
static inline __u32 skb_get_hash(struct sk_buff *skb)
{
- if (!skb->l4_rxhash)
+ if (!skb->l4_hash && !skb->sw_hash)
__skb_get_hash(skb);
- return skb->rxhash;
+ return skb->hash;
}
static inline __u32 skb_get_hash_raw(const struct sk_buff *skb)
{
- return skb->rxhash;
+ return skb->hash;
}
static inline void skb_clear_hash(struct sk_buff *skb)
{
- skb->rxhash = 0;
- skb->l4_rxhash = 0;
+ skb->hash = 0;
+ skb->sw_hash = 0;
+ skb->l4_hash = 0;
}
static inline void skb_clear_hash_if_not_l4(struct sk_buff *skb)
{
- if (!skb->l4_rxhash)
+ if (!skb->l4_hash)
skb_clear_hash(skb);
}
static inline void skb_copy_hash(struct sk_buff *to, const struct sk_buff *from)
{
- to->rxhash = from->rxhash;
- to->l4_rxhash = from->l4_rxhash;
+ to->hash = from->hash;
+ to->sw_hash = from->sw_hash;
+ to->l4_hash = from->l4_hash;
};
#ifdef NET_SKBUFF_DATA_USES_OFFSET
return pskb_may_pull(skb, skb_network_offset(skb) + len);
}
+static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb)
+{
+ /* Only continue with checksum unnecessary if device indicated
+ * it is valid across encapsulation (skb->encapsulation was set).
+ */
+ if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation)
+ skb->ip_summed = CHECKSUM_NONE;
+
+ skb->encapsulation = 0;
+ skb->csum_valid = 0;
+}
+
/*
* CPUs often take a performance hit when accessing unaligned memory
* locations. The actual performance hit varies, it can be small if the
}
/**
- * skb_frag_page - retrieve the page refered to by a paged fragment
+ * skb_frag_page - retrieve the page referred to by a paged fragment
* @frag: the paged fragment
*
* Returns the &struct page associated with @frag.
return __pskb_copy(skb, skb_headroom(skb), gfp_mask);
}
+
+static inline struct sk_buff *pskb_copy_for_clone(struct sk_buff *skb,
+ gfp_t gfp_mask)
+{
+ return __pskb_copy_fclone(skb, skb_headroom(skb), gfp_mask, true);
+}
+
+
/**
* skb_clone_writable - is the header of a clone writable
* @skb: buffer to check
unsigned int flags);
void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to);
unsigned int skb_zerocopy_headlen(const struct sk_buff *from);
-void skb_zerocopy(struct sk_buff *to, const struct sk_buff *from,
- int len, int hlen);
+int skb_zerocopy(struct sk_buff *to, struct sk_buff *from,
+ int len, int hlen);
void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len);
int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen);
void skb_scrub_packet(struct sk_buff *skb, bool xnet);
unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
+struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
struct skb_checksum_ops {
__wsum (*update)(const void *mem, int len, __wsum wsum);
return ktime_set(0, 0);
}
-void skb_timestamping_init(void);
-
#ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
void skb_clone_tx_timestamp(struct sk_buff *skb);
void skb_complete_tx_timestamp(struct sk_buff *skb,
struct skb_shared_hwtstamps *hwtstamps);
+void __skb_tstamp_tx(struct sk_buff *orig_skb,
+ struct skb_shared_hwtstamps *hwtstamps,
+ struct sock *sk, int tstype);
+
/**
* skb_tstamp_tx - queue clone of skb with send time stamps
* @orig_skb: the original outgoing packet
static inline int skb_csum_unnecessary(const struct sk_buff *skb)
{
- return skb->ip_summed & CHECKSUM_UNNECESSARY;
+ return ((skb->ip_summed & CHECKSUM_UNNECESSARY) || skb->csum_valid);
}
/**
0 : __skb_checksum_complete(skb);
}
+/* Check if we need to perform checksum complete validation.
+ *
+ * Returns true if checksum complete is needed, false otherwise
+ * (either checksum is unnecessary or zero checksum is allowed).
+ */
+static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
+ bool zero_okay,
+ __sum16 check)
+{
+ if (skb_csum_unnecessary(skb) || (zero_okay && !check)) {
+ skb->csum_valid = 1;
+ return false;
+ }
+
+ return true;
+}
+
+/* For small packets <= CHECKSUM_BREAK peform checksum complete directly
+ * in checksum_init.
+ */
+#define CHECKSUM_BREAK 76
+
+/* Validate (init) checksum based on checksum complete.
+ *
+ * Return values:
+ * 0: checksum is validated or try to in skb_checksum_complete. In the latter
+ * case the ip_summed will not be CHECKSUM_UNNECESSARY and the pseudo
+ * checksum is stored in skb->csum for use in __skb_checksum_complete
+ * non-zero: value of invalid checksum
+ *
+ */
+static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
+ bool complete,
+ __wsum psum)
+{
+ if (skb->ip_summed == CHECKSUM_COMPLETE) {
+ if (!csum_fold(csum_add(psum, skb->csum))) {
+ skb->csum_valid = 1;
+ return 0;
+ }
+ }
+
+ skb->csum = psum;
+
+ if (complete || skb->len <= CHECKSUM_BREAK) {
+ __sum16 csum;
+
+ csum = __skb_checksum_complete(skb);
+ skb->csum_valid = !csum;
+ return csum;
+ }
+
+ return 0;
+}
+
+static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
+{
+ return 0;
+}
+
+/* Perform checksum validate (init). Note that this is a macro since we only
+ * want to calculate the pseudo header which is an input function if necessary.
+ * First we try to validate without any computation (checksum unnecessary) and
+ * then calculate based on checksum complete calling the function to compute
+ * pseudo header.
+ *
+ * Return values:
+ * 0: checksum is validated or try to in skb_checksum_complete
+ * non-zero: value of invalid checksum
+ */
+#define __skb_checksum_validate(skb, proto, complete, \
+ zero_okay, check, compute_pseudo) \
+({ \
+ __sum16 __ret = 0; \
+ skb->csum_valid = 0; \
+ if (__skb_checksum_validate_needed(skb, zero_okay, check)) \
+ __ret = __skb_checksum_validate_complete(skb, \
+ complete, compute_pseudo(skb, proto)); \
+ __ret; \
+})
+
+#define skb_checksum_init(skb, proto, compute_pseudo) \
+ __skb_checksum_validate(skb, proto, false, false, 0, compute_pseudo)
+
+#define skb_checksum_init_zero_check(skb, proto, check, compute_pseudo) \
+ __skb_checksum_validate(skb, proto, false, true, check, compute_pseudo)
+
+#define skb_checksum_validate(skb, proto, compute_pseudo) \
+ __skb_checksum_validate(skb, proto, true, false, 0, compute_pseudo)
+
+#define skb_checksum_validate_zero_check(skb, proto, check, \
+ compute_pseudo) \
+ __skb_checksum_validate_(skb, proto, true, true, check, compute_pseudo)
+
+#define skb_checksum_simple_validate(skb) \
+ __skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)
+
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
void nf_conntrack_destroy(struct nf_conntrack *nfct);
static inline void nf_conntrack_put(struct nf_conntrack *nfct)
{ }
#endif
+static inline bool skb_irq_freeable(const struct sk_buff *skb)
+{
+ return !skb->destructor &&
+#if IS_ENABLED(CONFIG_XFRM)
+ !skb->sp &&
+#endif
+#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+ !skb->nfct &&
+#endif
+ !skb->_skb_refdst &&
+ !skb_has_frag_list(skb);
+}
+
static inline void skb_set_queue_mapping(struct sk_buff *skb, u16 queue_mapping)
{
skb->queue_mapping = queue_mapping;
return skb->queue_mapping != 0;
}
-u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
+u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
unsigned int num_tx_queues);
static inline struct sec_path *skb_sec_path(struct sk_buff *skb)
struct skb_gso_cb {
int mac_offset;
int encap_level;
+ __u16 csum_start;
};
#define SKB_GSO_CB(skb) ((struct skb_gso_cb *)(skb)->cb)
return 0;
}
+/* Compute the checksum for a gso segment. First compute the checksum value
+ * from the start of transport header to SKB_GSO_CB(skb)->csum_start, and
+ * then add in skb->csum (checksum from csum_start to end of packet).
+ * skb->csum and csum_start are then updated to reflect the checksum of the
+ * resultant packet starting from the transport header-- the resultant checksum
+ * is in the res argument (i.e. normally zero or ~ of checksum of a pseudo
+ * header.
+ */
+static inline __sum16 gso_make_checksum(struct sk_buff *skb, __wsum res)
+{
+ int plen = SKB_GSO_CB(skb)->csum_start - skb_headroom(skb) -
+ skb_transport_offset(skb);
+ __u16 csum;
+
+ csum = csum_fold(csum_partial(skb_transport_header(skb),
+ plen, skb->csum));
+ skb->csum = res;
+ SKB_GSO_CB(skb)->csum_start -= plen;
+
+ return csum;
+}
+
static inline bool skb_is_gso(const struct sk_buff *skb)
{
return skb_shinfo(skb)->gso_size;