static void packet_flush_mclist(struct sock *sk);
struct packet_skb_cb {
- unsigned int origlen;
union {
struct sockaddr_pkt pkt;
- struct sockaddr_ll ll;
+ union {
+ /* Trick: alias skb original length with
+ * ll.sll_family and ll.protocol in order
+ * to save room.
+ */
+ unsigned int origlen;
+ struct sockaddr_ll ll;
+ };
} sa;
};
free_percpu(po->tx_ring.pending_refcnt);
}
-static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
+#define ROOM_POW_OFF 2
+#define ROOM_NONE 0x0
+#define ROOM_LOW 0x1
+#define ROOM_NORMAL 0x2
+
+static bool __tpacket_has_room(struct packet_sock *po, int pow_off)
{
- struct sock *sk = &po->sk;
- bool has_room;
+ int idx, len;
+
+ len = po->rx_ring.frame_max + 1;
+ idx = po->rx_ring.head;
+ if (pow_off)
+ idx += len >> pow_off;
+ if (idx >= len)
+ idx -= len;
+ return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
+}
- if (po->prot_hook.func != tpacket_rcv)
- return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize)
- <= sk->sk_rcvbuf;
+static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off)
+{
+ int idx, len;
+
+ len = po->rx_ring.prb_bdqc.knum_blocks;
+ idx = po->rx_ring.prb_bdqc.kactive_blk_num;
+ if (pow_off)
+ idx += len >> pow_off;
+ if (idx >= len)
+ idx -= len;
+ return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL);
+}
+
+static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb)
+{
+ struct sock *sk = &po->sk;
+ int ret = ROOM_NONE;
+
+ if (po->prot_hook.func != tpacket_rcv) {
+ int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc)
+ - skb->truesize;
+ if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF))
+ return ROOM_NORMAL;
+ else if (avail > 0)
+ return ROOM_LOW;
+ else
+ return ROOM_NONE;
+ }
spin_lock(&sk->sk_receive_queue.lock);
- if (po->tp_version == TPACKET_V3)
- has_room = prb_lookup_block(po, &po->rx_ring,
- po->rx_ring.prb_bdqc.kactive_blk_num,
- TP_STATUS_KERNEL);
- else
- has_room = packet_lookup_frame(po, &po->rx_ring,
- po->rx_ring.head,
- TP_STATUS_KERNEL);
+ if (po->tp_version == TPACKET_V3) {
+ if (__tpacket_v3_has_room(po, ROOM_POW_OFF))
+ ret = ROOM_NORMAL;
+ else if (__tpacket_v3_has_room(po, 0))
+ ret = ROOM_LOW;
+ } else {
+ if (__tpacket_has_room(po, ROOM_POW_OFF))
+ ret = ROOM_NORMAL;
+ else if (__tpacket_has_room(po, 0))
+ ret = ROOM_LOW;
+ }
spin_unlock(&sk->sk_receive_queue.lock);
- return has_room;
+ return ret;
}
static void packet_sock_destruct(struct sock *sk)
static unsigned int fanout_demux_rollover(struct packet_fanout *f,
struct sk_buff *skb,
- unsigned int idx, unsigned int skip,
+ unsigned int idx, bool try_self,
unsigned int num)
{
+ struct packet_sock *po;
unsigned int i, j;
- i = j = min_t(int, f->next[idx], num - 1);
+ po = pkt_sk(f->arr[idx]);
+ if (try_self && packet_rcv_has_room(po, skb) != ROOM_NONE)
+ return idx;
+
+ i = j = min_t(int, po->rollover->sock, num - 1);
do {
- if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) {
+ if (i != idx &&
+ packet_rcv_has_room(pkt_sk(f->arr[i]), skb) == ROOM_NORMAL) {
if (i != j)
- f->next[idx] = i;
+ po->rollover->sock = i;
return i;
}
+
if (++i == num)
i = 0;
} while (i != j);
idx = fanout_demux_qm(f, skb, num);
break;
case PACKET_FANOUT_ROLLOVER:
- idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num);
+ idx = fanout_demux_rollover(f, skb, 0, false, num);
break;
}
- po = pkt_sk(f->arr[idx]);
- if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) &&
- unlikely(!packet_rcv_has_room(po, skb))) {
- idx = fanout_demux_rollover(f, skb, idx, idx, num);
- po = pkt_sk(f->arr[idx]);
- }
+ if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER))
+ idx = fanout_demux_rollover(f, skb, idx, true, num);
+ po = pkt_sk(f->arr[idx]);
return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev);
}
if (po->fanout)
return -EALREADY;
+ if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER) {
+ po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL);
+ if (!po->rollover)
+ return -ENOMEM;
+ }
+
mutex_lock(&fanout_mutex);
match = NULL;
list_for_each_entry(f, &fanout_list, list) {
}
out:
mutex_unlock(&fanout_mutex);
+ if (err) {
+ kfree(po->rollover);
+ po->rollover = NULL;
+ }
return err;
}
kfree(f);
}
mutex_unlock(&fanout_mutex);
+
+ kfree(po->rollover);
}
static const struct proto_ops packet_ops;
* protocol layers and you must therefore supply it with a complete frame
*/
-static int packet_sendmsg_spkt(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int packet_sendmsg_spkt(struct socket *sock, struct msghdr *msg,
+ size_t len)
{
struct sock *sk = sock->sk;
DECLARE_SOCKADDR(struct sockaddr_pkt *, saddr, msg->msg_name);
skb = nskb;
}
- BUILD_BUG_ON(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8 >
- sizeof(skb->cb));
+ sock_skb_cb_check_size(sizeof(*PACKET_SKB_CB(skb)) + MAX_ADDR_LEN - 8);
sll = &PACKET_SKB_CB(skb)->sa.ll;
- sll->sll_family = AF_PACKET;
sll->sll_hatype = dev->type;
- sll->sll_protocol = skb->protocol;
sll->sll_pkttype = skb->pkt_type;
if (unlikely(po->origdev))
sll->sll_ifindex = orig_dev->ifindex;
sll->sll_halen = dev_parse_header(skb, sll->sll_addr);
- PACKET_SKB_CB(skb)->origlen = skb->len;
+ /* sll->sll_family and sll->sll_protocol are set in packet_recvmsg().
+ * Use their space for storing the original skb length.
+ */
+ PACKET_SKB_CB(skb)->sa.origlen = skb->len;
if (pskb_trim(skb, snaplen))
goto drop_n_acct;
spin_lock(&sk->sk_receive_queue.lock);
po->stats.stats1.tp_packets++;
- skb->dropcount = atomic_read(&sk->sk_drops);
+ sock_skb_set_dropcount(sk, skb);
__skb_queue_tail(&sk->sk_receive_queue, skb);
spin_unlock(&sk->sk_receive_queue.lock);
sk->sk_data_ready(sk);
}
}
- if (skb->ip_summed == CHECKSUM_PARTIAL)
- status |= TP_STATUS_CSUMNOTREADY;
-
snaplen = skb->len;
res = run_filter(skb, sk, snaplen);
if (!res)
goto drop_n_restore;
+
+ if (skb->ip_summed == CHECKSUM_PARTIAL)
+ status |= TP_STATUS_CSUMNOTREADY;
+ else if (skb->pkt_type != PACKET_OUTGOING &&
+ (skb->ip_summed == CHECKSUM_COMPLETE ||
+ skb_csum_unnecessary(skb)))
+ status |= TP_STATUS_CSUM_VALID;
+
if (snaplen > res)
snaplen = res;
tlen = dev->needed_tailroom;
skb = sock_alloc_send_skb(&po->sk,
hlen + tlen + sizeof(struct sockaddr_ll),
- 0, &err);
+ !need_wait, &err);
- if (unlikely(skb == NULL))
+ if (unlikely(skb == NULL)) {
+ /* we assume the socket was initially writeable ... */
+ if (likely(len_sum > 0))
+ err = len_sum;
goto out_status;
-
+ }
tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto,
addr, hlen);
if (tp_len > dev->mtu + dev->hard_header_len) {
return err;
}
-static int packet_sendmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static int packet_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct packet_sock *po = pkt_sk(sk);
sock->state = SS_UNCONNECTED;
err = -ENOBUFS;
- sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto);
+ sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern);
if (sk == NULL)
goto out;
spin_lock_init(&po->bind_lock);
mutex_init(&po->pg_vec_lock);
+ po->rollover = NULL;
po->prot_hook.func = packet_rcv;
if (sock->type == SOCK_PACKET)
* If necessary we block.
*/
-static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
- struct msghdr *msg, size_t len, int flags)
+static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
+ int flags)
{
struct sock *sk = sock->sk;
struct sk_buff *skb;
int copied, err;
int vnet_hdr_len = 0;
+ unsigned int origlen = 0;
err = -EINVAL;
if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT|MSG_ERRQUEUE))
if (err)
goto out_free;
+ if (sock->type != SOCK_PACKET) {
+ struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
+
+ /* Original length was stored in sockaddr_ll fields */
+ origlen = PACKET_SKB_CB(skb)->sa.origlen;
+ sll->sll_family = AF_PACKET;
+ sll->sll_protocol = skb->protocol;
+ }
+
sock_recv_ts_and_drops(msg, sk, skb);
if (msg->msg_name) {
msg->msg_namelen = sizeof(struct sockaddr_pkt);
} else {
struct sockaddr_ll *sll = &PACKET_SKB_CB(skb)->sa.ll;
+
msg->msg_namelen = sll->sll_halen +
offsetof(struct sockaddr_ll, sll_addr);
}
aux.tp_status = TP_STATUS_USER;
if (skb->ip_summed == CHECKSUM_PARTIAL)
aux.tp_status |= TP_STATUS_CSUMNOTREADY;
- aux.tp_len = PACKET_SKB_CB(skb)->origlen;
+ else if (skb->pkt_type != PACKET_OUTGOING &&
+ (skb->ip_summed == CHECKSUM_COMPLETE ||
+ skb_csum_unnecessary(skb)))
+ aux.tp_status |= TP_STATUS_CSUM_VALID;
+
+ aux.tp_len = origlen;
aux.tp_snaplen = skb->len;
aux.tp_mac = 0;
aux.tp_net = skb_network_offset(skb);