X-Git-Url: http://git.cascardo.info/?a=blobdiff_plain;f=net%2Fpacket%2Faf_packet.c;h=ffa67205f698a00f7189e562a3f32a883fd2e918;hb=9954729bc3896998d53040c46a28830a3a3d5063;hp=5102c3cc4eec4ecec6698859935d7769d37a174c;hpb=e5ac320de1fe3ef5a5afa5f8a0cd19b0c5373a37;p=cascardo%2Flinux.git diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 5102c3cc4eec..ffa67205f698 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1234,27 +1234,68 @@ static void packet_free_pending(struct packet_sock *po) free_percpu(po->tx_ring.pending_refcnt); } -static bool packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) +#define ROOM_POW_OFF 2 +#define ROOM_NONE 0x0 +#define ROOM_LOW 0x1 +#define ROOM_NORMAL 0x2 + +static bool __tpacket_has_room(struct packet_sock *po, int pow_off) { - struct sock *sk = &po->sk; - bool has_room; + int idx, len; + + len = po->rx_ring.frame_max + 1; + idx = po->rx_ring.head; + if (pow_off) + idx += len >> pow_off; + if (idx >= len) + idx -= len; + return packet_lookup_frame(po, &po->rx_ring, idx, TP_STATUS_KERNEL); +} - if (po->prot_hook.func != tpacket_rcv) - return (atomic_read(&sk->sk_rmem_alloc) + skb->truesize) - <= sk->sk_rcvbuf; +static bool __tpacket_v3_has_room(struct packet_sock *po, int pow_off) +{ + int idx, len; + + len = po->rx_ring.prb_bdqc.knum_blocks; + idx = po->rx_ring.prb_bdqc.kactive_blk_num; + if (pow_off) + idx += len >> pow_off; + if (idx >= len) + idx -= len; + return prb_lookup_block(po, &po->rx_ring, idx, TP_STATUS_KERNEL); +} + +static int packet_rcv_has_room(struct packet_sock *po, struct sk_buff *skb) +{ + struct sock *sk = &po->sk; + int ret = ROOM_NONE; + + if (po->prot_hook.func != tpacket_rcv) { + int avail = sk->sk_rcvbuf - atomic_read(&sk->sk_rmem_alloc) + - skb->truesize; + if (avail > (sk->sk_rcvbuf >> ROOM_POW_OFF)) + return ROOM_NORMAL; + else if (avail > 0) + return ROOM_LOW; + else + return ROOM_NONE; + } spin_lock(&sk->sk_receive_queue.lock); - if (po->tp_version == TPACKET_V3) - has_room = prb_lookup_block(po, &po->rx_ring, - po->rx_ring.prb_bdqc.kactive_blk_num, - TP_STATUS_KERNEL); - else - has_room = packet_lookup_frame(po, &po->rx_ring, - po->rx_ring.head, - TP_STATUS_KERNEL); + if (po->tp_version == TPACKET_V3) { + if (__tpacket_v3_has_room(po, ROOM_POW_OFF)) + ret = ROOM_NORMAL; + else if (__tpacket_v3_has_room(po, 0)) + ret = ROOM_LOW; + } else { + if (__tpacket_has_room(po, ROOM_POW_OFF)) + ret = ROOM_NORMAL; + else if (__tpacket_has_room(po, 0)) + ret = ROOM_LOW; + } spin_unlock(&sk->sk_receive_queue.lock); - return has_room; + return ret; } static void packet_sock_destruct(struct sock *sk) @@ -1318,18 +1359,25 @@ static unsigned int fanout_demux_rnd(struct packet_fanout *f, static unsigned int fanout_demux_rollover(struct packet_fanout *f, struct sk_buff *skb, - unsigned int idx, unsigned int skip, + unsigned int idx, bool try_self, unsigned int num) { + struct packet_sock *po; unsigned int i, j; - i = j = min_t(int, f->next[idx], num - 1); + po = pkt_sk(f->arr[idx]); + if (try_self && packet_rcv_has_room(po, skb) != ROOM_NONE) + return idx; + + i = j = min_t(int, po->rollover->sock, num - 1); do { - if (i != skip && packet_rcv_has_room(pkt_sk(f->arr[i]), skb)) { + if (i != idx && + packet_rcv_has_room(pkt_sk(f->arr[i]), skb) == ROOM_NORMAL) { if (i != j) - f->next[idx] = i; + po->rollover->sock = i; return i; } + if (++i == num) i = 0; } while (i != j); @@ -1386,17 +1434,14 @@ static int packet_rcv_fanout(struct sk_buff *skb, struct net_device *dev, idx = fanout_demux_qm(f, skb, num); break; case PACKET_FANOUT_ROLLOVER: - idx = fanout_demux_rollover(f, skb, 0, (unsigned int) -1, num); + idx = fanout_demux_rollover(f, skb, 0, false, num); break; } - po = pkt_sk(f->arr[idx]); - if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER) && - unlikely(!packet_rcv_has_room(po, skb))) { - idx = fanout_demux_rollover(f, skb, idx, idx, num); - po = pkt_sk(f->arr[idx]); - } + if (fanout_has_flag(f, PACKET_FANOUT_FLAG_ROLLOVER)) + idx = fanout_demux_rollover(f, skb, idx, true, num); + po = pkt_sk(f->arr[idx]); return po->prot_hook.func(skb, dev, &po->prot_hook, orig_dev); } @@ -1467,6 +1512,12 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) if (po->fanout) return -EALREADY; + if (type_flags & PACKET_FANOUT_FLAG_ROLLOVER) { + po->rollover = kzalloc(sizeof(*po->rollover), GFP_KERNEL); + if (!po->rollover) + return -ENOMEM; + } + mutex_lock(&fanout_mutex); match = NULL; list_for_each_entry(f, &fanout_list, list) { @@ -1515,6 +1566,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) } out: mutex_unlock(&fanout_mutex); + if (err) { + kfree(po->rollover); + po->rollover = NULL; + } return err; } @@ -1536,6 +1591,8 @@ static void fanout_release(struct sock *sk) kfree(f); } mutex_unlock(&fanout_mutex); + + kfree(po->rollover); } static const struct proto_ops packet_ops; @@ -2311,11 +2368,14 @@ static int tpacket_snd(struct packet_sock *po, struct msghdr *msg) tlen = dev->needed_tailroom; skb = sock_alloc_send_skb(&po->sk, hlen + tlen + sizeof(struct sockaddr_ll), - 0, &err); + !need_wait, &err); - if (unlikely(skb == NULL)) + if (unlikely(skb == NULL)) { + /* we assume the socket was initially writeable ... */ + if (likely(len_sum > 0)) + err = len_sum; goto out_status; - + } tp_len = tpacket_fill_skb(po, skb, ph, dev, size_max, proto, addr, hlen); if (tp_len > dev->mtu + dev->hard_header_len) { @@ -2832,7 +2892,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, sock->state = SS_UNCONNECTED; err = -ENOBUFS; - sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto); + sk = sk_alloc(net, PF_PACKET, GFP_KERNEL, &packet_proto, kern); if (sk == NULL) goto out; @@ -2862,6 +2922,7 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, spin_lock_init(&po->bind_lock); mutex_init(&po->pg_vec_lock); + po->rollover = NULL; po->prot_hook.func = packet_rcv; if (sock->type == SOCK_PACKET)