2 * IP multicast routing support for mrouted 3.6/3.8
4 * (c) 1995 Alan Cox, <alan@lxorguk.ukuu.org.uk>
5 * Linux Consultancy and Custom Driver Development
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License
9 * as published by the Free Software Foundation; either version
10 * 2 of the License, or (at your option) any later version.
13 * Michael Chastain : Incorrect size of copying.
14 * Alan Cox : Added the cache manager code
15 * Alan Cox : Fixed the clone/copy bug and device race.
16 * Mike McLagan : Routing by source
17 * Malcolm Beattie : Buffer handling fixes.
18 * Alexey Kuznetsov : Double buffer free and other fixes.
19 * SVR Anand : Fixed several multicast bugs and problems.
20 * Alexey Kuznetsov : Status, optimisations and more.
21 * Brad Parker : Better behaviour on mrouted upcall
23 * Carlos Picoto : PIMv1 Support
24 * Pavlin Ivanov Radoslavov: PIMv2 Registers must checksum only PIM header
25 * Relax this requirement to work with older peers.
29 #include <asm/system.h>
30 #include <asm/uaccess.h>
31 #include <linux/types.h>
32 #include <linux/capability.h>
33 #include <linux/errno.h>
34 #include <linux/timer.h>
36 #include <linux/kernel.h>
37 #include <linux/fcntl.h>
38 #include <linux/stat.h>
39 #include <linux/socket.h>
41 #include <linux/inet.h>
42 #include <linux/netdevice.h>
43 #include <linux/inetdevice.h>
44 #include <linux/igmp.h>
45 #include <linux/proc_fs.h>
46 #include <linux/seq_file.h>
47 #include <linux/mroute.h>
48 #include <linux/init.h>
49 #include <linux/if_ether.h>
50 #include <linux/slab.h>
51 #include <net/net_namespace.h>
53 #include <net/protocol.h>
54 #include <linux/skbuff.h>
55 #include <net/route.h>
60 #include <linux/notifier.h>
61 #include <linux/if_arp.h>
62 #include <linux/netfilter_ipv4.h>
64 #include <net/checksum.h>
65 #include <net/netlink.h>
66 #include <net/fib_rules.h>
68 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
69 #define CONFIG_IP_PIMSM 1
73 struct list_head list;
78 struct sock __rcu *mroute_sk;
79 struct timer_list ipmr_expire_timer;
80 struct list_head mfc_unres_queue;
81 struct list_head mfc_cache_array[MFC_LINES];
82 struct vif_device vif_table[MAXVIFS];
84 atomic_t cache_resolve_queue_len;
87 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
88 int mroute_reg_vif_num;
93 struct fib_rule common;
100 /* Big lock, protecting vif table, mrt cache and mroute socket state.
101 Note that the changes are semaphored via rtnl_lock.
104 static DEFINE_RWLOCK(mrt_lock);
107 * Multicast router control variables
110 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL)
112 /* Special spinlock for queue of unresolved entries */
113 static DEFINE_SPINLOCK(mfc_unres_lock);
115 /* We return to original Alan's scheme. Hash table of resolved
116 entries is changed only in process context and protected
117 with weak lock mrt_lock. Queue of unresolved entries is protected
118 with strong spinlock mfc_unres_lock.
120 In this case data path is free of exclusive locks at all.
123 static struct kmem_cache *mrt_cachep __read_mostly;
125 static struct mr_table *ipmr_new_table(struct net *net, u32 id);
126 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
127 struct sk_buff *skb, struct mfc_cache *cache,
129 static int ipmr_cache_report(struct mr_table *mrt,
130 struct sk_buff *pkt, vifi_t vifi, int assert);
131 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
132 struct mfc_cache *c, struct rtmsg *rtm);
133 static void ipmr_expire_process(unsigned long arg);
135 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
136 #define ipmr_for_each_table(mrt, net) \
137 list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list)
139 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
141 struct mr_table *mrt;
143 ipmr_for_each_table(mrt, net) {
150 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
151 struct mr_table **mrt)
153 struct ipmr_result res;
154 struct fib_lookup_arg arg = { .result = &res, };
157 err = fib_rules_lookup(net->ipv4.mr_rules_ops, flp, 0, &arg);
164 static int ipmr_rule_action(struct fib_rule *rule, struct flowi *flp,
165 int flags, struct fib_lookup_arg *arg)
167 struct ipmr_result *res = arg->result;
168 struct mr_table *mrt;
170 switch (rule->action) {
173 case FR_ACT_UNREACHABLE:
175 case FR_ACT_PROHIBIT:
177 case FR_ACT_BLACKHOLE:
182 mrt = ipmr_get_table(rule->fr_net, rule->table);
189 static int ipmr_rule_match(struct fib_rule *rule, struct flowi *fl, int flags)
194 static const struct nla_policy ipmr_rule_policy[FRA_MAX + 1] = {
198 static int ipmr_rule_configure(struct fib_rule *rule, struct sk_buff *skb,
199 struct fib_rule_hdr *frh, struct nlattr **tb)
204 static int ipmr_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh,
210 static int ipmr_rule_fill(struct fib_rule *rule, struct sk_buff *skb,
211 struct fib_rule_hdr *frh)
219 static const struct fib_rules_ops __net_initdata ipmr_rules_ops_template = {
220 .family = RTNL_FAMILY_IPMR,
221 .rule_size = sizeof(struct ipmr_rule),
222 .addr_size = sizeof(u32),
223 .action = ipmr_rule_action,
224 .match = ipmr_rule_match,
225 .configure = ipmr_rule_configure,
226 .compare = ipmr_rule_compare,
227 .default_pref = fib_default_rule_pref,
228 .fill = ipmr_rule_fill,
229 .nlgroup = RTNLGRP_IPV4_RULE,
230 .policy = ipmr_rule_policy,
231 .owner = THIS_MODULE,
234 static int __net_init ipmr_rules_init(struct net *net)
236 struct fib_rules_ops *ops;
237 struct mr_table *mrt;
240 ops = fib_rules_register(&ipmr_rules_ops_template, net);
244 INIT_LIST_HEAD(&net->ipv4.mr_tables);
246 mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
252 err = fib_default_rule_add(ops, 0x7fff, RT_TABLE_DEFAULT, 0);
256 net->ipv4.mr_rules_ops = ops;
262 fib_rules_unregister(ops);
266 static void __net_exit ipmr_rules_exit(struct net *net)
268 struct mr_table *mrt, *next;
270 list_for_each_entry_safe(mrt, next, &net->ipv4.mr_tables, list) {
271 list_del(&mrt->list);
274 fib_rules_unregister(net->ipv4.mr_rules_ops);
277 #define ipmr_for_each_table(mrt, net) \
278 for (mrt = net->ipv4.mrt; mrt; mrt = NULL)
280 static struct mr_table *ipmr_get_table(struct net *net, u32 id)
282 return net->ipv4.mrt;
285 static int ipmr_fib_lookup(struct net *net, struct flowi *flp,
286 struct mr_table **mrt)
288 *mrt = net->ipv4.mrt;
292 static int __net_init ipmr_rules_init(struct net *net)
294 net->ipv4.mrt = ipmr_new_table(net, RT_TABLE_DEFAULT);
295 return net->ipv4.mrt ? 0 : -ENOMEM;
298 static void __net_exit ipmr_rules_exit(struct net *net)
300 kfree(net->ipv4.mrt);
304 static struct mr_table *ipmr_new_table(struct net *net, u32 id)
306 struct mr_table *mrt;
309 mrt = ipmr_get_table(net, id);
313 mrt = kzalloc(sizeof(*mrt), GFP_KERNEL);
316 write_pnet(&mrt->net, net);
319 /* Forwarding cache */
320 for (i = 0; i < MFC_LINES; i++)
321 INIT_LIST_HEAD(&mrt->mfc_cache_array[i]);
323 INIT_LIST_HEAD(&mrt->mfc_unres_queue);
325 setup_timer(&mrt->ipmr_expire_timer, ipmr_expire_process,
328 #ifdef CONFIG_IP_PIMSM
329 mrt->mroute_reg_vif_num = -1;
331 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
332 list_add_tail_rcu(&mrt->list, &net->ipv4.mr_tables);
337 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
339 static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
341 struct net *net = dev_net(dev);
345 dev = __dev_get_by_name(net, "tunl0");
347 const struct net_device_ops *ops = dev->netdev_ops;
349 struct ip_tunnel_parm p;
351 memset(&p, 0, sizeof(p));
352 p.iph.daddr = v->vifc_rmt_addr.s_addr;
353 p.iph.saddr = v->vifc_lcl_addr.s_addr;
356 p.iph.protocol = IPPROTO_IPIP;
357 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
358 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
360 if (ops->ndo_do_ioctl) {
361 mm_segment_t oldfs = get_fs();
364 ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
371 struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
373 struct net_device *dev;
375 dev = __dev_get_by_name(net, "tunl0");
378 const struct net_device_ops *ops = dev->netdev_ops;
381 struct ip_tunnel_parm p;
382 struct in_device *in_dev;
384 memset(&p, 0, sizeof(p));
385 p.iph.daddr = v->vifc_rmt_addr.s_addr;
386 p.iph.saddr = v->vifc_lcl_addr.s_addr;
389 p.iph.protocol = IPPROTO_IPIP;
390 sprintf(p.name, "dvmrp%d", v->vifc_vifi);
391 ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
393 if (ops->ndo_do_ioctl) {
394 mm_segment_t oldfs = get_fs();
397 err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
405 (dev = __dev_get_by_name(net, p.name)) != NULL) {
406 dev->flags |= IFF_MULTICAST;
408 in_dev = __in_dev_get_rtnl(dev);
412 ipv4_devconf_setall(in_dev);
413 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
423 /* allow the register to be completed before unregistering. */
427 unregister_netdevice(dev);
431 #ifdef CONFIG_IP_PIMSM
433 static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, struct net_device *dev)
435 struct net *net = dev_net(dev);
436 struct mr_table *mrt;
444 err = ipmr_fib_lookup(net, &fl, &mrt);
450 read_lock(&mrt_lock);
451 dev->stats.tx_bytes += skb->len;
452 dev->stats.tx_packets++;
453 ipmr_cache_report(mrt, skb, mrt->mroute_reg_vif_num, IGMPMSG_WHOLEPKT);
454 read_unlock(&mrt_lock);
459 static const struct net_device_ops reg_vif_netdev_ops = {
460 .ndo_start_xmit = reg_vif_xmit,
463 static void reg_vif_setup(struct net_device *dev)
465 dev->type = ARPHRD_PIMREG;
466 dev->mtu = ETH_DATA_LEN - sizeof(struct iphdr) - 8;
467 dev->flags = IFF_NOARP;
468 dev->netdev_ops = ®_vif_netdev_ops,
469 dev->destructor = free_netdev;
470 dev->features |= NETIF_F_NETNS_LOCAL;
473 static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt)
475 struct net_device *dev;
476 struct in_device *in_dev;
479 if (mrt->id == RT_TABLE_DEFAULT)
480 sprintf(name, "pimreg");
482 sprintf(name, "pimreg%u", mrt->id);
484 dev = alloc_netdev(0, name, reg_vif_setup);
489 dev_net_set(dev, net);
491 if (register_netdevice(dev)) {
498 if ((in_dev = __in_dev_get_rcu(dev)) == NULL) {
503 ipv4_devconf_setall(in_dev);
504 IPV4_DEVCONF(in_dev->cnf, RP_FILTER) = 0;
515 /* allow the register to be completed before unregistering. */
519 unregister_netdevice(dev);
526 * @notify: Set to 1, if the caller is a notifier_call
529 static int vif_delete(struct mr_table *mrt, int vifi, int notify,
530 struct list_head *head)
532 struct vif_device *v;
533 struct net_device *dev;
534 struct in_device *in_dev;
536 if (vifi < 0 || vifi >= mrt->maxvif)
537 return -EADDRNOTAVAIL;
539 v = &mrt->vif_table[vifi];
541 write_lock_bh(&mrt_lock);
546 write_unlock_bh(&mrt_lock);
547 return -EADDRNOTAVAIL;
550 #ifdef CONFIG_IP_PIMSM
551 if (vifi == mrt->mroute_reg_vif_num)
552 mrt->mroute_reg_vif_num = -1;
555 if (vifi+1 == mrt->maxvif) {
557 for (tmp=vifi-1; tmp>=0; tmp--) {
558 if (VIF_EXISTS(mrt, tmp))
564 write_unlock_bh(&mrt_lock);
566 dev_set_allmulti(dev, -1);
568 if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
569 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)--;
570 ip_rt_multicast_event(in_dev);
573 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER) && !notify)
574 unregister_netdevice_queue(dev, head);
580 static void ipmr_cache_free_rcu(struct rcu_head *head)
582 struct mfc_cache *c = container_of(head, struct mfc_cache, rcu);
584 kmem_cache_free(mrt_cachep, c);
587 static inline void ipmr_cache_free(struct mfc_cache *c)
589 call_rcu(&c->rcu, ipmr_cache_free_rcu);
592 /* Destroy an unresolved cache entry, killing queued skbs
593 and reporting error to netlink readers.
596 static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c)
598 struct net *net = read_pnet(&mrt->net);
602 atomic_dec(&mrt->cache_resolve_queue_len);
604 while ((skb = skb_dequeue(&c->mfc_un.unres.unresolved))) {
605 if (ip_hdr(skb)->version == 0) {
606 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
607 nlh->nlmsg_type = NLMSG_ERROR;
608 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
609 skb_trim(skb, nlh->nlmsg_len);
611 e->error = -ETIMEDOUT;
612 memset(&e->msg, 0, sizeof(e->msg));
614 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
623 /* Timer process for the unresolved queue. */
625 static void ipmr_expire_process(unsigned long arg)
627 struct mr_table *mrt = (struct mr_table *)arg;
629 unsigned long expires;
630 struct mfc_cache *c, *next;
632 if (!spin_trylock(&mfc_unres_lock)) {
633 mod_timer(&mrt->ipmr_expire_timer, jiffies+HZ/10);
637 if (list_empty(&mrt->mfc_unres_queue))
643 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
644 if (time_after(c->mfc_un.unres.expires, now)) {
645 unsigned long interval = c->mfc_un.unres.expires - now;
646 if (interval < expires)
652 ipmr_destroy_unres(mrt, c);
655 if (!list_empty(&mrt->mfc_unres_queue))
656 mod_timer(&mrt->ipmr_expire_timer, jiffies + expires);
659 spin_unlock(&mfc_unres_lock);
662 /* Fill oifs list. It is called under write locked mrt_lock. */
664 static void ipmr_update_thresholds(struct mr_table *mrt, struct mfc_cache *cache,
669 cache->mfc_un.res.minvif = MAXVIFS;
670 cache->mfc_un.res.maxvif = 0;
671 memset(cache->mfc_un.res.ttls, 255, MAXVIFS);
673 for (vifi = 0; vifi < mrt->maxvif; vifi++) {
674 if (VIF_EXISTS(mrt, vifi) &&
675 ttls[vifi] && ttls[vifi] < 255) {
676 cache->mfc_un.res.ttls[vifi] = ttls[vifi];
677 if (cache->mfc_un.res.minvif > vifi)
678 cache->mfc_un.res.minvif = vifi;
679 if (cache->mfc_un.res.maxvif <= vifi)
680 cache->mfc_un.res.maxvif = vifi + 1;
685 static int vif_add(struct net *net, struct mr_table *mrt,
686 struct vifctl *vifc, int mrtsock)
688 int vifi = vifc->vifc_vifi;
689 struct vif_device *v = &mrt->vif_table[vifi];
690 struct net_device *dev;
691 struct in_device *in_dev;
695 if (VIF_EXISTS(mrt, vifi))
698 switch (vifc->vifc_flags) {
699 #ifdef CONFIG_IP_PIMSM
702 * Special Purpose VIF in PIM
703 * All the packets will be sent to the daemon
705 if (mrt->mroute_reg_vif_num >= 0)
707 dev = ipmr_reg_vif(net, mrt);
710 err = dev_set_allmulti(dev, 1);
712 unregister_netdevice(dev);
719 dev = ipmr_new_tunnel(net, vifc);
722 err = dev_set_allmulti(dev, 1);
724 ipmr_del_tunnel(dev, vifc);
730 case VIFF_USE_IFINDEX:
732 if (vifc->vifc_flags == VIFF_USE_IFINDEX) {
733 dev = dev_get_by_index(net, vifc->vifc_lcl_ifindex);
734 if (dev && __in_dev_get_rtnl(dev) == NULL) {
736 return -EADDRNOTAVAIL;
739 dev = ip_dev_find(net, vifc->vifc_lcl_addr.s_addr);
742 return -EADDRNOTAVAIL;
743 err = dev_set_allmulti(dev, 1);
753 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL) {
755 return -EADDRNOTAVAIL;
757 IPV4_DEVCONF(in_dev->cnf, MC_FORWARDING)++;
758 ip_rt_multicast_event(in_dev);
761 * Fill in the VIF structures
763 v->rate_limit = vifc->vifc_rate_limit;
764 v->local = vifc->vifc_lcl_addr.s_addr;
765 v->remote = vifc->vifc_rmt_addr.s_addr;
766 v->flags = vifc->vifc_flags;
768 v->flags |= VIFF_STATIC;
769 v->threshold = vifc->vifc_threshold;
774 v->link = dev->ifindex;
775 if (v->flags&(VIFF_TUNNEL|VIFF_REGISTER))
776 v->link = dev->iflink;
778 /* And finish update writing critical data */
779 write_lock_bh(&mrt_lock);
781 #ifdef CONFIG_IP_PIMSM
782 if (v->flags&VIFF_REGISTER)
783 mrt->mroute_reg_vif_num = vifi;
785 if (vifi+1 > mrt->maxvif)
786 mrt->maxvif = vifi+1;
787 write_unlock_bh(&mrt_lock);
791 /* called with rcu_read_lock() */
792 static struct mfc_cache *ipmr_cache_find(struct mr_table *mrt,
796 int line = MFC_HASH(mcastgrp, origin);
799 list_for_each_entry_rcu(c, &mrt->mfc_cache_array[line], list) {
800 if (c->mfc_origin == origin && c->mfc_mcastgrp == mcastgrp)
807 * Allocate a multicast cache entry
809 static struct mfc_cache *ipmr_cache_alloc(void)
811 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL);
814 c->mfc_un.res.minvif = MAXVIFS;
818 static struct mfc_cache *ipmr_cache_alloc_unres(void)
820 struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC);
823 skb_queue_head_init(&c->mfc_un.unres.unresolved);
824 c->mfc_un.unres.expires = jiffies + 10*HZ;
830 * A cache entry has gone into a resolved state from queued
833 static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt,
834 struct mfc_cache *uc, struct mfc_cache *c)
840 * Play the pending entries through our router
843 while ((skb = __skb_dequeue(&uc->mfc_un.unres.unresolved))) {
844 if (ip_hdr(skb)->version == 0) {
845 struct nlmsghdr *nlh = (struct nlmsghdr *)skb_pull(skb, sizeof(struct iphdr));
847 if (__ipmr_fill_mroute(mrt, skb, c, NLMSG_DATA(nlh)) > 0) {
848 nlh->nlmsg_len = (skb_tail_pointer(skb) -
851 nlh->nlmsg_type = NLMSG_ERROR;
852 nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr));
853 skb_trim(skb, nlh->nlmsg_len);
855 e->error = -EMSGSIZE;
856 memset(&e->msg, 0, sizeof(e->msg));
859 rtnl_unicast(skb, net, NETLINK_CB(skb).pid);
861 ip_mr_forward(net, mrt, skb, c, 0);
866 * Bounce a cache query up to mrouted. We could use netlink for this but mrouted
867 * expects the following bizarre scheme.
869 * Called under mrt_lock.
872 static int ipmr_cache_report(struct mr_table *mrt,
873 struct sk_buff *pkt, vifi_t vifi, int assert)
876 const int ihl = ip_hdrlen(pkt);
877 struct igmphdr *igmp;
879 struct sock *mroute_sk;
882 #ifdef CONFIG_IP_PIMSM
883 if (assert == IGMPMSG_WHOLEPKT)
884 skb = skb_realloc_headroom(pkt, sizeof(struct iphdr));
887 skb = alloc_skb(128, GFP_ATOMIC);
892 #ifdef CONFIG_IP_PIMSM
893 if (assert == IGMPMSG_WHOLEPKT) {
894 /* Ugly, but we have no choice with this interface.
895 Duplicate old header, fix ihl, length etc.
896 And all this only to mangle msg->im_msgtype and
897 to set msg->im_mbz to "mbz" :-)
899 skb_push(skb, sizeof(struct iphdr));
900 skb_reset_network_header(skb);
901 skb_reset_transport_header(skb);
902 msg = (struct igmpmsg *)skb_network_header(skb);
903 memcpy(msg, skb_network_header(pkt), sizeof(struct iphdr));
904 msg->im_msgtype = IGMPMSG_WHOLEPKT;
906 msg->im_vif = mrt->mroute_reg_vif_num;
907 ip_hdr(skb)->ihl = sizeof(struct iphdr) >> 2;
908 ip_hdr(skb)->tot_len = htons(ntohs(ip_hdr(pkt)->tot_len) +
909 sizeof(struct iphdr));
918 skb->network_header = skb->tail;
920 skb_copy_to_linear_data(skb, pkt->data, ihl);
921 ip_hdr(skb)->protocol = 0; /* Flag to the kernel this is a route add */
922 msg = (struct igmpmsg *)skb_network_header(skb);
924 skb_dst_set(skb, dst_clone(skb_dst(pkt)));
930 igmp=(struct igmphdr *)skb_put(skb, sizeof(struct igmphdr));
932 msg->im_msgtype = assert;
934 ip_hdr(skb)->tot_len = htons(skb->len); /* Fix the length */
935 skb->transport_header = skb->network_header;
939 mroute_sk = rcu_dereference(mrt->mroute_sk);
940 if (mroute_sk == NULL) {
949 ret = sock_queue_rcv_skb(mroute_sk, skb);
953 printk(KERN_WARNING "mroute: pending queue full, dropping entries.\n");
961 * Queue a packet for resolution. It gets locked cache entry!
965 ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, struct sk_buff *skb)
970 const struct iphdr *iph = ip_hdr(skb);
972 spin_lock_bh(&mfc_unres_lock);
973 list_for_each_entry(c, &mrt->mfc_unres_queue, list) {
974 if (c->mfc_mcastgrp == iph->daddr &&
975 c->mfc_origin == iph->saddr) {
983 * Create a new entry if allowable
986 if (atomic_read(&mrt->cache_resolve_queue_len) >= 10 ||
987 (c = ipmr_cache_alloc_unres()) == NULL) {
988 spin_unlock_bh(&mfc_unres_lock);
995 * Fill in the new cache entry
998 c->mfc_origin = iph->saddr;
999 c->mfc_mcastgrp = iph->daddr;
1002 * Reflect first query at mrouted.
1004 err = ipmr_cache_report(mrt, skb, vifi, IGMPMSG_NOCACHE);
1006 /* If the report failed throw the cache entry
1009 spin_unlock_bh(&mfc_unres_lock);
1016 atomic_inc(&mrt->cache_resolve_queue_len);
1017 list_add(&c->list, &mrt->mfc_unres_queue);
1019 if (atomic_read(&mrt->cache_resolve_queue_len) == 1)
1020 mod_timer(&mrt->ipmr_expire_timer, c->mfc_un.unres.expires);
1024 * See if we can append the packet
1026 if (c->mfc_un.unres.unresolved.qlen>3) {
1030 skb_queue_tail(&c->mfc_un.unres.unresolved, skb);
1034 spin_unlock_bh(&mfc_unres_lock);
1039 * MFC cache manipulation by user space mroute daemon
1042 static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc)
1045 struct mfc_cache *c, *next;
1047 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1049 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[line], list) {
1050 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1051 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1052 list_del_rcu(&c->list);
1061 static int ipmr_mfc_add(struct net *net, struct mr_table *mrt,
1062 struct mfcctl *mfc, int mrtsock)
1066 struct mfc_cache *uc, *c;
1068 if (mfc->mfcc_parent >= MAXVIFS)
1071 line = MFC_HASH(mfc->mfcc_mcastgrp.s_addr, mfc->mfcc_origin.s_addr);
1073 list_for_each_entry(c, &mrt->mfc_cache_array[line], list) {
1074 if (c->mfc_origin == mfc->mfcc_origin.s_addr &&
1075 c->mfc_mcastgrp == mfc->mfcc_mcastgrp.s_addr) {
1082 write_lock_bh(&mrt_lock);
1083 c->mfc_parent = mfc->mfcc_parent;
1084 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1086 c->mfc_flags |= MFC_STATIC;
1087 write_unlock_bh(&mrt_lock);
1091 if (!ipv4_is_multicast(mfc->mfcc_mcastgrp.s_addr))
1094 c = ipmr_cache_alloc();
1098 c->mfc_origin = mfc->mfcc_origin.s_addr;
1099 c->mfc_mcastgrp = mfc->mfcc_mcastgrp.s_addr;
1100 c->mfc_parent = mfc->mfcc_parent;
1101 ipmr_update_thresholds(mrt, c, mfc->mfcc_ttls);
1103 c->mfc_flags |= MFC_STATIC;
1105 list_add_rcu(&c->list, &mrt->mfc_cache_array[line]);
1108 * Check to see if we resolved a queued list. If so we
1109 * need to send on the frames and tidy up.
1112 spin_lock_bh(&mfc_unres_lock);
1113 list_for_each_entry(uc, &mrt->mfc_unres_queue, list) {
1114 if (uc->mfc_origin == c->mfc_origin &&
1115 uc->mfc_mcastgrp == c->mfc_mcastgrp) {
1116 list_del(&uc->list);
1117 atomic_dec(&mrt->cache_resolve_queue_len);
1122 if (list_empty(&mrt->mfc_unres_queue))
1123 del_timer(&mrt->ipmr_expire_timer);
1124 spin_unlock_bh(&mfc_unres_lock);
1127 ipmr_cache_resolve(net, mrt, uc, c);
1128 ipmr_cache_free(uc);
1134 * Close the multicast socket, and clear the vif tables etc
1137 static void mroute_clean_tables(struct mr_table *mrt)
1141 struct mfc_cache *c, *next;
1144 * Shut down all active vif entries
1146 for (i = 0; i < mrt->maxvif; i++) {
1147 if (!(mrt->vif_table[i].flags&VIFF_STATIC))
1148 vif_delete(mrt, i, 0, &list);
1150 unregister_netdevice_many(&list);
1155 for (i = 0; i < MFC_LINES; i++) {
1156 list_for_each_entry_safe(c, next, &mrt->mfc_cache_array[i], list) {
1157 if (c->mfc_flags & MFC_STATIC)
1159 list_del_rcu(&c->list);
1164 if (atomic_read(&mrt->cache_resolve_queue_len) != 0) {
1165 spin_lock_bh(&mfc_unres_lock);
1166 list_for_each_entry_safe(c, next, &mrt->mfc_unres_queue, list) {
1168 ipmr_destroy_unres(mrt, c);
1170 spin_unlock_bh(&mfc_unres_lock);
1174 /* called from ip_ra_control(), before an RCU grace period,
1175 * we dont need to call synchronize_rcu() here
1177 static void mrtsock_destruct(struct sock *sk)
1179 struct net *net = sock_net(sk);
1180 struct mr_table *mrt;
1183 ipmr_for_each_table(mrt, net) {
1184 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1185 IPV4_DEVCONF_ALL(net, MC_FORWARDING)--;
1186 rcu_assign_pointer(mrt->mroute_sk, NULL);
1187 mroute_clean_tables(mrt);
1194 * Socket options and virtual interface manipulation. The whole
1195 * virtual interface system is a complete heap, but unfortunately
1196 * that's how BSD mrouted happens to think. Maybe one day with a proper
1197 * MOSPF/PIM router set up we can clean this up.
1200 int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, unsigned int optlen)
1205 struct net *net = sock_net(sk);
1206 struct mr_table *mrt;
1208 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1212 if (optname != MRT_INIT) {
1213 if (sk != rcu_dereference_raw(mrt->mroute_sk) &&
1214 !capable(CAP_NET_ADMIN))
1220 if (sk->sk_type != SOCK_RAW ||
1221 inet_sk(sk)->inet_num != IPPROTO_IGMP)
1223 if (optlen != sizeof(int))
1224 return -ENOPROTOOPT;
1227 if (rtnl_dereference(mrt->mroute_sk)) {
1232 ret = ip_ra_control(sk, 1, mrtsock_destruct);
1234 rcu_assign_pointer(mrt->mroute_sk, sk);
1235 IPV4_DEVCONF_ALL(net, MC_FORWARDING)++;
1240 if (sk != rcu_dereference_raw(mrt->mroute_sk))
1242 return ip_ra_control(sk, 0, NULL);
1245 if (optlen != sizeof(vif))
1247 if (copy_from_user(&vif, optval, sizeof(vif)))
1249 if (vif.vifc_vifi >= MAXVIFS)
1252 if (optname == MRT_ADD_VIF) {
1253 ret = vif_add(net, mrt, &vif,
1254 sk == rtnl_dereference(mrt->mroute_sk));
1256 ret = vif_delete(mrt, vif.vifc_vifi, 0, NULL);
1262 * Manipulate the forwarding caches. These live
1263 * in a sort of kernel/user symbiosis.
1267 if (optlen != sizeof(mfc))
1269 if (copy_from_user(&mfc, optval, sizeof(mfc)))
1272 if (optname == MRT_DEL_MFC)
1273 ret = ipmr_mfc_delete(mrt, &mfc);
1275 ret = ipmr_mfc_add(net, mrt, &mfc,
1276 sk == rtnl_dereference(mrt->mroute_sk));
1280 * Control PIM assert.
1285 if (get_user(v,(int __user *)optval))
1287 mrt->mroute_do_assert = (v) ? 1 : 0;
1290 #ifdef CONFIG_IP_PIMSM
1295 if (get_user(v,(int __user *)optval))
1301 if (v != mrt->mroute_do_pim) {
1302 mrt->mroute_do_pim = v;
1303 mrt->mroute_do_assert = v;
1309 #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES
1314 if (optlen != sizeof(u32))
1316 if (get_user(v, (u32 __user *)optval))
1321 if (sk == rtnl_dereference(mrt->mroute_sk)) {
1324 if (!ipmr_new_table(net, v))
1326 raw_sk(sk)->ipmr_table = v;
1333 * Spurious command, or MRT_VERSION which you cannot
1337 return -ENOPROTOOPT;
1342 * Getsock opt support for the multicast routing system.
1345 int ip_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, int __user *optlen)
1349 struct net *net = sock_net(sk);
1350 struct mr_table *mrt;
1352 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1356 if (optname != MRT_VERSION &&
1357 #ifdef CONFIG_IP_PIMSM
1360 optname!=MRT_ASSERT)
1361 return -ENOPROTOOPT;
1363 if (get_user(olr, optlen))
1366 olr = min_t(unsigned int, olr, sizeof(int));
1370 if (put_user(olr, optlen))
1372 if (optname == MRT_VERSION)
1374 #ifdef CONFIG_IP_PIMSM
1375 else if (optname == MRT_PIM)
1376 val = mrt->mroute_do_pim;
1379 val = mrt->mroute_do_assert;
1380 if (copy_to_user(optval, &val, olr))
1386 * The IP multicast ioctl support routines.
1389 int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg)
1391 struct sioc_sg_req sr;
1392 struct sioc_vif_req vr;
1393 struct vif_device *vif;
1394 struct mfc_cache *c;
1395 struct net *net = sock_net(sk);
1396 struct mr_table *mrt;
1398 mrt = ipmr_get_table(net, raw_sk(sk)->ipmr_table ? : RT_TABLE_DEFAULT);
1404 if (copy_from_user(&vr, arg, sizeof(vr)))
1406 if (vr.vifi >= mrt->maxvif)
1408 read_lock(&mrt_lock);
1409 vif = &mrt->vif_table[vr.vifi];
1410 if (VIF_EXISTS(mrt, vr.vifi)) {
1411 vr.icount = vif->pkt_in;
1412 vr.ocount = vif->pkt_out;
1413 vr.ibytes = vif->bytes_in;
1414 vr.obytes = vif->bytes_out;
1415 read_unlock(&mrt_lock);
1417 if (copy_to_user(arg, &vr, sizeof(vr)))
1421 read_unlock(&mrt_lock);
1422 return -EADDRNOTAVAIL;
1424 if (copy_from_user(&sr, arg, sizeof(sr)))
1428 c = ipmr_cache_find(mrt, sr.src.s_addr, sr.grp.s_addr);
1430 sr.pktcnt = c->mfc_un.res.pkt;
1431 sr.bytecnt = c->mfc_un.res.bytes;
1432 sr.wrong_if = c->mfc_un.res.wrong_if;
1435 if (copy_to_user(arg, &sr, sizeof(sr)))
1440 return -EADDRNOTAVAIL;
1442 return -ENOIOCTLCMD;
1447 static int ipmr_device_event(struct notifier_block *this, unsigned long event, void *ptr)
1449 struct net_device *dev = ptr;
1450 struct net *net = dev_net(dev);
1451 struct mr_table *mrt;
1452 struct vif_device *v;
1456 if (event != NETDEV_UNREGISTER)
1459 ipmr_for_each_table(mrt, net) {
1460 v = &mrt->vif_table[0];
1461 for (ct = 0; ct < mrt->maxvif; ct++, v++) {
1463 vif_delete(mrt, ct, 1, &list);
1466 unregister_netdevice_many(&list);
1471 static struct notifier_block ip_mr_notifier = {
1472 .notifier_call = ipmr_device_event,
1476 * Encapsulate a packet by attaching a valid IPIP header to it.
1477 * This avoids tunnel drivers and other mess and gives us the speed so
1478 * important for multicast video.
1481 static void ip_encap(struct sk_buff *skb, __be32 saddr, __be32 daddr)
1484 struct iphdr *old_iph = ip_hdr(skb);
1486 skb_push(skb, sizeof(struct iphdr));
1487 skb->transport_header = skb->network_header;
1488 skb_reset_network_header(skb);
1492 iph->tos = old_iph->tos;
1493 iph->ttl = old_iph->ttl;
1497 iph->protocol = IPPROTO_IPIP;
1499 iph->tot_len = htons(skb->len);
1500 ip_select_ident(iph, skb_dst(skb), NULL);
1503 memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
1507 static inline int ipmr_forward_finish(struct sk_buff *skb)
1509 struct ip_options * opt = &(IPCB(skb)->opt);
1511 IP_INC_STATS_BH(dev_net(skb_dst(skb)->dev), IPSTATS_MIB_OUTFORWDATAGRAMS);
1513 if (unlikely(opt->optlen))
1514 ip_forward_options(skb);
1516 return dst_output(skb);
1520 * Processing handlers for ipmr_forward
1523 static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt,
1524 struct sk_buff *skb, struct mfc_cache *c, int vifi)
1526 const struct iphdr *iph = ip_hdr(skb);
1527 struct vif_device *vif = &mrt->vif_table[vifi];
1528 struct net_device *dev;
1532 if (vif->dev == NULL)
1535 #ifdef CONFIG_IP_PIMSM
1536 if (vif->flags & VIFF_REGISTER) {
1538 vif->bytes_out += skb->len;
1539 vif->dev->stats.tx_bytes += skb->len;
1540 vif->dev->stats.tx_packets++;
1541 ipmr_cache_report(mrt, skb, vifi, IGMPMSG_WHOLEPKT);
1546 if (vif->flags&VIFF_TUNNEL) {
1547 struct flowi fl = { .oif = vif->link,
1549 { .daddr = vif->remote,
1550 .saddr = vif->local,
1551 .tos = RT_TOS(iph->tos) } },
1552 .proto = IPPROTO_IPIP };
1553 if (ip_route_output_key(net, &rt, &fl))
1555 encap = sizeof(struct iphdr);
1557 struct flowi fl = { .oif = vif->link,
1559 { .daddr = iph->daddr,
1560 .tos = RT_TOS(iph->tos) } },
1561 .proto = IPPROTO_IPIP };
1562 if (ip_route_output_key(net, &rt, &fl))
1568 if (skb->len+encap > dst_mtu(&rt->dst) && (ntohs(iph->frag_off) & IP_DF)) {
1569 /* Do not fragment multicasts. Alas, IPv4 does not
1570 allow to send ICMP, so that packets will disappear
1574 IP_INC_STATS_BH(dev_net(dev), IPSTATS_MIB_FRAGFAILS);
1579 encap += LL_RESERVED_SPACE(dev) + rt->dst.header_len;
1581 if (skb_cow(skb, encap)) {
1587 vif->bytes_out += skb->len;
1590 skb_dst_set(skb, &rt->dst);
1591 ip_decrease_ttl(ip_hdr(skb));
1593 /* FIXME: forward and output firewalls used to be called here.
1594 * What do we do with netfilter? -- RR */
1595 if (vif->flags & VIFF_TUNNEL) {
1596 ip_encap(skb, vif->local, vif->remote);
1597 /* FIXME: extra output firewall step used to be here. --RR */
1598 vif->dev->stats.tx_packets++;
1599 vif->dev->stats.tx_bytes += skb->len;
1602 IPCB(skb)->flags |= IPSKB_FORWARDED;
1605 * RFC1584 teaches, that DVMRP/PIM router must deliver packets locally
1606 * not only before forwarding, but after forwarding on all output
1607 * interfaces. It is clear, if mrouter runs a multicasting
1608 * program, it should receive packets not depending to what interface
1609 * program is joined.
1610 * If we will not make it, the program will have to join on all
1611 * interfaces. On the other hand, multihoming host (or router, but
1612 * not mrouter) cannot join to more than one interface - it will
1613 * result in receiving multiple packets.
1615 NF_HOOK(NFPROTO_IPV4, NF_INET_FORWARD, skb, skb->dev, dev,
1616 ipmr_forward_finish);
1623 static int ipmr_find_vif(struct mr_table *mrt, struct net_device *dev)
1627 for (ct = mrt->maxvif-1; ct >= 0; ct--) {
1628 if (mrt->vif_table[ct].dev == dev)
1634 /* "local" means that we should preserve one skb (for local delivery) */
1636 static int ip_mr_forward(struct net *net, struct mr_table *mrt,
1637 struct sk_buff *skb, struct mfc_cache *cache,
1643 vif = cache->mfc_parent;
1644 cache->mfc_un.res.pkt++;
1645 cache->mfc_un.res.bytes += skb->len;
1648 * Wrong interface: drop packet and (maybe) send PIM assert.
1650 if (mrt->vif_table[vif].dev != skb->dev) {
1653 if (skb_rtable(skb)->fl.iif == 0) {
1654 /* It is our own packet, looped back.
1655 Very complicated situation...
1657 The best workaround until routing daemons will be
1658 fixed is not to redistribute packet, if it was
1659 send through wrong interface. It means, that
1660 multicast applications WILL NOT work for
1661 (S,G), which have default multicast route pointing
1662 to wrong oif. In any case, it is not a good
1663 idea to use multicasting applications on router.
1668 cache->mfc_un.res.wrong_if++;
1669 true_vifi = ipmr_find_vif(mrt, skb->dev);
1671 if (true_vifi >= 0 && mrt->mroute_do_assert &&
1672 /* pimsm uses asserts, when switching from RPT to SPT,
1673 so that we cannot check that packet arrived on an oif.
1674 It is bad, but otherwise we would need to move pretty
1675 large chunk of pimd to kernel. Ough... --ANK
1677 (mrt->mroute_do_pim ||
1678 cache->mfc_un.res.ttls[true_vifi] < 255) &&
1680 cache->mfc_un.res.last_assert + MFC_ASSERT_THRESH)) {
1681 cache->mfc_un.res.last_assert = jiffies;
1682 ipmr_cache_report(mrt, skb, true_vifi, IGMPMSG_WRONGVIF);
1687 mrt->vif_table[vif].pkt_in++;
1688 mrt->vif_table[vif].bytes_in += skb->len;
1693 for (ct = cache->mfc_un.res.maxvif-1; ct >= cache->mfc_un.res.minvif; ct--) {
1694 if (ip_hdr(skb)->ttl > cache->mfc_un.res.ttls[ct]) {
1696 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1698 ipmr_queue_xmit(net, mrt, skb2, cache,
1706 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1708 ipmr_queue_xmit(net, mrt, skb2, cache, psend);
1710 ipmr_queue_xmit(net, mrt, skb, cache, psend);
1723 * Multicast packets for forwarding arrive here
1724 * Called with rcu_read_lock();
1727 int ip_mr_input(struct sk_buff *skb)
1729 struct mfc_cache *cache;
1730 struct net *net = dev_net(skb->dev);
1731 int local = skb_rtable(skb)->rt_flags & RTCF_LOCAL;
1732 struct mr_table *mrt;
1735 /* Packet is looped back after forward, it should not be
1736 forwarded second time, but still can be delivered locally.
1738 if (IPCB(skb)->flags & IPSKB_FORWARDED)
1741 err = ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt);
1748 if (IPCB(skb)->opt.router_alert) {
1749 if (ip_call_ra_chain(skb))
1751 } else if (ip_hdr(skb)->protocol == IPPROTO_IGMP) {
1752 /* IGMPv1 (and broken IGMPv2 implementations sort of
1753 * Cisco IOS <= 11.2(8)) do not put router alert
1754 * option to IGMP packets destined to routable
1755 * groups. It is very bad, because it means
1756 * that we can forward NO IGMP messages.
1758 struct sock *mroute_sk;
1760 mroute_sk = rcu_dereference(mrt->mroute_sk);
1763 raw_rcv(mroute_sk, skb);
1769 /* already under rcu_read_lock() */
1770 cache = ipmr_cache_find(mrt, ip_hdr(skb)->saddr, ip_hdr(skb)->daddr);
1773 * No usable cache entry
1775 if (cache == NULL) {
1779 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
1780 ip_local_deliver(skb);
1786 read_lock(&mrt_lock);
1787 vif = ipmr_find_vif(mrt, skb->dev);
1789 int err2 = ipmr_cache_unresolved(mrt, vif, skb);
1790 read_unlock(&mrt_lock);
1794 read_unlock(&mrt_lock);
1799 read_lock(&mrt_lock);
1800 ip_mr_forward(net, mrt, skb, cache, local);
1801 read_unlock(&mrt_lock);
1804 return ip_local_deliver(skb);
1810 return ip_local_deliver(skb);
1815 #ifdef CONFIG_IP_PIMSM
1816 /* called with rcu_read_lock() */
1817 static int __pim_rcv(struct mr_table *mrt, struct sk_buff *skb,
1818 unsigned int pimlen)
1820 struct net_device *reg_dev = NULL;
1821 struct iphdr *encap;
1823 encap = (struct iphdr *)(skb_transport_header(skb) + pimlen);
1826 a. packet is really destinted to a multicast group
1827 b. packet is not a NULL-REGISTER
1828 c. packet is not truncated
1830 if (!ipv4_is_multicast(encap->daddr) ||
1831 encap->tot_len == 0 ||
1832 ntohs(encap->tot_len) + pimlen > skb->len)
1835 read_lock(&mrt_lock);
1836 if (mrt->mroute_reg_vif_num >= 0)
1837 reg_dev = mrt->vif_table[mrt->mroute_reg_vif_num].dev;
1838 read_unlock(&mrt_lock);
1840 if (reg_dev == NULL)
1843 skb->mac_header = skb->network_header;
1844 skb_pull(skb, (u8 *)encap - skb->data);
1845 skb_reset_network_header(skb);
1846 skb->protocol = htons(ETH_P_IP);
1847 skb->ip_summed = CHECKSUM_NONE;
1848 skb->pkt_type = PACKET_HOST;
1850 skb_tunnel_rx(skb, reg_dev);
1854 return NET_RX_SUCCESS;
1858 #ifdef CONFIG_IP_PIMSM_V1
1860 * Handle IGMP messages of PIMv1
1863 int pim_rcv_v1(struct sk_buff * skb)
1865 struct igmphdr *pim;
1866 struct net *net = dev_net(skb->dev);
1867 struct mr_table *mrt;
1869 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1872 pim = igmp_hdr(skb);
1874 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1877 if (!mrt->mroute_do_pim ||
1878 pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER)
1881 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1889 #ifdef CONFIG_IP_PIMSM_V2
1890 static int pim_rcv(struct sk_buff * skb)
1892 struct pimreghdr *pim;
1893 struct net *net = dev_net(skb->dev);
1894 struct mr_table *mrt;
1896 if (!pskb_may_pull(skb, sizeof(*pim) + sizeof(struct iphdr)))
1899 pim = (struct pimreghdr *)skb_transport_header(skb);
1900 if (pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
1901 (pim->flags&PIM_NULL_REGISTER) ||
1902 (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
1903 csum_fold(skb_checksum(skb, 0, skb->len, 0))))
1906 if (ipmr_fib_lookup(net, &skb_rtable(skb)->fl, &mrt) < 0)
1909 if (__pim_rcv(mrt, skb, sizeof(*pim))) {
1917 static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
1918 struct mfc_cache *c, struct rtmsg *rtm)
1921 struct rtnexthop *nhp;
1922 u8 *b = skb_tail_pointer(skb);
1923 struct rtattr *mp_head;
1925 /* If cache is unresolved, don't try to parse IIF and OIF */
1926 if (c->mfc_parent >= MAXVIFS)
1929 if (VIF_EXISTS(mrt, c->mfc_parent))
1930 RTA_PUT(skb, RTA_IIF, 4, &mrt->vif_table[c->mfc_parent].dev->ifindex);
1932 mp_head = (struct rtattr *)skb_put(skb, RTA_LENGTH(0));
1934 for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) {
1935 if (VIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) {
1936 if (skb_tailroom(skb) < RTA_ALIGN(RTA_ALIGN(sizeof(*nhp)) + 4))
1937 goto rtattr_failure;
1938 nhp = (struct rtnexthop *)skb_put(skb, RTA_ALIGN(sizeof(*nhp)));
1939 nhp->rtnh_flags = 0;
1940 nhp->rtnh_hops = c->mfc_un.res.ttls[ct];
1941 nhp->rtnh_ifindex = mrt->vif_table[ct].dev->ifindex;
1942 nhp->rtnh_len = sizeof(*nhp);
1945 mp_head->rta_type = RTA_MULTIPATH;
1946 mp_head->rta_len = skb_tail_pointer(skb) - (u8 *)mp_head;
1947 rtm->rtm_type = RTN_MULTICAST;
1955 int ipmr_get_route(struct net *net,
1956 struct sk_buff *skb, struct rtmsg *rtm, int nowait)
1959 struct mr_table *mrt;
1960 struct mfc_cache *cache;
1961 struct rtable *rt = skb_rtable(skb);
1963 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
1968 cache = ipmr_cache_find(mrt, rt->rt_src, rt->rt_dst);
1970 if (cache == NULL) {
1971 struct sk_buff *skb2;
1973 struct net_device *dev;
1982 read_lock(&mrt_lock);
1983 if (dev == NULL || (vif = ipmr_find_vif(mrt, dev)) < 0) {
1984 read_unlock(&mrt_lock);
1988 skb2 = skb_clone(skb, GFP_ATOMIC);
1990 read_unlock(&mrt_lock);
1995 skb_push(skb2, sizeof(struct iphdr));
1996 skb_reset_network_header(skb2);
1998 iph->ihl = sizeof(struct iphdr) >> 2;
1999 iph->saddr = rt->rt_src;
2000 iph->daddr = rt->rt_dst;
2002 err = ipmr_cache_unresolved(mrt, vif, skb2);
2003 read_unlock(&mrt_lock);
2008 read_lock(&mrt_lock);
2009 if (!nowait && (rtm->rtm_flags & RTM_F_NOTIFY))
2010 cache->mfc_flags |= MFC_NOTIFY;
2011 err = __ipmr_fill_mroute(mrt, skb, cache, rtm);
2012 read_unlock(&mrt_lock);
2017 static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
2018 u32 pid, u32 seq, struct mfc_cache *c)
2020 struct nlmsghdr *nlh;
2023 nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI);
2027 rtm = nlmsg_data(nlh);
2028 rtm->rtm_family = RTNL_FAMILY_IPMR;
2029 rtm->rtm_dst_len = 32;
2030 rtm->rtm_src_len = 32;
2032 rtm->rtm_table = mrt->id;
2033 NLA_PUT_U32(skb, RTA_TABLE, mrt->id);
2034 rtm->rtm_type = RTN_MULTICAST;
2035 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2036 rtm->rtm_protocol = RTPROT_UNSPEC;
2039 NLA_PUT_BE32(skb, RTA_SRC, c->mfc_origin);
2040 NLA_PUT_BE32(skb, RTA_DST, c->mfc_mcastgrp);
2042 if (__ipmr_fill_mroute(mrt, skb, c, rtm) < 0)
2043 goto nla_put_failure;
2045 return nlmsg_end(skb, nlh);
2048 nlmsg_cancel(skb, nlh);
2052 static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb)
2054 struct net *net = sock_net(skb->sk);
2055 struct mr_table *mrt;
2056 struct mfc_cache *mfc;
2057 unsigned int t = 0, s_t;
2058 unsigned int h = 0, s_h;
2059 unsigned int e = 0, s_e;
2066 ipmr_for_each_table(mrt, net) {
2071 for (h = s_h; h < MFC_LINES; h++) {
2072 list_for_each_entry_rcu(mfc, &mrt->mfc_cache_array[h], list) {
2075 if (ipmr_fill_mroute(mrt, skb,
2076 NETLINK_CB(cb->skb).pid,
2099 #ifdef CONFIG_PROC_FS
2101 * The /proc interfaces to multicast routing /proc/ip_mr_cache /proc/ip_mr_vif
2103 struct ipmr_vif_iter {
2104 struct seq_net_private p;
2105 struct mr_table *mrt;
2109 static struct vif_device *ipmr_vif_seq_idx(struct net *net,
2110 struct ipmr_vif_iter *iter,
2113 struct mr_table *mrt = iter->mrt;
2115 for (iter->ct = 0; iter->ct < mrt->maxvif; ++iter->ct) {
2116 if (!VIF_EXISTS(mrt, iter->ct))
2119 return &mrt->vif_table[iter->ct];
2124 static void *ipmr_vif_seq_start(struct seq_file *seq, loff_t *pos)
2125 __acquires(mrt_lock)
2127 struct ipmr_vif_iter *iter = seq->private;
2128 struct net *net = seq_file_net(seq);
2129 struct mr_table *mrt;
2131 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2133 return ERR_PTR(-ENOENT);
2137 read_lock(&mrt_lock);
2138 return *pos ? ipmr_vif_seq_idx(net, seq->private, *pos - 1)
2142 static void *ipmr_vif_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2144 struct ipmr_vif_iter *iter = seq->private;
2145 struct net *net = seq_file_net(seq);
2146 struct mr_table *mrt = iter->mrt;
2149 if (v == SEQ_START_TOKEN)
2150 return ipmr_vif_seq_idx(net, iter, 0);
2152 while (++iter->ct < mrt->maxvif) {
2153 if (!VIF_EXISTS(mrt, iter->ct))
2155 return &mrt->vif_table[iter->ct];
2160 static void ipmr_vif_seq_stop(struct seq_file *seq, void *v)
2161 __releases(mrt_lock)
2163 read_unlock(&mrt_lock);
2166 static int ipmr_vif_seq_show(struct seq_file *seq, void *v)
2168 struct ipmr_vif_iter *iter = seq->private;
2169 struct mr_table *mrt = iter->mrt;
2171 if (v == SEQ_START_TOKEN) {
2173 "Interface BytesIn PktsIn BytesOut PktsOut Flags Local Remote\n");
2175 const struct vif_device *vif = v;
2176 const char *name = vif->dev ? vif->dev->name : "none";
2179 "%2Zd %-10s %8ld %7ld %8ld %7ld %05X %08X %08X\n",
2180 vif - mrt->vif_table,
2181 name, vif->bytes_in, vif->pkt_in,
2182 vif->bytes_out, vif->pkt_out,
2183 vif->flags, vif->local, vif->remote);
2188 static const struct seq_operations ipmr_vif_seq_ops = {
2189 .start = ipmr_vif_seq_start,
2190 .next = ipmr_vif_seq_next,
2191 .stop = ipmr_vif_seq_stop,
2192 .show = ipmr_vif_seq_show,
2195 static int ipmr_vif_open(struct inode *inode, struct file *file)
2197 return seq_open_net(inode, file, &ipmr_vif_seq_ops,
2198 sizeof(struct ipmr_vif_iter));
2201 static const struct file_operations ipmr_vif_fops = {
2202 .owner = THIS_MODULE,
2203 .open = ipmr_vif_open,
2205 .llseek = seq_lseek,
2206 .release = seq_release_net,
2209 struct ipmr_mfc_iter {
2210 struct seq_net_private p;
2211 struct mr_table *mrt;
2212 struct list_head *cache;
2217 static struct mfc_cache *ipmr_mfc_seq_idx(struct net *net,
2218 struct ipmr_mfc_iter *it, loff_t pos)
2220 struct mr_table *mrt = it->mrt;
2221 struct mfc_cache *mfc;
2224 for (it->ct = 0; it->ct < MFC_LINES; it->ct++) {
2225 it->cache = &mrt->mfc_cache_array[it->ct];
2226 list_for_each_entry_rcu(mfc, it->cache, list)
2232 spin_lock_bh(&mfc_unres_lock);
2233 it->cache = &mrt->mfc_unres_queue;
2234 list_for_each_entry(mfc, it->cache, list)
2237 spin_unlock_bh(&mfc_unres_lock);
2244 static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos)
2246 struct ipmr_mfc_iter *it = seq->private;
2247 struct net *net = seq_file_net(seq);
2248 struct mr_table *mrt;
2250 mrt = ipmr_get_table(net, RT_TABLE_DEFAULT);
2252 return ERR_PTR(-ENOENT);
2257 return *pos ? ipmr_mfc_seq_idx(net, seq->private, *pos - 1)
2261 static void *ipmr_mfc_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2263 struct mfc_cache *mfc = v;
2264 struct ipmr_mfc_iter *it = seq->private;
2265 struct net *net = seq_file_net(seq);
2266 struct mr_table *mrt = it->mrt;
2270 if (v == SEQ_START_TOKEN)
2271 return ipmr_mfc_seq_idx(net, seq->private, 0);
2273 if (mfc->list.next != it->cache)
2274 return list_entry(mfc->list.next, struct mfc_cache, list);
2276 if (it->cache == &mrt->mfc_unres_queue)
2279 BUG_ON(it->cache != &mrt->mfc_cache_array[it->ct]);
2281 while (++it->ct < MFC_LINES) {
2282 it->cache = &mrt->mfc_cache_array[it->ct];
2283 if (list_empty(it->cache))
2285 return list_first_entry(it->cache, struct mfc_cache, list);
2288 /* exhausted cache_array, show unresolved */
2290 it->cache = &mrt->mfc_unres_queue;
2293 spin_lock_bh(&mfc_unres_lock);
2294 if (!list_empty(it->cache))
2295 return list_first_entry(it->cache, struct mfc_cache, list);
2298 spin_unlock_bh(&mfc_unres_lock);
2304 static void ipmr_mfc_seq_stop(struct seq_file *seq, void *v)
2306 struct ipmr_mfc_iter *it = seq->private;
2307 struct mr_table *mrt = it->mrt;
2309 if (it->cache == &mrt->mfc_unres_queue)
2310 spin_unlock_bh(&mfc_unres_lock);
2311 else if (it->cache == &mrt->mfc_cache_array[it->ct])
2315 static int ipmr_mfc_seq_show(struct seq_file *seq, void *v)
2319 if (v == SEQ_START_TOKEN) {
2321 "Group Origin Iif Pkts Bytes Wrong Oifs\n");
2323 const struct mfc_cache *mfc = v;
2324 const struct ipmr_mfc_iter *it = seq->private;
2325 const struct mr_table *mrt = it->mrt;
2327 seq_printf(seq, "%08X %08X %-3hd",
2328 (__force u32) mfc->mfc_mcastgrp,
2329 (__force u32) mfc->mfc_origin,
2332 if (it->cache != &mrt->mfc_unres_queue) {
2333 seq_printf(seq, " %8lu %8lu %8lu",
2334 mfc->mfc_un.res.pkt,
2335 mfc->mfc_un.res.bytes,
2336 mfc->mfc_un.res.wrong_if);
2337 for (n = mfc->mfc_un.res.minvif;
2338 n < mfc->mfc_un.res.maxvif; n++ ) {
2339 if (VIF_EXISTS(mrt, n) &&
2340 mfc->mfc_un.res.ttls[n] < 255)
2343 n, mfc->mfc_un.res.ttls[n]);
2346 /* unresolved mfc_caches don't contain
2347 * pkt, bytes and wrong_if values
2349 seq_printf(seq, " %8lu %8lu %8lu", 0ul, 0ul, 0ul);
2351 seq_putc(seq, '\n');
2356 static const struct seq_operations ipmr_mfc_seq_ops = {
2357 .start = ipmr_mfc_seq_start,
2358 .next = ipmr_mfc_seq_next,
2359 .stop = ipmr_mfc_seq_stop,
2360 .show = ipmr_mfc_seq_show,
2363 static int ipmr_mfc_open(struct inode *inode, struct file *file)
2365 return seq_open_net(inode, file, &ipmr_mfc_seq_ops,
2366 sizeof(struct ipmr_mfc_iter));
2369 static const struct file_operations ipmr_mfc_fops = {
2370 .owner = THIS_MODULE,
2371 .open = ipmr_mfc_open,
2373 .llseek = seq_lseek,
2374 .release = seq_release_net,
2378 #ifdef CONFIG_IP_PIMSM_V2
2379 static const struct net_protocol pim_protocol = {
2387 * Setup for IP multicast routing
2389 static int __net_init ipmr_net_init(struct net *net)
2393 err = ipmr_rules_init(net);
2397 #ifdef CONFIG_PROC_FS
2399 if (!proc_net_fops_create(net, "ip_mr_vif", 0, &ipmr_vif_fops))
2401 if (!proc_net_fops_create(net, "ip_mr_cache", 0, &ipmr_mfc_fops))
2402 goto proc_cache_fail;
2406 #ifdef CONFIG_PROC_FS
2408 proc_net_remove(net, "ip_mr_vif");
2410 ipmr_rules_exit(net);
2416 static void __net_exit ipmr_net_exit(struct net *net)
2418 #ifdef CONFIG_PROC_FS
2419 proc_net_remove(net, "ip_mr_cache");
2420 proc_net_remove(net, "ip_mr_vif");
2422 ipmr_rules_exit(net);
2425 static struct pernet_operations ipmr_net_ops = {
2426 .init = ipmr_net_init,
2427 .exit = ipmr_net_exit,
2430 int __init ip_mr_init(void)
2434 mrt_cachep = kmem_cache_create("ip_mrt_cache",
2435 sizeof(struct mfc_cache),
2436 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC,
2441 err = register_pernet_subsys(&ipmr_net_ops);
2443 goto reg_pernet_fail;
2445 err = register_netdevice_notifier(&ip_mr_notifier);
2447 goto reg_notif_fail;
2448 #ifdef CONFIG_IP_PIMSM_V2
2449 if (inet_add_protocol(&pim_protocol, IPPROTO_PIM) < 0) {
2450 printk(KERN_ERR "ip_mr_init: can't add PIM protocol\n");
2452 goto add_proto_fail;
2455 rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, NULL, ipmr_rtm_dumproute);
2458 #ifdef CONFIG_IP_PIMSM_V2
2460 unregister_netdevice_notifier(&ip_mr_notifier);
2463 unregister_pernet_subsys(&ipmr_net_ops);
2465 kmem_cache_destroy(mrt_cachep);