Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/vapier...
[cascardo/linux.git] / net / ipv6 / route.c
index 1534508..843406f 100644 (file)
@@ -97,6 +97,36 @@ static struct rt6_info *rt6_get_route_info(struct net *net,
                                           struct in6_addr *gwaddr, int ifindex);
 #endif
 
+static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+       struct rt6_info *rt = (struct rt6_info *) dst;
+       struct inet_peer *peer;
+       u32 *p = NULL;
+
+       if (!rt->rt6i_peer)
+               rt6_bind_peer(rt, 1);
+
+       peer = rt->rt6i_peer;
+       if (peer) {
+               u32 *old_p = __DST_METRICS_PTR(old);
+               unsigned long prev, new;
+
+               p = peer->metrics;
+               if (inet_metrics_new(peer))
+                       memcpy(p, old_p, sizeof(u32) * RTAX_MAX);
+
+               new = (unsigned long) p;
+               prev = cmpxchg(&dst->_metrics, old, new);
+
+               if (prev != old) {
+                       p = __DST_METRICS_PTR(prev);
+                       if (prev & DST_METRICS_READ_ONLY)
+                               p = NULL;
+               }
+       }
+       return p;
+}
+
 static struct dst_ops ip6_dst_ops_template = {
        .family                 =       AF_INET6,
        .protocol               =       cpu_to_be16(ETH_P_IPV6),
@@ -105,6 +135,7 @@ static struct dst_ops ip6_dst_ops_template = {
        .check                  =       ip6_dst_check,
        .default_advmss         =       ip6_default_advmss,
        .default_mtu            =       ip6_default_mtu,
+       .cow_metrics            =       ipv6_cow_metrics,
        .destroy                =       ip6_dst_destroy,
        .ifdown                 =       ip6_dst_ifdown,
        .negative_advice        =       ip6_negative_advice,
@@ -113,6 +144,11 @@ static struct dst_ops ip6_dst_ops_template = {
        .local_out              =       __ip6_local_out,
 };
 
+static unsigned int ip6_blackhole_default_mtu(const struct dst_entry *dst)
+{
+       return 0;
+}
+
 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 {
 }
@@ -122,9 +158,15 @@ static struct dst_ops ip6_dst_blackhole_ops = {
        .protocol               =       cpu_to_be16(ETH_P_IPV6),
        .destroy                =       ip6_dst_destroy,
        .check                  =       ip6_dst_check,
+       .default_mtu            =       ip6_blackhole_default_mtu,
+       .default_advmss         =       ip6_default_advmss,
        .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
 };
 
+static const u32 ip6_template_metrics[RTAX_MAX] = {
+       [RTAX_HOPLIMIT - 1] = 255,
+};
+
 static struct rt6_info ip6_null_entry_template = {
        .dst = {
                .__refcnt       = ATOMIC_INIT(1),
@@ -180,7 +222,7 @@ static struct rt6_info ip6_blk_hole_entry_template = {
 /* allocate dst with ip6_dst_ops */
 static inline struct rt6_info *ip6_dst_alloc(struct dst_ops *ops)
 {
-       return (struct rt6_info *)dst_alloc(ops);
+       return (struct rt6_info *)dst_alloc(ops, 0);
 }
 
 static void ip6_dst_destroy(struct dst_entry *dst)
@@ -194,22 +236,27 @@ static void ip6_dst_destroy(struct dst_entry *dst)
                in6_dev_put(idev);
        }
        if (peer) {
-               BUG_ON(!(rt->rt6i_flags & RTF_CACHE));
                rt->rt6i_peer = NULL;
                inet_putpeer(peer);
        }
 }
 
+static atomic_t __rt6_peer_genid = ATOMIC_INIT(0);
+
+static u32 rt6_peer_genid(void)
+{
+       return atomic_read(&__rt6_peer_genid);
+}
+
 void rt6_bind_peer(struct rt6_info *rt, int create)
 {
        struct inet_peer *peer;
 
-       if (WARN_ON(!(rt->rt6i_flags & RTF_CACHE)))
-               return;
-
        peer = inet_getpeer_v6(&rt->rt6i_dst.addr, create);
        if (peer && cmpxchg(&rt->rt6i_peer, NULL, peer) != NULL)
                inet_putpeer(peer);
+       else
+               rt->rt6i_peer_genid = rt6_peer_genid();
 }
 
 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
@@ -552,17 +599,17 @@ do { \
 
 static struct rt6_info *ip6_pol_route_lookup(struct net *net,
                                             struct fib6_table *table,
-                                            struct flowi *fl, int flags)
+                                            struct flowi6 *fl6, int flags)
 {
        struct fib6_node *fn;
        struct rt6_info *rt;
 
        read_lock_bh(&table->tb6_lock);
-       fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
+       fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
        rt = fn->leaf;
-       rt = rt6_device_match(net, rt, &fl->fl6_src, fl->oif, flags);
-       BACKTRACK(net, &fl->fl6_src);
+       rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags);
+       BACKTRACK(net, &fl6->saddr);
 out:
        dst_use(&rt->dst, jiffies);
        read_unlock_bh(&table->tb6_lock);
@@ -573,19 +620,19 @@ out:
 struct rt6_info *rt6_lookup(struct net *net, const struct in6_addr *daddr,
                            const struct in6_addr *saddr, int oif, int strict)
 {
-       struct flowi fl = {
-               .oif = oif,
-               .fl6_dst = *daddr,
+       struct flowi6 fl6 = {
+               .flowi6_oif = oif,
+               .daddr = *daddr,
        };
        struct dst_entry *dst;
        int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 
        if (saddr) {
-               memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
+               memcpy(&fl6.saddr, saddr, sizeof(*saddr));
                flags |= RT6_LOOKUP_F_HAS_SADDR;
        }
 
-       dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
+       dst = fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_lookup);
        if (dst->error == 0)
                return (struct rt6_info *) dst;
 
@@ -706,7 +753,7 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d
 }
 
 static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif,
-                                     struct flowi *fl, int flags)
+                                     struct flowi6 *fl6, int flags)
 {
        struct fib6_node *fn;
        struct rt6_info *rt, *nrt;
@@ -721,12 +768,12 @@ relookup:
        read_lock_bh(&table->tb6_lock);
 
 restart_2:
-       fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
+       fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 
 restart:
        rt = rt6_select(fn, oif, strict | reachable);
 
-       BACKTRACK(net, &fl->fl6_src);
+       BACKTRACK(net, &fl6->saddr);
        if (rt == net->ipv6.ip6_null_entry ||
            rt->rt6i_flags & RTF_CACHE)
                goto out;
@@ -735,9 +782,11 @@ restart:
        read_unlock_bh(&table->tb6_lock);
 
        if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
-               nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
+               nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr);
+       else if (!(rt->dst.flags & DST_HOST))
+               nrt = rt6_alloc_clone(rt, &fl6->daddr);
        else
-               nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
+               goto out2;
 
        dst_release(&rt->dst);
        rt = nrt ? : net->ipv6.ip6_null_entry;
@@ -774,9 +823,9 @@ out2:
 }
 
 static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table,
-                                           struct flowi *fl, int flags)
+                                           struct flowi6 *fl6, int flags)
 {
-       return ip6_pol_route(net, table, fl->iif, fl, flags);
+       return ip6_pol_route(net, table, fl6->flowi6_iif, fl6, flags);
 }
 
 void ip6_route_input(struct sk_buff *skb)
@@ -784,56 +833,54 @@ void ip6_route_input(struct sk_buff *skb)
        struct ipv6hdr *iph = ipv6_hdr(skb);
        struct net *net = dev_net(skb->dev);
        int flags = RT6_LOOKUP_F_HAS_SADDR;
-       struct flowi fl = {
-               .iif = skb->dev->ifindex,
-               .fl6_dst = iph->daddr,
-               .fl6_src = iph->saddr,
-               .fl6_flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
-               .mark = skb->mark,
-               .proto = iph->nexthdr,
+       struct flowi6 fl6 = {
+               .flowi6_iif = skb->dev->ifindex,
+               .daddr = iph->daddr,
+               .saddr = iph->saddr,
+               .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
+               .flowi6_mark = skb->mark,
+               .flowi6_proto = iph->nexthdr,
        };
 
        if (rt6_need_strict(&iph->daddr) && skb->dev->type != ARPHRD_PIMREG)
                flags |= RT6_LOOKUP_F_IFACE;
 
-       skb_dst_set(skb, fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input));
+       skb_dst_set(skb, fib6_rule_lookup(net, &fl6, flags, ip6_pol_route_input));
 }
 
 static struct rt6_info *ip6_pol_route_output(struct net *net, struct fib6_table *table,
-                                            struct flowi *fl, int flags)
+                                            struct flowi6 *fl6, int flags)
 {
-       return ip6_pol_route(net, table, fl->oif, fl, flags);
+       return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, flags);
 }
 
-struct dst_entry * ip6_route_output(struct net *net, struct sock *sk,
-                                   struct flowi *fl)
+struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk,
+                                   struct flowi6 *fl6)
 {
        int flags = 0;
 
-       if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl->fl6_dst))
+       if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr))
                flags |= RT6_LOOKUP_F_IFACE;
 
-       if (!ipv6_addr_any(&fl->fl6_src))
+       if (!ipv6_addr_any(&fl6->saddr))
                flags |= RT6_LOOKUP_F_HAS_SADDR;
        else if (sk)
                flags |= rt6_srcprefs2flags(inet6_sk(sk)->srcprefs);
 
-       return fib6_rule_lookup(net, fl, flags, ip6_pol_route_output);
+       return fib6_rule_lookup(net, fl6, flags, ip6_pol_route_output);
 }
 
 EXPORT_SYMBOL(ip6_route_output);
 
-int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
+struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_orig)
 {
-       struct rt6_info *ort = (struct rt6_info *) *dstp;
-       struct rt6_info *rt = (struct rt6_info *)
-               dst_alloc(&ip6_dst_blackhole_ops);
+       struct rt6_info *rt = dst_alloc(&ip6_dst_blackhole_ops, 1);
+       struct rt6_info *ort = (struct rt6_info *) dst_orig;
        struct dst_entry *new = NULL;
 
        if (rt) {
                new = &rt->dst;
 
-               atomic_set(&new->__refcnt, 1);
                new->__use = 1;
                new->input = dst_discard;
                new->output = dst_discard;
@@ -859,11 +906,9 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl
                dst_free(new);
        }
 
-       dst_release(*dstp);
-       *dstp = new;
-       return new ? 0 : -ENOMEM;
+       dst_release(dst_orig);
+       return new ? new : ERR_PTR(-ENOMEM);
 }
-EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
 
 /*
  *     Destination cache support functions
@@ -875,9 +920,14 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
        rt = (struct rt6_info *) dst;
 
-       if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
+       if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) {
+               if (rt->rt6i_peer_genid != rt6_peer_genid()) {
+                       if (!rt->rt6i_peer)
+                               rt6_bind_peer(rt, 0);
+                       rt->rt6i_peer_genid = rt6_peer_genid();
+               }
                return dst;
-
+       }
        return NULL;
 }
 
@@ -928,7 +978,6 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
                        dst_metric_set(dst, RTAX_FEATURES, features);
                }
                dst_metric_set(dst, RTAX_MTU, mtu);
-               call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
        }
 }
 
@@ -1025,11 +1074,9 @@ out:
 
 int icmp6_dst_gc(void)
 {
-       struct dst_entry *dst, *next, **pprev;
+       struct dst_entry *dst, **pprev;
        int more = 0;
 
-       next = NULL;
-
        spin_lock_bh(&icmp6_dst_lock);
        pprev = &icmp6_dst_gc_list;
 
@@ -1397,16 +1444,16 @@ static int ip6_route_del(struct fib6_config *cfg)
  *     Handle redirects
  */
 struct ip6rd_flowi {
-       struct flowi fl;
+       struct flowi6 fl6;
        struct in6_addr gateway;
 };
 
 static struct rt6_info *__ip6_route_redirect(struct net *net,
                                             struct fib6_table *table,
-                                            struct flowi *fl,
+                                            struct flowi6 *fl6,
                                             int flags)
 {
-       struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
+       struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6;
        struct rt6_info *rt;
        struct fib6_node *fn;
 
@@ -1422,7 +1469,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net,
         */
 
        read_lock_bh(&table->tb6_lock);
-       fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
+       fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr);
 restart:
        for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) {
                /*
@@ -1437,7 +1484,7 @@ restart:
                        continue;
                if (!(rt->rt6i_flags & RTF_GATEWAY))
                        continue;
-               if (fl->oif != rt->rt6i_dev->ifindex)
+               if (fl6->flowi6_oif != rt->rt6i_dev->ifindex)
                        continue;
                if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
                        continue;
@@ -1446,7 +1493,7 @@ restart:
 
        if (!rt)
                rt = net->ipv6.ip6_null_entry;
-       BACKTRACK(net, &fl->fl6_src);
+       BACKTRACK(net, &fl6->saddr);
 out:
        dst_hold(&rt->dst);
 
@@ -1463,10 +1510,10 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
        int flags = RT6_LOOKUP_F_HAS_SADDR;
        struct net *net = dev_net(dev);
        struct ip6rd_flowi rdfl = {
-               .fl = {
-                       .oif = dev->ifindex,
-                       .fl6_dst = *dest,
-                       .fl6_src = *src,
+               .fl6 = {
+                       .flowi6_oif = dev->ifindex,
+                       .daddr = *dest,
+                       .saddr = *src,
                },
        };
 
@@ -1475,7 +1522,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
        if (rt6_need_strict(dest))
                flags |= RT6_LOOKUP_F_IFACE;
 
-       return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
+       return (struct rt6_info *)fib6_rule_lookup(net, &rdfl.fl6,
                                                   flags, __ip6_route_redirect);
 }
 
@@ -1977,12 +2024,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
        if (IS_ERR(neigh)) {
                dst_free(&rt->dst);
 
-               /* We are casting this because that is the return
-                * value type.  But an errno encoded pointer is the
-                * same regardless of the underlying pointer type,
-                * and that's what we are returning.  So this is OK.
-                */
-               return (struct rt6_info *) neigh;
+               return ERR_CAST(neigh);
        }
        rt->rt6i_nexthop = neigh;
 
@@ -2343,7 +2385,7 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
        struct rt6_info *rt;
        struct sk_buff *skb;
        struct rtmsg *rtm;
-       struct flowi fl;
+       struct flowi6 fl6;
        int err, iif = 0;
 
        err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
@@ -2351,27 +2393,27 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
                goto errout;
 
        err = -EINVAL;
-       memset(&fl, 0, sizeof(fl));
+       memset(&fl6, 0, sizeof(fl6));
 
        if (tb[RTA_SRC]) {
                if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
                        goto errout;
 
-               ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
+               ipv6_addr_copy(&fl6.saddr, nla_data(tb[RTA_SRC]));
        }
 
        if (tb[RTA_DST]) {
                if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
                        goto errout;
 
-               ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
+               ipv6_addr_copy(&fl6.daddr, nla_data(tb[RTA_DST]));
        }
 
        if (tb[RTA_IIF])
                iif = nla_get_u32(tb[RTA_IIF]);
 
        if (tb[RTA_OIF])
-               fl.oif = nla_get_u32(tb[RTA_OIF]);
+               fl6.flowi6_oif = nla_get_u32(tb[RTA_OIF]);
 
        if (iif) {
                struct net_device *dev;
@@ -2394,10 +2436,10 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void
        skb_reset_mac_header(skb);
        skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
 
-       rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl);
+       rt = (struct rt6_info*) ip6_route_output(net, NULL, &fl6);
        skb_dst_set(skb, &rt->dst);
 
-       err = rt6_fill_node(net, skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
+       err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif,
                            RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
                            nlh->nlmsg_seq, 0, 0, 0);
        if (err < 0) {
@@ -2554,14 +2596,16 @@ static
 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write,
                              void __user *buffer, size_t *lenp, loff_t *ppos)
 {
-       struct net *net = current->nsproxy->net_ns;
-       int delay = net->ipv6.sysctl.flush_delay;
-       if (write) {
-               proc_dointvec(ctl, write, buffer, lenp, ppos);
-               fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
-               return 0;
-       } else
+       struct net *net;
+       int delay;
+       if (!write)
                return -EINVAL;
+
+       net = (struct net *)ctl->extra1;
+       delay = net->ipv6.sysctl.flush_delay;
+       proc_dointvec(ctl, write, buffer, lenp, ppos);
+       fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
+       return 0;
 }
 
 ctl_table ipv6_route_table_template[] = {
@@ -2648,6 +2692,7 @@ struct ctl_table * __net_init ipv6_route_sysctl_init(struct net *net)
 
        if (table) {
                table[0].data = &net->ipv6.sysctl.flush_delay;
+               table[0].extra1 = net;
                table[1].data = &net->ipv6.ip6_dst_ops.gc_thresh;
                table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
                table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
@@ -2681,7 +2726,8 @@ static int __net_init ip6_route_net_init(struct net *net)
        net->ipv6.ip6_null_entry->dst.path =
                (struct dst_entry *)net->ipv6.ip6_null_entry;
        net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops;
-       dst_metric_set(&net->ipv6.ip6_null_entry->dst, RTAX_HOPLIMIT, 255);
+       dst_init_metrics(&net->ipv6.ip6_null_entry->dst,
+                        ip6_template_metrics, true);
 
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
        net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
@@ -2692,7 +2738,8 @@ static int __net_init ip6_route_net_init(struct net *net)
        net->ipv6.ip6_prohibit_entry->dst.path =
                (struct dst_entry *)net->ipv6.ip6_prohibit_entry;
        net->ipv6.ip6_prohibit_entry->dst.ops = &net->ipv6.ip6_dst_ops;
-       dst_metric_set(&net->ipv6.ip6_prohibit_entry->dst, RTAX_HOPLIMIT, 255);
+       dst_init_metrics(&net->ipv6.ip6_prohibit_entry->dst,
+                        ip6_template_metrics, true);
 
        net->ipv6.ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
                                               sizeof(*net->ipv6.ip6_blk_hole_entry),
@@ -2702,7 +2749,8 @@ static int __net_init ip6_route_net_init(struct net *net)
        net->ipv6.ip6_blk_hole_entry->dst.path =
                (struct dst_entry *)net->ipv6.ip6_blk_hole_entry;
        net->ipv6.ip6_blk_hole_entry->dst.ops = &net->ipv6.ip6_dst_ops;
-       dst_metric_set(&net->ipv6.ip6_blk_hole_entry->dst, RTAX_HOPLIMIT, 255);
+       dst_init_metrics(&net->ipv6.ip6_blk_hole_entry->dst,
+                        ip6_template_metrics, true);
 #endif
 
        net->ipv6.sysctl.flush_delay = 0;