ipv4: Adjust in_dev handling in fib_validate_source()
[cascardo/linux.git] / net / ipv4 / route.c
index 41df529..919d69e 100644 (file)
@@ -440,7 +440,7 @@ static int rt_cache_seq_show(struct seq_file *seq, void *v)
                        r->rt_key_tos,
                        -1,
                        HHUptod,
-                       r->rt_spec_dst, &len);
+                       0, &len);
 
                seq_printf(seq, "%*s\n", 127 - len, "");
        }
@@ -870,34 +870,22 @@ static void rt_check_expire(void)
                while ((rth = rcu_dereference_protected(*rthp,
                                        lockdep_is_held(rt_hash_lock_addr(i)))) != NULL) {
                        prefetch(rth->dst.rt_next);
-                       if (rt_is_expired(rth)) {
+                       if (rt_is_expired(rth) ||
+                           rt_may_expire(rth, tmo, ip_rt_gc_timeout)) {
                                *rthp = rth->dst.rt_next;
                                rt_free(rth);
                                continue;
                        }
-                       if (rth->dst.expires) {
-                               /* Entry is expired even if it is in use */
-                               if (time_before_eq(jiffies, rth->dst.expires)) {
-nofree:
-                                       tmo >>= 1;
-                                       rthp = &rth->dst.rt_next;
-                                       /*
-                                        * We only count entries on
-                                        * a chain with equal hash inputs once
-                                        * so that entries for different QOS
-                                        * levels, and other non-hash input
-                                        * attributes don't unfairly skew
-                                        * the length computation
-                                        */
-                                       length += has_noalias(rt_hash_table[i].chain, rth);
-                                       continue;
-                               }
-                       } else if (!rt_may_expire(rth, tmo, ip_rt_gc_timeout))
-                               goto nofree;
 
-                       /* Cleanup aged off entries. */
-                       *rthp = rth->dst.rt_next;
-                       rt_free(rth);
+                       /* We only count entries on a chain with equal
+                        * hash inputs once so that entries for
+                        * different QOS levels, and other non-hash
+                        * input attributes don't unfairly skew the
+                        * length computation
+                        */
+                       tmo >>= 1;
+                       rthp = &rth->dst.rt_next;
+                       length += has_noalias(rt_hash_table[i].chain, rth);
                }
                spin_unlock_bh(rt_hash_lock_addr(i));
                sum += length;
@@ -1156,7 +1144,7 @@ restart:
        candp = NULL;
        now = jiffies;
 
-       if (!rt_caching(dev_net(rt->dst.dev))) {
+       if (!rt_caching(dev_net(rt->dst.dev)) || (rt->dst.flags & DST_NOCACHE)) {
                /*
                 * If we're not caching, just tell the caller we
                 * were successful and don't touch the route.  The
@@ -1621,12 +1609,28 @@ void ip_rt_send_redirect(struct sk_buff *skb)
 
 static int ip_error(struct sk_buff *skb)
 {
+       struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
        struct rtable *rt = skb_rtable(skb);
        struct inet_peer *peer;
        unsigned long now;
+       struct net *net;
        bool send;
        int code;
 
+       net = dev_net(rt->dst.dev);
+       if (!IN_DEV_FORWARD(in_dev)) {
+               switch (rt->dst.error) {
+               case EHOSTUNREACH:
+                       IP_INC_STATS_BH(net, IPSTATS_MIB_INADDRERRORS);
+                       break;
+
+               case ENETUNREACH:
+                       IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
+                       break;
+               }
+               goto out;
+       }
+
        switch (rt->dst.error) {
        case EINVAL:
        default:
@@ -1636,8 +1640,7 @@ static int ip_error(struct sk_buff *skb)
                break;
        case ENETUNREACH:
                code = ICMP_NET_UNREACH;
-               IP_INC_STATS_BH(dev_net(rt->dst.dev),
-                               IPSTATS_MIB_INNOROUTES);
+               IP_INC_STATS_BH(net, IPSTATS_MIB_INNOROUTES);
                break;
        case EACCES:
                code = ICMP_PKT_FILTERED;
@@ -1951,8 +1954,6 @@ static void rt_set_nexthop(struct rtable *rt, const struct flowi4 *fl4,
 
        if (dst_mtu(dst) > IP_MAX_MTU)
                dst_metric_set(dst, RTAX_MTU, IP_MAX_MTU);
-       if (dst_metric_raw(dst, RTAX_ADVMSS) > 65535 - 40)
-               dst_metric_set(dst, RTAX_ADVMSS, 65535 - 40);
 
 #ifdef CONFIG_IP_ROUTE_CLASSID
 #ifdef CONFIG_IP_MULTIPLE_TABLES
@@ -1977,7 +1978,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
 {
        unsigned int hash;
        struct rtable *rth;
-       __be32 spec_dst;
        struct in_device *in_dev = __in_dev_get_rcu(dev);
        u32 itag = 0;
        int err;
@@ -1998,10 +1998,9 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        if (ipv4_is_zeronet(saddr)) {
                if (!ipv4_is_local_multicast(daddr))
                        goto e_inval;
-               spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
        } else {
-               err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
-                                         &itag);
+               err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
+                                         in_dev, &itag);
                if (err < 0)
                        goto e_err;
        }
@@ -2028,7 +2027,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        rth->rt_oif     = 0;
        rth->rt_mark    = skb->mark;
        rth->rt_gateway = daddr;
-       rth->rt_spec_dst= spec_dst;
        rth->rt_peer_genid = 0;
        rt_init_peer(rth, dev_net(dev)->ipv4.peers);
        rth->fi = NULL;
@@ -2092,7 +2090,6 @@ static int __mkroute_input(struct sk_buff *skb,
        int err;
        struct in_device *out_dev;
        unsigned int flags = 0;
-       __be32 spec_dst;
        u32 itag;
 
        /* get a working reference to the output device */
@@ -2104,7 +2101,7 @@ static int __mkroute_input(struct sk_buff *skb,
 
 
        err = fib_validate_source(skb, saddr, daddr, tos, FIB_RES_OIF(*res),
-                                 in_dev->dev, &spec_dst, &itag);
+                                 in_dev->dev, in_dev, &itag);
        if (err < 0) {
                ip_handle_martian_source(in_dev->dev, in_dev, skb, daddr,
                                         saddr);
@@ -2156,7 +2153,6 @@ static int __mkroute_input(struct sk_buff *skb,
        rth->rt_oif     = 0;
        rth->rt_mark    = skb->mark;
        rth->rt_gateway = daddr;
-       rth->rt_spec_dst= spec_dst;
        rth->rt_peer_genid = 0;
        rt_init_peer(rth, &res->table->tb_peers);
        rth->fi = NULL;
@@ -2222,7 +2218,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        u32             itag = 0;
        struct rtable   *rth;
        unsigned int    hash;
-       __be32          spec_dst;
        int             err = -EINVAL;
        struct net    *net = dev_net(dev);
 
@@ -2269,11 +2264,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        fl4.daddr = daddr;
        fl4.saddr = saddr;
        err = fib_lookup(net, &fl4, &res);
-       if (err != 0) {
-               if (!IN_DEV_FORWARD(in_dev))
-                       goto e_hostunreach;
+       if (err != 0)
                goto no_route;
-       }
 
        RT_CACHE_STAT_INC(in_slow_tot);
 
@@ -2283,17 +2275,16 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr,
        if (res.type == RTN_LOCAL) {
                err = fib_validate_source(skb, saddr, daddr, tos,
                                          net->loopback_dev->ifindex,
-                                         dev, &spec_dst, &itag);
+                                         dev, in_dev, &itag);
                if (err < 0)
                        goto martian_source_keep_err;
                if (err)
                        flags |= RTCF_DIRECTSRC;
-               spec_dst = daddr;
                goto local_input;
        }
 
        if (!IN_DEV_FORWARD(in_dev))
-               goto e_hostunreach;
+               goto no_route;
        if (res.type != RTN_UNICAST)
                goto martian_destination;
 
@@ -2304,11 +2295,9 @@ brd_input:
        if (skb->protocol != htons(ETH_P_IP))
                goto e_inval;
 
-       if (ipv4_is_zeronet(saddr))
-               spec_dst = inet_select_addr(dev, 0, RT_SCOPE_LINK);
-       else {
-               err = fib_validate_source(skb, saddr, 0, tos, 0, dev, &spec_dst,
-                                         &itag);
+       if (!ipv4_is_zeronet(saddr)) {
+               err = fib_validate_source(skb, saddr, 0, tos, 0, dev,
+                                         in_dev, &itag);
                if (err < 0)
                        goto martian_source_keep_err;
                if (err)
@@ -2346,7 +2335,6 @@ local_input:
        rth->rt_oif     = 0;
        rth->rt_mark    = skb->mark;
        rth->rt_gateway = daddr;
-       rth->rt_spec_dst= spec_dst;
        rth->rt_peer_genid = 0;
        rt_init_peer(rth, net->ipv4.peers);
        rth->fi = NULL;
@@ -2364,7 +2352,6 @@ local_input:
 
 no_route:
        RT_CACHE_STAT_INC(in_no_route);
-       spec_dst = inet_select_addr(dev, 0, RT_SCOPE_UNIVERSE);
        res.type = RTN_UNREACHABLE;
        if (err == -ESRCH)
                err = -ENETUNREACH;
@@ -2381,10 +2368,6 @@ martian_destination:
                                     &daddr, &saddr, dev->name);
 #endif
 
-e_hostunreach:
-       err = -EHOSTUNREACH;
-       goto out;
-
 e_inval:
        err = -EINVAL;
        goto out;
@@ -2551,7 +2534,6 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
        rth->rt_oif     = orig_oif;
        rth->rt_mark    = fl4->flowi4_mark;
        rth->rt_gateway = fl4->daddr;
-       rth->rt_spec_dst= fl4->saddr;
        rth->rt_peer_genid = 0;
        rt_init_peer(rth, (res->table ?
                           &res->table->tb_peers :
@@ -2560,12 +2542,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 
        RT_CACHE_STAT_INC(out_slow_tot);
 
-       if (flags & RTCF_LOCAL) {
+       if (flags & RTCF_LOCAL)
                rth->dst.input = ip_local_deliver;
-               rth->rt_spec_dst = fl4->daddr;
-       }
        if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
-               rth->rt_spec_dst = fl4->saddr;
                if (flags & RTCF_LOCAL &&
                    !(dev_out->flags & IFF_LOOPBACK)) {
                        rth->dst.output = ip_mc_output;
@@ -2584,6 +2563,9 @@ static struct rtable *__mkroute_output(const struct fib_result *res,
 
        rt_set_nexthop(rth, fl4, res, fi, type, 0);
 
+       if (fl4->flowi4_flags & FLOWI_FLAG_RT_NOCACHE)
+               rth->dst.flags |= DST_NOCACHE;
+
        return rth;
 }
 
@@ -2893,7 +2875,6 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or
                rt->rt_dst = ort->rt_dst;
                rt->rt_src = ort->rt_src;
                rt->rt_gateway = ort->rt_gateway;
-               rt->rt_spec_dst = ort->rt_spec_dst;
                rt_transfer_peer(rt, ort);
                rt->fi = ort->fi;
                if (rt->fi)
@@ -2968,10 +2949,8 @@ static int rt_fill_info(struct net *net,
            nla_put_u32(skb, RTA_FLOW, rt->dst.tclassid))
                goto nla_put_failure;
 #endif
-       if (rt_is_input_route(rt)) {
-               if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_spec_dst))
-                       goto nla_put_failure;
-       } else if (rt->rt_src != rt->rt_key_src) {
+       if (!rt_is_input_route(rt) &&
+           rt->rt_src != rt->rt_key_src) {
                if (nla_put_be32(skb, RTA_PREFSRC, rt->rt_src))
                        goto nla_put_failure;
        }