c4a7879bfb150c9a005c4928cdad3e6833d72623
[cascardo/linux.git] / net / core / neighbour.c
1 /*
2  *      Generic address resolution entity
3  *
4  *      Authors:
5  *      Pedro Roque             <roque@di.fc.ul.pt>
6  *      Alexey Kuznetsov        <kuznet@ms2.inr.ac.ru>
7  *
8  *      This program is free software; you can redistribute it and/or
9  *      modify it under the terms of the GNU General Public License
10  *      as published by the Free Software Foundation; either version
11  *      2 of the License, or (at your option) any later version.
12  *
13  *      Fixes:
14  *      Vitaly E. Lavrov        releasing NULL neighbor in neigh_add.
15  *      Harald Welte            Add neighbour cache statistics like rtstat
16  */
17
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
19
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
27 #ifdef CONFIG_SYSCTL
28 #include <linux/sysctl.h>
29 #endif
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
33 #include <net/dst.h>
34 #include <net/sock.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
42
43 #define DEBUG
44 #define NEIGH_DEBUG 1
45 #define neigh_dbg(level, fmt, ...)              \
46 do {                                            \
47         if (level <= NEIGH_DEBUG)               \
48                 pr_debug(fmt, ##__VA_ARGS__);   \
49 } while (0)
50
51 #define PNEIGH_HASHMASK         0xF
52
53 static void neigh_timer_handler(unsigned long arg);
54 static void __neigh_notify(struct neighbour *n, int type, int flags);
55 static void neigh_update_notify(struct neighbour *neigh);
56 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
57
58 static struct neigh_table *neigh_tables;
59 #ifdef CONFIG_PROC_FS
60 static const struct file_operations neigh_stat_seq_fops;
61 #endif
62
63 /*
64    Neighbour hash table buckets are protected with rwlock tbl->lock.
65
66    - All the scans/updates to hash buckets MUST be made under this lock.
67    - NOTHING clever should be made under this lock: no callbacks
68      to protocol backends, no attempts to send something to network.
69      It will result in deadlocks, if backend/driver wants to use neighbour
70      cache.
71    - If the entry requires some non-trivial actions, increase
72      its reference count and release table lock.
73
74    Neighbour entries are protected:
75    - with reference count.
76    - with rwlock neigh->lock
77
78    Reference count prevents destruction.
79
80    neigh->lock mainly serializes ll address data and its validity state.
81    However, the same lock is used to protect another entry fields:
82     - timer
83     - resolution queue
84
85    Again, nothing clever shall be made under neigh->lock,
86    the most complicated procedure, which we allow is dev->hard_header.
87    It is supposed, that dev->hard_header is simplistic and does
88    not make callbacks to neighbour tables.
89
90    The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
91    list of neighbour tables. This list is used only in process context,
92  */
93
94 static DEFINE_RWLOCK(neigh_tbl_lock);
95
96 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
97 {
98         kfree_skb(skb);
99         return -ENETDOWN;
100 }
101
102 static void neigh_cleanup_and_release(struct neighbour *neigh)
103 {
104         if (neigh->parms->neigh_cleanup)
105                 neigh->parms->neigh_cleanup(neigh);
106
107         __neigh_notify(neigh, RTM_DELNEIGH, 0);
108         neigh_release(neigh);
109 }
110
111 /*
112  * It is random distribution in the interval (1/2)*base...(3/2)*base.
113  * It corresponds to default IPv6 settings and is not overridable,
114  * because it is really reasonable choice.
115  */
116
117 unsigned long neigh_rand_reach_time(unsigned long base)
118 {
119         return base ? (net_random() % base) + (base >> 1) : 0;
120 }
121 EXPORT_SYMBOL(neigh_rand_reach_time);
122
123
124 static int neigh_forced_gc(struct neigh_table *tbl)
125 {
126         int shrunk = 0;
127         int i;
128         struct neigh_hash_table *nht;
129
130         NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
131
132         write_lock_bh(&tbl->lock);
133         nht = rcu_dereference_protected(tbl->nht,
134                                         lockdep_is_held(&tbl->lock));
135         for (i = 0; i < (1 << nht->hash_shift); i++) {
136                 struct neighbour *n;
137                 struct neighbour __rcu **np;
138
139                 np = &nht->hash_buckets[i];
140                 while ((n = rcu_dereference_protected(*np,
141                                         lockdep_is_held(&tbl->lock))) != NULL) {
142                         /* Neighbour record may be discarded if:
143                          * - nobody refers to it.
144                          * - it is not permanent
145                          */
146                         write_lock(&n->lock);
147                         if (atomic_read(&n->refcnt) == 1 &&
148                             !(n->nud_state & NUD_PERMANENT)) {
149                                 rcu_assign_pointer(*np,
150                                         rcu_dereference_protected(n->next,
151                                                   lockdep_is_held(&tbl->lock)));
152                                 n->dead = 1;
153                                 shrunk  = 1;
154                                 write_unlock(&n->lock);
155                                 neigh_cleanup_and_release(n);
156                                 continue;
157                         }
158                         write_unlock(&n->lock);
159                         np = &n->next;
160                 }
161         }
162
163         tbl->last_flush = jiffies;
164
165         write_unlock_bh(&tbl->lock);
166
167         return shrunk;
168 }
169
170 static void neigh_add_timer(struct neighbour *n, unsigned long when)
171 {
172         neigh_hold(n);
173         if (unlikely(mod_timer(&n->timer, when))) {
174                 printk("NEIGH: BUG, double timer add, state is %x\n",
175                        n->nud_state);
176                 dump_stack();
177         }
178 }
179
180 static int neigh_del_timer(struct neighbour *n)
181 {
182         if ((n->nud_state & NUD_IN_TIMER) &&
183             del_timer(&n->timer)) {
184                 neigh_release(n);
185                 return 1;
186         }
187         return 0;
188 }
189
190 static void pneigh_queue_purge(struct sk_buff_head *list)
191 {
192         struct sk_buff *skb;
193
194         while ((skb = skb_dequeue(list)) != NULL) {
195                 dev_put(skb->dev);
196                 kfree_skb(skb);
197         }
198 }
199
200 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
201 {
202         int i;
203         struct neigh_hash_table *nht;
204
205         nht = rcu_dereference_protected(tbl->nht,
206                                         lockdep_is_held(&tbl->lock));
207
208         for (i = 0; i < (1 << nht->hash_shift); i++) {
209                 struct neighbour *n;
210                 struct neighbour __rcu **np = &nht->hash_buckets[i];
211
212                 while ((n = rcu_dereference_protected(*np,
213                                         lockdep_is_held(&tbl->lock))) != NULL) {
214                         if (dev && n->dev != dev) {
215                                 np = &n->next;
216                                 continue;
217                         }
218                         rcu_assign_pointer(*np,
219                                    rcu_dereference_protected(n->next,
220                                                 lockdep_is_held(&tbl->lock)));
221                         write_lock(&n->lock);
222                         neigh_del_timer(n);
223                         n->dead = 1;
224
225                         if (atomic_read(&n->refcnt) != 1) {
226                                 /* The most unpleasant situation.
227                                    We must destroy neighbour entry,
228                                    but someone still uses it.
229
230                                    The destroy will be delayed until
231                                    the last user releases us, but
232                                    we must kill timers etc. and move
233                                    it to safe state.
234                                  */
235                                 __skb_queue_purge(&n->arp_queue);
236                                 n->arp_queue_len_bytes = 0;
237                                 n->output = neigh_blackhole;
238                                 if (n->nud_state & NUD_VALID)
239                                         n->nud_state = NUD_NOARP;
240                                 else
241                                         n->nud_state = NUD_NONE;
242                                 neigh_dbg(2, "neigh %p is stray\n", n);
243                         }
244                         write_unlock(&n->lock);
245                         neigh_cleanup_and_release(n);
246                 }
247         }
248 }
249
250 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
251 {
252         write_lock_bh(&tbl->lock);
253         neigh_flush_dev(tbl, dev);
254         write_unlock_bh(&tbl->lock);
255 }
256 EXPORT_SYMBOL(neigh_changeaddr);
257
258 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
259 {
260         write_lock_bh(&tbl->lock);
261         neigh_flush_dev(tbl, dev);
262         pneigh_ifdown(tbl, dev);
263         write_unlock_bh(&tbl->lock);
264
265         del_timer_sync(&tbl->proxy_timer);
266         pneigh_queue_purge(&tbl->proxy_queue);
267         return 0;
268 }
269 EXPORT_SYMBOL(neigh_ifdown);
270
271 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
272 {
273         struct neighbour *n = NULL;
274         unsigned long now = jiffies;
275         int entries;
276
277         entries = atomic_inc_return(&tbl->entries) - 1;
278         if (entries >= tbl->gc_thresh3 ||
279             (entries >= tbl->gc_thresh2 &&
280              time_after(now, tbl->last_flush + 5 * HZ))) {
281                 if (!neigh_forced_gc(tbl) &&
282                     entries >= tbl->gc_thresh3)
283                         goto out_entries;
284         }
285
286         n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
287         if (!n)
288                 goto out_entries;
289
290         __skb_queue_head_init(&n->arp_queue);
291         rwlock_init(&n->lock);
292         seqlock_init(&n->ha_lock);
293         n->updated        = n->used = now;
294         n->nud_state      = NUD_NONE;
295         n->output         = neigh_blackhole;
296         seqlock_init(&n->hh.hh_lock);
297         n->parms          = neigh_parms_clone(&tbl->parms);
298         setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
299
300         NEIGH_CACHE_STAT_INC(tbl, allocs);
301         n->tbl            = tbl;
302         atomic_set(&n->refcnt, 1);
303         n->dead           = 1;
304 out:
305         return n;
306
307 out_entries:
308         atomic_dec(&tbl->entries);
309         goto out;
310 }
311
312 static void neigh_get_hash_rnd(u32 *x)
313 {
314         get_random_bytes(x, sizeof(*x));
315         *x |= 1;
316 }
317
318 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
319 {
320         size_t size = (1 << shift) * sizeof(struct neighbour *);
321         struct neigh_hash_table *ret;
322         struct neighbour __rcu **buckets;
323         int i;
324
325         ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
326         if (!ret)
327                 return NULL;
328         if (size <= PAGE_SIZE)
329                 buckets = kzalloc(size, GFP_ATOMIC);
330         else
331                 buckets = (struct neighbour __rcu **)
332                           __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
333                                            get_order(size));
334         if (!buckets) {
335                 kfree(ret);
336                 return NULL;
337         }
338         ret->hash_buckets = buckets;
339         ret->hash_shift = shift;
340         for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
341                 neigh_get_hash_rnd(&ret->hash_rnd[i]);
342         return ret;
343 }
344
345 static void neigh_hash_free_rcu(struct rcu_head *head)
346 {
347         struct neigh_hash_table *nht = container_of(head,
348                                                     struct neigh_hash_table,
349                                                     rcu);
350         size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
351         struct neighbour __rcu **buckets = nht->hash_buckets;
352
353         if (size <= PAGE_SIZE)
354                 kfree(buckets);
355         else
356                 free_pages((unsigned long)buckets, get_order(size));
357         kfree(nht);
358 }
359
360 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
361                                                 unsigned long new_shift)
362 {
363         unsigned int i, hash;
364         struct neigh_hash_table *new_nht, *old_nht;
365
366         NEIGH_CACHE_STAT_INC(tbl, hash_grows);
367
368         old_nht = rcu_dereference_protected(tbl->nht,
369                                             lockdep_is_held(&tbl->lock));
370         new_nht = neigh_hash_alloc(new_shift);
371         if (!new_nht)
372                 return old_nht;
373
374         for (i = 0; i < (1 << old_nht->hash_shift); i++) {
375                 struct neighbour *n, *next;
376
377                 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
378                                                    lockdep_is_held(&tbl->lock));
379                      n != NULL;
380                      n = next) {
381                         hash = tbl->hash(n->primary_key, n->dev,
382                                          new_nht->hash_rnd);
383
384                         hash >>= (32 - new_nht->hash_shift);
385                         next = rcu_dereference_protected(n->next,
386                                                 lockdep_is_held(&tbl->lock));
387
388                         rcu_assign_pointer(n->next,
389                                            rcu_dereference_protected(
390                                                 new_nht->hash_buckets[hash],
391                                                 lockdep_is_held(&tbl->lock)));
392                         rcu_assign_pointer(new_nht->hash_buckets[hash], n);
393                 }
394         }
395
396         rcu_assign_pointer(tbl->nht, new_nht);
397         call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
398         return new_nht;
399 }
400
401 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
402                                struct net_device *dev)
403 {
404         struct neighbour *n;
405         int key_len = tbl->key_len;
406         u32 hash_val;
407         struct neigh_hash_table *nht;
408
409         NEIGH_CACHE_STAT_INC(tbl, lookups);
410
411         rcu_read_lock_bh();
412         nht = rcu_dereference_bh(tbl->nht);
413         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
414
415         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
416              n != NULL;
417              n = rcu_dereference_bh(n->next)) {
418                 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
419                         if (!atomic_inc_not_zero(&n->refcnt))
420                                 n = NULL;
421                         NEIGH_CACHE_STAT_INC(tbl, hits);
422                         break;
423                 }
424         }
425
426         rcu_read_unlock_bh();
427         return n;
428 }
429 EXPORT_SYMBOL(neigh_lookup);
430
431 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
432                                      const void *pkey)
433 {
434         struct neighbour *n;
435         int key_len = tbl->key_len;
436         u32 hash_val;
437         struct neigh_hash_table *nht;
438
439         NEIGH_CACHE_STAT_INC(tbl, lookups);
440
441         rcu_read_lock_bh();
442         nht = rcu_dereference_bh(tbl->nht);
443         hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
444
445         for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
446              n != NULL;
447              n = rcu_dereference_bh(n->next)) {
448                 if (!memcmp(n->primary_key, pkey, key_len) &&
449                     net_eq(dev_net(n->dev), net)) {
450                         if (!atomic_inc_not_zero(&n->refcnt))
451                                 n = NULL;
452                         NEIGH_CACHE_STAT_INC(tbl, hits);
453                         break;
454                 }
455         }
456
457         rcu_read_unlock_bh();
458         return n;
459 }
460 EXPORT_SYMBOL(neigh_lookup_nodev);
461
462 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
463                                  struct net_device *dev, bool want_ref)
464 {
465         u32 hash_val;
466         int key_len = tbl->key_len;
467         int error;
468         struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
469         struct neigh_hash_table *nht;
470
471         if (!n) {
472                 rc = ERR_PTR(-ENOBUFS);
473                 goto out;
474         }
475
476         memcpy(n->primary_key, pkey, key_len);
477         n->dev = dev;
478         dev_hold(dev);
479
480         /* Protocol specific setup. */
481         if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
482                 rc = ERR_PTR(error);
483                 goto out_neigh_release;
484         }
485
486         if (dev->netdev_ops->ndo_neigh_construct) {
487                 error = dev->netdev_ops->ndo_neigh_construct(n);
488                 if (error < 0) {
489                         rc = ERR_PTR(error);
490                         goto out_neigh_release;
491                 }
492         }
493
494         /* Device specific setup. */
495         if (n->parms->neigh_setup &&
496             (error = n->parms->neigh_setup(n)) < 0) {
497                 rc = ERR_PTR(error);
498                 goto out_neigh_release;
499         }
500
501         n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
502
503         write_lock_bh(&tbl->lock);
504         nht = rcu_dereference_protected(tbl->nht,
505                                         lockdep_is_held(&tbl->lock));
506
507         if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
508                 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
509
510         hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
511
512         if (n->parms->dead) {
513                 rc = ERR_PTR(-EINVAL);
514                 goto out_tbl_unlock;
515         }
516
517         for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
518                                             lockdep_is_held(&tbl->lock));
519              n1 != NULL;
520              n1 = rcu_dereference_protected(n1->next,
521                         lockdep_is_held(&tbl->lock))) {
522                 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
523                         if (want_ref)
524                                 neigh_hold(n1);
525                         rc = n1;
526                         goto out_tbl_unlock;
527                 }
528         }
529
530         n->dead = 0;
531         if (want_ref)
532                 neigh_hold(n);
533         rcu_assign_pointer(n->next,
534                            rcu_dereference_protected(nht->hash_buckets[hash_val],
535                                                      lockdep_is_held(&tbl->lock)));
536         rcu_assign_pointer(nht->hash_buckets[hash_val], n);
537         write_unlock_bh(&tbl->lock);
538         neigh_dbg(2, "neigh %p is created\n", n);
539         rc = n;
540 out:
541         return rc;
542 out_tbl_unlock:
543         write_unlock_bh(&tbl->lock);
544 out_neigh_release:
545         neigh_release(n);
546         goto out;
547 }
548 EXPORT_SYMBOL(__neigh_create);
549
550 static u32 pneigh_hash(const void *pkey, int key_len)
551 {
552         u32 hash_val = *(u32 *)(pkey + key_len - 4);
553         hash_val ^= (hash_val >> 16);
554         hash_val ^= hash_val >> 8;
555         hash_val ^= hash_val >> 4;
556         hash_val &= PNEIGH_HASHMASK;
557         return hash_val;
558 }
559
560 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
561                                               struct net *net,
562                                               const void *pkey,
563                                               int key_len,
564                                               struct net_device *dev)
565 {
566         while (n) {
567                 if (!memcmp(n->key, pkey, key_len) &&
568                     net_eq(pneigh_net(n), net) &&
569                     (n->dev == dev || !n->dev))
570                         return n;
571                 n = n->next;
572         }
573         return NULL;
574 }
575
576 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
577                 struct net *net, const void *pkey, struct net_device *dev)
578 {
579         int key_len = tbl->key_len;
580         u32 hash_val = pneigh_hash(pkey, key_len);
581
582         return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
583                                  net, pkey, key_len, dev);
584 }
585 EXPORT_SYMBOL_GPL(__pneigh_lookup);
586
587 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
588                                     struct net *net, const void *pkey,
589                                     struct net_device *dev, int creat)
590 {
591         struct pneigh_entry *n;
592         int key_len = tbl->key_len;
593         u32 hash_val = pneigh_hash(pkey, key_len);
594
595         read_lock_bh(&tbl->lock);
596         n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
597                               net, pkey, key_len, dev);
598         read_unlock_bh(&tbl->lock);
599
600         if (n || !creat)
601                 goto out;
602
603         ASSERT_RTNL();
604
605         n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
606         if (!n)
607                 goto out;
608
609         write_pnet(&n->net, hold_net(net));
610         memcpy(n->key, pkey, key_len);
611         n->dev = dev;
612         if (dev)
613                 dev_hold(dev);
614
615         if (tbl->pconstructor && tbl->pconstructor(n)) {
616                 if (dev)
617                         dev_put(dev);
618                 release_net(net);
619                 kfree(n);
620                 n = NULL;
621                 goto out;
622         }
623
624         write_lock_bh(&tbl->lock);
625         n->next = tbl->phash_buckets[hash_val];
626         tbl->phash_buckets[hash_val] = n;
627         write_unlock_bh(&tbl->lock);
628 out:
629         return n;
630 }
631 EXPORT_SYMBOL(pneigh_lookup);
632
633
634 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
635                   struct net_device *dev)
636 {
637         struct pneigh_entry *n, **np;
638         int key_len = tbl->key_len;
639         u32 hash_val = pneigh_hash(pkey, key_len);
640
641         write_lock_bh(&tbl->lock);
642         for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
643              np = &n->next) {
644                 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
645                     net_eq(pneigh_net(n), net)) {
646                         *np = n->next;
647                         write_unlock_bh(&tbl->lock);
648                         if (tbl->pdestructor)
649                                 tbl->pdestructor(n);
650                         if (n->dev)
651                                 dev_put(n->dev);
652                         release_net(pneigh_net(n));
653                         kfree(n);
654                         return 0;
655                 }
656         }
657         write_unlock_bh(&tbl->lock);
658         return -ENOENT;
659 }
660
661 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
662 {
663         struct pneigh_entry *n, **np;
664         u32 h;
665
666         for (h = 0; h <= PNEIGH_HASHMASK; h++) {
667                 np = &tbl->phash_buckets[h];
668                 while ((n = *np) != NULL) {
669                         if (!dev || n->dev == dev) {
670                                 *np = n->next;
671                                 if (tbl->pdestructor)
672                                         tbl->pdestructor(n);
673                                 if (n->dev)
674                                         dev_put(n->dev);
675                                 release_net(pneigh_net(n));
676                                 kfree(n);
677                                 continue;
678                         }
679                         np = &n->next;
680                 }
681         }
682         return -ENOENT;
683 }
684
685 static void neigh_parms_destroy(struct neigh_parms *parms);
686
687 static inline void neigh_parms_put(struct neigh_parms *parms)
688 {
689         if (atomic_dec_and_test(&parms->refcnt))
690                 neigh_parms_destroy(parms);
691 }
692
693 /*
694  *      neighbour must already be out of the table;
695  *
696  */
697 void neigh_destroy(struct neighbour *neigh)
698 {
699         struct net_device *dev = neigh->dev;
700
701         NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
702
703         if (!neigh->dead) {
704                 pr_warn("Destroying alive neighbour %p\n", neigh);
705                 dump_stack();
706                 return;
707         }
708
709         if (neigh_del_timer(neigh))
710                 pr_warn("Impossible event\n");
711
712         write_lock_bh(&neigh->lock);
713         __skb_queue_purge(&neigh->arp_queue);
714         write_unlock_bh(&neigh->lock);
715         neigh->arp_queue_len_bytes = 0;
716
717         if (dev->netdev_ops->ndo_neigh_destroy)
718                 dev->netdev_ops->ndo_neigh_destroy(neigh);
719
720         dev_put(dev);
721         neigh_parms_put(neigh->parms);
722
723         neigh_dbg(2, "neigh %p is destroyed\n", neigh);
724
725         atomic_dec(&neigh->tbl->entries);
726         kfree_rcu(neigh, rcu);
727 }
728 EXPORT_SYMBOL(neigh_destroy);
729
730 /* Neighbour state is suspicious;
731    disable fast path.
732
733    Called with write_locked neigh.
734  */
735 static void neigh_suspect(struct neighbour *neigh)
736 {
737         neigh_dbg(2, "neigh %p is suspected\n", neigh);
738
739         neigh->output = neigh->ops->output;
740 }
741
742 /* Neighbour state is OK;
743    enable fast path.
744
745    Called with write_locked neigh.
746  */
747 static void neigh_connect(struct neighbour *neigh)
748 {
749         neigh_dbg(2, "neigh %p is connected\n", neigh);
750
751         neigh->output = neigh->ops->connected_output;
752 }
753
754 static void neigh_periodic_work(struct work_struct *work)
755 {
756         struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
757         struct neighbour *n;
758         struct neighbour __rcu **np;
759         unsigned int i;
760         struct neigh_hash_table *nht;
761
762         NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
763
764         write_lock_bh(&tbl->lock);
765         nht = rcu_dereference_protected(tbl->nht,
766                                         lockdep_is_held(&tbl->lock));
767
768         if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
769                 goto out;
770
771         /*
772          *      periodically recompute ReachableTime from random function
773          */
774
775         if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
776                 struct neigh_parms *p;
777                 tbl->last_rand = jiffies;
778                 for (p = &tbl->parms; p; p = p->next)
779                         p->reachable_time =
780                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
781         }
782
783         for (i = 0 ; i < (1 << nht->hash_shift); i++) {
784                 np = &nht->hash_buckets[i];
785
786                 while ((n = rcu_dereference_protected(*np,
787                                 lockdep_is_held(&tbl->lock))) != NULL) {
788                         unsigned int state;
789
790                         write_lock(&n->lock);
791
792                         state = n->nud_state;
793                         if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
794                                 write_unlock(&n->lock);
795                                 goto next_elt;
796                         }
797
798                         if (time_before(n->used, n->confirmed))
799                                 n->used = n->confirmed;
800
801                         if (atomic_read(&n->refcnt) == 1 &&
802                             (state == NUD_FAILED ||
803                              time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
804                                 *np = n->next;
805                                 n->dead = 1;
806                                 write_unlock(&n->lock);
807                                 neigh_cleanup_and_release(n);
808                                 continue;
809                         }
810                         write_unlock(&n->lock);
811
812 next_elt:
813                         np = &n->next;
814                 }
815                 /*
816                  * It's fine to release lock here, even if hash table
817                  * grows while we are preempted.
818                  */
819                 write_unlock_bh(&tbl->lock);
820                 cond_resched();
821                 write_lock_bh(&tbl->lock);
822                 nht = rcu_dereference_protected(tbl->nht,
823                                                 lockdep_is_held(&tbl->lock));
824         }
825 out:
826         /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
827          * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
828          * BASE_REACHABLE_TIME.
829          */
830         schedule_delayed_work(&tbl->gc_work,
831                               NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
832         write_unlock_bh(&tbl->lock);
833 }
834
835 static __inline__ int neigh_max_probes(struct neighbour *n)
836 {
837         struct neigh_parms *p = n->parms;
838         return (n->nud_state & NUD_PROBE) ?
839                 NEIGH_VAR(p, UCAST_PROBES) :
840                 NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
841                 NEIGH_VAR(p, MCAST_PROBES);
842 }
843
844 static void neigh_invalidate(struct neighbour *neigh)
845         __releases(neigh->lock)
846         __acquires(neigh->lock)
847 {
848         struct sk_buff *skb;
849
850         NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
851         neigh_dbg(2, "neigh %p is failed\n", neigh);
852         neigh->updated = jiffies;
853
854         /* It is very thin place. report_unreachable is very complicated
855            routine. Particularly, it can hit the same neighbour entry!
856
857            So that, we try to be accurate and avoid dead loop. --ANK
858          */
859         while (neigh->nud_state == NUD_FAILED &&
860                (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
861                 write_unlock(&neigh->lock);
862                 neigh->ops->error_report(neigh, skb);
863                 write_lock(&neigh->lock);
864         }
865         __skb_queue_purge(&neigh->arp_queue);
866         neigh->arp_queue_len_bytes = 0;
867 }
868
869 static void neigh_probe(struct neighbour *neigh)
870         __releases(neigh->lock)
871 {
872         struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
873         /* keep skb alive even if arp_queue overflows */
874         if (skb)
875                 skb = skb_copy(skb, GFP_ATOMIC);
876         write_unlock(&neigh->lock);
877         neigh->ops->solicit(neigh, skb);
878         atomic_inc(&neigh->probes);
879         kfree_skb(skb);
880 }
881
882 /* Called when a timer expires for a neighbour entry. */
883
884 static void neigh_timer_handler(unsigned long arg)
885 {
886         unsigned long now, next;
887         struct neighbour *neigh = (struct neighbour *)arg;
888         unsigned int state;
889         int notify = 0;
890
891         write_lock(&neigh->lock);
892
893         state = neigh->nud_state;
894         now = jiffies;
895         next = now + HZ;
896
897         if (!(state & NUD_IN_TIMER))
898                 goto out;
899
900         if (state & NUD_REACHABLE) {
901                 if (time_before_eq(now,
902                                    neigh->confirmed + neigh->parms->reachable_time)) {
903                         neigh_dbg(2, "neigh %p is still alive\n", neigh);
904                         next = neigh->confirmed + neigh->parms->reachable_time;
905                 } else if (time_before_eq(now,
906                                           neigh->used +
907                                           NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
908                         neigh_dbg(2, "neigh %p is delayed\n", neigh);
909                         neigh->nud_state = NUD_DELAY;
910                         neigh->updated = jiffies;
911                         neigh_suspect(neigh);
912                         next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
913                 } else {
914                         neigh_dbg(2, "neigh %p is suspected\n", neigh);
915                         neigh->nud_state = NUD_STALE;
916                         neigh->updated = jiffies;
917                         neigh_suspect(neigh);
918                         notify = 1;
919                 }
920         } else if (state & NUD_DELAY) {
921                 if (time_before_eq(now,
922                                    neigh->confirmed +
923                                    NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
924                         neigh_dbg(2, "neigh %p is now reachable\n", neigh);
925                         neigh->nud_state = NUD_REACHABLE;
926                         neigh->updated = jiffies;
927                         neigh_connect(neigh);
928                         notify = 1;
929                         next = neigh->confirmed + neigh->parms->reachable_time;
930                 } else {
931                         neigh_dbg(2, "neigh %p is probed\n", neigh);
932                         neigh->nud_state = NUD_PROBE;
933                         neigh->updated = jiffies;
934                         atomic_set(&neigh->probes, 0);
935                         next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
936                 }
937         } else {
938                 /* NUD_PROBE|NUD_INCOMPLETE */
939                 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
940         }
941
942         if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
943             atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
944                 neigh->nud_state = NUD_FAILED;
945                 notify = 1;
946                 neigh_invalidate(neigh);
947         }
948
949         if (neigh->nud_state & NUD_IN_TIMER) {
950                 if (time_before(next, jiffies + HZ/2))
951                         next = jiffies + HZ/2;
952                 if (!mod_timer(&neigh->timer, next))
953                         neigh_hold(neigh);
954         }
955         if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
956                 neigh_probe(neigh);
957         } else {
958 out:
959                 write_unlock(&neigh->lock);
960         }
961
962         if (notify)
963                 neigh_update_notify(neigh);
964
965         neigh_release(neigh);
966 }
967
968 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
969 {
970         int rc;
971         bool immediate_probe = false;
972
973         write_lock_bh(&neigh->lock);
974
975         rc = 0;
976         if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
977                 goto out_unlock_bh;
978
979         if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
980                 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
981                     NEIGH_VAR(neigh->parms, APP_PROBES)) {
982                         unsigned long next, now = jiffies;
983
984                         atomic_set(&neigh->probes,
985                                    NEIGH_VAR(neigh->parms, UCAST_PROBES));
986                         neigh->nud_state     = NUD_INCOMPLETE;
987                         neigh->updated = now;
988                         next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
989                                          HZ/2);
990                         neigh_add_timer(neigh, next);
991                         immediate_probe = true;
992                 } else {
993                         neigh->nud_state = NUD_FAILED;
994                         neigh->updated = jiffies;
995                         write_unlock_bh(&neigh->lock);
996
997                         kfree_skb(skb);
998                         return 1;
999                 }
1000         } else if (neigh->nud_state & NUD_STALE) {
1001                 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1002                 neigh->nud_state = NUD_DELAY;
1003                 neigh->updated = jiffies;
1004                 neigh_add_timer(neigh, jiffies +
1005                                 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1006         }
1007
1008         if (neigh->nud_state == NUD_INCOMPLETE) {
1009                 if (skb) {
1010                         while (neigh->arp_queue_len_bytes + skb->truesize >
1011                                NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1012                                 struct sk_buff *buff;
1013
1014                                 buff = __skb_dequeue(&neigh->arp_queue);
1015                                 if (!buff)
1016                                         break;
1017                                 neigh->arp_queue_len_bytes -= buff->truesize;
1018                                 kfree_skb(buff);
1019                                 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1020                         }
1021                         skb_dst_force(skb);
1022                         __skb_queue_tail(&neigh->arp_queue, skb);
1023                         neigh->arp_queue_len_bytes += skb->truesize;
1024                 }
1025                 rc = 1;
1026         }
1027 out_unlock_bh:
1028         if (immediate_probe)
1029                 neigh_probe(neigh);
1030         else
1031                 write_unlock(&neigh->lock);
1032         local_bh_enable();
1033         return rc;
1034 }
1035 EXPORT_SYMBOL(__neigh_event_send);
1036
1037 static void neigh_update_hhs(struct neighbour *neigh)
1038 {
1039         struct hh_cache *hh;
1040         void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1041                 = NULL;
1042
1043         if (neigh->dev->header_ops)
1044                 update = neigh->dev->header_ops->cache_update;
1045
1046         if (update) {
1047                 hh = &neigh->hh;
1048                 if (hh->hh_len) {
1049                         write_seqlock_bh(&hh->hh_lock);
1050                         update(hh, neigh->dev, neigh->ha);
1051                         write_sequnlock_bh(&hh->hh_lock);
1052                 }
1053         }
1054 }
1055
1056
1057
1058 /* Generic update routine.
1059    -- lladdr is new lladdr or NULL, if it is not supplied.
1060    -- new    is new state.
1061    -- flags
1062         NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1063                                 if it is different.
1064         NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1065                                 lladdr instead of overriding it
1066                                 if it is different.
1067                                 It also allows to retain current state
1068                                 if lladdr is unchanged.
1069         NEIGH_UPDATE_F_ADMIN    means that the change is administrative.
1070
1071         NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1072                                 NTF_ROUTER flag.
1073         NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1074                                 a router.
1075
1076    Caller MUST hold reference count on the entry.
1077  */
1078
1079 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1080                  u32 flags)
1081 {
1082         u8 old;
1083         int err;
1084         int notify = 0;
1085         struct net_device *dev;
1086         int update_isrouter = 0;
1087
1088         write_lock_bh(&neigh->lock);
1089
1090         dev    = neigh->dev;
1091         old    = neigh->nud_state;
1092         err    = -EPERM;
1093
1094         if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1095             (old & (NUD_NOARP | NUD_PERMANENT)))
1096                 goto out;
1097
1098         if (!(new & NUD_VALID)) {
1099                 neigh_del_timer(neigh);
1100                 if (old & NUD_CONNECTED)
1101                         neigh_suspect(neigh);
1102                 neigh->nud_state = new;
1103                 err = 0;
1104                 notify = old & NUD_VALID;
1105                 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1106                     (new & NUD_FAILED)) {
1107                         neigh_invalidate(neigh);
1108                         notify = 1;
1109                 }
1110                 goto out;
1111         }
1112
1113         /* Compare new lladdr with cached one */
1114         if (!dev->addr_len) {
1115                 /* First case: device needs no address. */
1116                 lladdr = neigh->ha;
1117         } else if (lladdr) {
1118                 /* The second case: if something is already cached
1119                    and a new address is proposed:
1120                    - compare new & old
1121                    - if they are different, check override flag
1122                  */
1123                 if ((old & NUD_VALID) &&
1124                     !memcmp(lladdr, neigh->ha, dev->addr_len))
1125                         lladdr = neigh->ha;
1126         } else {
1127                 /* No address is supplied; if we know something,
1128                    use it, otherwise discard the request.
1129                  */
1130                 err = -EINVAL;
1131                 if (!(old & NUD_VALID))
1132                         goto out;
1133                 lladdr = neigh->ha;
1134         }
1135
1136         if (new & NUD_CONNECTED)
1137                 neigh->confirmed = jiffies;
1138         neigh->updated = jiffies;
1139
1140         /* If entry was valid and address is not changed,
1141            do not change entry state, if new one is STALE.
1142          */
1143         err = 0;
1144         update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1145         if (old & NUD_VALID) {
1146                 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1147                         update_isrouter = 0;
1148                         if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1149                             (old & NUD_CONNECTED)) {
1150                                 lladdr = neigh->ha;
1151                                 new = NUD_STALE;
1152                         } else
1153                                 goto out;
1154                 } else {
1155                         if (lladdr == neigh->ha && new == NUD_STALE &&
1156                             ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1157                              (old & NUD_CONNECTED))
1158                             )
1159                                 new = old;
1160                 }
1161         }
1162
1163         if (new != old) {
1164                 neigh_del_timer(neigh);
1165                 if (new & NUD_IN_TIMER)
1166                         neigh_add_timer(neigh, (jiffies +
1167                                                 ((new & NUD_REACHABLE) ?
1168                                                  neigh->parms->reachable_time :
1169                                                  0)));
1170                 neigh->nud_state = new;
1171         }
1172
1173         if (lladdr != neigh->ha) {
1174                 write_seqlock(&neigh->ha_lock);
1175                 memcpy(&neigh->ha, lladdr, dev->addr_len);
1176                 write_sequnlock(&neigh->ha_lock);
1177                 neigh_update_hhs(neigh);
1178                 if (!(new & NUD_CONNECTED))
1179                         neigh->confirmed = jiffies -
1180                                       (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1181                 notify = 1;
1182         }
1183         if (new == old)
1184                 goto out;
1185         if (new & NUD_CONNECTED)
1186                 neigh_connect(neigh);
1187         else
1188                 neigh_suspect(neigh);
1189         if (!(old & NUD_VALID)) {
1190                 struct sk_buff *skb;
1191
1192                 /* Again: avoid dead loop if something went wrong */
1193
1194                 while (neigh->nud_state & NUD_VALID &&
1195                        (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1196                         struct dst_entry *dst = skb_dst(skb);
1197                         struct neighbour *n2, *n1 = neigh;
1198                         write_unlock_bh(&neigh->lock);
1199
1200                         rcu_read_lock();
1201
1202                         /* Why not just use 'neigh' as-is?  The problem is that
1203                          * things such as shaper, eql, and sch_teql can end up
1204                          * using alternative, different, neigh objects to output
1205                          * the packet in the output path.  So what we need to do
1206                          * here is re-lookup the top-level neigh in the path so
1207                          * we can reinject the packet there.
1208                          */
1209                         n2 = NULL;
1210                         if (dst) {
1211                                 n2 = dst_neigh_lookup_skb(dst, skb);
1212                                 if (n2)
1213                                         n1 = n2;
1214                         }
1215                         n1->output(n1, skb);
1216                         if (n2)
1217                                 neigh_release(n2);
1218                         rcu_read_unlock();
1219
1220                         write_lock_bh(&neigh->lock);
1221                 }
1222                 __skb_queue_purge(&neigh->arp_queue);
1223                 neigh->arp_queue_len_bytes = 0;
1224         }
1225 out:
1226         if (update_isrouter) {
1227                 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1228                         (neigh->flags | NTF_ROUTER) :
1229                         (neigh->flags & ~NTF_ROUTER);
1230         }
1231         write_unlock_bh(&neigh->lock);
1232
1233         if (notify)
1234                 neigh_update_notify(neigh);
1235
1236         return err;
1237 }
1238 EXPORT_SYMBOL(neigh_update);
1239
1240 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1241                                  u8 *lladdr, void *saddr,
1242                                  struct net_device *dev)
1243 {
1244         struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1245                                                  lladdr || !dev->addr_len);
1246         if (neigh)
1247                 neigh_update(neigh, lladdr, NUD_STALE,
1248                              NEIGH_UPDATE_F_OVERRIDE);
1249         return neigh;
1250 }
1251 EXPORT_SYMBOL(neigh_event_ns);
1252
1253 /* called with read_lock_bh(&n->lock); */
1254 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1255 {
1256         struct net_device *dev = dst->dev;
1257         __be16 prot = dst->ops->protocol;
1258         struct hh_cache *hh = &n->hh;
1259
1260         write_lock_bh(&n->lock);
1261
1262         /* Only one thread can come in here and initialize the
1263          * hh_cache entry.
1264          */
1265         if (!hh->hh_len)
1266                 dev->header_ops->cache(n, hh, prot);
1267
1268         write_unlock_bh(&n->lock);
1269 }
1270
1271 /* This function can be used in contexts, where only old dev_queue_xmit
1272  * worked, f.e. if you want to override normal output path (eql, shaper),
1273  * but resolution is not made yet.
1274  */
1275
1276 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1277 {
1278         struct net_device *dev = skb->dev;
1279
1280         __skb_pull(skb, skb_network_offset(skb));
1281
1282         if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1283                             skb->len) < 0 &&
1284             dev->header_ops->rebuild(skb))
1285                 return 0;
1286
1287         return dev_queue_xmit(skb);
1288 }
1289 EXPORT_SYMBOL(neigh_compat_output);
1290
1291 /* Slow and careful. */
1292
1293 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1294 {
1295         struct dst_entry *dst = skb_dst(skb);
1296         int rc = 0;
1297
1298         if (!dst)
1299                 goto discard;
1300
1301         if (!neigh_event_send(neigh, skb)) {
1302                 int err;
1303                 struct net_device *dev = neigh->dev;
1304                 unsigned int seq;
1305
1306                 if (dev->header_ops->cache && !neigh->hh.hh_len)
1307                         neigh_hh_init(neigh, dst);
1308
1309                 do {
1310                         __skb_pull(skb, skb_network_offset(skb));
1311                         seq = read_seqbegin(&neigh->ha_lock);
1312                         err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1313                                               neigh->ha, NULL, skb->len);
1314                 } while (read_seqretry(&neigh->ha_lock, seq));
1315
1316                 if (err >= 0)
1317                         rc = dev_queue_xmit(skb);
1318                 else
1319                         goto out_kfree_skb;
1320         }
1321 out:
1322         return rc;
1323 discard:
1324         neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1325 out_kfree_skb:
1326         rc = -EINVAL;
1327         kfree_skb(skb);
1328         goto out;
1329 }
1330 EXPORT_SYMBOL(neigh_resolve_output);
1331
1332 /* As fast as possible without hh cache */
1333
1334 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1335 {
1336         struct net_device *dev = neigh->dev;
1337         unsigned int seq;
1338         int err;
1339
1340         do {
1341                 __skb_pull(skb, skb_network_offset(skb));
1342                 seq = read_seqbegin(&neigh->ha_lock);
1343                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1344                                       neigh->ha, NULL, skb->len);
1345         } while (read_seqretry(&neigh->ha_lock, seq));
1346
1347         if (err >= 0)
1348                 err = dev_queue_xmit(skb);
1349         else {
1350                 err = -EINVAL;
1351                 kfree_skb(skb);
1352         }
1353         return err;
1354 }
1355 EXPORT_SYMBOL(neigh_connected_output);
1356
1357 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1358 {
1359         return dev_queue_xmit(skb);
1360 }
1361 EXPORT_SYMBOL(neigh_direct_output);
1362
1363 static void neigh_proxy_process(unsigned long arg)
1364 {
1365         struct neigh_table *tbl = (struct neigh_table *)arg;
1366         long sched_next = 0;
1367         unsigned long now = jiffies;
1368         struct sk_buff *skb, *n;
1369
1370         spin_lock(&tbl->proxy_queue.lock);
1371
1372         skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1373                 long tdif = NEIGH_CB(skb)->sched_next - now;
1374
1375                 if (tdif <= 0) {
1376                         struct net_device *dev = skb->dev;
1377
1378                         __skb_unlink(skb, &tbl->proxy_queue);
1379                         if (tbl->proxy_redo && netif_running(dev)) {
1380                                 rcu_read_lock();
1381                                 tbl->proxy_redo(skb);
1382                                 rcu_read_unlock();
1383                         } else {
1384                                 kfree_skb(skb);
1385                         }
1386
1387                         dev_put(dev);
1388                 } else if (!sched_next || tdif < sched_next)
1389                         sched_next = tdif;
1390         }
1391         del_timer(&tbl->proxy_timer);
1392         if (sched_next)
1393                 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1394         spin_unlock(&tbl->proxy_queue.lock);
1395 }
1396
1397 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1398                     struct sk_buff *skb)
1399 {
1400         unsigned long now = jiffies;
1401         unsigned long sched_next = now + (net_random() %
1402                                           NEIGH_VAR(p, PROXY_DELAY));
1403
1404         if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1405                 kfree_skb(skb);
1406                 return;
1407         }
1408
1409         NEIGH_CB(skb)->sched_next = sched_next;
1410         NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1411
1412         spin_lock(&tbl->proxy_queue.lock);
1413         if (del_timer(&tbl->proxy_timer)) {
1414                 if (time_before(tbl->proxy_timer.expires, sched_next))
1415                         sched_next = tbl->proxy_timer.expires;
1416         }
1417         skb_dst_drop(skb);
1418         dev_hold(skb->dev);
1419         __skb_queue_tail(&tbl->proxy_queue, skb);
1420         mod_timer(&tbl->proxy_timer, sched_next);
1421         spin_unlock(&tbl->proxy_queue.lock);
1422 }
1423 EXPORT_SYMBOL(pneigh_enqueue);
1424
1425 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1426                                                       struct net *net, int ifindex)
1427 {
1428         struct neigh_parms *p;
1429
1430         for (p = &tbl->parms; p; p = p->next) {
1431                 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1432                     (!p->dev && !ifindex && net_eq(net, &init_net)))
1433                         return p;
1434         }
1435
1436         return NULL;
1437 }
1438
1439 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1440                                       struct neigh_table *tbl)
1441 {
1442         struct neigh_parms *p;
1443         struct net *net = dev_net(dev);
1444         const struct net_device_ops *ops = dev->netdev_ops;
1445
1446         p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1447         if (p) {
1448                 p->tbl            = tbl;
1449                 atomic_set(&p->refcnt, 1);
1450                 p->reachable_time =
1451                                 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1452                 dev_hold(dev);
1453                 p->dev = dev;
1454                 write_pnet(&p->net, hold_net(net));
1455                 p->sysctl_table = NULL;
1456
1457                 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1458                         release_net(net);
1459                         dev_put(dev);
1460                         kfree(p);
1461                         return NULL;
1462                 }
1463
1464                 write_lock_bh(&tbl->lock);
1465                 p->next         = tbl->parms.next;
1466                 tbl->parms.next = p;
1467                 write_unlock_bh(&tbl->lock);
1468
1469                 neigh_parms_data_state_cleanall(p);
1470         }
1471         return p;
1472 }
1473 EXPORT_SYMBOL(neigh_parms_alloc);
1474
1475 static void neigh_rcu_free_parms(struct rcu_head *head)
1476 {
1477         struct neigh_parms *parms =
1478                 container_of(head, struct neigh_parms, rcu_head);
1479
1480         neigh_parms_put(parms);
1481 }
1482
1483 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1484 {
1485         struct neigh_parms **p;
1486
1487         if (!parms || parms == &tbl->parms)
1488                 return;
1489         write_lock_bh(&tbl->lock);
1490         for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1491                 if (*p == parms) {
1492                         *p = parms->next;
1493                         parms->dead = 1;
1494                         write_unlock_bh(&tbl->lock);
1495                         if (parms->dev)
1496                                 dev_put(parms->dev);
1497                         call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1498                         return;
1499                 }
1500         }
1501         write_unlock_bh(&tbl->lock);
1502         neigh_dbg(1, "%s: not found\n", __func__);
1503 }
1504 EXPORT_SYMBOL(neigh_parms_release);
1505
1506 static void neigh_parms_destroy(struct neigh_parms *parms)
1507 {
1508         release_net(neigh_parms_net(parms));
1509         kfree(parms);
1510 }
1511
1512 static struct lock_class_key neigh_table_proxy_queue_class;
1513
1514 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1515 {
1516         unsigned long now = jiffies;
1517         unsigned long phsize;
1518
1519         write_pnet(&tbl->parms.net, &init_net);
1520         atomic_set(&tbl->parms.refcnt, 1);
1521         tbl->parms.reachable_time =
1522                           neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1523
1524         tbl->stats = alloc_percpu(struct neigh_statistics);
1525         if (!tbl->stats)
1526                 panic("cannot create neighbour cache statistics");
1527
1528 #ifdef CONFIG_PROC_FS
1529         if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1530                               &neigh_stat_seq_fops, tbl))
1531                 panic("cannot create neighbour proc dir entry");
1532 #endif
1533
1534         RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1535
1536         phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1537         tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1538
1539         if (!tbl->nht || !tbl->phash_buckets)
1540                 panic("cannot allocate neighbour cache hashes");
1541
1542         if (!tbl->entry_size)
1543                 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1544                                         tbl->key_len, NEIGH_PRIV_ALIGN);
1545         else
1546                 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1547
1548         rwlock_init(&tbl->lock);
1549         INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1550         schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1551         setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1552         skb_queue_head_init_class(&tbl->proxy_queue,
1553                         &neigh_table_proxy_queue_class);
1554
1555         tbl->last_flush = now;
1556         tbl->last_rand  = now + tbl->parms.reachable_time * 20;
1557 }
1558
1559 void neigh_table_init(struct neigh_table *tbl)
1560 {
1561         struct neigh_table *tmp;
1562
1563         neigh_table_init_no_netlink(tbl);
1564         write_lock(&neigh_tbl_lock);
1565         for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1566                 if (tmp->family == tbl->family)
1567                         break;
1568         }
1569         tbl->next       = neigh_tables;
1570         neigh_tables    = tbl;
1571         write_unlock(&neigh_tbl_lock);
1572
1573         if (unlikely(tmp)) {
1574                 pr_err("Registering multiple tables for family %d\n",
1575                        tbl->family);
1576                 dump_stack();
1577         }
1578 }
1579 EXPORT_SYMBOL(neigh_table_init);
1580
1581 int neigh_table_clear(struct neigh_table *tbl)
1582 {
1583         struct neigh_table **tp;
1584
1585         /* It is not clean... Fix it to unload IPv6 module safely */
1586         cancel_delayed_work_sync(&tbl->gc_work);
1587         del_timer_sync(&tbl->proxy_timer);
1588         pneigh_queue_purge(&tbl->proxy_queue);
1589         neigh_ifdown(tbl, NULL);
1590         if (atomic_read(&tbl->entries))
1591                 pr_crit("neighbour leakage\n");
1592         write_lock(&neigh_tbl_lock);
1593         for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1594                 if (*tp == tbl) {
1595                         *tp = tbl->next;
1596                         break;
1597                 }
1598         }
1599         write_unlock(&neigh_tbl_lock);
1600
1601         call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1602                  neigh_hash_free_rcu);
1603         tbl->nht = NULL;
1604
1605         kfree(tbl->phash_buckets);
1606         tbl->phash_buckets = NULL;
1607
1608         remove_proc_entry(tbl->id, init_net.proc_net_stat);
1609
1610         free_percpu(tbl->stats);
1611         tbl->stats = NULL;
1612
1613         return 0;
1614 }
1615 EXPORT_SYMBOL(neigh_table_clear);
1616
1617 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1618 {
1619         struct net *net = sock_net(skb->sk);
1620         struct ndmsg *ndm;
1621         struct nlattr *dst_attr;
1622         struct neigh_table *tbl;
1623         struct net_device *dev = NULL;
1624         int err = -EINVAL;
1625
1626         ASSERT_RTNL();
1627         if (nlmsg_len(nlh) < sizeof(*ndm))
1628                 goto out;
1629
1630         dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1631         if (dst_attr == NULL)
1632                 goto out;
1633
1634         ndm = nlmsg_data(nlh);
1635         if (ndm->ndm_ifindex) {
1636                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1637                 if (dev == NULL) {
1638                         err = -ENODEV;
1639                         goto out;
1640                 }
1641         }
1642
1643         read_lock(&neigh_tbl_lock);
1644         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1645                 struct neighbour *neigh;
1646
1647                 if (tbl->family != ndm->ndm_family)
1648                         continue;
1649                 read_unlock(&neigh_tbl_lock);
1650
1651                 if (nla_len(dst_attr) < tbl->key_len)
1652                         goto out;
1653
1654                 if (ndm->ndm_flags & NTF_PROXY) {
1655                         err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1656                         goto out;
1657                 }
1658
1659                 if (dev == NULL)
1660                         goto out;
1661
1662                 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1663                 if (neigh == NULL) {
1664                         err = -ENOENT;
1665                         goto out;
1666                 }
1667
1668                 err = neigh_update(neigh, NULL, NUD_FAILED,
1669                                    NEIGH_UPDATE_F_OVERRIDE |
1670                                    NEIGH_UPDATE_F_ADMIN);
1671                 neigh_release(neigh);
1672                 goto out;
1673         }
1674         read_unlock(&neigh_tbl_lock);
1675         err = -EAFNOSUPPORT;
1676
1677 out:
1678         return err;
1679 }
1680
1681 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1682 {
1683         struct net *net = sock_net(skb->sk);
1684         struct ndmsg *ndm;
1685         struct nlattr *tb[NDA_MAX+1];
1686         struct neigh_table *tbl;
1687         struct net_device *dev = NULL;
1688         int err;
1689
1690         ASSERT_RTNL();
1691         err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1692         if (err < 0)
1693                 goto out;
1694
1695         err = -EINVAL;
1696         if (tb[NDA_DST] == NULL)
1697                 goto out;
1698
1699         ndm = nlmsg_data(nlh);
1700         if (ndm->ndm_ifindex) {
1701                 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1702                 if (dev == NULL) {
1703                         err = -ENODEV;
1704                         goto out;
1705                 }
1706
1707                 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1708                         goto out;
1709         }
1710
1711         read_lock(&neigh_tbl_lock);
1712         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1713                 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1714                 struct neighbour *neigh;
1715                 void *dst, *lladdr;
1716
1717                 if (tbl->family != ndm->ndm_family)
1718                         continue;
1719                 read_unlock(&neigh_tbl_lock);
1720
1721                 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1722                         goto out;
1723                 dst = nla_data(tb[NDA_DST]);
1724                 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1725
1726                 if (ndm->ndm_flags & NTF_PROXY) {
1727                         struct pneigh_entry *pn;
1728
1729                         err = -ENOBUFS;
1730                         pn = pneigh_lookup(tbl, net, dst, dev, 1);
1731                         if (pn) {
1732                                 pn->flags = ndm->ndm_flags;
1733                                 err = 0;
1734                         }
1735                         goto out;
1736                 }
1737
1738                 if (dev == NULL)
1739                         goto out;
1740
1741                 neigh = neigh_lookup(tbl, dst, dev);
1742                 if (neigh == NULL) {
1743                         if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1744                                 err = -ENOENT;
1745                                 goto out;
1746                         }
1747
1748                         neigh = __neigh_lookup_errno(tbl, dst, dev);
1749                         if (IS_ERR(neigh)) {
1750                                 err = PTR_ERR(neigh);
1751                                 goto out;
1752                         }
1753                 } else {
1754                         if (nlh->nlmsg_flags & NLM_F_EXCL) {
1755                                 err = -EEXIST;
1756                                 neigh_release(neigh);
1757                                 goto out;
1758                         }
1759
1760                         if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1761                                 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1762                 }
1763
1764                 if (ndm->ndm_flags & NTF_USE) {
1765                         neigh_event_send(neigh, NULL);
1766                         err = 0;
1767                 } else
1768                         err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1769                 neigh_release(neigh);
1770                 goto out;
1771         }
1772
1773         read_unlock(&neigh_tbl_lock);
1774         err = -EAFNOSUPPORT;
1775 out:
1776         return err;
1777 }
1778
1779 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1780 {
1781         struct nlattr *nest;
1782
1783         nest = nla_nest_start(skb, NDTA_PARMS);
1784         if (nest == NULL)
1785                 return -ENOBUFS;
1786
1787         if ((parms->dev &&
1788              nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1789             nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1790             nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1791                         NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1792             /* approximative value for deprecated QUEUE_LEN (in packets) */
1793             nla_put_u32(skb, NDTPA_QUEUE_LEN,
1794                         NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1795             nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1796             nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1797             nla_put_u32(skb, NDTPA_UCAST_PROBES,
1798                         NEIGH_VAR(parms, UCAST_PROBES)) ||
1799             nla_put_u32(skb, NDTPA_MCAST_PROBES,
1800                         NEIGH_VAR(parms, MCAST_PROBES)) ||
1801             nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1802             nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1803                           NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
1804             nla_put_msecs(skb, NDTPA_GC_STALETIME,
1805                           NEIGH_VAR(parms, GC_STALETIME)) ||
1806             nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1807                           NEIGH_VAR(parms, DELAY_PROBE_TIME)) ||
1808             nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1809                           NEIGH_VAR(parms, RETRANS_TIME)) ||
1810             nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1811                           NEIGH_VAR(parms, ANYCAST_DELAY)) ||
1812             nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1813                           NEIGH_VAR(parms, PROXY_DELAY)) ||
1814             nla_put_msecs(skb, NDTPA_LOCKTIME,
1815                           NEIGH_VAR(parms, LOCKTIME)))
1816                 goto nla_put_failure;
1817         return nla_nest_end(skb, nest);
1818
1819 nla_put_failure:
1820         nla_nest_cancel(skb, nest);
1821         return -EMSGSIZE;
1822 }
1823
1824 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1825                               u32 pid, u32 seq, int type, int flags)
1826 {
1827         struct nlmsghdr *nlh;
1828         struct ndtmsg *ndtmsg;
1829
1830         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1831         if (nlh == NULL)
1832                 return -EMSGSIZE;
1833
1834         ndtmsg = nlmsg_data(nlh);
1835
1836         read_lock_bh(&tbl->lock);
1837         ndtmsg->ndtm_family = tbl->family;
1838         ndtmsg->ndtm_pad1   = 0;
1839         ndtmsg->ndtm_pad2   = 0;
1840
1841         if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1842             nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1843             nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1844             nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1845             nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1846                 goto nla_put_failure;
1847         {
1848                 unsigned long now = jiffies;
1849                 unsigned int flush_delta = now - tbl->last_flush;
1850                 unsigned int rand_delta = now - tbl->last_rand;
1851                 struct neigh_hash_table *nht;
1852                 struct ndt_config ndc = {
1853                         .ndtc_key_len           = tbl->key_len,
1854                         .ndtc_entry_size        = tbl->entry_size,
1855                         .ndtc_entries           = atomic_read(&tbl->entries),
1856                         .ndtc_last_flush        = jiffies_to_msecs(flush_delta),
1857                         .ndtc_last_rand         = jiffies_to_msecs(rand_delta),
1858                         .ndtc_proxy_qlen        = tbl->proxy_queue.qlen,
1859                 };
1860
1861                 rcu_read_lock_bh();
1862                 nht = rcu_dereference_bh(tbl->nht);
1863                 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1864                 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1865                 rcu_read_unlock_bh();
1866
1867                 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1868                         goto nla_put_failure;
1869         }
1870
1871         {
1872                 int cpu;
1873                 struct ndt_stats ndst;
1874
1875                 memset(&ndst, 0, sizeof(ndst));
1876
1877                 for_each_possible_cpu(cpu) {
1878                         struct neigh_statistics *st;
1879
1880                         st = per_cpu_ptr(tbl->stats, cpu);
1881                         ndst.ndts_allocs                += st->allocs;
1882                         ndst.ndts_destroys              += st->destroys;
1883                         ndst.ndts_hash_grows            += st->hash_grows;
1884                         ndst.ndts_res_failed            += st->res_failed;
1885                         ndst.ndts_lookups               += st->lookups;
1886                         ndst.ndts_hits                  += st->hits;
1887                         ndst.ndts_rcv_probes_mcast      += st->rcv_probes_mcast;
1888                         ndst.ndts_rcv_probes_ucast      += st->rcv_probes_ucast;
1889                         ndst.ndts_periodic_gc_runs      += st->periodic_gc_runs;
1890                         ndst.ndts_forced_gc_runs        += st->forced_gc_runs;
1891                 }
1892
1893                 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1894                         goto nla_put_failure;
1895         }
1896
1897         BUG_ON(tbl->parms.dev);
1898         if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1899                 goto nla_put_failure;
1900
1901         read_unlock_bh(&tbl->lock);
1902         return nlmsg_end(skb, nlh);
1903
1904 nla_put_failure:
1905         read_unlock_bh(&tbl->lock);
1906         nlmsg_cancel(skb, nlh);
1907         return -EMSGSIZE;
1908 }
1909
1910 static int neightbl_fill_param_info(struct sk_buff *skb,
1911                                     struct neigh_table *tbl,
1912                                     struct neigh_parms *parms,
1913                                     u32 pid, u32 seq, int type,
1914                                     unsigned int flags)
1915 {
1916         struct ndtmsg *ndtmsg;
1917         struct nlmsghdr *nlh;
1918
1919         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1920         if (nlh == NULL)
1921                 return -EMSGSIZE;
1922
1923         ndtmsg = nlmsg_data(nlh);
1924
1925         read_lock_bh(&tbl->lock);
1926         ndtmsg->ndtm_family = tbl->family;
1927         ndtmsg->ndtm_pad1   = 0;
1928         ndtmsg->ndtm_pad2   = 0;
1929
1930         if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1931             neightbl_fill_parms(skb, parms) < 0)
1932                 goto errout;
1933
1934         read_unlock_bh(&tbl->lock);
1935         return nlmsg_end(skb, nlh);
1936 errout:
1937         read_unlock_bh(&tbl->lock);
1938         nlmsg_cancel(skb, nlh);
1939         return -EMSGSIZE;
1940 }
1941
1942 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1943         [NDTA_NAME]             = { .type = NLA_STRING },
1944         [NDTA_THRESH1]          = { .type = NLA_U32 },
1945         [NDTA_THRESH2]          = { .type = NLA_U32 },
1946         [NDTA_THRESH3]          = { .type = NLA_U32 },
1947         [NDTA_GC_INTERVAL]      = { .type = NLA_U64 },
1948         [NDTA_PARMS]            = { .type = NLA_NESTED },
1949 };
1950
1951 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1952         [NDTPA_IFINDEX]                 = { .type = NLA_U32 },
1953         [NDTPA_QUEUE_LEN]               = { .type = NLA_U32 },
1954         [NDTPA_PROXY_QLEN]              = { .type = NLA_U32 },
1955         [NDTPA_APP_PROBES]              = { .type = NLA_U32 },
1956         [NDTPA_UCAST_PROBES]            = { .type = NLA_U32 },
1957         [NDTPA_MCAST_PROBES]            = { .type = NLA_U32 },
1958         [NDTPA_BASE_REACHABLE_TIME]     = { .type = NLA_U64 },
1959         [NDTPA_GC_STALETIME]            = { .type = NLA_U64 },
1960         [NDTPA_DELAY_PROBE_TIME]        = { .type = NLA_U64 },
1961         [NDTPA_RETRANS_TIME]            = { .type = NLA_U64 },
1962         [NDTPA_ANYCAST_DELAY]           = { .type = NLA_U64 },
1963         [NDTPA_PROXY_DELAY]             = { .type = NLA_U64 },
1964         [NDTPA_LOCKTIME]                = { .type = NLA_U64 },
1965 };
1966
1967 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1968 {
1969         struct net *net = sock_net(skb->sk);
1970         struct neigh_table *tbl;
1971         struct ndtmsg *ndtmsg;
1972         struct nlattr *tb[NDTA_MAX+1];
1973         int err;
1974
1975         err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1976                           nl_neightbl_policy);
1977         if (err < 0)
1978                 goto errout;
1979
1980         if (tb[NDTA_NAME] == NULL) {
1981                 err = -EINVAL;
1982                 goto errout;
1983         }
1984
1985         ndtmsg = nlmsg_data(nlh);
1986         read_lock(&neigh_tbl_lock);
1987         for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1988                 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1989                         continue;
1990
1991                 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
1992                         break;
1993         }
1994
1995         if (tbl == NULL) {
1996                 err = -ENOENT;
1997                 goto errout_locked;
1998         }
1999
2000         /*
2001          * We acquire tbl->lock to be nice to the periodic timers and
2002          * make sure they always see a consistent set of values.
2003          */
2004         write_lock_bh(&tbl->lock);
2005
2006         if (tb[NDTA_PARMS]) {
2007                 struct nlattr *tbp[NDTPA_MAX+1];
2008                 struct neigh_parms *p;
2009                 int i, ifindex = 0;
2010
2011                 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2012                                        nl_ntbl_parm_policy);
2013                 if (err < 0)
2014                         goto errout_tbl_lock;
2015
2016                 if (tbp[NDTPA_IFINDEX])
2017                         ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2018
2019                 p = lookup_neigh_parms(tbl, net, ifindex);
2020                 if (p == NULL) {
2021                         err = -ENOENT;
2022                         goto errout_tbl_lock;
2023                 }
2024
2025                 for (i = 1; i <= NDTPA_MAX; i++) {
2026                         if (tbp[i] == NULL)
2027                                 continue;
2028
2029                         switch (i) {
2030                         case NDTPA_QUEUE_LEN:
2031                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2032                                               nla_get_u32(tbp[i]) *
2033                                               SKB_TRUESIZE(ETH_FRAME_LEN));
2034                                 break;
2035                         case NDTPA_QUEUE_LENBYTES:
2036                                 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2037                                               nla_get_u32(tbp[i]));
2038                                 break;
2039                         case NDTPA_PROXY_QLEN:
2040                                 NEIGH_VAR_SET(p, PROXY_QLEN,
2041                                               nla_get_u32(tbp[i]));
2042                                 break;
2043                         case NDTPA_APP_PROBES:
2044                                 NEIGH_VAR_SET(p, APP_PROBES,
2045                                               nla_get_u32(tbp[i]));
2046                                 break;
2047                         case NDTPA_UCAST_PROBES:
2048                                 NEIGH_VAR_SET(p, UCAST_PROBES,
2049                                               nla_get_u32(tbp[i]));
2050                                 break;
2051                         case NDTPA_MCAST_PROBES:
2052                                 NEIGH_VAR_SET(p, MCAST_PROBES,
2053                                               nla_get_u32(tbp[i]));
2054                                 break;
2055                         case NDTPA_BASE_REACHABLE_TIME:
2056                                 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2057                                               nla_get_msecs(tbp[i]));
2058                                 break;
2059                         case NDTPA_GC_STALETIME:
2060                                 NEIGH_VAR_SET(p, GC_STALETIME,
2061                                               nla_get_msecs(tbp[i]));
2062                                 break;
2063                         case NDTPA_DELAY_PROBE_TIME:
2064                                 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2065                                               nla_get_msecs(tbp[i]));
2066                                 break;
2067                         case NDTPA_RETRANS_TIME:
2068                                 NEIGH_VAR_SET(p, RETRANS_TIME,
2069                                               nla_get_msecs(tbp[i]));
2070                                 break;
2071                         case NDTPA_ANYCAST_DELAY:
2072                                 NEIGH_VAR_SET(p, ANYCAST_DELAY, nla_get_msecs(tbp[i]));
2073                                 break;
2074                         case NDTPA_PROXY_DELAY:
2075                                 NEIGH_VAR_SET(p, PROXY_DELAY, nla_get_msecs(tbp[i]));
2076                                 break;
2077                         case NDTPA_LOCKTIME:
2078                                 NEIGH_VAR_SET(p, LOCKTIME, nla_get_msecs(tbp[i]));
2079                                 break;
2080                         }
2081                 }
2082         }
2083
2084         err = -ENOENT;
2085         if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2086              tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2087             !net_eq(net, &init_net))
2088                 goto errout_tbl_lock;
2089
2090         if (tb[NDTA_THRESH1])
2091                 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2092
2093         if (tb[NDTA_THRESH2])
2094                 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2095
2096         if (tb[NDTA_THRESH3])
2097                 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2098
2099         if (tb[NDTA_GC_INTERVAL])
2100                 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2101
2102         err = 0;
2103
2104 errout_tbl_lock:
2105         write_unlock_bh(&tbl->lock);
2106 errout_locked:
2107         read_unlock(&neigh_tbl_lock);
2108 errout:
2109         return err;
2110 }
2111
2112 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2113 {
2114         struct net *net = sock_net(skb->sk);
2115         int family, tidx, nidx = 0;
2116         int tbl_skip = cb->args[0];
2117         int neigh_skip = cb->args[1];
2118         struct neigh_table *tbl;
2119
2120         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2121
2122         read_lock(&neigh_tbl_lock);
2123         for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2124                 struct neigh_parms *p;
2125
2126                 if (tidx < tbl_skip || (family && tbl->family != family))
2127                         continue;
2128
2129                 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2130                                        cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2131                                        NLM_F_MULTI) <= 0)
2132                         break;
2133
2134                 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2135                         if (!net_eq(neigh_parms_net(p), net))
2136                                 continue;
2137
2138                         if (nidx < neigh_skip)
2139                                 goto next;
2140
2141                         if (neightbl_fill_param_info(skb, tbl, p,
2142                                                      NETLINK_CB(cb->skb).portid,
2143                                                      cb->nlh->nlmsg_seq,
2144                                                      RTM_NEWNEIGHTBL,
2145                                                      NLM_F_MULTI) <= 0)
2146                                 goto out;
2147                 next:
2148                         nidx++;
2149                 }
2150
2151                 neigh_skip = 0;
2152         }
2153 out:
2154         read_unlock(&neigh_tbl_lock);
2155         cb->args[0] = tidx;
2156         cb->args[1] = nidx;
2157
2158         return skb->len;
2159 }
2160
2161 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2162                            u32 pid, u32 seq, int type, unsigned int flags)
2163 {
2164         unsigned long now = jiffies;
2165         struct nda_cacheinfo ci;
2166         struct nlmsghdr *nlh;
2167         struct ndmsg *ndm;
2168
2169         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2170         if (nlh == NULL)
2171                 return -EMSGSIZE;
2172
2173         ndm = nlmsg_data(nlh);
2174         ndm->ndm_family  = neigh->ops->family;
2175         ndm->ndm_pad1    = 0;
2176         ndm->ndm_pad2    = 0;
2177         ndm->ndm_flags   = neigh->flags;
2178         ndm->ndm_type    = neigh->type;
2179         ndm->ndm_ifindex = neigh->dev->ifindex;
2180
2181         if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2182                 goto nla_put_failure;
2183
2184         read_lock_bh(&neigh->lock);
2185         ndm->ndm_state   = neigh->nud_state;
2186         if (neigh->nud_state & NUD_VALID) {
2187                 char haddr[MAX_ADDR_LEN];
2188
2189                 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2190                 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2191                         read_unlock_bh(&neigh->lock);
2192                         goto nla_put_failure;
2193                 }
2194         }
2195
2196         ci.ndm_used      = jiffies_to_clock_t(now - neigh->used);
2197         ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2198         ci.ndm_updated   = jiffies_to_clock_t(now - neigh->updated);
2199         ci.ndm_refcnt    = atomic_read(&neigh->refcnt) - 1;
2200         read_unlock_bh(&neigh->lock);
2201
2202         if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2203             nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2204                 goto nla_put_failure;
2205
2206         return nlmsg_end(skb, nlh);
2207
2208 nla_put_failure:
2209         nlmsg_cancel(skb, nlh);
2210         return -EMSGSIZE;
2211 }
2212
2213 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2214                             u32 pid, u32 seq, int type, unsigned int flags,
2215                             struct neigh_table *tbl)
2216 {
2217         struct nlmsghdr *nlh;
2218         struct ndmsg *ndm;
2219
2220         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2221         if (nlh == NULL)
2222                 return -EMSGSIZE;
2223
2224         ndm = nlmsg_data(nlh);
2225         ndm->ndm_family  = tbl->family;
2226         ndm->ndm_pad1    = 0;
2227         ndm->ndm_pad2    = 0;
2228         ndm->ndm_flags   = pn->flags | NTF_PROXY;
2229         ndm->ndm_type    = NDA_DST;
2230         ndm->ndm_ifindex = pn->dev->ifindex;
2231         ndm->ndm_state   = NUD_NONE;
2232
2233         if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2234                 goto nla_put_failure;
2235
2236         return nlmsg_end(skb, nlh);
2237
2238 nla_put_failure:
2239         nlmsg_cancel(skb, nlh);
2240         return -EMSGSIZE;
2241 }
2242
2243 static void neigh_update_notify(struct neighbour *neigh)
2244 {
2245         call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2246         __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2247 }
2248
2249 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2250                             struct netlink_callback *cb)
2251 {
2252         struct net *net = sock_net(skb->sk);
2253         struct neighbour *n;
2254         int rc, h, s_h = cb->args[1];
2255         int idx, s_idx = idx = cb->args[2];
2256         struct neigh_hash_table *nht;
2257
2258         rcu_read_lock_bh();
2259         nht = rcu_dereference_bh(tbl->nht);
2260
2261         for (h = s_h; h < (1 << nht->hash_shift); h++) {
2262                 if (h > s_h)
2263                         s_idx = 0;
2264                 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2265                      n != NULL;
2266                      n = rcu_dereference_bh(n->next)) {
2267                         if (!net_eq(dev_net(n->dev), net))
2268                                 continue;
2269                         if (idx < s_idx)
2270                                 goto next;
2271                         if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2272                                             cb->nlh->nlmsg_seq,
2273                                             RTM_NEWNEIGH,
2274                                             NLM_F_MULTI) <= 0) {
2275                                 rc = -1;
2276                                 goto out;
2277                         }
2278 next:
2279                         idx++;
2280                 }
2281         }
2282         rc = skb->len;
2283 out:
2284         rcu_read_unlock_bh();
2285         cb->args[1] = h;
2286         cb->args[2] = idx;
2287         return rc;
2288 }
2289
2290 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2291                              struct netlink_callback *cb)
2292 {
2293         struct pneigh_entry *n;
2294         struct net *net = sock_net(skb->sk);
2295         int rc, h, s_h = cb->args[3];
2296         int idx, s_idx = idx = cb->args[4];
2297
2298         read_lock_bh(&tbl->lock);
2299
2300         for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2301                 if (h > s_h)
2302                         s_idx = 0;
2303                 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2304                         if (dev_net(n->dev) != net)
2305                                 continue;
2306                         if (idx < s_idx)
2307                                 goto next;
2308                         if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2309                                             cb->nlh->nlmsg_seq,
2310                                             RTM_NEWNEIGH,
2311                                             NLM_F_MULTI, tbl) <= 0) {
2312                                 read_unlock_bh(&tbl->lock);
2313                                 rc = -1;
2314                                 goto out;
2315                         }
2316                 next:
2317                         idx++;
2318                 }
2319         }
2320
2321         read_unlock_bh(&tbl->lock);
2322         rc = skb->len;
2323 out:
2324         cb->args[3] = h;
2325         cb->args[4] = idx;
2326         return rc;
2327
2328 }
2329
2330 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2331 {
2332         struct neigh_table *tbl;
2333         int t, family, s_t;
2334         int proxy = 0;
2335         int err;
2336
2337         read_lock(&neigh_tbl_lock);
2338         family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2339
2340         /* check for full ndmsg structure presence, family member is
2341          * the same for both structures
2342          */
2343         if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2344             ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2345                 proxy = 1;
2346
2347         s_t = cb->args[0];
2348
2349         for (tbl = neigh_tables, t = 0; tbl;
2350              tbl = tbl->next, t++) {
2351                 if (t < s_t || (family && tbl->family != family))
2352                         continue;
2353                 if (t > s_t)
2354                         memset(&cb->args[1], 0, sizeof(cb->args) -
2355                                                 sizeof(cb->args[0]));
2356                 if (proxy)
2357                         err = pneigh_dump_table(tbl, skb, cb);
2358                 else
2359                         err = neigh_dump_table(tbl, skb, cb);
2360                 if (err < 0)
2361                         break;
2362         }
2363         read_unlock(&neigh_tbl_lock);
2364
2365         cb->args[0] = t;
2366         return skb->len;
2367 }
2368
2369 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2370 {
2371         int chain;
2372         struct neigh_hash_table *nht;
2373
2374         rcu_read_lock_bh();
2375         nht = rcu_dereference_bh(tbl->nht);
2376
2377         read_lock(&tbl->lock); /* avoid resizes */
2378         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2379                 struct neighbour *n;
2380
2381                 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2382                      n != NULL;
2383                      n = rcu_dereference_bh(n->next))
2384                         cb(n, cookie);
2385         }
2386         read_unlock(&tbl->lock);
2387         rcu_read_unlock_bh();
2388 }
2389 EXPORT_SYMBOL(neigh_for_each);
2390
2391 /* The tbl->lock must be held as a writer and BH disabled. */
2392 void __neigh_for_each_release(struct neigh_table *tbl,
2393                               int (*cb)(struct neighbour *))
2394 {
2395         int chain;
2396         struct neigh_hash_table *nht;
2397
2398         nht = rcu_dereference_protected(tbl->nht,
2399                                         lockdep_is_held(&tbl->lock));
2400         for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2401                 struct neighbour *n;
2402                 struct neighbour __rcu **np;
2403
2404                 np = &nht->hash_buckets[chain];
2405                 while ((n = rcu_dereference_protected(*np,
2406                                         lockdep_is_held(&tbl->lock))) != NULL) {
2407                         int release;
2408
2409                         write_lock(&n->lock);
2410                         release = cb(n);
2411                         if (release) {
2412                                 rcu_assign_pointer(*np,
2413                                         rcu_dereference_protected(n->next,
2414                                                 lockdep_is_held(&tbl->lock)));
2415                                 n->dead = 1;
2416                         } else
2417                                 np = &n->next;
2418                         write_unlock(&n->lock);
2419                         if (release)
2420                                 neigh_cleanup_and_release(n);
2421                 }
2422         }
2423 }
2424 EXPORT_SYMBOL(__neigh_for_each_release);
2425
2426 #ifdef CONFIG_PROC_FS
2427
2428 static struct neighbour *neigh_get_first(struct seq_file *seq)
2429 {
2430         struct neigh_seq_state *state = seq->private;
2431         struct net *net = seq_file_net(seq);
2432         struct neigh_hash_table *nht = state->nht;
2433         struct neighbour *n = NULL;
2434         int bucket = state->bucket;
2435
2436         state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2437         for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2438                 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2439
2440                 while (n) {
2441                         if (!net_eq(dev_net(n->dev), net))
2442                                 goto next;
2443                         if (state->neigh_sub_iter) {
2444                                 loff_t fakep = 0;
2445                                 void *v;
2446
2447                                 v = state->neigh_sub_iter(state, n, &fakep);
2448                                 if (!v)
2449                                         goto next;
2450                         }
2451                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2452                                 break;
2453                         if (n->nud_state & ~NUD_NOARP)
2454                                 break;
2455 next:
2456                         n = rcu_dereference_bh(n->next);
2457                 }
2458
2459                 if (n)
2460                         break;
2461         }
2462         state->bucket = bucket;
2463
2464         return n;
2465 }
2466
2467 static struct neighbour *neigh_get_next(struct seq_file *seq,
2468                                         struct neighbour *n,
2469                                         loff_t *pos)
2470 {
2471         struct neigh_seq_state *state = seq->private;
2472         struct net *net = seq_file_net(seq);
2473         struct neigh_hash_table *nht = state->nht;
2474
2475         if (state->neigh_sub_iter) {
2476                 void *v = state->neigh_sub_iter(state, n, pos);
2477                 if (v)
2478                         return n;
2479         }
2480         n = rcu_dereference_bh(n->next);
2481
2482         while (1) {
2483                 while (n) {
2484                         if (!net_eq(dev_net(n->dev), net))
2485                                 goto next;
2486                         if (state->neigh_sub_iter) {
2487                                 void *v = state->neigh_sub_iter(state, n, pos);
2488                                 if (v)
2489                                         return n;
2490                                 goto next;
2491                         }
2492                         if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2493                                 break;
2494
2495                         if (n->nud_state & ~NUD_NOARP)
2496                                 break;
2497 next:
2498                         n = rcu_dereference_bh(n->next);
2499                 }
2500
2501                 if (n)
2502                         break;
2503
2504                 if (++state->bucket >= (1 << nht->hash_shift))
2505                         break;
2506
2507                 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2508         }
2509
2510         if (n && pos)
2511                 --(*pos);
2512         return n;
2513 }
2514
2515 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2516 {
2517         struct neighbour *n = neigh_get_first(seq);
2518
2519         if (n) {
2520                 --(*pos);
2521                 while (*pos) {
2522                         n = neigh_get_next(seq, n, pos);
2523                         if (!n)
2524                                 break;
2525                 }
2526         }
2527         return *pos ? NULL : n;
2528 }
2529
2530 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2531 {
2532         struct neigh_seq_state *state = seq->private;
2533         struct net *net = seq_file_net(seq);
2534         struct neigh_table *tbl = state->tbl;
2535         struct pneigh_entry *pn = NULL;
2536         int bucket = state->bucket;
2537
2538         state->flags |= NEIGH_SEQ_IS_PNEIGH;
2539         for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2540                 pn = tbl->phash_buckets[bucket];
2541                 while (pn && !net_eq(pneigh_net(pn), net))
2542                         pn = pn->next;
2543                 if (pn)
2544                         break;
2545         }
2546         state->bucket = bucket;
2547
2548         return pn;
2549 }
2550
2551 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2552                                             struct pneigh_entry *pn,
2553                                             loff_t *pos)
2554 {
2555         struct neigh_seq_state *state = seq->private;
2556         struct net *net = seq_file_net(seq);
2557         struct neigh_table *tbl = state->tbl;
2558
2559         do {
2560                 pn = pn->next;
2561         } while (pn && !net_eq(pneigh_net(pn), net));
2562
2563         while (!pn) {
2564                 if (++state->bucket > PNEIGH_HASHMASK)
2565                         break;
2566                 pn = tbl->phash_buckets[state->bucket];
2567                 while (pn && !net_eq(pneigh_net(pn), net))
2568                         pn = pn->next;
2569                 if (pn)
2570                         break;
2571         }
2572
2573         if (pn && pos)
2574                 --(*pos);
2575
2576         return pn;
2577 }
2578
2579 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2580 {
2581         struct pneigh_entry *pn = pneigh_get_first(seq);
2582
2583         if (pn) {
2584                 --(*pos);
2585                 while (*pos) {
2586                         pn = pneigh_get_next(seq, pn, pos);
2587                         if (!pn)
2588                                 break;
2589                 }
2590         }
2591         return *pos ? NULL : pn;
2592 }
2593
2594 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2595 {
2596         struct neigh_seq_state *state = seq->private;
2597         void *rc;
2598         loff_t idxpos = *pos;
2599
2600         rc = neigh_get_idx(seq, &idxpos);
2601         if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2602                 rc = pneigh_get_idx(seq, &idxpos);
2603
2604         return rc;
2605 }
2606
2607 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2608         __acquires(rcu_bh)
2609 {
2610         struct neigh_seq_state *state = seq->private;
2611
2612         state->tbl = tbl;
2613         state->bucket = 0;
2614         state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2615
2616         rcu_read_lock_bh();
2617         state->nht = rcu_dereference_bh(tbl->nht);
2618
2619         return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2620 }
2621 EXPORT_SYMBOL(neigh_seq_start);
2622
2623 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2624 {
2625         struct neigh_seq_state *state;
2626         void *rc;
2627
2628         if (v == SEQ_START_TOKEN) {
2629                 rc = neigh_get_first(seq);
2630                 goto out;
2631         }
2632
2633         state = seq->private;
2634         if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2635                 rc = neigh_get_next(seq, v, NULL);
2636                 if (rc)
2637                         goto out;
2638                 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2639                         rc = pneigh_get_first(seq);
2640         } else {
2641                 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2642                 rc = pneigh_get_next(seq, v, NULL);
2643         }
2644 out:
2645         ++(*pos);
2646         return rc;
2647 }
2648 EXPORT_SYMBOL(neigh_seq_next);
2649
2650 void neigh_seq_stop(struct seq_file *seq, void *v)
2651         __releases(rcu_bh)
2652 {
2653         rcu_read_unlock_bh();
2654 }
2655 EXPORT_SYMBOL(neigh_seq_stop);
2656
2657 /* statistics via seq_file */
2658
2659 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2660 {
2661         struct neigh_table *tbl = seq->private;
2662         int cpu;
2663
2664         if (*pos == 0)
2665                 return SEQ_START_TOKEN;
2666
2667         for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2668                 if (!cpu_possible(cpu))
2669                         continue;
2670                 *pos = cpu+1;
2671                 return per_cpu_ptr(tbl->stats, cpu);
2672         }
2673         return NULL;
2674 }
2675
2676 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2677 {
2678         struct neigh_table *tbl = seq->private;
2679         int cpu;
2680
2681         for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2682                 if (!cpu_possible(cpu))
2683                         continue;
2684                 *pos = cpu+1;
2685                 return per_cpu_ptr(tbl->stats, cpu);
2686         }
2687         return NULL;
2688 }
2689
2690 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2691 {
2692
2693 }
2694
2695 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2696 {
2697         struct neigh_table *tbl = seq->private;
2698         struct neigh_statistics *st = v;
2699
2700         if (v == SEQ_START_TOKEN) {
2701                 seq_printf(seq, "entries  allocs destroys hash_grows  lookups hits  res_failed  rcv_probes_mcast rcv_probes_ucast  periodic_gc_runs forced_gc_runs unresolved_discards\n");
2702                 return 0;
2703         }
2704
2705         seq_printf(seq, "%08x  %08lx %08lx %08lx  %08lx %08lx  %08lx  "
2706                         "%08lx %08lx  %08lx %08lx %08lx\n",
2707                    atomic_read(&tbl->entries),
2708
2709                    st->allocs,
2710                    st->destroys,
2711                    st->hash_grows,
2712
2713                    st->lookups,
2714                    st->hits,
2715
2716                    st->res_failed,
2717
2718                    st->rcv_probes_mcast,
2719                    st->rcv_probes_ucast,
2720
2721                    st->periodic_gc_runs,
2722                    st->forced_gc_runs,
2723                    st->unres_discards
2724                    );
2725
2726         return 0;
2727 }
2728
2729 static const struct seq_operations neigh_stat_seq_ops = {
2730         .start  = neigh_stat_seq_start,
2731         .next   = neigh_stat_seq_next,
2732         .stop   = neigh_stat_seq_stop,
2733         .show   = neigh_stat_seq_show,
2734 };
2735
2736 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2737 {
2738         int ret = seq_open(file, &neigh_stat_seq_ops);
2739
2740         if (!ret) {
2741                 struct seq_file *sf = file->private_data;
2742                 sf->private = PDE_DATA(inode);
2743         }
2744         return ret;
2745 };
2746
2747 static const struct file_operations neigh_stat_seq_fops = {
2748         .owner   = THIS_MODULE,
2749         .open    = neigh_stat_seq_open,
2750         .read    = seq_read,
2751         .llseek  = seq_lseek,
2752         .release = seq_release,
2753 };
2754
2755 #endif /* CONFIG_PROC_FS */
2756
2757 static inline size_t neigh_nlmsg_size(void)
2758 {
2759         return NLMSG_ALIGN(sizeof(struct ndmsg))
2760                + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2761                + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2762                + nla_total_size(sizeof(struct nda_cacheinfo))
2763                + nla_total_size(4); /* NDA_PROBES */
2764 }
2765
2766 static void __neigh_notify(struct neighbour *n, int type, int flags)
2767 {
2768         struct net *net = dev_net(n->dev);
2769         struct sk_buff *skb;
2770         int err = -ENOBUFS;
2771
2772         skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2773         if (skb == NULL)
2774                 goto errout;
2775
2776         err = neigh_fill_info(skb, n, 0, 0, type, flags);
2777         if (err < 0) {
2778                 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2779                 WARN_ON(err == -EMSGSIZE);
2780                 kfree_skb(skb);
2781                 goto errout;
2782         }
2783         rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2784         return;
2785 errout:
2786         if (err < 0)
2787                 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2788 }
2789
2790 void neigh_app_ns(struct neighbour *n)
2791 {
2792         __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2793 }
2794 EXPORT_SYMBOL(neigh_app_ns);
2795
2796 #ifdef CONFIG_SYSCTL
2797 static int zero;
2798 static int int_max = INT_MAX;
2799 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2800
2801 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2802                            void __user *buffer, size_t *lenp, loff_t *ppos)
2803 {
2804         int size, ret;
2805         struct ctl_table tmp = *ctl;
2806
2807         tmp.extra1 = &zero;
2808         tmp.extra2 = &unres_qlen_max;
2809         tmp.data = &size;
2810
2811         size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2812         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2813
2814         if (write && !ret)
2815                 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2816         return ret;
2817 }
2818
2819 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2820                                                    int family)
2821 {
2822         if (family == AF_INET)
2823                 return __in_dev_arp_parms_get_rcu(dev);
2824         return NULL;
2825 }
2826
2827 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2828                                   int index)
2829 {
2830         struct net_device *dev;
2831         int family = neigh_parms_family(p);
2832
2833         rcu_read_lock();
2834         for_each_netdev_rcu(net, dev) {
2835                 struct neigh_parms *dst_p =
2836                                 neigh_get_dev_parms_rcu(dev, family);
2837
2838                 if (dst_p && !test_bit(index, dst_p->data_state))
2839                         dst_p->data[index] = p->data[index];
2840         }
2841         rcu_read_unlock();
2842 }
2843
2844 static void neigh_proc_update(struct ctl_table *ctl, int write)
2845 {
2846         struct net_device *dev = ctl->extra1;
2847         struct neigh_parms *p = ctl->extra2;
2848         struct net *net = p->net;
2849         int index = (int *) ctl->data - p->data;
2850
2851         if (!write)
2852                 return;
2853
2854         set_bit(index, p->data_state);
2855         if (!dev) /* NULL dev means this is default value */
2856                 neigh_copy_dflt_parms(net, p, index);
2857 }
2858
2859 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2860                                            void __user *buffer,
2861                                            size_t *lenp, loff_t *ppos)
2862 {
2863         struct ctl_table tmp = *ctl;
2864         int ret;
2865
2866         tmp.extra1 = &zero;
2867         tmp.extra2 = &int_max;
2868
2869         ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2870         neigh_proc_update(ctl, write);
2871         return ret;
2872 }
2873
2874 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2875                         void __user *buffer, size_t *lenp, loff_t *ppos)
2876 {
2877         int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2878
2879         neigh_proc_update(ctl, write);
2880         return ret;
2881 }
2882 EXPORT_SYMBOL(neigh_proc_dointvec);
2883
2884 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
2885                                 void __user *buffer,
2886                                 size_t *lenp, loff_t *ppos)
2887 {
2888         int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
2889
2890         neigh_proc_update(ctl, write);
2891         return ret;
2892 }
2893 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
2894
2895 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
2896                                               void __user *buffer,
2897                                               size_t *lenp, loff_t *ppos)
2898 {
2899         int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
2900
2901         neigh_proc_update(ctl, write);
2902         return ret;
2903 }
2904
2905 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
2906                                    void __user *buffer,
2907                                    size_t *lenp, loff_t *ppos)
2908 {
2909         int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
2910
2911         neigh_proc_update(ctl, write);
2912         return ret;
2913 }
2914 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
2915
2916 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
2917                                           void __user *buffer,
2918                                           size_t *lenp, loff_t *ppos)
2919 {
2920         int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
2921
2922         neigh_proc_update(ctl, write);
2923         return ret;
2924 }
2925
2926 #define NEIGH_PARMS_DATA_OFFSET(index)  \
2927         (&((struct neigh_parms *) 0)->data[index])
2928
2929 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
2930         [NEIGH_VAR_ ## attr] = { \
2931                 .procname       = name, \
2932                 .data           = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
2933                 .maxlen         = sizeof(int), \
2934                 .mode           = mval, \
2935                 .proc_handler   = proc, \
2936         }
2937
2938 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
2939         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
2940
2941 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
2942         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
2943
2944 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
2945         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
2946
2947 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
2948         NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2949
2950 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
2951         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2952
2953 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
2954         NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
2955
2956 static struct neigh_sysctl_table {
2957         struct ctl_table_header *sysctl_header;
2958         struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2959 } neigh_sysctl_template __read_mostly = {
2960         .neigh_vars = {
2961                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
2962                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
2963                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
2964                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
2965                 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
2966                 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
2967                 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
2968                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
2969                 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
2970                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
2971                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
2972                 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
2973                 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
2974                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
2975                 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
2976                 [NEIGH_VAR_GC_INTERVAL] = {
2977                         .procname       = "gc_interval",
2978                         .maxlen         = sizeof(int),
2979                         .mode           = 0644,
2980                         .proc_handler   = proc_dointvec_jiffies,
2981                 },
2982                 [NEIGH_VAR_GC_THRESH1] = {
2983                         .procname       = "gc_thresh1",
2984                         .maxlen         = sizeof(int),
2985                         .mode           = 0644,
2986                         .extra1         = &zero,
2987                         .extra2         = &int_max,
2988                         .proc_handler   = proc_dointvec_minmax,
2989                 },
2990                 [NEIGH_VAR_GC_THRESH2] = {
2991                         .procname       = "gc_thresh2",
2992                         .maxlen         = sizeof(int),
2993                         .mode           = 0644,
2994                         .extra1         = &zero,
2995                         .extra2         = &int_max,
2996                         .proc_handler   = proc_dointvec_minmax,
2997                 },
2998                 [NEIGH_VAR_GC_THRESH3] = {
2999                         .procname       = "gc_thresh3",
3000                         .maxlen         = sizeof(int),
3001                         .mode           = 0644,
3002                         .extra1         = &zero,
3003                         .extra2         = &int_max,
3004                         .proc_handler   = proc_dointvec_minmax,
3005                 },
3006                 {},
3007         },
3008 };
3009
3010 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3011                           proc_handler *handler)
3012 {
3013         int i;
3014         struct neigh_sysctl_table *t;
3015         const char *dev_name_source;
3016         char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3017         char *p_name;
3018
3019         t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3020         if (!t)
3021                 goto err;
3022
3023         for (i = 0; i < ARRAY_SIZE(t->neigh_vars); i++) {
3024                 t->neigh_vars[i].data += (long) p;
3025                 t->neigh_vars[i].extra1 = dev;
3026                 t->neigh_vars[i].extra2 = p;
3027         }
3028
3029         if (dev) {
3030                 dev_name_source = dev->name;
3031                 /* Terminate the table early */
3032                 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3033                        sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3034         } else {
3035                 dev_name_source = "default";
3036                 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
3037                 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
3038                 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
3039                 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
3040         }
3041
3042         if (handler) {
3043                 /* RetransTime */
3044                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3045                 /* ReachableTime */
3046                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3047                 /* RetransTime (in milliseconds)*/
3048                 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3049                 /* ReachableTime (in milliseconds) */
3050                 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3051         }
3052
3053         /* Don't export sysctls to unprivileged users */
3054         if (neigh_parms_net(p)->user_ns != &init_user_ns)
3055                 t->neigh_vars[0].procname = NULL;
3056
3057         switch (neigh_parms_family(p)) {
3058         case AF_INET:
3059               p_name = "ipv4";
3060               break;
3061         case AF_INET6:
3062               p_name = "ipv6";
3063               break;
3064         default:
3065               BUG();
3066         }
3067
3068         snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3069                 p_name, dev_name_source);
3070         t->sysctl_header =
3071                 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3072         if (!t->sysctl_header)
3073                 goto free;
3074
3075         p->sysctl_table = t;
3076         return 0;
3077
3078 free:
3079         kfree(t);
3080 err:
3081         return -ENOBUFS;
3082 }
3083 EXPORT_SYMBOL(neigh_sysctl_register);
3084
3085 void neigh_sysctl_unregister(struct neigh_parms *p)
3086 {
3087         if (p->sysctl_table) {
3088                 struct neigh_sysctl_table *t = p->sysctl_table;
3089                 p->sysctl_table = NULL;
3090                 unregister_net_sysctl_table(t->sysctl_header);
3091                 kfree(t);
3092         }
3093 }
3094 EXPORT_SYMBOL(neigh_sysctl_unregister);
3095
3096 #endif  /* CONFIG_SYSCTL */
3097
3098 static int __init neigh_init(void)
3099 {
3100         rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3101         rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3102         rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3103
3104         rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3105                       NULL);
3106         rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3107
3108         return 0;
3109 }
3110
3111 subsys_initcall(neigh_init);
3112