2 * Generic address resolution entity
5 * Pedro Roque <roque@di.fc.ul.pt>
6 * Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
14 * Vitaly E. Lavrov releasing NULL neighbor in neigh_add.
15 * Harald Welte Add neighbour cache statistics like rtstat
18 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
20 #include <linux/slab.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/socket.h>
25 #include <linux/netdevice.h>
26 #include <linux/proc_fs.h>
28 #include <linux/sysctl.h>
30 #include <linux/times.h>
31 #include <net/net_namespace.h>
32 #include <net/neighbour.h>
35 #include <net/netevent.h>
36 #include <net/netlink.h>
37 #include <linux/rtnetlink.h>
38 #include <linux/random.h>
39 #include <linux/string.h>
40 #include <linux/log2.h>
41 #include <linux/inetdevice.h>
45 #define neigh_dbg(level, fmt, ...) \
47 if (level <= NEIGH_DEBUG) \
48 pr_debug(fmt, ##__VA_ARGS__); \
51 #define PNEIGH_HASHMASK 0xF
53 static void neigh_timer_handler(unsigned long arg);
54 static void __neigh_notify(struct neighbour *n, int type, int flags);
55 static void neigh_update_notify(struct neighbour *neigh);
56 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev);
58 static struct neigh_table *neigh_tables;
60 static const struct file_operations neigh_stat_seq_fops;
64 Neighbour hash table buckets are protected with rwlock tbl->lock.
66 - All the scans/updates to hash buckets MUST be made under this lock.
67 - NOTHING clever should be made under this lock: no callbacks
68 to protocol backends, no attempts to send something to network.
69 It will result in deadlocks, if backend/driver wants to use neighbour
71 - If the entry requires some non-trivial actions, increase
72 its reference count and release table lock.
74 Neighbour entries are protected:
75 - with reference count.
76 - with rwlock neigh->lock
78 Reference count prevents destruction.
80 neigh->lock mainly serializes ll address data and its validity state.
81 However, the same lock is used to protect another entry fields:
85 Again, nothing clever shall be made under neigh->lock,
86 the most complicated procedure, which we allow is dev->hard_header.
87 It is supposed, that dev->hard_header is simplistic and does
88 not make callbacks to neighbour tables.
90 The last lock is neigh_tbl_lock. It is pure SMP lock, protecting
91 list of neighbour tables. This list is used only in process context,
94 static DEFINE_RWLOCK(neigh_tbl_lock);
96 static int neigh_blackhole(struct neighbour *neigh, struct sk_buff *skb)
102 static void neigh_cleanup_and_release(struct neighbour *neigh)
104 if (neigh->parms->neigh_cleanup)
105 neigh->parms->neigh_cleanup(neigh);
107 __neigh_notify(neigh, RTM_DELNEIGH, 0);
108 neigh_release(neigh);
112 * It is random distribution in the interval (1/2)*base...(3/2)*base.
113 * It corresponds to default IPv6 settings and is not overridable,
114 * because it is really reasonable choice.
117 unsigned long neigh_rand_reach_time(unsigned long base)
119 return base ? (net_random() % base) + (base >> 1) : 0;
121 EXPORT_SYMBOL(neigh_rand_reach_time);
124 static int neigh_forced_gc(struct neigh_table *tbl)
128 struct neigh_hash_table *nht;
130 NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs);
132 write_lock_bh(&tbl->lock);
133 nht = rcu_dereference_protected(tbl->nht,
134 lockdep_is_held(&tbl->lock));
135 for (i = 0; i < (1 << nht->hash_shift); i++) {
137 struct neighbour __rcu **np;
139 np = &nht->hash_buckets[i];
140 while ((n = rcu_dereference_protected(*np,
141 lockdep_is_held(&tbl->lock))) != NULL) {
142 /* Neighbour record may be discarded if:
143 * - nobody refers to it.
144 * - it is not permanent
146 write_lock(&n->lock);
147 if (atomic_read(&n->refcnt) == 1 &&
148 !(n->nud_state & NUD_PERMANENT)) {
149 rcu_assign_pointer(*np,
150 rcu_dereference_protected(n->next,
151 lockdep_is_held(&tbl->lock)));
154 write_unlock(&n->lock);
155 neigh_cleanup_and_release(n);
158 write_unlock(&n->lock);
163 tbl->last_flush = jiffies;
165 write_unlock_bh(&tbl->lock);
170 static void neigh_add_timer(struct neighbour *n, unsigned long when)
173 if (unlikely(mod_timer(&n->timer, when))) {
174 printk("NEIGH: BUG, double timer add, state is %x\n",
180 static int neigh_del_timer(struct neighbour *n)
182 if ((n->nud_state & NUD_IN_TIMER) &&
183 del_timer(&n->timer)) {
190 static void pneigh_queue_purge(struct sk_buff_head *list)
194 while ((skb = skb_dequeue(list)) != NULL) {
200 static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev)
203 struct neigh_hash_table *nht;
205 nht = rcu_dereference_protected(tbl->nht,
206 lockdep_is_held(&tbl->lock));
208 for (i = 0; i < (1 << nht->hash_shift); i++) {
210 struct neighbour __rcu **np = &nht->hash_buckets[i];
212 while ((n = rcu_dereference_protected(*np,
213 lockdep_is_held(&tbl->lock))) != NULL) {
214 if (dev && n->dev != dev) {
218 rcu_assign_pointer(*np,
219 rcu_dereference_protected(n->next,
220 lockdep_is_held(&tbl->lock)));
221 write_lock(&n->lock);
225 if (atomic_read(&n->refcnt) != 1) {
226 /* The most unpleasant situation.
227 We must destroy neighbour entry,
228 but someone still uses it.
230 The destroy will be delayed until
231 the last user releases us, but
232 we must kill timers etc. and move
235 __skb_queue_purge(&n->arp_queue);
236 n->arp_queue_len_bytes = 0;
237 n->output = neigh_blackhole;
238 if (n->nud_state & NUD_VALID)
239 n->nud_state = NUD_NOARP;
241 n->nud_state = NUD_NONE;
242 neigh_dbg(2, "neigh %p is stray\n", n);
244 write_unlock(&n->lock);
245 neigh_cleanup_and_release(n);
250 void neigh_changeaddr(struct neigh_table *tbl, struct net_device *dev)
252 write_lock_bh(&tbl->lock);
253 neigh_flush_dev(tbl, dev);
254 write_unlock_bh(&tbl->lock);
256 EXPORT_SYMBOL(neigh_changeaddr);
258 int neigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
260 write_lock_bh(&tbl->lock);
261 neigh_flush_dev(tbl, dev);
262 pneigh_ifdown(tbl, dev);
263 write_unlock_bh(&tbl->lock);
265 del_timer_sync(&tbl->proxy_timer);
266 pneigh_queue_purge(&tbl->proxy_queue);
269 EXPORT_SYMBOL(neigh_ifdown);
271 static struct neighbour *neigh_alloc(struct neigh_table *tbl, struct net_device *dev)
273 struct neighbour *n = NULL;
274 unsigned long now = jiffies;
277 entries = atomic_inc_return(&tbl->entries) - 1;
278 if (entries >= tbl->gc_thresh3 ||
279 (entries >= tbl->gc_thresh2 &&
280 time_after(now, tbl->last_flush + 5 * HZ))) {
281 if (!neigh_forced_gc(tbl) &&
282 entries >= tbl->gc_thresh3)
286 n = kzalloc(tbl->entry_size + dev->neigh_priv_len, GFP_ATOMIC);
290 __skb_queue_head_init(&n->arp_queue);
291 rwlock_init(&n->lock);
292 seqlock_init(&n->ha_lock);
293 n->updated = n->used = now;
294 n->nud_state = NUD_NONE;
295 n->output = neigh_blackhole;
296 seqlock_init(&n->hh.hh_lock);
297 n->parms = neigh_parms_clone(&tbl->parms);
298 setup_timer(&n->timer, neigh_timer_handler, (unsigned long)n);
300 NEIGH_CACHE_STAT_INC(tbl, allocs);
302 atomic_set(&n->refcnt, 1);
308 atomic_dec(&tbl->entries);
312 static void neigh_get_hash_rnd(u32 *x)
314 get_random_bytes(x, sizeof(*x));
318 static struct neigh_hash_table *neigh_hash_alloc(unsigned int shift)
320 size_t size = (1 << shift) * sizeof(struct neighbour *);
321 struct neigh_hash_table *ret;
322 struct neighbour __rcu **buckets;
325 ret = kmalloc(sizeof(*ret), GFP_ATOMIC);
328 if (size <= PAGE_SIZE)
329 buckets = kzalloc(size, GFP_ATOMIC);
331 buckets = (struct neighbour __rcu **)
332 __get_free_pages(GFP_ATOMIC | __GFP_ZERO,
338 ret->hash_buckets = buckets;
339 ret->hash_shift = shift;
340 for (i = 0; i < NEIGH_NUM_HASH_RND; i++)
341 neigh_get_hash_rnd(&ret->hash_rnd[i]);
345 static void neigh_hash_free_rcu(struct rcu_head *head)
347 struct neigh_hash_table *nht = container_of(head,
348 struct neigh_hash_table,
350 size_t size = (1 << nht->hash_shift) * sizeof(struct neighbour *);
351 struct neighbour __rcu **buckets = nht->hash_buckets;
353 if (size <= PAGE_SIZE)
356 free_pages((unsigned long)buckets, get_order(size));
360 static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl,
361 unsigned long new_shift)
363 unsigned int i, hash;
364 struct neigh_hash_table *new_nht, *old_nht;
366 NEIGH_CACHE_STAT_INC(tbl, hash_grows);
368 old_nht = rcu_dereference_protected(tbl->nht,
369 lockdep_is_held(&tbl->lock));
370 new_nht = neigh_hash_alloc(new_shift);
374 for (i = 0; i < (1 << old_nht->hash_shift); i++) {
375 struct neighbour *n, *next;
377 for (n = rcu_dereference_protected(old_nht->hash_buckets[i],
378 lockdep_is_held(&tbl->lock));
381 hash = tbl->hash(n->primary_key, n->dev,
384 hash >>= (32 - new_nht->hash_shift);
385 next = rcu_dereference_protected(n->next,
386 lockdep_is_held(&tbl->lock));
388 rcu_assign_pointer(n->next,
389 rcu_dereference_protected(
390 new_nht->hash_buckets[hash],
391 lockdep_is_held(&tbl->lock)));
392 rcu_assign_pointer(new_nht->hash_buckets[hash], n);
396 rcu_assign_pointer(tbl->nht, new_nht);
397 call_rcu(&old_nht->rcu, neigh_hash_free_rcu);
401 struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
402 struct net_device *dev)
405 int key_len = tbl->key_len;
407 struct neigh_hash_table *nht;
409 NEIGH_CACHE_STAT_INC(tbl, lookups);
412 nht = rcu_dereference_bh(tbl->nht);
413 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
415 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
417 n = rcu_dereference_bh(n->next)) {
418 if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
419 if (!atomic_inc_not_zero(&n->refcnt))
421 NEIGH_CACHE_STAT_INC(tbl, hits);
426 rcu_read_unlock_bh();
429 EXPORT_SYMBOL(neigh_lookup);
431 struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net,
435 int key_len = tbl->key_len;
437 struct neigh_hash_table *nht;
439 NEIGH_CACHE_STAT_INC(tbl, lookups);
442 nht = rcu_dereference_bh(tbl->nht);
443 hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) >> (32 - nht->hash_shift);
445 for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]);
447 n = rcu_dereference_bh(n->next)) {
448 if (!memcmp(n->primary_key, pkey, key_len) &&
449 net_eq(dev_net(n->dev), net)) {
450 if (!atomic_inc_not_zero(&n->refcnt))
452 NEIGH_CACHE_STAT_INC(tbl, hits);
457 rcu_read_unlock_bh();
460 EXPORT_SYMBOL(neigh_lookup_nodev);
462 struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey,
463 struct net_device *dev, bool want_ref)
466 int key_len = tbl->key_len;
468 struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev);
469 struct neigh_hash_table *nht;
472 rc = ERR_PTR(-ENOBUFS);
476 memcpy(n->primary_key, pkey, key_len);
480 /* Protocol specific setup. */
481 if (tbl->constructor && (error = tbl->constructor(n)) < 0) {
483 goto out_neigh_release;
486 if (dev->netdev_ops->ndo_neigh_construct) {
487 error = dev->netdev_ops->ndo_neigh_construct(n);
490 goto out_neigh_release;
494 /* Device specific setup. */
495 if (n->parms->neigh_setup &&
496 (error = n->parms->neigh_setup(n)) < 0) {
498 goto out_neigh_release;
501 n->confirmed = jiffies - (NEIGH_VAR(n->parms, BASE_REACHABLE_TIME) << 1);
503 write_lock_bh(&tbl->lock);
504 nht = rcu_dereference_protected(tbl->nht,
505 lockdep_is_held(&tbl->lock));
507 if (atomic_read(&tbl->entries) > (1 << nht->hash_shift))
508 nht = neigh_hash_grow(tbl, nht->hash_shift + 1);
510 hash_val = tbl->hash(pkey, dev, nht->hash_rnd) >> (32 - nht->hash_shift);
512 if (n->parms->dead) {
513 rc = ERR_PTR(-EINVAL);
517 for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val],
518 lockdep_is_held(&tbl->lock));
520 n1 = rcu_dereference_protected(n1->next,
521 lockdep_is_held(&tbl->lock))) {
522 if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) {
533 rcu_assign_pointer(n->next,
534 rcu_dereference_protected(nht->hash_buckets[hash_val],
535 lockdep_is_held(&tbl->lock)));
536 rcu_assign_pointer(nht->hash_buckets[hash_val], n);
537 write_unlock_bh(&tbl->lock);
538 neigh_dbg(2, "neigh %p is created\n", n);
543 write_unlock_bh(&tbl->lock);
548 EXPORT_SYMBOL(__neigh_create);
550 static u32 pneigh_hash(const void *pkey, int key_len)
552 u32 hash_val = *(u32 *)(pkey + key_len - 4);
553 hash_val ^= (hash_val >> 16);
554 hash_val ^= hash_val >> 8;
555 hash_val ^= hash_val >> 4;
556 hash_val &= PNEIGH_HASHMASK;
560 static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n,
564 struct net_device *dev)
567 if (!memcmp(n->key, pkey, key_len) &&
568 net_eq(pneigh_net(n), net) &&
569 (n->dev == dev || !n->dev))
576 struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl,
577 struct net *net, const void *pkey, struct net_device *dev)
579 int key_len = tbl->key_len;
580 u32 hash_val = pneigh_hash(pkey, key_len);
582 return __pneigh_lookup_1(tbl->phash_buckets[hash_val],
583 net, pkey, key_len, dev);
585 EXPORT_SYMBOL_GPL(__pneigh_lookup);
587 struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl,
588 struct net *net, const void *pkey,
589 struct net_device *dev, int creat)
591 struct pneigh_entry *n;
592 int key_len = tbl->key_len;
593 u32 hash_val = pneigh_hash(pkey, key_len);
595 read_lock_bh(&tbl->lock);
596 n = __pneigh_lookup_1(tbl->phash_buckets[hash_val],
597 net, pkey, key_len, dev);
598 read_unlock_bh(&tbl->lock);
605 n = kmalloc(sizeof(*n) + key_len, GFP_KERNEL);
609 write_pnet(&n->net, hold_net(net));
610 memcpy(n->key, pkey, key_len);
615 if (tbl->pconstructor && tbl->pconstructor(n)) {
624 write_lock_bh(&tbl->lock);
625 n->next = tbl->phash_buckets[hash_val];
626 tbl->phash_buckets[hash_val] = n;
627 write_unlock_bh(&tbl->lock);
631 EXPORT_SYMBOL(pneigh_lookup);
634 int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey,
635 struct net_device *dev)
637 struct pneigh_entry *n, **np;
638 int key_len = tbl->key_len;
639 u32 hash_val = pneigh_hash(pkey, key_len);
641 write_lock_bh(&tbl->lock);
642 for (np = &tbl->phash_buckets[hash_val]; (n = *np) != NULL;
644 if (!memcmp(n->key, pkey, key_len) && n->dev == dev &&
645 net_eq(pneigh_net(n), net)) {
647 write_unlock_bh(&tbl->lock);
648 if (tbl->pdestructor)
652 release_net(pneigh_net(n));
657 write_unlock_bh(&tbl->lock);
661 static int pneigh_ifdown(struct neigh_table *tbl, struct net_device *dev)
663 struct pneigh_entry *n, **np;
666 for (h = 0; h <= PNEIGH_HASHMASK; h++) {
667 np = &tbl->phash_buckets[h];
668 while ((n = *np) != NULL) {
669 if (!dev || n->dev == dev) {
671 if (tbl->pdestructor)
675 release_net(pneigh_net(n));
685 static void neigh_parms_destroy(struct neigh_parms *parms);
687 static inline void neigh_parms_put(struct neigh_parms *parms)
689 if (atomic_dec_and_test(&parms->refcnt))
690 neigh_parms_destroy(parms);
694 * neighbour must already be out of the table;
697 void neigh_destroy(struct neighbour *neigh)
699 struct net_device *dev = neigh->dev;
701 NEIGH_CACHE_STAT_INC(neigh->tbl, destroys);
704 pr_warn("Destroying alive neighbour %p\n", neigh);
709 if (neigh_del_timer(neigh))
710 pr_warn("Impossible event\n");
712 write_lock_bh(&neigh->lock);
713 __skb_queue_purge(&neigh->arp_queue);
714 write_unlock_bh(&neigh->lock);
715 neigh->arp_queue_len_bytes = 0;
717 if (dev->netdev_ops->ndo_neigh_destroy)
718 dev->netdev_ops->ndo_neigh_destroy(neigh);
721 neigh_parms_put(neigh->parms);
723 neigh_dbg(2, "neigh %p is destroyed\n", neigh);
725 atomic_dec(&neigh->tbl->entries);
726 kfree_rcu(neigh, rcu);
728 EXPORT_SYMBOL(neigh_destroy);
730 /* Neighbour state is suspicious;
733 Called with write_locked neigh.
735 static void neigh_suspect(struct neighbour *neigh)
737 neigh_dbg(2, "neigh %p is suspected\n", neigh);
739 neigh->output = neigh->ops->output;
742 /* Neighbour state is OK;
745 Called with write_locked neigh.
747 static void neigh_connect(struct neighbour *neigh)
749 neigh_dbg(2, "neigh %p is connected\n", neigh);
751 neigh->output = neigh->ops->connected_output;
754 static void neigh_periodic_work(struct work_struct *work)
756 struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work);
758 struct neighbour __rcu **np;
760 struct neigh_hash_table *nht;
762 NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs);
764 write_lock_bh(&tbl->lock);
765 nht = rcu_dereference_protected(tbl->nht,
766 lockdep_is_held(&tbl->lock));
768 if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
772 * periodically recompute ReachableTime from random function
775 if (time_after(jiffies, tbl->last_rand + 300 * HZ)) {
776 struct neigh_parms *p;
777 tbl->last_rand = jiffies;
778 for (p = &tbl->parms; p; p = p->next)
780 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
783 for (i = 0 ; i < (1 << nht->hash_shift); i++) {
784 np = &nht->hash_buckets[i];
786 while ((n = rcu_dereference_protected(*np,
787 lockdep_is_held(&tbl->lock))) != NULL) {
790 write_lock(&n->lock);
792 state = n->nud_state;
793 if (state & (NUD_PERMANENT | NUD_IN_TIMER)) {
794 write_unlock(&n->lock);
798 if (time_before(n->used, n->confirmed))
799 n->used = n->confirmed;
801 if (atomic_read(&n->refcnt) == 1 &&
802 (state == NUD_FAILED ||
803 time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
806 write_unlock(&n->lock);
807 neigh_cleanup_and_release(n);
810 write_unlock(&n->lock);
816 * It's fine to release lock here, even if hash table
817 * grows while we are preempted.
819 write_unlock_bh(&tbl->lock);
821 write_lock_bh(&tbl->lock);
822 nht = rcu_dereference_protected(tbl->nht,
823 lockdep_is_held(&tbl->lock));
826 /* Cycle through all hash buckets every BASE_REACHABLE_TIME/2 ticks.
827 * ARP entry timeouts range from 1/2 BASE_REACHABLE_TIME to 3/2
828 * BASE_REACHABLE_TIME.
830 schedule_delayed_work(&tbl->gc_work,
831 NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME) >> 1);
832 write_unlock_bh(&tbl->lock);
835 static __inline__ int neigh_max_probes(struct neighbour *n)
837 struct neigh_parms *p = n->parms;
838 return (n->nud_state & NUD_PROBE) ?
839 NEIGH_VAR(p, UCAST_PROBES) :
840 NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
841 NEIGH_VAR(p, MCAST_PROBES);
844 static void neigh_invalidate(struct neighbour *neigh)
845 __releases(neigh->lock)
846 __acquires(neigh->lock)
850 NEIGH_CACHE_STAT_INC(neigh->tbl, res_failed);
851 neigh_dbg(2, "neigh %p is failed\n", neigh);
852 neigh->updated = jiffies;
854 /* It is very thin place. report_unreachable is very complicated
855 routine. Particularly, it can hit the same neighbour entry!
857 So that, we try to be accurate and avoid dead loop. --ANK
859 while (neigh->nud_state == NUD_FAILED &&
860 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
861 write_unlock(&neigh->lock);
862 neigh->ops->error_report(neigh, skb);
863 write_lock(&neigh->lock);
865 __skb_queue_purge(&neigh->arp_queue);
866 neigh->arp_queue_len_bytes = 0;
869 static void neigh_probe(struct neighbour *neigh)
870 __releases(neigh->lock)
872 struct sk_buff *skb = skb_peek_tail(&neigh->arp_queue);
873 /* keep skb alive even if arp_queue overflows */
875 skb = skb_copy(skb, GFP_ATOMIC);
876 write_unlock(&neigh->lock);
877 neigh->ops->solicit(neigh, skb);
878 atomic_inc(&neigh->probes);
882 /* Called when a timer expires for a neighbour entry. */
884 static void neigh_timer_handler(unsigned long arg)
886 unsigned long now, next;
887 struct neighbour *neigh = (struct neighbour *)arg;
891 write_lock(&neigh->lock);
893 state = neigh->nud_state;
897 if (!(state & NUD_IN_TIMER))
900 if (state & NUD_REACHABLE) {
901 if (time_before_eq(now,
902 neigh->confirmed + neigh->parms->reachable_time)) {
903 neigh_dbg(2, "neigh %p is still alive\n", neigh);
904 next = neigh->confirmed + neigh->parms->reachable_time;
905 } else if (time_before_eq(now,
907 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
908 neigh_dbg(2, "neigh %p is delayed\n", neigh);
909 neigh->nud_state = NUD_DELAY;
910 neigh->updated = jiffies;
911 neigh_suspect(neigh);
912 next = now + NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME);
914 neigh_dbg(2, "neigh %p is suspected\n", neigh);
915 neigh->nud_state = NUD_STALE;
916 neigh->updated = jiffies;
917 neigh_suspect(neigh);
920 } else if (state & NUD_DELAY) {
921 if (time_before_eq(now,
923 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME))) {
924 neigh_dbg(2, "neigh %p is now reachable\n", neigh);
925 neigh->nud_state = NUD_REACHABLE;
926 neigh->updated = jiffies;
927 neigh_connect(neigh);
929 next = neigh->confirmed + neigh->parms->reachable_time;
931 neigh_dbg(2, "neigh %p is probed\n", neigh);
932 neigh->nud_state = NUD_PROBE;
933 neigh->updated = jiffies;
934 atomic_set(&neigh->probes, 0);
935 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
938 /* NUD_PROBE|NUD_INCOMPLETE */
939 next = now + NEIGH_VAR(neigh->parms, RETRANS_TIME);
942 if ((neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) &&
943 atomic_read(&neigh->probes) >= neigh_max_probes(neigh)) {
944 neigh->nud_state = NUD_FAILED;
946 neigh_invalidate(neigh);
949 if (neigh->nud_state & NUD_IN_TIMER) {
950 if (time_before(next, jiffies + HZ/2))
951 next = jiffies + HZ/2;
952 if (!mod_timer(&neigh->timer, next))
955 if (neigh->nud_state & (NUD_INCOMPLETE | NUD_PROBE)) {
959 write_unlock(&neigh->lock);
963 neigh_update_notify(neigh);
965 neigh_release(neigh);
968 int __neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
971 bool immediate_probe = false;
973 write_lock_bh(&neigh->lock);
976 if (neigh->nud_state & (NUD_CONNECTED | NUD_DELAY | NUD_PROBE))
979 if (!(neigh->nud_state & (NUD_STALE | NUD_INCOMPLETE))) {
980 if (NEIGH_VAR(neigh->parms, MCAST_PROBES) +
981 NEIGH_VAR(neigh->parms, APP_PROBES)) {
982 unsigned long next, now = jiffies;
984 atomic_set(&neigh->probes,
985 NEIGH_VAR(neigh->parms, UCAST_PROBES));
986 neigh->nud_state = NUD_INCOMPLETE;
987 neigh->updated = now;
988 next = now + max(NEIGH_VAR(neigh->parms, RETRANS_TIME),
990 neigh_add_timer(neigh, next);
991 immediate_probe = true;
993 neigh->nud_state = NUD_FAILED;
994 neigh->updated = jiffies;
995 write_unlock_bh(&neigh->lock);
1000 } else if (neigh->nud_state & NUD_STALE) {
1001 neigh_dbg(2, "neigh %p is delayed\n", neigh);
1002 neigh->nud_state = NUD_DELAY;
1003 neigh->updated = jiffies;
1004 neigh_add_timer(neigh, jiffies +
1005 NEIGH_VAR(neigh->parms, DELAY_PROBE_TIME));
1008 if (neigh->nud_state == NUD_INCOMPLETE) {
1010 while (neigh->arp_queue_len_bytes + skb->truesize >
1011 NEIGH_VAR(neigh->parms, QUEUE_LEN_BYTES)) {
1012 struct sk_buff *buff;
1014 buff = __skb_dequeue(&neigh->arp_queue);
1017 neigh->arp_queue_len_bytes -= buff->truesize;
1019 NEIGH_CACHE_STAT_INC(neigh->tbl, unres_discards);
1022 __skb_queue_tail(&neigh->arp_queue, skb);
1023 neigh->arp_queue_len_bytes += skb->truesize;
1028 if (immediate_probe)
1031 write_unlock(&neigh->lock);
1035 EXPORT_SYMBOL(__neigh_event_send);
1037 static void neigh_update_hhs(struct neighbour *neigh)
1039 struct hh_cache *hh;
1040 void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *)
1043 if (neigh->dev->header_ops)
1044 update = neigh->dev->header_ops->cache_update;
1049 write_seqlock_bh(&hh->hh_lock);
1050 update(hh, neigh->dev, neigh->ha);
1051 write_sequnlock_bh(&hh->hh_lock);
1058 /* Generic update routine.
1059 -- lladdr is new lladdr or NULL, if it is not supplied.
1060 -- new is new state.
1062 NEIGH_UPDATE_F_OVERRIDE allows to override existing lladdr,
1064 NEIGH_UPDATE_F_WEAK_OVERRIDE will suspect existing "connected"
1065 lladdr instead of overriding it
1067 It also allows to retain current state
1068 if lladdr is unchanged.
1069 NEIGH_UPDATE_F_ADMIN means that the change is administrative.
1071 NEIGH_UPDATE_F_OVERRIDE_ISROUTER allows to override existing
1073 NEIGH_UPDATE_F_ISROUTER indicates if the neighbour is known as
1076 Caller MUST hold reference count on the entry.
1079 int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new,
1085 struct net_device *dev;
1086 int update_isrouter = 0;
1088 write_lock_bh(&neigh->lock);
1091 old = neigh->nud_state;
1094 if (!(flags & NEIGH_UPDATE_F_ADMIN) &&
1095 (old & (NUD_NOARP | NUD_PERMANENT)))
1098 if (!(new & NUD_VALID)) {
1099 neigh_del_timer(neigh);
1100 if (old & NUD_CONNECTED)
1101 neigh_suspect(neigh);
1102 neigh->nud_state = new;
1104 notify = old & NUD_VALID;
1105 if ((old & (NUD_INCOMPLETE | NUD_PROBE)) &&
1106 (new & NUD_FAILED)) {
1107 neigh_invalidate(neigh);
1113 /* Compare new lladdr with cached one */
1114 if (!dev->addr_len) {
1115 /* First case: device needs no address. */
1117 } else if (lladdr) {
1118 /* The second case: if something is already cached
1119 and a new address is proposed:
1121 - if they are different, check override flag
1123 if ((old & NUD_VALID) &&
1124 !memcmp(lladdr, neigh->ha, dev->addr_len))
1127 /* No address is supplied; if we know something,
1128 use it, otherwise discard the request.
1131 if (!(old & NUD_VALID))
1136 if (new & NUD_CONNECTED)
1137 neigh->confirmed = jiffies;
1138 neigh->updated = jiffies;
1140 /* If entry was valid and address is not changed,
1141 do not change entry state, if new one is STALE.
1144 update_isrouter = flags & NEIGH_UPDATE_F_OVERRIDE_ISROUTER;
1145 if (old & NUD_VALID) {
1146 if (lladdr != neigh->ha && !(flags & NEIGH_UPDATE_F_OVERRIDE)) {
1147 update_isrouter = 0;
1148 if ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) &&
1149 (old & NUD_CONNECTED)) {
1155 if (lladdr == neigh->ha && new == NUD_STALE &&
1156 ((flags & NEIGH_UPDATE_F_WEAK_OVERRIDE) ||
1157 (old & NUD_CONNECTED))
1164 neigh_del_timer(neigh);
1165 if (new & NUD_IN_TIMER)
1166 neigh_add_timer(neigh, (jiffies +
1167 ((new & NUD_REACHABLE) ?
1168 neigh->parms->reachable_time :
1170 neigh->nud_state = new;
1173 if (lladdr != neigh->ha) {
1174 write_seqlock(&neigh->ha_lock);
1175 memcpy(&neigh->ha, lladdr, dev->addr_len);
1176 write_sequnlock(&neigh->ha_lock);
1177 neigh_update_hhs(neigh);
1178 if (!(new & NUD_CONNECTED))
1179 neigh->confirmed = jiffies -
1180 (NEIGH_VAR(neigh->parms, BASE_REACHABLE_TIME) << 1);
1185 if (new & NUD_CONNECTED)
1186 neigh_connect(neigh);
1188 neigh_suspect(neigh);
1189 if (!(old & NUD_VALID)) {
1190 struct sk_buff *skb;
1192 /* Again: avoid dead loop if something went wrong */
1194 while (neigh->nud_state & NUD_VALID &&
1195 (skb = __skb_dequeue(&neigh->arp_queue)) != NULL) {
1196 struct dst_entry *dst = skb_dst(skb);
1197 struct neighbour *n2, *n1 = neigh;
1198 write_unlock_bh(&neigh->lock);
1202 /* Why not just use 'neigh' as-is? The problem is that
1203 * things such as shaper, eql, and sch_teql can end up
1204 * using alternative, different, neigh objects to output
1205 * the packet in the output path. So what we need to do
1206 * here is re-lookup the top-level neigh in the path so
1207 * we can reinject the packet there.
1211 n2 = dst_neigh_lookup_skb(dst, skb);
1215 n1->output(n1, skb);
1220 write_lock_bh(&neigh->lock);
1222 __skb_queue_purge(&neigh->arp_queue);
1223 neigh->arp_queue_len_bytes = 0;
1226 if (update_isrouter) {
1227 neigh->flags = (flags & NEIGH_UPDATE_F_ISROUTER) ?
1228 (neigh->flags | NTF_ROUTER) :
1229 (neigh->flags & ~NTF_ROUTER);
1231 write_unlock_bh(&neigh->lock);
1234 neigh_update_notify(neigh);
1238 EXPORT_SYMBOL(neigh_update);
1240 struct neighbour *neigh_event_ns(struct neigh_table *tbl,
1241 u8 *lladdr, void *saddr,
1242 struct net_device *dev)
1244 struct neighbour *neigh = __neigh_lookup(tbl, saddr, dev,
1245 lladdr || !dev->addr_len);
1247 neigh_update(neigh, lladdr, NUD_STALE,
1248 NEIGH_UPDATE_F_OVERRIDE);
1251 EXPORT_SYMBOL(neigh_event_ns);
1253 /* called with read_lock_bh(&n->lock); */
1254 static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst)
1256 struct net_device *dev = dst->dev;
1257 __be16 prot = dst->ops->protocol;
1258 struct hh_cache *hh = &n->hh;
1260 write_lock_bh(&n->lock);
1262 /* Only one thread can come in here and initialize the
1266 dev->header_ops->cache(n, hh, prot);
1268 write_unlock_bh(&n->lock);
1271 /* This function can be used in contexts, where only old dev_queue_xmit
1272 * worked, f.e. if you want to override normal output path (eql, shaper),
1273 * but resolution is not made yet.
1276 int neigh_compat_output(struct neighbour *neigh, struct sk_buff *skb)
1278 struct net_device *dev = skb->dev;
1280 __skb_pull(skb, skb_network_offset(skb));
1282 if (dev_hard_header(skb, dev, ntohs(skb->protocol), NULL, NULL,
1284 dev->header_ops->rebuild(skb))
1287 return dev_queue_xmit(skb);
1289 EXPORT_SYMBOL(neigh_compat_output);
1291 /* Slow and careful. */
1293 int neigh_resolve_output(struct neighbour *neigh, struct sk_buff *skb)
1295 struct dst_entry *dst = skb_dst(skb);
1301 if (!neigh_event_send(neigh, skb)) {
1303 struct net_device *dev = neigh->dev;
1306 if (dev->header_ops->cache && !neigh->hh.hh_len)
1307 neigh_hh_init(neigh, dst);
1310 __skb_pull(skb, skb_network_offset(skb));
1311 seq = read_seqbegin(&neigh->ha_lock);
1312 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1313 neigh->ha, NULL, skb->len);
1314 } while (read_seqretry(&neigh->ha_lock, seq));
1317 rc = dev_queue_xmit(skb);
1324 neigh_dbg(1, "%s: dst=%p neigh=%p\n", __func__, dst, neigh);
1330 EXPORT_SYMBOL(neigh_resolve_output);
1332 /* As fast as possible without hh cache */
1334 int neigh_connected_output(struct neighbour *neigh, struct sk_buff *skb)
1336 struct net_device *dev = neigh->dev;
1341 __skb_pull(skb, skb_network_offset(skb));
1342 seq = read_seqbegin(&neigh->ha_lock);
1343 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
1344 neigh->ha, NULL, skb->len);
1345 } while (read_seqretry(&neigh->ha_lock, seq));
1348 err = dev_queue_xmit(skb);
1355 EXPORT_SYMBOL(neigh_connected_output);
1357 int neigh_direct_output(struct neighbour *neigh, struct sk_buff *skb)
1359 return dev_queue_xmit(skb);
1361 EXPORT_SYMBOL(neigh_direct_output);
1363 static void neigh_proxy_process(unsigned long arg)
1365 struct neigh_table *tbl = (struct neigh_table *)arg;
1366 long sched_next = 0;
1367 unsigned long now = jiffies;
1368 struct sk_buff *skb, *n;
1370 spin_lock(&tbl->proxy_queue.lock);
1372 skb_queue_walk_safe(&tbl->proxy_queue, skb, n) {
1373 long tdif = NEIGH_CB(skb)->sched_next - now;
1376 struct net_device *dev = skb->dev;
1378 __skb_unlink(skb, &tbl->proxy_queue);
1379 if (tbl->proxy_redo && netif_running(dev)) {
1381 tbl->proxy_redo(skb);
1388 } else if (!sched_next || tdif < sched_next)
1391 del_timer(&tbl->proxy_timer);
1393 mod_timer(&tbl->proxy_timer, jiffies + sched_next);
1394 spin_unlock(&tbl->proxy_queue.lock);
1397 void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
1398 struct sk_buff *skb)
1400 unsigned long now = jiffies;
1401 unsigned long sched_next = now + (net_random() %
1402 NEIGH_VAR(p, PROXY_DELAY));
1404 if (tbl->proxy_queue.qlen > NEIGH_VAR(p, PROXY_QLEN)) {
1409 NEIGH_CB(skb)->sched_next = sched_next;
1410 NEIGH_CB(skb)->flags |= LOCALLY_ENQUEUED;
1412 spin_lock(&tbl->proxy_queue.lock);
1413 if (del_timer(&tbl->proxy_timer)) {
1414 if (time_before(tbl->proxy_timer.expires, sched_next))
1415 sched_next = tbl->proxy_timer.expires;
1419 __skb_queue_tail(&tbl->proxy_queue, skb);
1420 mod_timer(&tbl->proxy_timer, sched_next);
1421 spin_unlock(&tbl->proxy_queue.lock);
1423 EXPORT_SYMBOL(pneigh_enqueue);
1425 static inline struct neigh_parms *lookup_neigh_parms(struct neigh_table *tbl,
1426 struct net *net, int ifindex)
1428 struct neigh_parms *p;
1430 for (p = &tbl->parms; p; p = p->next) {
1431 if ((p->dev && p->dev->ifindex == ifindex && net_eq(neigh_parms_net(p), net)) ||
1432 (!p->dev && !ifindex && net_eq(net, &init_net)))
1439 struct neigh_parms *neigh_parms_alloc(struct net_device *dev,
1440 struct neigh_table *tbl)
1442 struct neigh_parms *p;
1443 struct net *net = dev_net(dev);
1444 const struct net_device_ops *ops = dev->netdev_ops;
1446 p = kmemdup(&tbl->parms, sizeof(*p), GFP_KERNEL);
1449 atomic_set(&p->refcnt, 1);
1451 neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
1454 write_pnet(&p->net, hold_net(net));
1455 p->sysctl_table = NULL;
1457 if (ops->ndo_neigh_setup && ops->ndo_neigh_setup(dev, p)) {
1464 write_lock_bh(&tbl->lock);
1465 p->next = tbl->parms.next;
1466 tbl->parms.next = p;
1467 write_unlock_bh(&tbl->lock);
1469 neigh_parms_data_state_cleanall(p);
1473 EXPORT_SYMBOL(neigh_parms_alloc);
1475 static void neigh_rcu_free_parms(struct rcu_head *head)
1477 struct neigh_parms *parms =
1478 container_of(head, struct neigh_parms, rcu_head);
1480 neigh_parms_put(parms);
1483 void neigh_parms_release(struct neigh_table *tbl, struct neigh_parms *parms)
1485 struct neigh_parms **p;
1487 if (!parms || parms == &tbl->parms)
1489 write_lock_bh(&tbl->lock);
1490 for (p = &tbl->parms.next; *p; p = &(*p)->next) {
1494 write_unlock_bh(&tbl->lock);
1496 dev_put(parms->dev);
1497 call_rcu(&parms->rcu_head, neigh_rcu_free_parms);
1501 write_unlock_bh(&tbl->lock);
1502 neigh_dbg(1, "%s: not found\n", __func__);
1504 EXPORT_SYMBOL(neigh_parms_release);
1506 static void neigh_parms_destroy(struct neigh_parms *parms)
1508 release_net(neigh_parms_net(parms));
1512 static struct lock_class_key neigh_table_proxy_queue_class;
1514 static void neigh_table_init_no_netlink(struct neigh_table *tbl)
1516 unsigned long now = jiffies;
1517 unsigned long phsize;
1519 write_pnet(&tbl->parms.net, &init_net);
1520 atomic_set(&tbl->parms.refcnt, 1);
1521 tbl->parms.reachable_time =
1522 neigh_rand_reach_time(NEIGH_VAR(&tbl->parms, BASE_REACHABLE_TIME));
1524 tbl->stats = alloc_percpu(struct neigh_statistics);
1526 panic("cannot create neighbour cache statistics");
1528 #ifdef CONFIG_PROC_FS
1529 if (!proc_create_data(tbl->id, 0, init_net.proc_net_stat,
1530 &neigh_stat_seq_fops, tbl))
1531 panic("cannot create neighbour proc dir entry");
1534 RCU_INIT_POINTER(tbl->nht, neigh_hash_alloc(3));
1536 phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *);
1537 tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL);
1539 if (!tbl->nht || !tbl->phash_buckets)
1540 panic("cannot allocate neighbour cache hashes");
1542 if (!tbl->entry_size)
1543 tbl->entry_size = ALIGN(offsetof(struct neighbour, primary_key) +
1544 tbl->key_len, NEIGH_PRIV_ALIGN);
1546 WARN_ON(tbl->entry_size % NEIGH_PRIV_ALIGN);
1548 rwlock_init(&tbl->lock);
1549 INIT_DEFERRABLE_WORK(&tbl->gc_work, neigh_periodic_work);
1550 schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time);
1551 setup_timer(&tbl->proxy_timer, neigh_proxy_process, (unsigned long)tbl);
1552 skb_queue_head_init_class(&tbl->proxy_queue,
1553 &neigh_table_proxy_queue_class);
1555 tbl->last_flush = now;
1556 tbl->last_rand = now + tbl->parms.reachable_time * 20;
1559 void neigh_table_init(struct neigh_table *tbl)
1561 struct neigh_table *tmp;
1563 neigh_table_init_no_netlink(tbl);
1564 write_lock(&neigh_tbl_lock);
1565 for (tmp = neigh_tables; tmp; tmp = tmp->next) {
1566 if (tmp->family == tbl->family)
1569 tbl->next = neigh_tables;
1571 write_unlock(&neigh_tbl_lock);
1573 if (unlikely(tmp)) {
1574 pr_err("Registering multiple tables for family %d\n",
1579 EXPORT_SYMBOL(neigh_table_init);
1581 int neigh_table_clear(struct neigh_table *tbl)
1583 struct neigh_table **tp;
1585 /* It is not clean... Fix it to unload IPv6 module safely */
1586 cancel_delayed_work_sync(&tbl->gc_work);
1587 del_timer_sync(&tbl->proxy_timer);
1588 pneigh_queue_purge(&tbl->proxy_queue);
1589 neigh_ifdown(tbl, NULL);
1590 if (atomic_read(&tbl->entries))
1591 pr_crit("neighbour leakage\n");
1592 write_lock(&neigh_tbl_lock);
1593 for (tp = &neigh_tables; *tp; tp = &(*tp)->next) {
1599 write_unlock(&neigh_tbl_lock);
1601 call_rcu(&rcu_dereference_protected(tbl->nht, 1)->rcu,
1602 neigh_hash_free_rcu);
1605 kfree(tbl->phash_buckets);
1606 tbl->phash_buckets = NULL;
1608 remove_proc_entry(tbl->id, init_net.proc_net_stat);
1610 free_percpu(tbl->stats);
1615 EXPORT_SYMBOL(neigh_table_clear);
1617 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh)
1619 struct net *net = sock_net(skb->sk);
1621 struct nlattr *dst_attr;
1622 struct neigh_table *tbl;
1623 struct net_device *dev = NULL;
1627 if (nlmsg_len(nlh) < sizeof(*ndm))
1630 dst_attr = nlmsg_find_attr(nlh, sizeof(*ndm), NDA_DST);
1631 if (dst_attr == NULL)
1634 ndm = nlmsg_data(nlh);
1635 if (ndm->ndm_ifindex) {
1636 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1643 read_lock(&neigh_tbl_lock);
1644 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1645 struct neighbour *neigh;
1647 if (tbl->family != ndm->ndm_family)
1649 read_unlock(&neigh_tbl_lock);
1651 if (nla_len(dst_attr) < tbl->key_len)
1654 if (ndm->ndm_flags & NTF_PROXY) {
1655 err = pneigh_delete(tbl, net, nla_data(dst_attr), dev);
1662 neigh = neigh_lookup(tbl, nla_data(dst_attr), dev);
1663 if (neigh == NULL) {
1668 err = neigh_update(neigh, NULL, NUD_FAILED,
1669 NEIGH_UPDATE_F_OVERRIDE |
1670 NEIGH_UPDATE_F_ADMIN);
1671 neigh_release(neigh);
1674 read_unlock(&neigh_tbl_lock);
1675 err = -EAFNOSUPPORT;
1681 static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh)
1683 struct net *net = sock_net(skb->sk);
1685 struct nlattr *tb[NDA_MAX+1];
1686 struct neigh_table *tbl;
1687 struct net_device *dev = NULL;
1691 err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
1696 if (tb[NDA_DST] == NULL)
1699 ndm = nlmsg_data(nlh);
1700 if (ndm->ndm_ifindex) {
1701 dev = __dev_get_by_index(net, ndm->ndm_ifindex);
1707 if (tb[NDA_LLADDR] && nla_len(tb[NDA_LLADDR]) < dev->addr_len)
1711 read_lock(&neigh_tbl_lock);
1712 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1713 int flags = NEIGH_UPDATE_F_ADMIN | NEIGH_UPDATE_F_OVERRIDE;
1714 struct neighbour *neigh;
1717 if (tbl->family != ndm->ndm_family)
1719 read_unlock(&neigh_tbl_lock);
1721 if (nla_len(tb[NDA_DST]) < tbl->key_len)
1723 dst = nla_data(tb[NDA_DST]);
1724 lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL;
1726 if (ndm->ndm_flags & NTF_PROXY) {
1727 struct pneigh_entry *pn;
1730 pn = pneigh_lookup(tbl, net, dst, dev, 1);
1732 pn->flags = ndm->ndm_flags;
1741 neigh = neigh_lookup(tbl, dst, dev);
1742 if (neigh == NULL) {
1743 if (!(nlh->nlmsg_flags & NLM_F_CREATE)) {
1748 neigh = __neigh_lookup_errno(tbl, dst, dev);
1749 if (IS_ERR(neigh)) {
1750 err = PTR_ERR(neigh);
1754 if (nlh->nlmsg_flags & NLM_F_EXCL) {
1756 neigh_release(neigh);
1760 if (!(nlh->nlmsg_flags & NLM_F_REPLACE))
1761 flags &= ~NEIGH_UPDATE_F_OVERRIDE;
1764 if (ndm->ndm_flags & NTF_USE) {
1765 neigh_event_send(neigh, NULL);
1768 err = neigh_update(neigh, lladdr, ndm->ndm_state, flags);
1769 neigh_release(neigh);
1773 read_unlock(&neigh_tbl_lock);
1774 err = -EAFNOSUPPORT;
1779 static int neightbl_fill_parms(struct sk_buff *skb, struct neigh_parms *parms)
1781 struct nlattr *nest;
1783 nest = nla_nest_start(skb, NDTA_PARMS);
1788 nla_put_u32(skb, NDTPA_IFINDEX, parms->dev->ifindex)) ||
1789 nla_put_u32(skb, NDTPA_REFCNT, atomic_read(&parms->refcnt)) ||
1790 nla_put_u32(skb, NDTPA_QUEUE_LENBYTES,
1791 NEIGH_VAR(parms, QUEUE_LEN_BYTES)) ||
1792 /* approximative value for deprecated QUEUE_LEN (in packets) */
1793 nla_put_u32(skb, NDTPA_QUEUE_LEN,
1794 NEIGH_VAR(parms, QUEUE_LEN_BYTES) / SKB_TRUESIZE(ETH_FRAME_LEN)) ||
1795 nla_put_u32(skb, NDTPA_PROXY_QLEN, NEIGH_VAR(parms, PROXY_QLEN)) ||
1796 nla_put_u32(skb, NDTPA_APP_PROBES, NEIGH_VAR(parms, APP_PROBES)) ||
1797 nla_put_u32(skb, NDTPA_UCAST_PROBES,
1798 NEIGH_VAR(parms, UCAST_PROBES)) ||
1799 nla_put_u32(skb, NDTPA_MCAST_PROBES,
1800 NEIGH_VAR(parms, MCAST_PROBES)) ||
1801 nla_put_msecs(skb, NDTPA_REACHABLE_TIME, parms->reachable_time) ||
1802 nla_put_msecs(skb, NDTPA_BASE_REACHABLE_TIME,
1803 NEIGH_VAR(parms, BASE_REACHABLE_TIME)) ||
1804 nla_put_msecs(skb, NDTPA_GC_STALETIME,
1805 NEIGH_VAR(parms, GC_STALETIME)) ||
1806 nla_put_msecs(skb, NDTPA_DELAY_PROBE_TIME,
1807 NEIGH_VAR(parms, DELAY_PROBE_TIME)) ||
1808 nla_put_msecs(skb, NDTPA_RETRANS_TIME,
1809 NEIGH_VAR(parms, RETRANS_TIME)) ||
1810 nla_put_msecs(skb, NDTPA_ANYCAST_DELAY,
1811 NEIGH_VAR(parms, ANYCAST_DELAY)) ||
1812 nla_put_msecs(skb, NDTPA_PROXY_DELAY,
1813 NEIGH_VAR(parms, PROXY_DELAY)) ||
1814 nla_put_msecs(skb, NDTPA_LOCKTIME,
1815 NEIGH_VAR(parms, LOCKTIME)))
1816 goto nla_put_failure;
1817 return nla_nest_end(skb, nest);
1820 nla_nest_cancel(skb, nest);
1824 static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
1825 u32 pid, u32 seq, int type, int flags)
1827 struct nlmsghdr *nlh;
1828 struct ndtmsg *ndtmsg;
1830 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1834 ndtmsg = nlmsg_data(nlh);
1836 read_lock_bh(&tbl->lock);
1837 ndtmsg->ndtm_family = tbl->family;
1838 ndtmsg->ndtm_pad1 = 0;
1839 ndtmsg->ndtm_pad2 = 0;
1841 if (nla_put_string(skb, NDTA_NAME, tbl->id) ||
1842 nla_put_msecs(skb, NDTA_GC_INTERVAL, tbl->gc_interval) ||
1843 nla_put_u32(skb, NDTA_THRESH1, tbl->gc_thresh1) ||
1844 nla_put_u32(skb, NDTA_THRESH2, tbl->gc_thresh2) ||
1845 nla_put_u32(skb, NDTA_THRESH3, tbl->gc_thresh3))
1846 goto nla_put_failure;
1848 unsigned long now = jiffies;
1849 unsigned int flush_delta = now - tbl->last_flush;
1850 unsigned int rand_delta = now - tbl->last_rand;
1851 struct neigh_hash_table *nht;
1852 struct ndt_config ndc = {
1853 .ndtc_key_len = tbl->key_len,
1854 .ndtc_entry_size = tbl->entry_size,
1855 .ndtc_entries = atomic_read(&tbl->entries),
1856 .ndtc_last_flush = jiffies_to_msecs(flush_delta),
1857 .ndtc_last_rand = jiffies_to_msecs(rand_delta),
1858 .ndtc_proxy_qlen = tbl->proxy_queue.qlen,
1862 nht = rcu_dereference_bh(tbl->nht);
1863 ndc.ndtc_hash_rnd = nht->hash_rnd[0];
1864 ndc.ndtc_hash_mask = ((1 << nht->hash_shift) - 1);
1865 rcu_read_unlock_bh();
1867 if (nla_put(skb, NDTA_CONFIG, sizeof(ndc), &ndc))
1868 goto nla_put_failure;
1873 struct ndt_stats ndst;
1875 memset(&ndst, 0, sizeof(ndst));
1877 for_each_possible_cpu(cpu) {
1878 struct neigh_statistics *st;
1880 st = per_cpu_ptr(tbl->stats, cpu);
1881 ndst.ndts_allocs += st->allocs;
1882 ndst.ndts_destroys += st->destroys;
1883 ndst.ndts_hash_grows += st->hash_grows;
1884 ndst.ndts_res_failed += st->res_failed;
1885 ndst.ndts_lookups += st->lookups;
1886 ndst.ndts_hits += st->hits;
1887 ndst.ndts_rcv_probes_mcast += st->rcv_probes_mcast;
1888 ndst.ndts_rcv_probes_ucast += st->rcv_probes_ucast;
1889 ndst.ndts_periodic_gc_runs += st->periodic_gc_runs;
1890 ndst.ndts_forced_gc_runs += st->forced_gc_runs;
1893 if (nla_put(skb, NDTA_STATS, sizeof(ndst), &ndst))
1894 goto nla_put_failure;
1897 BUG_ON(tbl->parms.dev);
1898 if (neightbl_fill_parms(skb, &tbl->parms) < 0)
1899 goto nla_put_failure;
1901 read_unlock_bh(&tbl->lock);
1902 return nlmsg_end(skb, nlh);
1905 read_unlock_bh(&tbl->lock);
1906 nlmsg_cancel(skb, nlh);
1910 static int neightbl_fill_param_info(struct sk_buff *skb,
1911 struct neigh_table *tbl,
1912 struct neigh_parms *parms,
1913 u32 pid, u32 seq, int type,
1916 struct ndtmsg *ndtmsg;
1917 struct nlmsghdr *nlh;
1919 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndtmsg), flags);
1923 ndtmsg = nlmsg_data(nlh);
1925 read_lock_bh(&tbl->lock);
1926 ndtmsg->ndtm_family = tbl->family;
1927 ndtmsg->ndtm_pad1 = 0;
1928 ndtmsg->ndtm_pad2 = 0;
1930 if (nla_put_string(skb, NDTA_NAME, tbl->id) < 0 ||
1931 neightbl_fill_parms(skb, parms) < 0)
1934 read_unlock_bh(&tbl->lock);
1935 return nlmsg_end(skb, nlh);
1937 read_unlock_bh(&tbl->lock);
1938 nlmsg_cancel(skb, nlh);
1942 static const struct nla_policy nl_neightbl_policy[NDTA_MAX+1] = {
1943 [NDTA_NAME] = { .type = NLA_STRING },
1944 [NDTA_THRESH1] = { .type = NLA_U32 },
1945 [NDTA_THRESH2] = { .type = NLA_U32 },
1946 [NDTA_THRESH3] = { .type = NLA_U32 },
1947 [NDTA_GC_INTERVAL] = { .type = NLA_U64 },
1948 [NDTA_PARMS] = { .type = NLA_NESTED },
1951 static const struct nla_policy nl_ntbl_parm_policy[NDTPA_MAX+1] = {
1952 [NDTPA_IFINDEX] = { .type = NLA_U32 },
1953 [NDTPA_QUEUE_LEN] = { .type = NLA_U32 },
1954 [NDTPA_PROXY_QLEN] = { .type = NLA_U32 },
1955 [NDTPA_APP_PROBES] = { .type = NLA_U32 },
1956 [NDTPA_UCAST_PROBES] = { .type = NLA_U32 },
1957 [NDTPA_MCAST_PROBES] = { .type = NLA_U32 },
1958 [NDTPA_BASE_REACHABLE_TIME] = { .type = NLA_U64 },
1959 [NDTPA_GC_STALETIME] = { .type = NLA_U64 },
1960 [NDTPA_DELAY_PROBE_TIME] = { .type = NLA_U64 },
1961 [NDTPA_RETRANS_TIME] = { .type = NLA_U64 },
1962 [NDTPA_ANYCAST_DELAY] = { .type = NLA_U64 },
1963 [NDTPA_PROXY_DELAY] = { .type = NLA_U64 },
1964 [NDTPA_LOCKTIME] = { .type = NLA_U64 },
1967 static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh)
1969 struct net *net = sock_net(skb->sk);
1970 struct neigh_table *tbl;
1971 struct ndtmsg *ndtmsg;
1972 struct nlattr *tb[NDTA_MAX+1];
1975 err = nlmsg_parse(nlh, sizeof(*ndtmsg), tb, NDTA_MAX,
1976 nl_neightbl_policy);
1980 if (tb[NDTA_NAME] == NULL) {
1985 ndtmsg = nlmsg_data(nlh);
1986 read_lock(&neigh_tbl_lock);
1987 for (tbl = neigh_tables; tbl; tbl = tbl->next) {
1988 if (ndtmsg->ndtm_family && tbl->family != ndtmsg->ndtm_family)
1991 if (nla_strcmp(tb[NDTA_NAME], tbl->id) == 0)
2001 * We acquire tbl->lock to be nice to the periodic timers and
2002 * make sure they always see a consistent set of values.
2004 write_lock_bh(&tbl->lock);
2006 if (tb[NDTA_PARMS]) {
2007 struct nlattr *tbp[NDTPA_MAX+1];
2008 struct neigh_parms *p;
2011 err = nla_parse_nested(tbp, NDTPA_MAX, tb[NDTA_PARMS],
2012 nl_ntbl_parm_policy);
2014 goto errout_tbl_lock;
2016 if (tbp[NDTPA_IFINDEX])
2017 ifindex = nla_get_u32(tbp[NDTPA_IFINDEX]);
2019 p = lookup_neigh_parms(tbl, net, ifindex);
2022 goto errout_tbl_lock;
2025 for (i = 1; i <= NDTPA_MAX; i++) {
2030 case NDTPA_QUEUE_LEN:
2031 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2032 nla_get_u32(tbp[i]) *
2033 SKB_TRUESIZE(ETH_FRAME_LEN));
2035 case NDTPA_QUEUE_LENBYTES:
2036 NEIGH_VAR_SET(p, QUEUE_LEN_BYTES,
2037 nla_get_u32(tbp[i]));
2039 case NDTPA_PROXY_QLEN:
2040 NEIGH_VAR_SET(p, PROXY_QLEN,
2041 nla_get_u32(tbp[i]));
2043 case NDTPA_APP_PROBES:
2044 NEIGH_VAR_SET(p, APP_PROBES,
2045 nla_get_u32(tbp[i]));
2047 case NDTPA_UCAST_PROBES:
2048 NEIGH_VAR_SET(p, UCAST_PROBES,
2049 nla_get_u32(tbp[i]));
2051 case NDTPA_MCAST_PROBES:
2052 NEIGH_VAR_SET(p, MCAST_PROBES,
2053 nla_get_u32(tbp[i]));
2055 case NDTPA_BASE_REACHABLE_TIME:
2056 NEIGH_VAR_SET(p, BASE_REACHABLE_TIME,
2057 nla_get_msecs(tbp[i]));
2059 case NDTPA_GC_STALETIME:
2060 NEIGH_VAR_SET(p, GC_STALETIME,
2061 nla_get_msecs(tbp[i]));
2063 case NDTPA_DELAY_PROBE_TIME:
2064 NEIGH_VAR_SET(p, DELAY_PROBE_TIME,
2065 nla_get_msecs(tbp[i]));
2067 case NDTPA_RETRANS_TIME:
2068 NEIGH_VAR_SET(p, RETRANS_TIME,
2069 nla_get_msecs(tbp[i]));
2071 case NDTPA_ANYCAST_DELAY:
2072 NEIGH_VAR_SET(p, ANYCAST_DELAY, nla_get_msecs(tbp[i]));
2074 case NDTPA_PROXY_DELAY:
2075 NEIGH_VAR_SET(p, PROXY_DELAY, nla_get_msecs(tbp[i]));
2077 case NDTPA_LOCKTIME:
2078 NEIGH_VAR_SET(p, LOCKTIME, nla_get_msecs(tbp[i]));
2085 if ((tb[NDTA_THRESH1] || tb[NDTA_THRESH2] ||
2086 tb[NDTA_THRESH3] || tb[NDTA_GC_INTERVAL]) &&
2087 !net_eq(net, &init_net))
2088 goto errout_tbl_lock;
2090 if (tb[NDTA_THRESH1])
2091 tbl->gc_thresh1 = nla_get_u32(tb[NDTA_THRESH1]);
2093 if (tb[NDTA_THRESH2])
2094 tbl->gc_thresh2 = nla_get_u32(tb[NDTA_THRESH2]);
2096 if (tb[NDTA_THRESH3])
2097 tbl->gc_thresh3 = nla_get_u32(tb[NDTA_THRESH3]);
2099 if (tb[NDTA_GC_INTERVAL])
2100 tbl->gc_interval = nla_get_msecs(tb[NDTA_GC_INTERVAL]);
2105 write_unlock_bh(&tbl->lock);
2107 read_unlock(&neigh_tbl_lock);
2112 static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2114 struct net *net = sock_net(skb->sk);
2115 int family, tidx, nidx = 0;
2116 int tbl_skip = cb->args[0];
2117 int neigh_skip = cb->args[1];
2118 struct neigh_table *tbl;
2120 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2122 read_lock(&neigh_tbl_lock);
2123 for (tbl = neigh_tables, tidx = 0; tbl; tbl = tbl->next, tidx++) {
2124 struct neigh_parms *p;
2126 if (tidx < tbl_skip || (family && tbl->family != family))
2129 if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid,
2130 cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL,
2134 for (nidx = 0, p = tbl->parms.next; p; p = p->next) {
2135 if (!net_eq(neigh_parms_net(p), net))
2138 if (nidx < neigh_skip)
2141 if (neightbl_fill_param_info(skb, tbl, p,
2142 NETLINK_CB(cb->skb).portid,
2154 read_unlock(&neigh_tbl_lock);
2161 static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
2162 u32 pid, u32 seq, int type, unsigned int flags)
2164 unsigned long now = jiffies;
2165 struct nda_cacheinfo ci;
2166 struct nlmsghdr *nlh;
2169 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2173 ndm = nlmsg_data(nlh);
2174 ndm->ndm_family = neigh->ops->family;
2177 ndm->ndm_flags = neigh->flags;
2178 ndm->ndm_type = neigh->type;
2179 ndm->ndm_ifindex = neigh->dev->ifindex;
2181 if (nla_put(skb, NDA_DST, neigh->tbl->key_len, neigh->primary_key))
2182 goto nla_put_failure;
2184 read_lock_bh(&neigh->lock);
2185 ndm->ndm_state = neigh->nud_state;
2186 if (neigh->nud_state & NUD_VALID) {
2187 char haddr[MAX_ADDR_LEN];
2189 neigh_ha_snapshot(haddr, neigh, neigh->dev);
2190 if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) {
2191 read_unlock_bh(&neigh->lock);
2192 goto nla_put_failure;
2196 ci.ndm_used = jiffies_to_clock_t(now - neigh->used);
2197 ci.ndm_confirmed = jiffies_to_clock_t(now - neigh->confirmed);
2198 ci.ndm_updated = jiffies_to_clock_t(now - neigh->updated);
2199 ci.ndm_refcnt = atomic_read(&neigh->refcnt) - 1;
2200 read_unlock_bh(&neigh->lock);
2202 if (nla_put_u32(skb, NDA_PROBES, atomic_read(&neigh->probes)) ||
2203 nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
2204 goto nla_put_failure;
2206 return nlmsg_end(skb, nlh);
2209 nlmsg_cancel(skb, nlh);
2213 static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
2214 u32 pid, u32 seq, int type, unsigned int flags,
2215 struct neigh_table *tbl)
2217 struct nlmsghdr *nlh;
2220 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags);
2224 ndm = nlmsg_data(nlh);
2225 ndm->ndm_family = tbl->family;
2228 ndm->ndm_flags = pn->flags | NTF_PROXY;
2229 ndm->ndm_type = NDA_DST;
2230 ndm->ndm_ifindex = pn->dev->ifindex;
2231 ndm->ndm_state = NUD_NONE;
2233 if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
2234 goto nla_put_failure;
2236 return nlmsg_end(skb, nlh);
2239 nlmsg_cancel(skb, nlh);
2243 static void neigh_update_notify(struct neighbour *neigh)
2245 call_netevent_notifiers(NETEVENT_NEIGH_UPDATE, neigh);
2246 __neigh_notify(neigh, RTM_NEWNEIGH, 0);
2249 static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2250 struct netlink_callback *cb)
2252 struct net *net = sock_net(skb->sk);
2253 struct neighbour *n;
2254 int rc, h, s_h = cb->args[1];
2255 int idx, s_idx = idx = cb->args[2];
2256 struct neigh_hash_table *nht;
2259 nht = rcu_dereference_bh(tbl->nht);
2261 for (h = s_h; h < (1 << nht->hash_shift); h++) {
2264 for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0;
2266 n = rcu_dereference_bh(n->next)) {
2267 if (!net_eq(dev_net(n->dev), net))
2271 if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2274 NLM_F_MULTI) <= 0) {
2284 rcu_read_unlock_bh();
2290 static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
2291 struct netlink_callback *cb)
2293 struct pneigh_entry *n;
2294 struct net *net = sock_net(skb->sk);
2295 int rc, h, s_h = cb->args[3];
2296 int idx, s_idx = idx = cb->args[4];
2298 read_lock_bh(&tbl->lock);
2300 for (h = s_h; h <= PNEIGH_HASHMASK; h++) {
2303 for (n = tbl->phash_buckets[h], idx = 0; n; n = n->next) {
2304 if (dev_net(n->dev) != net)
2308 if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid,
2311 NLM_F_MULTI, tbl) <= 0) {
2312 read_unlock_bh(&tbl->lock);
2321 read_unlock_bh(&tbl->lock);
2330 static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb)
2332 struct neigh_table *tbl;
2337 read_lock(&neigh_tbl_lock);
2338 family = ((struct rtgenmsg *) nlmsg_data(cb->nlh))->rtgen_family;
2340 /* check for full ndmsg structure presence, family member is
2341 * the same for both structures
2343 if (nlmsg_len(cb->nlh) >= sizeof(struct ndmsg) &&
2344 ((struct ndmsg *) nlmsg_data(cb->nlh))->ndm_flags == NTF_PROXY)
2349 for (tbl = neigh_tables, t = 0; tbl;
2350 tbl = tbl->next, t++) {
2351 if (t < s_t || (family && tbl->family != family))
2354 memset(&cb->args[1], 0, sizeof(cb->args) -
2355 sizeof(cb->args[0]));
2357 err = pneigh_dump_table(tbl, skb, cb);
2359 err = neigh_dump_table(tbl, skb, cb);
2363 read_unlock(&neigh_tbl_lock);
2369 void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie)
2372 struct neigh_hash_table *nht;
2375 nht = rcu_dereference_bh(tbl->nht);
2377 read_lock(&tbl->lock); /* avoid resizes */
2378 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2379 struct neighbour *n;
2381 for (n = rcu_dereference_bh(nht->hash_buckets[chain]);
2383 n = rcu_dereference_bh(n->next))
2386 read_unlock(&tbl->lock);
2387 rcu_read_unlock_bh();
2389 EXPORT_SYMBOL(neigh_for_each);
2391 /* The tbl->lock must be held as a writer and BH disabled. */
2392 void __neigh_for_each_release(struct neigh_table *tbl,
2393 int (*cb)(struct neighbour *))
2396 struct neigh_hash_table *nht;
2398 nht = rcu_dereference_protected(tbl->nht,
2399 lockdep_is_held(&tbl->lock));
2400 for (chain = 0; chain < (1 << nht->hash_shift); chain++) {
2401 struct neighbour *n;
2402 struct neighbour __rcu **np;
2404 np = &nht->hash_buckets[chain];
2405 while ((n = rcu_dereference_protected(*np,
2406 lockdep_is_held(&tbl->lock))) != NULL) {
2409 write_lock(&n->lock);
2412 rcu_assign_pointer(*np,
2413 rcu_dereference_protected(n->next,
2414 lockdep_is_held(&tbl->lock)));
2418 write_unlock(&n->lock);
2420 neigh_cleanup_and_release(n);
2424 EXPORT_SYMBOL(__neigh_for_each_release);
2426 #ifdef CONFIG_PROC_FS
2428 static struct neighbour *neigh_get_first(struct seq_file *seq)
2430 struct neigh_seq_state *state = seq->private;
2431 struct net *net = seq_file_net(seq);
2432 struct neigh_hash_table *nht = state->nht;
2433 struct neighbour *n = NULL;
2434 int bucket = state->bucket;
2436 state->flags &= ~NEIGH_SEQ_IS_PNEIGH;
2437 for (bucket = 0; bucket < (1 << nht->hash_shift); bucket++) {
2438 n = rcu_dereference_bh(nht->hash_buckets[bucket]);
2441 if (!net_eq(dev_net(n->dev), net))
2443 if (state->neigh_sub_iter) {
2447 v = state->neigh_sub_iter(state, n, &fakep);
2451 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2453 if (n->nud_state & ~NUD_NOARP)
2456 n = rcu_dereference_bh(n->next);
2462 state->bucket = bucket;
2467 static struct neighbour *neigh_get_next(struct seq_file *seq,
2468 struct neighbour *n,
2471 struct neigh_seq_state *state = seq->private;
2472 struct net *net = seq_file_net(seq);
2473 struct neigh_hash_table *nht = state->nht;
2475 if (state->neigh_sub_iter) {
2476 void *v = state->neigh_sub_iter(state, n, pos);
2480 n = rcu_dereference_bh(n->next);
2484 if (!net_eq(dev_net(n->dev), net))
2486 if (state->neigh_sub_iter) {
2487 void *v = state->neigh_sub_iter(state, n, pos);
2492 if (!(state->flags & NEIGH_SEQ_SKIP_NOARP))
2495 if (n->nud_state & ~NUD_NOARP)
2498 n = rcu_dereference_bh(n->next);
2504 if (++state->bucket >= (1 << nht->hash_shift))
2507 n = rcu_dereference_bh(nht->hash_buckets[state->bucket]);
2515 static struct neighbour *neigh_get_idx(struct seq_file *seq, loff_t *pos)
2517 struct neighbour *n = neigh_get_first(seq);
2522 n = neigh_get_next(seq, n, pos);
2527 return *pos ? NULL : n;
2530 static struct pneigh_entry *pneigh_get_first(struct seq_file *seq)
2532 struct neigh_seq_state *state = seq->private;
2533 struct net *net = seq_file_net(seq);
2534 struct neigh_table *tbl = state->tbl;
2535 struct pneigh_entry *pn = NULL;
2536 int bucket = state->bucket;
2538 state->flags |= NEIGH_SEQ_IS_PNEIGH;
2539 for (bucket = 0; bucket <= PNEIGH_HASHMASK; bucket++) {
2540 pn = tbl->phash_buckets[bucket];
2541 while (pn && !net_eq(pneigh_net(pn), net))
2546 state->bucket = bucket;
2551 static struct pneigh_entry *pneigh_get_next(struct seq_file *seq,
2552 struct pneigh_entry *pn,
2555 struct neigh_seq_state *state = seq->private;
2556 struct net *net = seq_file_net(seq);
2557 struct neigh_table *tbl = state->tbl;
2561 } while (pn && !net_eq(pneigh_net(pn), net));
2564 if (++state->bucket > PNEIGH_HASHMASK)
2566 pn = tbl->phash_buckets[state->bucket];
2567 while (pn && !net_eq(pneigh_net(pn), net))
2579 static struct pneigh_entry *pneigh_get_idx(struct seq_file *seq, loff_t *pos)
2581 struct pneigh_entry *pn = pneigh_get_first(seq);
2586 pn = pneigh_get_next(seq, pn, pos);
2591 return *pos ? NULL : pn;
2594 static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos)
2596 struct neigh_seq_state *state = seq->private;
2598 loff_t idxpos = *pos;
2600 rc = neigh_get_idx(seq, &idxpos);
2601 if (!rc && !(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2602 rc = pneigh_get_idx(seq, &idxpos);
2607 void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags)
2610 struct neigh_seq_state *state = seq->private;
2614 state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH);
2617 state->nht = rcu_dereference_bh(tbl->nht);
2619 return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN;
2621 EXPORT_SYMBOL(neigh_seq_start);
2623 void *neigh_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2625 struct neigh_seq_state *state;
2628 if (v == SEQ_START_TOKEN) {
2629 rc = neigh_get_first(seq);
2633 state = seq->private;
2634 if (!(state->flags & NEIGH_SEQ_IS_PNEIGH)) {
2635 rc = neigh_get_next(seq, v, NULL);
2638 if (!(state->flags & NEIGH_SEQ_NEIGH_ONLY))
2639 rc = pneigh_get_first(seq);
2641 BUG_ON(state->flags & NEIGH_SEQ_NEIGH_ONLY);
2642 rc = pneigh_get_next(seq, v, NULL);
2648 EXPORT_SYMBOL(neigh_seq_next);
2650 void neigh_seq_stop(struct seq_file *seq, void *v)
2653 rcu_read_unlock_bh();
2655 EXPORT_SYMBOL(neigh_seq_stop);
2657 /* statistics via seq_file */
2659 static void *neigh_stat_seq_start(struct seq_file *seq, loff_t *pos)
2661 struct neigh_table *tbl = seq->private;
2665 return SEQ_START_TOKEN;
2667 for (cpu = *pos-1; cpu < nr_cpu_ids; ++cpu) {
2668 if (!cpu_possible(cpu))
2671 return per_cpu_ptr(tbl->stats, cpu);
2676 static void *neigh_stat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2678 struct neigh_table *tbl = seq->private;
2681 for (cpu = *pos; cpu < nr_cpu_ids; ++cpu) {
2682 if (!cpu_possible(cpu))
2685 return per_cpu_ptr(tbl->stats, cpu);
2690 static void neigh_stat_seq_stop(struct seq_file *seq, void *v)
2695 static int neigh_stat_seq_show(struct seq_file *seq, void *v)
2697 struct neigh_table *tbl = seq->private;
2698 struct neigh_statistics *st = v;
2700 if (v == SEQ_START_TOKEN) {
2701 seq_printf(seq, "entries allocs destroys hash_grows lookups hits res_failed rcv_probes_mcast rcv_probes_ucast periodic_gc_runs forced_gc_runs unresolved_discards\n");
2705 seq_printf(seq, "%08x %08lx %08lx %08lx %08lx %08lx %08lx "
2706 "%08lx %08lx %08lx %08lx %08lx\n",
2707 atomic_read(&tbl->entries),
2718 st->rcv_probes_mcast,
2719 st->rcv_probes_ucast,
2721 st->periodic_gc_runs,
2729 static const struct seq_operations neigh_stat_seq_ops = {
2730 .start = neigh_stat_seq_start,
2731 .next = neigh_stat_seq_next,
2732 .stop = neigh_stat_seq_stop,
2733 .show = neigh_stat_seq_show,
2736 static int neigh_stat_seq_open(struct inode *inode, struct file *file)
2738 int ret = seq_open(file, &neigh_stat_seq_ops);
2741 struct seq_file *sf = file->private_data;
2742 sf->private = PDE_DATA(inode);
2747 static const struct file_operations neigh_stat_seq_fops = {
2748 .owner = THIS_MODULE,
2749 .open = neigh_stat_seq_open,
2751 .llseek = seq_lseek,
2752 .release = seq_release,
2755 #endif /* CONFIG_PROC_FS */
2757 static inline size_t neigh_nlmsg_size(void)
2759 return NLMSG_ALIGN(sizeof(struct ndmsg))
2760 + nla_total_size(MAX_ADDR_LEN) /* NDA_DST */
2761 + nla_total_size(MAX_ADDR_LEN) /* NDA_LLADDR */
2762 + nla_total_size(sizeof(struct nda_cacheinfo))
2763 + nla_total_size(4); /* NDA_PROBES */
2766 static void __neigh_notify(struct neighbour *n, int type, int flags)
2768 struct net *net = dev_net(n->dev);
2769 struct sk_buff *skb;
2772 skb = nlmsg_new(neigh_nlmsg_size(), GFP_ATOMIC);
2776 err = neigh_fill_info(skb, n, 0, 0, type, flags);
2778 /* -EMSGSIZE implies BUG in neigh_nlmsg_size() */
2779 WARN_ON(err == -EMSGSIZE);
2783 rtnl_notify(skb, net, 0, RTNLGRP_NEIGH, NULL, GFP_ATOMIC);
2787 rtnl_set_sk_err(net, RTNLGRP_NEIGH, err);
2790 void neigh_app_ns(struct neighbour *n)
2792 __neigh_notify(n, RTM_GETNEIGH, NLM_F_REQUEST);
2794 EXPORT_SYMBOL(neigh_app_ns);
2796 #ifdef CONFIG_SYSCTL
2798 static int int_max = INT_MAX;
2799 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
2801 static int proc_unres_qlen(struct ctl_table *ctl, int write,
2802 void __user *buffer, size_t *lenp, loff_t *ppos)
2805 struct ctl_table tmp = *ctl;
2808 tmp.extra2 = &unres_qlen_max;
2811 size = *(int *)ctl->data / SKB_TRUESIZE(ETH_FRAME_LEN);
2812 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2815 *(int *)ctl->data = size * SKB_TRUESIZE(ETH_FRAME_LEN);
2819 static struct neigh_parms *neigh_get_dev_parms_rcu(struct net_device *dev,
2822 if (family == AF_INET)
2823 return __in_dev_arp_parms_get_rcu(dev);
2827 static void neigh_copy_dflt_parms(struct net *net, struct neigh_parms *p,
2830 struct net_device *dev;
2831 int family = neigh_parms_family(p);
2834 for_each_netdev_rcu(net, dev) {
2835 struct neigh_parms *dst_p =
2836 neigh_get_dev_parms_rcu(dev, family);
2838 if (dst_p && !test_bit(index, dst_p->data_state))
2839 dst_p->data[index] = p->data[index];
2844 static void neigh_proc_update(struct ctl_table *ctl, int write)
2846 struct net_device *dev = ctl->extra1;
2847 struct neigh_parms *p = ctl->extra2;
2848 struct net *net = p->net;
2849 int index = (int *) ctl->data - p->data;
2854 set_bit(index, p->data_state);
2855 if (!dev) /* NULL dev means this is default value */
2856 neigh_copy_dflt_parms(net, p, index);
2859 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
2860 void __user *buffer,
2861 size_t *lenp, loff_t *ppos)
2863 struct ctl_table tmp = *ctl;
2867 tmp.extra2 = &int_max;
2869 ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
2870 neigh_proc_update(ctl, write);
2874 int neigh_proc_dointvec(struct ctl_table *ctl, int write,
2875 void __user *buffer, size_t *lenp, loff_t *ppos)
2877 int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
2879 neigh_proc_update(ctl, write);
2882 EXPORT_SYMBOL(neigh_proc_dointvec);
2884 int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
2885 void __user *buffer,
2886 size_t *lenp, loff_t *ppos)
2888 int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
2890 neigh_proc_update(ctl, write);
2893 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
2895 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
2896 void __user *buffer,
2897 size_t *lenp, loff_t *ppos)
2899 int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
2901 neigh_proc_update(ctl, write);
2905 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
2906 void __user *buffer,
2907 size_t *lenp, loff_t *ppos)
2909 int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
2911 neigh_proc_update(ctl, write);
2914 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
2916 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
2917 void __user *buffer,
2918 size_t *lenp, loff_t *ppos)
2920 int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
2922 neigh_proc_update(ctl, write);
2926 #define NEIGH_PARMS_DATA_OFFSET(index) \
2927 (&((struct neigh_parms *) 0)->data[index])
2929 #define NEIGH_SYSCTL_ENTRY(attr, data_attr, name, mval, proc) \
2930 [NEIGH_VAR_ ## attr] = { \
2932 .data = NEIGH_PARMS_DATA_OFFSET(NEIGH_VAR_ ## data_attr), \
2933 .maxlen = sizeof(int), \
2935 .proc_handler = proc, \
2938 #define NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(attr, name) \
2939 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_zero_intmax)
2941 #define NEIGH_SYSCTL_JIFFIES_ENTRY(attr, name) \
2942 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_jiffies)
2944 #define NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(attr, name) \
2945 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_userhz_jiffies)
2947 #define NEIGH_SYSCTL_MS_JIFFIES_ENTRY(attr, name) \
2948 NEIGH_SYSCTL_ENTRY(attr, attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2950 #define NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(attr, data_attr, name) \
2951 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_ms_jiffies)
2953 #define NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(attr, data_attr, name) \
2954 NEIGH_SYSCTL_ENTRY(attr, data_attr, name, 0644, neigh_proc_dointvec_unres_qlen)
2956 static struct neigh_sysctl_table {
2957 struct ctl_table_header *sysctl_header;
2958 struct ctl_table neigh_vars[NEIGH_VAR_MAX + 1];
2959 } neigh_sysctl_template __read_mostly = {
2961 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(MCAST_PROBES, "mcast_solicit"),
2962 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(UCAST_PROBES, "ucast_solicit"),
2963 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(APP_PROBES, "app_solicit"),
2964 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(RETRANS_TIME, "retrans_time"),
2965 NEIGH_SYSCTL_JIFFIES_ENTRY(BASE_REACHABLE_TIME, "base_reachable_time"),
2966 NEIGH_SYSCTL_JIFFIES_ENTRY(DELAY_PROBE_TIME, "delay_first_probe_time"),
2967 NEIGH_SYSCTL_JIFFIES_ENTRY(GC_STALETIME, "gc_stale_time"),
2968 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(QUEUE_LEN_BYTES, "unres_qlen_bytes"),
2969 NEIGH_SYSCTL_ZERO_INTMAX_ENTRY(PROXY_QLEN, "proxy_qlen"),
2970 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(ANYCAST_DELAY, "anycast_delay"),
2971 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(PROXY_DELAY, "proxy_delay"),
2972 NEIGH_SYSCTL_USERHZ_JIFFIES_ENTRY(LOCKTIME, "locktime"),
2973 NEIGH_SYSCTL_UNRES_QLEN_REUSED_ENTRY(QUEUE_LEN, QUEUE_LEN_BYTES, "unres_qlen"),
2974 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(RETRANS_TIME_MS, RETRANS_TIME, "retrans_time_ms"),
2975 NEIGH_SYSCTL_MS_JIFFIES_REUSED_ENTRY(BASE_REACHABLE_TIME_MS, BASE_REACHABLE_TIME, "base_reachable_time_ms"),
2976 [NEIGH_VAR_GC_INTERVAL] = {
2977 .procname = "gc_interval",
2978 .maxlen = sizeof(int),
2980 .proc_handler = proc_dointvec_jiffies,
2982 [NEIGH_VAR_GC_THRESH1] = {
2983 .procname = "gc_thresh1",
2984 .maxlen = sizeof(int),
2988 .proc_handler = proc_dointvec_minmax,
2990 [NEIGH_VAR_GC_THRESH2] = {
2991 .procname = "gc_thresh2",
2992 .maxlen = sizeof(int),
2996 .proc_handler = proc_dointvec_minmax,
2998 [NEIGH_VAR_GC_THRESH3] = {
2999 .procname = "gc_thresh3",
3000 .maxlen = sizeof(int),
3004 .proc_handler = proc_dointvec_minmax,
3010 int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
3011 proc_handler *handler)
3014 struct neigh_sysctl_table *t;
3015 const char *dev_name_source;
3016 char neigh_path[ sizeof("net//neigh/") + IFNAMSIZ + IFNAMSIZ ];
3019 t = kmemdup(&neigh_sysctl_template, sizeof(*t), GFP_KERNEL);
3023 for (i = 0; i < ARRAY_SIZE(t->neigh_vars); i++) {
3024 t->neigh_vars[i].data += (long) p;
3025 t->neigh_vars[i].extra1 = dev;
3026 t->neigh_vars[i].extra2 = p;
3030 dev_name_source = dev->name;
3031 /* Terminate the table early */
3032 memset(&t->neigh_vars[NEIGH_VAR_GC_INTERVAL], 0,
3033 sizeof(t->neigh_vars[NEIGH_VAR_GC_INTERVAL]));
3035 dev_name_source = "default";
3036 t->neigh_vars[NEIGH_VAR_GC_INTERVAL].data = (int *)(p + 1);
3037 t->neigh_vars[NEIGH_VAR_GC_THRESH1].data = (int *)(p + 1) + 1;
3038 t->neigh_vars[NEIGH_VAR_GC_THRESH2].data = (int *)(p + 1) + 2;
3039 t->neigh_vars[NEIGH_VAR_GC_THRESH3].data = (int *)(p + 1) + 3;
3044 t->neigh_vars[NEIGH_VAR_RETRANS_TIME].proc_handler = handler;
3046 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = handler;
3047 /* RetransTime (in milliseconds)*/
3048 t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler;
3049 /* ReachableTime (in milliseconds) */
3050 t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler;
3053 /* Don't export sysctls to unprivileged users */
3054 if (neigh_parms_net(p)->user_ns != &init_user_ns)
3055 t->neigh_vars[0].procname = NULL;
3057 switch (neigh_parms_family(p)) {
3068 snprintf(neigh_path, sizeof(neigh_path), "net/%s/neigh/%s",
3069 p_name, dev_name_source);
3071 register_net_sysctl(neigh_parms_net(p), neigh_path, t->neigh_vars);
3072 if (!t->sysctl_header)
3075 p->sysctl_table = t;
3083 EXPORT_SYMBOL(neigh_sysctl_register);
3085 void neigh_sysctl_unregister(struct neigh_parms *p)
3087 if (p->sysctl_table) {
3088 struct neigh_sysctl_table *t = p->sysctl_table;
3089 p->sysctl_table = NULL;
3090 unregister_net_sysctl_table(t->sysctl_header);
3094 EXPORT_SYMBOL(neigh_sysctl_unregister);
3096 #endif /* CONFIG_SYSCTL */
3098 static int __init neigh_init(void)
3100 rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL);
3101 rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL);
3102 rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL);
3104 rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info,
3106 rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL);
3111 subsys_initcall(neigh_init);