c9d90eb64046c518231f3fb20b4d380490857b47
[cascardo/linux.git] / net / netfilter / core.c
1 /* netfilter.c: look after the filters for various protocols.
2  * Heavily influenced by the old firewall.c by David Bonn and Alan Cox.
3  *
4  * Thanks to Rob `CmdrTaco' Malda for not influencing this code in any
5  * way.
6  *
7  * Rusty Russell (C)2000 -- This code is GPL.
8  * Patrick McHardy (c) 2006-2012
9  */
10 #include <linux/kernel.h>
11 #include <linux/netfilter.h>
12 #include <net/protocol.h>
13 #include <linux/init.h>
14 #include <linux/skbuff.h>
15 #include <linux/wait.h>
16 #include <linux/module.h>
17 #include <linux/interrupt.h>
18 #include <linux/if.h>
19 #include <linux/netdevice.h>
20 #include <linux/netfilter_ipv6.h>
21 #include <linux/inetdevice.h>
22 #include <linux/proc_fs.h>
23 #include <linux/mutex.h>
24 #include <linux/slab.h>
25 #include <linux/rcupdate.h>
26 #include <net/net_namespace.h>
27 #include <net/sock.h>
28
29 #include "nf_internals.h"
30
31 static DEFINE_MUTEX(afinfo_mutex);
32
33 const struct nf_afinfo __rcu *nf_afinfo[NFPROTO_NUMPROTO] __read_mostly;
34 EXPORT_SYMBOL(nf_afinfo);
35 const struct nf_ipv6_ops __rcu *nf_ipv6_ops __read_mostly;
36 EXPORT_SYMBOL_GPL(nf_ipv6_ops);
37
38 DEFINE_PER_CPU(bool, nf_skb_duplicated);
39 EXPORT_SYMBOL_GPL(nf_skb_duplicated);
40
41 int nf_register_afinfo(const struct nf_afinfo *afinfo)
42 {
43         mutex_lock(&afinfo_mutex);
44         RCU_INIT_POINTER(nf_afinfo[afinfo->family], afinfo);
45         mutex_unlock(&afinfo_mutex);
46         return 0;
47 }
48 EXPORT_SYMBOL_GPL(nf_register_afinfo);
49
50 void nf_unregister_afinfo(const struct nf_afinfo *afinfo)
51 {
52         mutex_lock(&afinfo_mutex);
53         RCU_INIT_POINTER(nf_afinfo[afinfo->family], NULL);
54         mutex_unlock(&afinfo_mutex);
55         synchronize_rcu();
56 }
57 EXPORT_SYMBOL_GPL(nf_unregister_afinfo);
58
59 #ifdef HAVE_JUMP_LABEL
60 struct static_key nf_hooks_needed[NFPROTO_NUMPROTO][NF_MAX_HOOKS];
61 EXPORT_SYMBOL(nf_hooks_needed);
62 #endif
63
64 static DEFINE_MUTEX(nf_hook_mutex);
65 #define nf_entry_dereference(e) \
66         rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex))
67
68 static struct nf_hook_entry *nf_hook_entry_head(struct net *net,
69                                                 const struct nf_hook_ops *reg)
70 {
71         struct nf_hook_entry *hook_head = NULL;
72
73         if (reg->pf != NFPROTO_NETDEV)
74                 hook_head = nf_entry_dereference(net->nf.hooks[reg->pf]
75                                                  [reg->hooknum]);
76         else if (reg->hooknum == NF_NETDEV_INGRESS) {
77 #ifdef CONFIG_NETFILTER_INGRESS
78                 if (reg->dev && dev_net(reg->dev) == net)
79                         hook_head =
80                                 nf_entry_dereference(
81                                         reg->dev->nf_hooks_ingress);
82 #endif
83         }
84         return hook_head;
85 }
86
87 /* must hold nf_hook_mutex */
88 static void nf_set_hooks_head(struct net *net, const struct nf_hook_ops *reg,
89                               struct nf_hook_entry *entry)
90 {
91         switch (reg->pf) {
92         case NFPROTO_NETDEV:
93 #ifdef CONFIG_NETFILTER_INGRESS
94                 /* We already checked in nf_register_net_hook() that this is
95                  * used from ingress.
96                  */
97                 rcu_assign_pointer(reg->dev->nf_hooks_ingress, entry);
98 #endif
99                 break;
100         default:
101                 rcu_assign_pointer(net->nf.hooks[reg->pf][reg->hooknum],
102                                    entry);
103                 break;
104         }
105 }
106
107 int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg)
108 {
109         struct nf_hook_entry *hooks_entry;
110         struct nf_hook_entry *entry;
111
112         if (reg->pf == NFPROTO_NETDEV) {
113 #ifndef CONFIG_NETFILTER_INGRESS
114                 if (reg->hooknum == NF_NETDEV_INGRESS)
115                         return -EOPNOTSUPP;
116 #endif
117                 if (reg->hooknum != NF_NETDEV_INGRESS ||
118                     !reg->dev || dev_net(reg->dev) != net)
119                         return -EINVAL;
120         }
121
122         entry = kmalloc(sizeof(*entry), GFP_KERNEL);
123         if (!entry)
124                 return -ENOMEM;
125
126         entry->orig_ops = reg;
127         entry->ops      = *reg;
128         entry->next     = NULL;
129
130         mutex_lock(&nf_hook_mutex);
131         hooks_entry = nf_hook_entry_head(net, reg);
132
133         if (hooks_entry && hooks_entry->orig_ops->priority > reg->priority) {
134                 /* This is the case where we need to insert at the head */
135                 entry->next = hooks_entry;
136                 hooks_entry = NULL;
137         }
138
139         while (hooks_entry &&
140                 reg->priority >= hooks_entry->orig_ops->priority &&
141                 nf_entry_dereference(hooks_entry->next)) {
142                 hooks_entry = nf_entry_dereference(hooks_entry->next);
143         }
144
145         if (hooks_entry) {
146                 entry->next = nf_entry_dereference(hooks_entry->next);
147                 rcu_assign_pointer(hooks_entry->next, entry);
148         } else {
149                 nf_set_hooks_head(net, reg, entry);
150         }
151
152         mutex_unlock(&nf_hook_mutex);
153 #ifdef CONFIG_NETFILTER_INGRESS
154         if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
155                 net_inc_ingress_queue();
156 #endif
157 #ifdef HAVE_JUMP_LABEL
158         static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]);
159 #endif
160         return 0;
161 }
162 EXPORT_SYMBOL(nf_register_net_hook);
163
164 void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg)
165 {
166         struct nf_hook_entry *hooks_entry;
167
168         mutex_lock(&nf_hook_mutex);
169         hooks_entry = nf_hook_entry_head(net, reg);
170         if (hooks_entry && hooks_entry->orig_ops == reg) {
171                 nf_set_hooks_head(net, reg,
172                                   nf_entry_dereference(hooks_entry->next));
173                 goto unlock;
174         }
175         while (hooks_entry && nf_entry_dereference(hooks_entry->next)) {
176                 struct nf_hook_entry *next =
177                         nf_entry_dereference(hooks_entry->next);
178                 struct nf_hook_entry *nnext;
179
180                 if (next->orig_ops != reg) {
181                         hooks_entry = next;
182                         continue;
183                 }
184                 nnext = nf_entry_dereference(next->next);
185                 rcu_assign_pointer(hooks_entry->next, nnext);
186                 hooks_entry = next;
187                 break;
188         }
189
190 unlock:
191         mutex_unlock(&nf_hook_mutex);
192         if (!hooks_entry) {
193                 WARN(1, "nf_unregister_net_hook: hook not found!\n");
194                 return;
195         }
196 #ifdef CONFIG_NETFILTER_INGRESS
197         if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS)
198                 net_dec_ingress_queue();
199 #endif
200 #ifdef HAVE_JUMP_LABEL
201         static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]);
202 #endif
203         synchronize_net();
204         nf_queue_nf_hook_drop(net, hooks_entry);
205         /* other cpu might still process nfqueue verdict that used reg */
206         synchronize_net();
207         kfree(hooks_entry);
208 }
209 EXPORT_SYMBOL(nf_unregister_net_hook);
210
211 int nf_register_net_hooks(struct net *net, const struct nf_hook_ops *reg,
212                           unsigned int n)
213 {
214         unsigned int i;
215         int err = 0;
216
217         for (i = 0; i < n; i++) {
218                 err = nf_register_net_hook(net, &reg[i]);
219                 if (err)
220                         goto err;
221         }
222         return err;
223
224 err:
225         if (i > 0)
226                 nf_unregister_net_hooks(net, reg, i);
227         return err;
228 }
229 EXPORT_SYMBOL(nf_register_net_hooks);
230
231 void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg,
232                              unsigned int n)
233 {
234         while (n-- > 0)
235                 nf_unregister_net_hook(net, &reg[n]);
236 }
237 EXPORT_SYMBOL(nf_unregister_net_hooks);
238
239 static LIST_HEAD(nf_hook_list);
240
241 static int _nf_register_hook(struct nf_hook_ops *reg)
242 {
243         struct net *net, *last;
244         int ret;
245
246         for_each_net(net) {
247                 ret = nf_register_net_hook(net, reg);
248                 if (ret && ret != -ENOENT)
249                         goto rollback;
250         }
251         list_add_tail(&reg->list, &nf_hook_list);
252
253         return 0;
254 rollback:
255         last = net;
256         for_each_net(net) {
257                 if (net == last)
258                         break;
259                 nf_unregister_net_hook(net, reg);
260         }
261         return ret;
262 }
263
264 int nf_register_hook(struct nf_hook_ops *reg)
265 {
266         int ret;
267
268         rtnl_lock();
269         ret = _nf_register_hook(reg);
270         rtnl_unlock();
271
272         return ret;
273 }
274 EXPORT_SYMBOL(nf_register_hook);
275
276 static void _nf_unregister_hook(struct nf_hook_ops *reg)
277 {
278         struct net *net;
279
280         list_del(&reg->list);
281         for_each_net(net)
282                 nf_unregister_net_hook(net, reg);
283 }
284
285 void nf_unregister_hook(struct nf_hook_ops *reg)
286 {
287         rtnl_lock();
288         _nf_unregister_hook(reg);
289         rtnl_unlock();
290 }
291 EXPORT_SYMBOL(nf_unregister_hook);
292
293 int nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
294 {
295         unsigned int i;
296         int err = 0;
297
298         for (i = 0; i < n; i++) {
299                 err = nf_register_hook(&reg[i]);
300                 if (err)
301                         goto err;
302         }
303         return err;
304
305 err:
306         if (i > 0)
307                 nf_unregister_hooks(reg, i);
308         return err;
309 }
310 EXPORT_SYMBOL(nf_register_hooks);
311
312 /* Caller MUST take rtnl_lock() */
313 int _nf_register_hooks(struct nf_hook_ops *reg, unsigned int n)
314 {
315         unsigned int i;
316         int err = 0;
317
318         for (i = 0; i < n; i++) {
319                 err = _nf_register_hook(&reg[i]);
320                 if (err)
321                         goto err;
322         }
323         return err;
324
325 err:
326         if (i > 0)
327                 _nf_unregister_hooks(reg, i);
328         return err;
329 }
330 EXPORT_SYMBOL(_nf_register_hooks);
331
332 void nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
333 {
334         while (n-- > 0)
335                 nf_unregister_hook(&reg[n]);
336 }
337 EXPORT_SYMBOL(nf_unregister_hooks);
338
339 /* Caller MUST take rtnl_lock */
340 void _nf_unregister_hooks(struct nf_hook_ops *reg, unsigned int n)
341 {
342         while (n-- > 0)
343                 _nf_unregister_hook(&reg[n]);
344 }
345 EXPORT_SYMBOL(_nf_unregister_hooks);
346
347 unsigned int nf_iterate(struct sk_buff *skb,
348                         struct nf_hook_state *state,
349                         struct nf_hook_entry **entryp)
350 {
351         unsigned int verdict;
352
353         /*
354          * The caller must not block between calls to this
355          * function because of risk of continuing from deleted element.
356          */
357         while (*entryp) {
358                 if (state->thresh > (*entryp)->ops.priority) {
359                         *entryp = rcu_dereference((*entryp)->next);
360                         continue;
361                 }
362
363                 /* Optimization: we don't need to hold module
364                    reference here, since function can't sleep. --RR */
365 repeat:
366                 verdict = (*entryp)->ops.hook((*entryp)->ops.priv, skb, state);
367                 if (verdict != NF_ACCEPT) {
368 #ifdef CONFIG_NETFILTER_DEBUG
369                         if (unlikely((verdict & NF_VERDICT_MASK)
370                                                         > NF_MAX_VERDICT)) {
371                                 NFDEBUG("Evil return from %p(%u).\n",
372                                         (*entryp)->ops.hook, state->hook);
373                                 *entryp = rcu_dereference((*entryp)->next);
374                                 continue;
375                         }
376 #endif
377                         if (verdict != NF_REPEAT)
378                                 return verdict;
379                         goto repeat;
380                 }
381                 *entryp = rcu_dereference((*entryp)->next);
382         }
383         return NF_ACCEPT;
384 }
385
386
387 /* Returns 1 if okfn() needs to be executed by the caller,
388  * -EPERM for NF_DROP, 0 otherwise.  Caller must hold rcu_read_lock. */
389 int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state)
390 {
391         struct nf_hook_entry *entry;
392         unsigned int verdict;
393         int ret = 0;
394
395         entry = rcu_dereference(state->hook_entries);
396 next_hook:
397         verdict = nf_iterate(skb, state, &entry);
398         if (verdict == NF_ACCEPT || verdict == NF_STOP) {
399                 ret = 1;
400         } else if ((verdict & NF_VERDICT_MASK) == NF_DROP) {
401                 kfree_skb(skb);
402                 ret = NF_DROP_GETERR(verdict);
403                 if (ret == 0)
404                         ret = -EPERM;
405         } else if ((verdict & NF_VERDICT_MASK) == NF_QUEUE) {
406                 int err;
407
408                 RCU_INIT_POINTER(state->hook_entries, entry);
409                 err = nf_queue(skb, state, verdict >> NF_VERDICT_QBITS);
410                 if (err < 0) {
411                         if (err == -ESRCH &&
412                            (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS))
413                                 goto next_hook;
414                         kfree_skb(skb);
415                 }
416         }
417         return ret;
418 }
419 EXPORT_SYMBOL(nf_hook_slow);
420
421
422 int skb_make_writable(struct sk_buff *skb, unsigned int writable_len)
423 {
424         if (writable_len > skb->len)
425                 return 0;
426
427         /* Not exclusive use of packet?  Must copy. */
428         if (!skb_cloned(skb)) {
429                 if (writable_len <= skb_headlen(skb))
430                         return 1;
431         } else if (skb_clone_writable(skb, writable_len))
432                 return 1;
433
434         if (writable_len <= skb_headlen(skb))
435                 writable_len = 0;
436         else
437                 writable_len -= skb_headlen(skb);
438
439         return !!__pskb_pull_tail(skb, writable_len);
440 }
441 EXPORT_SYMBOL(skb_make_writable);
442
443 /* This needs to be compiled in any case to avoid dependencies between the
444  * nfnetlink_queue code and nf_conntrack.
445  */
446 struct nfnl_ct_hook __rcu *nfnl_ct_hook __read_mostly;
447 EXPORT_SYMBOL_GPL(nfnl_ct_hook);
448
449 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
450 /* This does not belong here, but locally generated errors need it if connection
451    tracking in use: without this, connection may not be in hash table, and hence
452    manufactured ICMP or RST packets will not be associated with it. */
453 void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *)
454                 __rcu __read_mostly;
455 EXPORT_SYMBOL(ip_ct_attach);
456
457 void nf_ct_attach(struct sk_buff *new, const struct sk_buff *skb)
458 {
459         void (*attach)(struct sk_buff *, const struct sk_buff *);
460
461         if (skb->nfct) {
462                 rcu_read_lock();
463                 attach = rcu_dereference(ip_ct_attach);
464                 if (attach)
465                         attach(new, skb);
466                 rcu_read_unlock();
467         }
468 }
469 EXPORT_SYMBOL(nf_ct_attach);
470
471 void (*nf_ct_destroy)(struct nf_conntrack *) __rcu __read_mostly;
472 EXPORT_SYMBOL(nf_ct_destroy);
473
474 void nf_conntrack_destroy(struct nf_conntrack *nfct)
475 {
476         void (*destroy)(struct nf_conntrack *);
477
478         rcu_read_lock();
479         destroy = rcu_dereference(nf_ct_destroy);
480         BUG_ON(destroy == NULL);
481         destroy(nfct);
482         rcu_read_unlock();
483 }
484 EXPORT_SYMBOL(nf_conntrack_destroy);
485
486 /* Built-in default zone used e.g. by modules. */
487 const struct nf_conntrack_zone nf_ct_zone_dflt = {
488         .id     = NF_CT_DEFAULT_ZONE_ID,
489         .dir    = NF_CT_DEFAULT_ZONE_DIR,
490 };
491 EXPORT_SYMBOL_GPL(nf_ct_zone_dflt);
492 #endif /* CONFIG_NF_CONNTRACK */
493
494 #ifdef CONFIG_NF_NAT_NEEDED
495 void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *);
496 EXPORT_SYMBOL(nf_nat_decode_session_hook);
497 #endif
498
499 static int nf_register_hook_list(struct net *net)
500 {
501         struct nf_hook_ops *elem;
502         int ret;
503
504         rtnl_lock();
505         list_for_each_entry(elem, &nf_hook_list, list) {
506                 ret = nf_register_net_hook(net, elem);
507                 if (ret && ret != -ENOENT)
508                         goto out_undo;
509         }
510         rtnl_unlock();
511         return 0;
512
513 out_undo:
514         list_for_each_entry_continue_reverse(elem, &nf_hook_list, list)
515                 nf_unregister_net_hook(net, elem);
516         rtnl_unlock();
517         return ret;
518 }
519
520 static void nf_unregister_hook_list(struct net *net)
521 {
522         struct nf_hook_ops *elem;
523
524         rtnl_lock();
525         list_for_each_entry(elem, &nf_hook_list, list)
526                 nf_unregister_net_hook(net, elem);
527         rtnl_unlock();
528 }
529
530 static int __net_init netfilter_net_init(struct net *net)
531 {
532         int i, h, ret;
533
534         for (i = 0; i < ARRAY_SIZE(net->nf.hooks); i++) {
535                 for (h = 0; h < NF_MAX_HOOKS; h++)
536                         RCU_INIT_POINTER(net->nf.hooks[i][h], NULL);
537         }
538
539 #ifdef CONFIG_PROC_FS
540         net->nf.proc_netfilter = proc_net_mkdir(net, "netfilter",
541                                                 net->proc_net);
542         if (!net->nf.proc_netfilter) {
543                 if (!net_eq(net, &init_net))
544                         pr_err("cannot create netfilter proc entry");
545
546                 return -ENOMEM;
547         }
548 #endif
549         ret = nf_register_hook_list(net);
550         if (ret)
551                 remove_proc_entry("netfilter", net->proc_net);
552
553         return ret;
554 }
555
556 static void __net_exit netfilter_net_exit(struct net *net)
557 {
558         nf_unregister_hook_list(net);
559         remove_proc_entry("netfilter", net->proc_net);
560 }
561
562 static struct pernet_operations netfilter_net_ops = {
563         .init = netfilter_net_init,
564         .exit = netfilter_net_exit,
565 };
566
567 int __init netfilter_init(void)
568 {
569         int ret;
570
571         ret = register_pernet_subsys(&netfilter_net_ops);
572         if (ret < 0)
573                 goto err;
574
575         ret = netfilter_log_init();
576         if (ret < 0)
577                 goto err_pernet;
578
579         return 0;
580 err_pernet:
581         unregister_pernet_subsys(&netfilter_net_ops);
582 err:
583         return ret;
584 }