1 /* Expectation handling for nf_conntrack. */
3 /* (C) 1999-2001 Paul `Rusty' Russell
4 * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5 * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22 #include <linux/jhash.h>
23 #include <linux/moduleparam.h>
24 #include <linux/export.h>
25 #include <net/net_namespace.h>
27 #include <net/netfilter/nf_conntrack.h>
28 #include <net/netfilter/nf_conntrack_core.h>
29 #include <net/netfilter/nf_conntrack_expect.h>
30 #include <net/netfilter/nf_conntrack_helper.h>
31 #include <net/netfilter/nf_conntrack_tuple.h>
32 #include <net/netfilter/nf_conntrack_zones.h>
34 unsigned int nf_ct_expect_hsize __read_mostly;
35 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
37 unsigned int nf_ct_expect_max __read_mostly;
39 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
41 /* nf_conntrack_expect helper functions */
42 void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
45 struct nf_conn_help *master_help = nfct_help(exp->master);
46 struct net *net = nf_ct_exp_net(exp);
48 NF_CT_ASSERT(master_help);
49 NF_CT_ASSERT(!timer_pending(&exp->timeout));
51 hlist_del_rcu(&exp->hnode);
52 net->ct.expect_count--;
54 hlist_del(&exp->lnode);
55 master_help->expecting[exp->class]--;
57 nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report);
58 nf_ct_expect_put(exp);
60 NF_CT_STAT_INC(net, expect_delete);
62 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report);
64 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
66 struct nf_conntrack_expect *exp = (void *)ul_expect;
68 spin_lock_bh(&nf_conntrack_lock);
69 nf_ct_unlink_expect(exp);
70 spin_unlock_bh(&nf_conntrack_lock);
71 nf_ct_expect_put(exp);
74 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
78 if (unlikely(!nf_conntrack_hash_rnd)) {
79 init_nf_conntrack_hash_rnd();
82 hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
83 (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
84 (__force __u16)tuple->dst.u.all) ^ nf_conntrack_hash_rnd);
85 return ((u64)hash * nf_ct_expect_hsize) >> 32;
88 struct nf_conntrack_expect *
89 __nf_ct_expect_find(struct net *net, u16 zone,
90 const struct nf_conntrack_tuple *tuple)
92 struct nf_conntrack_expect *i;
95 if (!net->ct.expect_count)
98 h = nf_ct_expect_dst_hash(tuple);
99 hlist_for_each_entry_rcu(i, &net->ct.expect_hash[h], hnode) {
100 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
101 nf_ct_zone(i->master) == zone)
106 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
108 /* Just find a expectation corresponding to a tuple. */
109 struct nf_conntrack_expect *
110 nf_ct_expect_find_get(struct net *net, u16 zone,
111 const struct nf_conntrack_tuple *tuple)
113 struct nf_conntrack_expect *i;
116 i = __nf_ct_expect_find(net, zone, tuple);
117 if (i && !atomic_inc_not_zero(&i->use))
123 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
125 /* If an expectation for this connection is found, it gets delete from
126 * global list then returned. */
127 struct nf_conntrack_expect *
128 nf_ct_find_expectation(struct net *net, u16 zone,
129 const struct nf_conntrack_tuple *tuple)
131 struct nf_conntrack_expect *i, *exp = NULL;
134 if (!net->ct.expect_count)
137 h = nf_ct_expect_dst_hash(tuple);
138 hlist_for_each_entry(i, &net->ct.expect_hash[h], hnode) {
139 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
140 nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask) &&
141 nf_ct_zone(i->master) == zone) {
149 /* If master is not in hash table yet (ie. packet hasn't left
150 this machine yet), how can other end know about expected?
151 Hence these are not the droids you are looking for (if
152 master ct never got confirmed, we'd hold a reference to it
153 and weird things would happen to future packets). */
154 if (!nf_ct_is_confirmed(exp->master))
157 if (exp->flags & NF_CT_EXPECT_PERMANENT) {
158 atomic_inc(&exp->use);
160 } else if (del_timer(&exp->timeout)) {
161 nf_ct_unlink_expect(exp);
168 /* delete all expectations for this conntrack */
169 void nf_ct_remove_expectations(struct nf_conn *ct)
171 struct nf_conn_help *help = nfct_help(ct);
172 struct nf_conntrack_expect *exp;
173 struct hlist_node *next;
175 /* Optimization: most connection never expect any others. */
179 hlist_for_each_entry_safe(exp, next, &help->expectations, lnode) {
180 if (del_timer(&exp->timeout)) {
181 nf_ct_unlink_expect(exp);
182 nf_ct_expect_put(exp);
186 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
188 /* Would two expected things clash? */
189 static inline int expect_clash(const struct nf_conntrack_expect *a,
190 const struct nf_conntrack_expect *b)
192 /* Part covered by intersection of masks must be unequal,
193 otherwise they clash */
194 struct nf_conntrack_tuple_mask intersect_mask;
197 intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
199 for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
200 intersect_mask.src.u3.all[count] =
201 a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
204 return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
207 static inline int expect_matches(const struct nf_conntrack_expect *a,
208 const struct nf_conntrack_expect *b)
210 return a->master == b->master && a->class == b->class &&
211 nf_ct_tuple_equal(&a->tuple, &b->tuple) &&
212 nf_ct_tuple_mask_equal(&a->mask, &b->mask) &&
213 nf_ct_zone(a->master) == nf_ct_zone(b->master);
216 /* Generally a bad idea to call this: could have matched already. */
217 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
219 spin_lock_bh(&nf_conntrack_lock);
220 if (del_timer(&exp->timeout)) {
221 nf_ct_unlink_expect(exp);
222 nf_ct_expect_put(exp);
224 spin_unlock_bh(&nf_conntrack_lock);
226 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
228 /* We don't increase the master conntrack refcount for non-fulfilled
229 * conntracks. During the conntrack destruction, the expectations are
230 * always killed before the conntrack itself */
231 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
233 struct nf_conntrack_expect *new;
235 new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
240 atomic_set(&new->use, 1);
243 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
245 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
247 const union nf_inet_addr *saddr,
248 const union nf_inet_addr *daddr,
249 u_int8_t proto, const __be16 *src, const __be16 *dst)
253 if (family == AF_INET)
260 exp->expectfn = NULL;
262 exp->tuple.src.l3num = family;
263 exp->tuple.dst.protonum = proto;
266 memcpy(&exp->tuple.src.u3, saddr, len);
267 if (sizeof(exp->tuple.src.u3) > len)
268 /* address needs to be cleared for nf_ct_tuple_equal */
269 memset((void *)&exp->tuple.src.u3 + len, 0x00,
270 sizeof(exp->tuple.src.u3) - len);
271 memset(&exp->mask.src.u3, 0xFF, len);
272 if (sizeof(exp->mask.src.u3) > len)
273 memset((void *)&exp->mask.src.u3 + len, 0x00,
274 sizeof(exp->mask.src.u3) - len);
276 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
277 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
281 exp->tuple.src.u.all = *src;
282 exp->mask.src.u.all = htons(0xFFFF);
284 exp->tuple.src.u.all = 0;
285 exp->mask.src.u.all = 0;
288 memcpy(&exp->tuple.dst.u3, daddr, len);
289 if (sizeof(exp->tuple.dst.u3) > len)
290 /* address needs to be cleared for nf_ct_tuple_equal */
291 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
292 sizeof(exp->tuple.dst.u3) - len);
294 exp->tuple.dst.u.all = *dst;
296 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
298 static void nf_ct_expect_free_rcu(struct rcu_head *head)
300 struct nf_conntrack_expect *exp;
302 exp = container_of(head, struct nf_conntrack_expect, rcu);
303 kmem_cache_free(nf_ct_expect_cachep, exp);
306 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
308 if (atomic_dec_and_test(&exp->use))
309 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
311 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
313 static int nf_ct_expect_insert(struct nf_conntrack_expect *exp)
315 struct nf_conn_help *master_help = nfct_help(exp->master);
316 struct nf_conntrack_helper *helper;
317 struct net *net = nf_ct_exp_net(exp);
318 unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
320 /* two references : one for hash insert, one for the timer */
321 atomic_add(2, &exp->use);
323 hlist_add_head(&exp->lnode, &master_help->expectations);
324 master_help->expecting[exp->class]++;
326 hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
327 net->ct.expect_count++;
329 setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
331 helper = rcu_dereference_protected(master_help->helper,
332 lockdep_is_held(&nf_conntrack_lock));
334 exp->timeout.expires = jiffies +
335 helper->expect_policy[exp->class].timeout * HZ;
337 add_timer(&exp->timeout);
339 NF_CT_STAT_INC(net, expect_create);
343 /* Race with expectations being used means we could have none to find; OK. */
344 static void evict_oldest_expect(struct nf_conn *master,
345 struct nf_conntrack_expect *new)
347 struct nf_conn_help *master_help = nfct_help(master);
348 struct nf_conntrack_expect *exp, *last = NULL;
350 hlist_for_each_entry(exp, &master_help->expectations, lnode) {
351 if (exp->class == new->class)
355 if (last && del_timer(&last->timeout)) {
356 nf_ct_unlink_expect(last);
357 nf_ct_expect_put(last);
361 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
363 const struct nf_conntrack_expect_policy *p;
364 struct nf_conntrack_expect *i;
365 struct nf_conn *master = expect->master;
366 struct nf_conn_help *master_help = nfct_help(master);
367 struct nf_conntrack_helper *helper;
368 struct net *net = nf_ct_exp_net(expect);
369 struct hlist_node *next;
377 h = nf_ct_expect_dst_hash(&expect->tuple);
378 hlist_for_each_entry_safe(i, next, &net->ct.expect_hash[h], hnode) {
379 if (expect_matches(i, expect)) {
380 if (del_timer(&i->timeout)) {
381 nf_ct_unlink_expect(i);
385 } else if (expect_clash(i, expect)) {
390 /* Will be over limit? */
391 helper = rcu_dereference_protected(master_help->helper,
392 lockdep_is_held(&nf_conntrack_lock));
394 p = &helper->expect_policy[expect->class];
395 if (p->max_expected &&
396 master_help->expecting[expect->class] >= p->max_expected) {
397 evict_oldest_expect(master, expect);
398 if (master_help->expecting[expect->class]
399 >= p->max_expected) {
406 if (net->ct.expect_count >= nf_ct_expect_max) {
407 net_warn_ratelimited("nf_conntrack: expectation table full\n");
414 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect,
419 spin_lock_bh(&nf_conntrack_lock);
420 ret = __nf_ct_expect_check(expect);
424 ret = nf_ct_expect_insert(expect);
427 spin_unlock_bh(&nf_conntrack_lock);
428 nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
431 spin_unlock_bh(&nf_conntrack_lock);
434 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
436 #ifdef CONFIG_NF_CONNTRACK_PROCFS
437 struct ct_expect_iter_state {
438 struct seq_net_private p;
442 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
444 struct net *net = seq_file_net(seq);
445 struct ct_expect_iter_state *st = seq->private;
446 struct hlist_node *n;
448 for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
449 n = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
456 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
457 struct hlist_node *head)
459 struct net *net = seq_file_net(seq);
460 struct ct_expect_iter_state *st = seq->private;
462 head = rcu_dereference(hlist_next_rcu(head));
463 while (head == NULL) {
464 if (++st->bucket >= nf_ct_expect_hsize)
466 head = rcu_dereference(hlist_first_rcu(&net->ct.expect_hash[st->bucket]));
471 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
473 struct hlist_node *head = ct_expect_get_first(seq);
476 while (pos && (head = ct_expect_get_next(seq, head)))
478 return pos ? NULL : head;
481 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
485 return ct_expect_get_idx(seq, *pos);
488 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
491 return ct_expect_get_next(seq, v);
494 static void exp_seq_stop(struct seq_file *seq, void *v)
500 static int exp_seq_show(struct seq_file *s, void *v)
502 struct nf_conntrack_expect *expect;
503 struct nf_conntrack_helper *helper;
504 struct hlist_node *n = v;
507 expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
509 if (expect->timeout.function)
510 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
511 ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
514 seq_printf(s, "l3proto = %u proto=%u ",
515 expect->tuple.src.l3num,
516 expect->tuple.dst.protonum);
517 print_tuple(s, &expect->tuple,
518 __nf_ct_l3proto_find(expect->tuple.src.l3num),
519 __nf_ct_l4proto_find(expect->tuple.src.l3num,
520 expect->tuple.dst.protonum));
522 if (expect->flags & NF_CT_EXPECT_PERMANENT) {
523 seq_printf(s, "PERMANENT");
526 if (expect->flags & NF_CT_EXPECT_INACTIVE) {
527 seq_printf(s, "%sINACTIVE", delim);
530 if (expect->flags & NF_CT_EXPECT_USERSPACE)
531 seq_printf(s, "%sUSERSPACE", delim);
533 helper = rcu_dereference(nfct_help(expect->master)->helper);
535 seq_printf(s, "%s%s", expect->flags ? " " : "", helper->name);
536 if (helper->expect_policy[expect->class].name)
538 helper->expect_policy[expect->class].name);
541 return seq_putc(s, '\n');
544 static const struct seq_operations exp_seq_ops = {
545 .start = exp_seq_start,
546 .next = exp_seq_next,
547 .stop = exp_seq_stop,
551 static int exp_open(struct inode *inode, struct file *file)
553 return seq_open_net(inode, file, &exp_seq_ops,
554 sizeof(struct ct_expect_iter_state));
557 static const struct file_operations exp_file_ops = {
558 .owner = THIS_MODULE,
562 .release = seq_release_net,
564 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
566 static int exp_proc_init(struct net *net)
568 #ifdef CONFIG_NF_CONNTRACK_PROCFS
569 struct proc_dir_entry *proc;
571 proc = proc_create("nf_conntrack_expect", 0440, net->proc_net,
575 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
579 static void exp_proc_remove(struct net *net)
581 #ifdef CONFIG_NF_CONNTRACK_PROCFS
582 remove_proc_entry("nf_conntrack_expect", net->proc_net);
583 #endif /* CONFIG_NF_CONNTRACK_PROCFS */
586 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0400);
588 int nf_conntrack_expect_pernet_init(struct net *net)
592 net->ct.expect_count = 0;
593 net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize, 0);
594 if (net->ct.expect_hash == NULL)
597 err = exp_proc_init(net);
603 nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
608 void nf_conntrack_expect_pernet_fini(struct net *net)
610 exp_proc_remove(net);
611 nf_ct_free_hashtable(net->ct.expect_hash, nf_ct_expect_hsize);
614 int nf_conntrack_expect_init(void)
616 if (!nf_ct_expect_hsize) {
617 nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
618 if (!nf_ct_expect_hsize)
619 nf_ct_expect_hsize = 1;
621 nf_ct_expect_max = nf_ct_expect_hsize * 4;
622 nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
623 sizeof(struct nf_conntrack_expect),
625 if (!nf_ct_expect_cachep)
630 void nf_conntrack_expect_fini(void)
632 rcu_barrier(); /* Wait for call_rcu() before destroy */
633 kmem_cache_destroy(nf_ct_expect_cachep);