Merge branch 'pm-cpuidle'
[cascardo/linux.git] / net / netfilter / xt_connlimit.c
1 /*
2  * netfilter module to limit the number of parallel tcp
3  * connections per IP address.
4  *   (c) 2000 Gerd Knorr <kraxel@bytesex.org>
5  *   Nov 2002: Martin Bene <martin.bene@icomedias.com>:
6  *              only ignore TIME_WAIT or gone connections
7  *   (C) CC Computer Consultants GmbH, 2007
8  *
9  * based on ...
10  *
11  * Kernel module to match connection tracking information.
12  * GPL (C) 1999  Rusty Russell (rusty@rustcorp.com.au).
13  */
14 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
15 #include <linux/in.h>
16 #include <linux/in6.h>
17 #include <linux/ip.h>
18 #include <linux/ipv6.h>
19 #include <linux/jhash.h>
20 #include <linux/slab.h>
21 #include <linux/list.h>
22 #include <linux/rbtree.h>
23 #include <linux/module.h>
24 #include <linux/random.h>
25 #include <linux/skbuff.h>
26 #include <linux/spinlock.h>
27 #include <linux/netfilter/nf_conntrack_tcp.h>
28 #include <linux/netfilter/x_tables.h>
29 #include <linux/netfilter/xt_connlimit.h>
30 #include <net/netfilter/nf_conntrack.h>
31 #include <net/netfilter/nf_conntrack_core.h>
32 #include <net/netfilter/nf_conntrack_tuple.h>
33 #include <net/netfilter/nf_conntrack_zones.h>
34
35 #define CONNLIMIT_SLOTS         32
36 #define CONNLIMIT_LOCK_SLOTS    32
37 #define CONNLIMIT_GC_MAX_NODES  8
38
39 /* we will save the tuples of all connections we care about */
40 struct xt_connlimit_conn {
41         struct hlist_node               node;
42         struct nf_conntrack_tuple       tuple;
43         union nf_inet_addr              addr;
44 };
45
46 struct xt_connlimit_rb {
47         struct rb_node node;
48         struct hlist_head hhead; /* connections/hosts in same subnet */
49         union nf_inet_addr addr; /* search key */
50 };
51
52 struct xt_connlimit_data {
53         struct rb_root climit_root4[CONNLIMIT_SLOTS];
54         struct rb_root climit_root6[CONNLIMIT_SLOTS];
55         spinlock_t              locks[CONNLIMIT_LOCK_SLOTS];
56 };
57
58 static u_int32_t connlimit_rnd __read_mostly;
59 static struct kmem_cache *connlimit_rb_cachep __read_mostly;
60 static struct kmem_cache *connlimit_conn_cachep __read_mostly;
61
62 static inline unsigned int connlimit_iphash(__be32 addr)
63 {
64         return jhash_1word((__force __u32)addr,
65                             connlimit_rnd) % CONNLIMIT_SLOTS;
66 }
67
68 static inline unsigned int
69 connlimit_iphash6(const union nf_inet_addr *addr,
70                   const union nf_inet_addr *mask)
71 {
72         union nf_inet_addr res;
73         unsigned int i;
74
75         for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i)
76                 res.ip6[i] = addr->ip6[i] & mask->ip6[i];
77
78         return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6),
79                        connlimit_rnd) % CONNLIMIT_SLOTS;
80 }
81
82 static inline bool already_closed(const struct nf_conn *conn)
83 {
84         if (nf_ct_protonum(conn) == IPPROTO_TCP)
85                 return conn->proto.tcp.state == TCP_CONNTRACK_TIME_WAIT ||
86                        conn->proto.tcp.state == TCP_CONNTRACK_CLOSE;
87         else
88                 return 0;
89 }
90
91 static int
92 same_source_net(const union nf_inet_addr *addr,
93                 const union nf_inet_addr *mask,
94                 const union nf_inet_addr *u3, u_int8_t family)
95 {
96         if (family == NFPROTO_IPV4) {
97                 return ntohl(addr->ip & mask->ip) -
98                        ntohl(u3->ip & mask->ip);
99         } else {
100                 union nf_inet_addr lh, rh;
101                 unsigned int i;
102
103                 for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) {
104                         lh.ip6[i] = addr->ip6[i] & mask->ip6[i];
105                         rh.ip6[i] = u3->ip6[i] & mask->ip6[i];
106                 }
107
108                 return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6));
109         }
110 }
111
112 static bool add_hlist(struct hlist_head *head,
113                       const struct nf_conntrack_tuple *tuple,
114                       const union nf_inet_addr *addr)
115 {
116         struct xt_connlimit_conn *conn;
117
118         conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
119         if (conn == NULL)
120                 return false;
121         conn->tuple = *tuple;
122         conn->addr = *addr;
123         hlist_add_head(&conn->node, head);
124         return true;
125 }
126
127 static unsigned int check_hlist(struct net *net,
128                                 struct hlist_head *head,
129                                 const struct nf_conntrack_tuple *tuple,
130                                 bool *addit)
131 {
132         const struct nf_conntrack_tuple_hash *found;
133         struct xt_connlimit_conn *conn;
134         struct hlist_node *n;
135         struct nf_conn *found_ct;
136         unsigned int length = 0;
137
138         *addit = true;
139         rcu_read_lock();
140
141         /* check the saved connections */
142         hlist_for_each_entry_safe(conn, n, head, node) {
143                 found    = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
144                                                  &conn->tuple);
145                 if (found == NULL) {
146                         hlist_del(&conn->node);
147                         kmem_cache_free(connlimit_conn_cachep, conn);
148                         continue;
149                 }
150
151                 found_ct = nf_ct_tuplehash_to_ctrack(found);
152
153                 if (nf_ct_tuple_equal(&conn->tuple, tuple)) {
154                         /*
155                          * Just to be sure we have it only once in the list.
156                          * We should not see tuples twice unless someone hooks
157                          * this into a table without "-p tcp --syn".
158                          */
159                         *addit = false;
160                 } else if (already_closed(found_ct)) {
161                         /*
162                          * we do not care about connections which are
163                          * closed already -> ditch it
164                          */
165                         nf_ct_put(found_ct);
166                         hlist_del(&conn->node);
167                         kmem_cache_free(connlimit_conn_cachep, conn);
168                         continue;
169                 }
170
171                 nf_ct_put(found_ct);
172                 length++;
173         }
174
175         rcu_read_unlock();
176
177         return length;
178 }
179
180 static void tree_nodes_free(struct rb_root *root,
181                             struct xt_connlimit_rb *gc_nodes[],
182                             unsigned int gc_count)
183 {
184         struct xt_connlimit_rb *rbconn;
185
186         while (gc_count) {
187                 rbconn = gc_nodes[--gc_count];
188                 rb_erase(&rbconn->node, root);
189                 kmem_cache_free(connlimit_rb_cachep, rbconn);
190         }
191 }
192
193 static unsigned int
194 count_tree(struct net *net, struct rb_root *root,
195            const struct nf_conntrack_tuple *tuple,
196            const union nf_inet_addr *addr, const union nf_inet_addr *mask,
197            u8 family)
198 {
199         struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES];
200         struct rb_node **rbnode, *parent;
201         struct xt_connlimit_rb *rbconn;
202         struct xt_connlimit_conn *conn;
203         unsigned int gc_count;
204         bool no_gc = false;
205
206  restart:
207         gc_count = 0;
208         parent = NULL;
209         rbnode = &(root->rb_node);
210         while (*rbnode) {
211                 int diff;
212                 bool addit;
213
214                 rbconn = container_of(*rbnode, struct xt_connlimit_rb, node);
215
216                 parent = *rbnode;
217                 diff = same_source_net(addr, mask, &rbconn->addr, family);
218                 if (diff < 0) {
219                         rbnode = &((*rbnode)->rb_left);
220                 } else if (diff > 0) {
221                         rbnode = &((*rbnode)->rb_right);
222                 } else {
223                         /* same source network -> be counted! */
224                         unsigned int count;
225                         count = check_hlist(net, &rbconn->hhead, tuple, &addit);
226
227                         tree_nodes_free(root, gc_nodes, gc_count);
228                         if (!addit)
229                                 return count;
230
231                         if (!add_hlist(&rbconn->hhead, tuple, addr))
232                                 return 0; /* hotdrop */
233
234                         return count + 1;
235                 }
236
237                 if (no_gc || gc_count >= ARRAY_SIZE(gc_nodes))
238                         continue;
239
240                 /* only used for GC on hhead, retval and 'addit' ignored */
241                 check_hlist(net, &rbconn->hhead, tuple, &addit);
242                 if (hlist_empty(&rbconn->hhead))
243                         gc_nodes[gc_count++] = rbconn;
244         }
245
246         if (gc_count) {
247                 no_gc = true;
248                 tree_nodes_free(root, gc_nodes, gc_count);
249                 /* tree_node_free before new allocation permits
250                  * allocator to re-use newly free'd object.
251                  *
252                  * This is a rare event; in most cases we will find
253                  * existing node to re-use. (or gc_count is 0).
254                  */
255                 goto restart;
256         }
257
258         /* no match, need to insert new node */
259         rbconn = kmem_cache_alloc(connlimit_rb_cachep, GFP_ATOMIC);
260         if (rbconn == NULL)
261                 return 0;
262
263         conn = kmem_cache_alloc(connlimit_conn_cachep, GFP_ATOMIC);
264         if (conn == NULL) {
265                 kmem_cache_free(connlimit_rb_cachep, rbconn);
266                 return 0;
267         }
268
269         conn->tuple = *tuple;
270         conn->addr = *addr;
271         rbconn->addr = *addr;
272
273         INIT_HLIST_HEAD(&rbconn->hhead);
274         hlist_add_head(&conn->node, &rbconn->hhead);
275
276         rb_link_node(&rbconn->node, parent, rbnode);
277         rb_insert_color(&rbconn->node, root);
278         return 1;
279 }
280
281 static int count_them(struct net *net,
282                       struct xt_connlimit_data *data,
283                       const struct nf_conntrack_tuple *tuple,
284                       const union nf_inet_addr *addr,
285                       const union nf_inet_addr *mask,
286                       u_int8_t family)
287 {
288         struct rb_root *root;
289         int count;
290         u32 hash;
291
292         if (family == NFPROTO_IPV6) {
293                 hash = connlimit_iphash6(addr, mask);
294                 root = &data->climit_root6[hash];
295         } else {
296                 hash = connlimit_iphash(addr->ip & mask->ip);
297                 root = &data->climit_root4[hash];
298         }
299
300         spin_lock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]);
301
302         count = count_tree(net, root, tuple, addr, mask, family);
303
304         spin_unlock_bh(&data->locks[hash % CONNLIMIT_LOCK_SLOTS]);
305
306         return count;
307 }
308
309 static bool
310 connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
311 {
312         struct net *net = dev_net(par->in ? par->in : par->out);
313         const struct xt_connlimit_info *info = par->matchinfo;
314         union nf_inet_addr addr;
315         struct nf_conntrack_tuple tuple;
316         const struct nf_conntrack_tuple *tuple_ptr = &tuple;
317         enum ip_conntrack_info ctinfo;
318         const struct nf_conn *ct;
319         unsigned int connections;
320
321         ct = nf_ct_get(skb, &ctinfo);
322         if (ct != NULL)
323                 tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
324         else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
325                                     par->family, &tuple))
326                 goto hotdrop;
327
328         if (par->family == NFPROTO_IPV6) {
329                 const struct ipv6hdr *iph = ipv6_hdr(skb);
330                 memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ?
331                        &iph->daddr : &iph->saddr, sizeof(addr.ip6));
332         } else {
333                 const struct iphdr *iph = ip_hdr(skb);
334                 addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ?
335                           iph->daddr : iph->saddr;
336         }
337
338         connections = count_them(net, info->data, tuple_ptr, &addr,
339                                  &info->mask, par->family);
340         if (connections == 0)
341                 /* kmalloc failed, drop it entirely */
342                 goto hotdrop;
343
344         return (connections > info->limit) ^
345                !!(info->flags & XT_CONNLIMIT_INVERT);
346
347  hotdrop:
348         par->hotdrop = true;
349         return false;
350 }
351
352 static int connlimit_mt_check(const struct xt_mtchk_param *par)
353 {
354         struct xt_connlimit_info *info = par->matchinfo;
355         unsigned int i;
356         int ret;
357
358         if (unlikely(!connlimit_rnd)) {
359                 u_int32_t rand;
360
361                 do {
362                         get_random_bytes(&rand, sizeof(rand));
363                 } while (!rand);
364                 cmpxchg(&connlimit_rnd, 0, rand);
365         }
366         ret = nf_ct_l3proto_try_module_get(par->family);
367         if (ret < 0) {
368                 pr_info("cannot load conntrack support for "
369                         "address family %u\n", par->family);
370                 return ret;
371         }
372
373         /* init private data */
374         info->data = kmalloc(sizeof(struct xt_connlimit_data), GFP_KERNEL);
375         if (info->data == NULL) {
376                 nf_ct_l3proto_module_put(par->family);
377                 return -ENOMEM;
378         }
379
380         for (i = 0; i < ARRAY_SIZE(info->data->locks); ++i)
381                 spin_lock_init(&info->data->locks[i]);
382
383         for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
384                 info->data->climit_root4[i] = RB_ROOT;
385         for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
386                 info->data->climit_root6[i] = RB_ROOT;
387
388         return 0;
389 }
390
391 static void destroy_tree(struct rb_root *r)
392 {
393         struct xt_connlimit_conn *conn;
394         struct xt_connlimit_rb *rbconn;
395         struct hlist_node *n;
396         struct rb_node *node;
397
398         while ((node = rb_first(r)) != NULL) {
399                 rbconn = container_of(node, struct xt_connlimit_rb, node);
400
401                 rb_erase(node, r);
402
403                 hlist_for_each_entry_safe(conn, n, &rbconn->hhead, node)
404                         kmem_cache_free(connlimit_conn_cachep, conn);
405
406                 kmem_cache_free(connlimit_rb_cachep, rbconn);
407         }
408 }
409
410 static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
411 {
412         const struct xt_connlimit_info *info = par->matchinfo;
413         unsigned int i;
414
415         nf_ct_l3proto_module_put(par->family);
416
417         for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i)
418                 destroy_tree(&info->data->climit_root4[i]);
419         for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i)
420                 destroy_tree(&info->data->climit_root6[i]);
421
422         kfree(info->data);
423 }
424
425 static struct xt_match connlimit_mt_reg __read_mostly = {
426         .name       = "connlimit",
427         .revision   = 1,
428         .family     = NFPROTO_UNSPEC,
429         .checkentry = connlimit_mt_check,
430         .match      = connlimit_mt,
431         .matchsize  = sizeof(struct xt_connlimit_info),
432         .destroy    = connlimit_mt_destroy,
433         .me         = THIS_MODULE,
434 };
435
436 static int __init connlimit_mt_init(void)
437 {
438         int ret;
439
440         BUILD_BUG_ON(CONNLIMIT_LOCK_SLOTS > CONNLIMIT_SLOTS);
441         BUILD_BUG_ON((CONNLIMIT_SLOTS % CONNLIMIT_LOCK_SLOTS) != 0);
442
443         connlimit_conn_cachep = kmem_cache_create("xt_connlimit_conn",
444                                            sizeof(struct xt_connlimit_conn),
445                                            0, 0, NULL);
446         if (!connlimit_conn_cachep)
447                 return -ENOMEM;
448
449         connlimit_rb_cachep = kmem_cache_create("xt_connlimit_rb",
450                                            sizeof(struct xt_connlimit_rb),
451                                            0, 0, NULL);
452         if (!connlimit_rb_cachep) {
453                 kmem_cache_destroy(connlimit_conn_cachep);
454                 return -ENOMEM;
455         }
456         ret = xt_register_match(&connlimit_mt_reg);
457         if (ret != 0) {
458                 kmem_cache_destroy(connlimit_conn_cachep);
459                 kmem_cache_destroy(connlimit_rb_cachep);
460         }
461         return ret;
462 }
463
464 static void __exit connlimit_mt_exit(void)
465 {
466         xt_unregister_match(&connlimit_mt_reg);
467         kmem_cache_destroy(connlimit_conn_cachep);
468         kmem_cache_destroy(connlimit_rb_cachep);
469 }
470
471 module_init(connlimit_mt_init);
472 module_exit(connlimit_mt_exit);
473 MODULE_AUTHOR("Jan Engelhardt <jengelh@medozas.de>");
474 MODULE_DESCRIPTION("Xtables: Number of connections matching");
475 MODULE_LICENSE("GPL");
476 MODULE_ALIAS("ipt_connlimit");
477 MODULE_ALIAS("ip6t_connlimit");