vfat: don't use ->d_time
[cascardo/linux.git] / net / xfrm / xfrm_policy.c
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *
14  */
15
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/cache.h>
27 #include <linux/audit.h>
28 #include <net/dst.h>
29 #include <net/flow.h>
30 #include <net/xfrm.h>
31 #include <net/ip.h>
32 #ifdef CONFIG_XFRM_STATISTICS
33 #include <net/snmp.h>
34 #endif
35
36 #include "xfrm_hash.h"
37
38 #define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
39 #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
40 #define XFRM_MAX_QUEUE_LEN      100
41
42 struct xfrm_flo {
43         struct dst_entry *dst_orig;
44         u8 flags;
45 };
46
47 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
48 static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
49                                                 __read_mostly;
50
51 static struct kmem_cache *xfrm_dst_cache __read_mostly;
52
53 static void xfrm_init_pmtu(struct dst_entry *dst);
54 static int stale_bundle(struct dst_entry *dst);
55 static int xfrm_bundle_ok(struct xfrm_dst *xdst);
56 static void xfrm_policy_queue_process(unsigned long arg);
57
58 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir);
59 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
60                                                 int dir);
61
62 static inline bool
63 __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
64 {
65         const struct flowi4 *fl4 = &fl->u.ip4;
66
67         return  addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
68                 addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
69                 !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
70                 !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
71                 (fl4->flowi4_proto == sel->proto || !sel->proto) &&
72                 (fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
73 }
74
75 static inline bool
76 __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
77 {
78         const struct flowi6 *fl6 = &fl->u.ip6;
79
80         return  addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
81                 addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
82                 !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
83                 !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
84                 (fl6->flowi6_proto == sel->proto || !sel->proto) &&
85                 (fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
86 }
87
88 bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
89                          unsigned short family)
90 {
91         switch (family) {
92         case AF_INET:
93                 return __xfrm4_selector_match(sel, fl);
94         case AF_INET6:
95                 return __xfrm6_selector_match(sel, fl);
96         }
97         return false;
98 }
99
100 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
101 {
102         struct xfrm_policy_afinfo *afinfo;
103
104         if (unlikely(family >= NPROTO))
105                 return NULL;
106         rcu_read_lock();
107         afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
108         if (unlikely(!afinfo))
109                 rcu_read_unlock();
110         return afinfo;
111 }
112
113 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
114 {
115         rcu_read_unlock();
116 }
117
118 static inline struct dst_entry *__xfrm_dst_lookup(struct net *net,
119                                                   int tos, int oif,
120                                                   const xfrm_address_t *saddr,
121                                                   const xfrm_address_t *daddr,
122                                                   int family)
123 {
124         struct xfrm_policy_afinfo *afinfo;
125         struct dst_entry *dst;
126
127         afinfo = xfrm_policy_get_afinfo(family);
128         if (unlikely(afinfo == NULL))
129                 return ERR_PTR(-EAFNOSUPPORT);
130
131         dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr);
132
133         xfrm_policy_put_afinfo(afinfo);
134
135         return dst;
136 }
137
138 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x,
139                                                 int tos, int oif,
140                                                 xfrm_address_t *prev_saddr,
141                                                 xfrm_address_t *prev_daddr,
142                                                 int family)
143 {
144         struct net *net = xs_net(x);
145         xfrm_address_t *saddr = &x->props.saddr;
146         xfrm_address_t *daddr = &x->id.daddr;
147         struct dst_entry *dst;
148
149         if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
150                 saddr = x->coaddr;
151                 daddr = prev_daddr;
152         }
153         if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
154                 saddr = prev_saddr;
155                 daddr = x->coaddr;
156         }
157
158         dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family);
159
160         if (!IS_ERR(dst)) {
161                 if (prev_saddr != saddr)
162                         memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
163                 if (prev_daddr != daddr)
164                         memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
165         }
166
167         return dst;
168 }
169
170 static inline unsigned long make_jiffies(long secs)
171 {
172         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
173                 return MAX_SCHEDULE_TIMEOUT-1;
174         else
175                 return secs*HZ;
176 }
177
178 static void xfrm_policy_timer(unsigned long data)
179 {
180         struct xfrm_policy *xp = (struct xfrm_policy *)data;
181         unsigned long now = get_seconds();
182         long next = LONG_MAX;
183         int warn = 0;
184         int dir;
185
186         read_lock(&xp->lock);
187
188         if (unlikely(xp->walk.dead))
189                 goto out;
190
191         dir = xfrm_policy_id2dir(xp->index);
192
193         if (xp->lft.hard_add_expires_seconds) {
194                 long tmo = xp->lft.hard_add_expires_seconds +
195                         xp->curlft.add_time - now;
196                 if (tmo <= 0)
197                         goto expired;
198                 if (tmo < next)
199                         next = tmo;
200         }
201         if (xp->lft.hard_use_expires_seconds) {
202                 long tmo = xp->lft.hard_use_expires_seconds +
203                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
204                 if (tmo <= 0)
205                         goto expired;
206                 if (tmo < next)
207                         next = tmo;
208         }
209         if (xp->lft.soft_add_expires_seconds) {
210                 long tmo = xp->lft.soft_add_expires_seconds +
211                         xp->curlft.add_time - now;
212                 if (tmo <= 0) {
213                         warn = 1;
214                         tmo = XFRM_KM_TIMEOUT;
215                 }
216                 if (tmo < next)
217                         next = tmo;
218         }
219         if (xp->lft.soft_use_expires_seconds) {
220                 long tmo = xp->lft.soft_use_expires_seconds +
221                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
222                 if (tmo <= 0) {
223                         warn = 1;
224                         tmo = XFRM_KM_TIMEOUT;
225                 }
226                 if (tmo < next)
227                         next = tmo;
228         }
229
230         if (warn)
231                 km_policy_expired(xp, dir, 0, 0);
232         if (next != LONG_MAX &&
233             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
234                 xfrm_pol_hold(xp);
235
236 out:
237         read_unlock(&xp->lock);
238         xfrm_pol_put(xp);
239         return;
240
241 expired:
242         read_unlock(&xp->lock);
243         if (!xfrm_policy_delete(xp, dir))
244                 km_policy_expired(xp, dir, 1, 0);
245         xfrm_pol_put(xp);
246 }
247
248 static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
249 {
250         struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
251
252         if (unlikely(pol->walk.dead))
253                 flo = NULL;
254         else
255                 xfrm_pol_hold(pol);
256
257         return flo;
258 }
259
260 static int xfrm_policy_flo_check(struct flow_cache_object *flo)
261 {
262         struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
263
264         return !pol->walk.dead;
265 }
266
267 static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
268 {
269         xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
270 }
271
272 static const struct flow_cache_ops xfrm_policy_fc_ops = {
273         .get = xfrm_policy_flo_get,
274         .check = xfrm_policy_flo_check,
275         .delete = xfrm_policy_flo_delete,
276 };
277
278 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
279  * SPD calls.
280  */
281
282 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
283 {
284         struct xfrm_policy *policy;
285
286         policy = kzalloc(sizeof(struct xfrm_policy), gfp);
287
288         if (policy) {
289                 write_pnet(&policy->xp_net, net);
290                 INIT_LIST_HEAD(&policy->walk.all);
291                 INIT_HLIST_NODE(&policy->bydst);
292                 INIT_HLIST_NODE(&policy->byidx);
293                 rwlock_init(&policy->lock);
294                 atomic_set(&policy->refcnt, 1);
295                 skb_queue_head_init(&policy->polq.hold_queue);
296                 setup_timer(&policy->timer, xfrm_policy_timer,
297                                 (unsigned long)policy);
298                 setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
299                             (unsigned long)policy);
300                 policy->flo.ops = &xfrm_policy_fc_ops;
301         }
302         return policy;
303 }
304 EXPORT_SYMBOL(xfrm_policy_alloc);
305
306 static void xfrm_policy_destroy_rcu(struct rcu_head *head)
307 {
308         struct xfrm_policy *policy = container_of(head, struct xfrm_policy, rcu);
309
310         security_xfrm_policy_free(policy->security);
311         kfree(policy);
312 }
313
314 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
315
316 void xfrm_policy_destroy(struct xfrm_policy *policy)
317 {
318         BUG_ON(!policy->walk.dead);
319
320         if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
321                 BUG();
322
323         call_rcu(&policy->rcu, xfrm_policy_destroy_rcu);
324 }
325 EXPORT_SYMBOL(xfrm_policy_destroy);
326
327 /* Rule must be locked. Release descentant resources, announce
328  * entry dead. The rule must be unlinked from lists to the moment.
329  */
330
331 static void xfrm_policy_kill(struct xfrm_policy *policy)
332 {
333         policy->walk.dead = 1;
334
335         atomic_inc(&policy->genid);
336
337         if (del_timer(&policy->polq.hold_timer))
338                 xfrm_pol_put(policy);
339         skb_queue_purge(&policy->polq.hold_queue);
340
341         if (del_timer(&policy->timer))
342                 xfrm_pol_put(policy);
343
344         xfrm_pol_put(policy);
345 }
346
347 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
348
349 static inline unsigned int idx_hash(struct net *net, u32 index)
350 {
351         return __idx_hash(index, net->xfrm.policy_idx_hmask);
352 }
353
354 /* calculate policy hash thresholds */
355 static void __get_hash_thresh(struct net *net,
356                               unsigned short family, int dir,
357                               u8 *dbits, u8 *sbits)
358 {
359         switch (family) {
360         case AF_INET:
361                 *dbits = net->xfrm.policy_bydst[dir].dbits4;
362                 *sbits = net->xfrm.policy_bydst[dir].sbits4;
363                 break;
364
365         case AF_INET6:
366                 *dbits = net->xfrm.policy_bydst[dir].dbits6;
367                 *sbits = net->xfrm.policy_bydst[dir].sbits6;
368                 break;
369
370         default:
371                 *dbits = 0;
372                 *sbits = 0;
373         }
374 }
375
376 static struct hlist_head *policy_hash_bysel(struct net *net,
377                                             const struct xfrm_selector *sel,
378                                             unsigned short family, int dir)
379 {
380         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
381         unsigned int hash;
382         u8 dbits;
383         u8 sbits;
384
385         __get_hash_thresh(net, family, dir, &dbits, &sbits);
386         hash = __sel_hash(sel, family, hmask, dbits, sbits);
387
388         return (hash == hmask + 1 ?
389                 &net->xfrm.policy_inexact[dir] :
390                 net->xfrm.policy_bydst[dir].table + hash);
391 }
392
393 static struct hlist_head *policy_hash_direct(struct net *net,
394                                              const xfrm_address_t *daddr,
395                                              const xfrm_address_t *saddr,
396                                              unsigned short family, int dir)
397 {
398         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
399         unsigned int hash;
400         u8 dbits;
401         u8 sbits;
402
403         __get_hash_thresh(net, family, dir, &dbits, &sbits);
404         hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits);
405
406         return net->xfrm.policy_bydst[dir].table + hash;
407 }
408
409 static void xfrm_dst_hash_transfer(struct net *net,
410                                    struct hlist_head *list,
411                                    struct hlist_head *ndsttable,
412                                    unsigned int nhashmask,
413                                    int dir)
414 {
415         struct hlist_node *tmp, *entry0 = NULL;
416         struct xfrm_policy *pol;
417         unsigned int h0 = 0;
418         u8 dbits;
419         u8 sbits;
420
421 redo:
422         hlist_for_each_entry_safe(pol, tmp, list, bydst) {
423                 unsigned int h;
424
425                 __get_hash_thresh(net, pol->family, dir, &dbits, &sbits);
426                 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
427                                 pol->family, nhashmask, dbits, sbits);
428                 if (!entry0) {
429                         hlist_del(&pol->bydst);
430                         hlist_add_head(&pol->bydst, ndsttable+h);
431                         h0 = h;
432                 } else {
433                         if (h != h0)
434                                 continue;
435                         hlist_del(&pol->bydst);
436                         hlist_add_behind(&pol->bydst, entry0);
437                 }
438                 entry0 = &pol->bydst;
439         }
440         if (!hlist_empty(list)) {
441                 entry0 = NULL;
442                 goto redo;
443         }
444 }
445
446 static void xfrm_idx_hash_transfer(struct hlist_head *list,
447                                    struct hlist_head *nidxtable,
448                                    unsigned int nhashmask)
449 {
450         struct hlist_node *tmp;
451         struct xfrm_policy *pol;
452
453         hlist_for_each_entry_safe(pol, tmp, list, byidx) {
454                 unsigned int h;
455
456                 h = __idx_hash(pol->index, nhashmask);
457                 hlist_add_head(&pol->byidx, nidxtable+h);
458         }
459 }
460
461 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
462 {
463         return ((old_hmask + 1) << 1) - 1;
464 }
465
466 static void xfrm_bydst_resize(struct net *net, int dir)
467 {
468         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
469         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
470         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
471         struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
472         struct hlist_head *ndst = xfrm_hash_alloc(nsize);
473         int i;
474
475         if (!ndst)
476                 return;
477
478         write_lock_bh(&net->xfrm.xfrm_policy_lock);
479
480         for (i = hmask; i >= 0; i--)
481                 xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir);
482
483         net->xfrm.policy_bydst[dir].table = ndst;
484         net->xfrm.policy_bydst[dir].hmask = nhashmask;
485
486         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
487
488         xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
489 }
490
491 static void xfrm_byidx_resize(struct net *net, int total)
492 {
493         unsigned int hmask = net->xfrm.policy_idx_hmask;
494         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
495         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
496         struct hlist_head *oidx = net->xfrm.policy_byidx;
497         struct hlist_head *nidx = xfrm_hash_alloc(nsize);
498         int i;
499
500         if (!nidx)
501                 return;
502
503         write_lock_bh(&net->xfrm.xfrm_policy_lock);
504
505         for (i = hmask; i >= 0; i--)
506                 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
507
508         net->xfrm.policy_byidx = nidx;
509         net->xfrm.policy_idx_hmask = nhashmask;
510
511         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
512
513         xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
514 }
515
516 static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
517 {
518         unsigned int cnt = net->xfrm.policy_count[dir];
519         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
520
521         if (total)
522                 *total += cnt;
523
524         if ((hmask + 1) < xfrm_policy_hashmax &&
525             cnt > hmask)
526                 return 1;
527
528         return 0;
529 }
530
531 static inline int xfrm_byidx_should_resize(struct net *net, int total)
532 {
533         unsigned int hmask = net->xfrm.policy_idx_hmask;
534
535         if ((hmask + 1) < xfrm_policy_hashmax &&
536             total > hmask)
537                 return 1;
538
539         return 0;
540 }
541
542 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
543 {
544         read_lock_bh(&net->xfrm.xfrm_policy_lock);
545         si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
546         si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
547         si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
548         si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
549         si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
550         si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
551         si->spdhcnt = net->xfrm.policy_idx_hmask;
552         si->spdhmcnt = xfrm_policy_hashmax;
553         read_unlock_bh(&net->xfrm.xfrm_policy_lock);
554 }
555 EXPORT_SYMBOL(xfrm_spd_getinfo);
556
557 static DEFINE_MUTEX(hash_resize_mutex);
558 static void xfrm_hash_resize(struct work_struct *work)
559 {
560         struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
561         int dir, total;
562
563         mutex_lock(&hash_resize_mutex);
564
565         total = 0;
566         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
567                 if (xfrm_bydst_should_resize(net, dir, &total))
568                         xfrm_bydst_resize(net, dir);
569         }
570         if (xfrm_byidx_should_resize(net, total))
571                 xfrm_byidx_resize(net, total);
572
573         mutex_unlock(&hash_resize_mutex);
574 }
575
576 static void xfrm_hash_rebuild(struct work_struct *work)
577 {
578         struct net *net = container_of(work, struct net,
579                                        xfrm.policy_hthresh.work);
580         unsigned int hmask;
581         struct xfrm_policy *pol;
582         struct xfrm_policy *policy;
583         struct hlist_head *chain;
584         struct hlist_head *odst;
585         struct hlist_node *newpos;
586         int i;
587         int dir;
588         unsigned seq;
589         u8 lbits4, rbits4, lbits6, rbits6;
590
591         mutex_lock(&hash_resize_mutex);
592
593         /* read selector prefixlen thresholds */
594         do {
595                 seq = read_seqbegin(&net->xfrm.policy_hthresh.lock);
596
597                 lbits4 = net->xfrm.policy_hthresh.lbits4;
598                 rbits4 = net->xfrm.policy_hthresh.rbits4;
599                 lbits6 = net->xfrm.policy_hthresh.lbits6;
600                 rbits6 = net->xfrm.policy_hthresh.rbits6;
601         } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq));
602
603         write_lock_bh(&net->xfrm.xfrm_policy_lock);
604
605         /* reset the bydst and inexact table in all directions */
606         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
607                 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
608                 hmask = net->xfrm.policy_bydst[dir].hmask;
609                 odst = net->xfrm.policy_bydst[dir].table;
610                 for (i = hmask; i >= 0; i--)
611                         INIT_HLIST_HEAD(odst + i);
612                 if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) {
613                         /* dir out => dst = remote, src = local */
614                         net->xfrm.policy_bydst[dir].dbits4 = rbits4;
615                         net->xfrm.policy_bydst[dir].sbits4 = lbits4;
616                         net->xfrm.policy_bydst[dir].dbits6 = rbits6;
617                         net->xfrm.policy_bydst[dir].sbits6 = lbits6;
618                 } else {
619                         /* dir in/fwd => dst = local, src = remote */
620                         net->xfrm.policy_bydst[dir].dbits4 = lbits4;
621                         net->xfrm.policy_bydst[dir].sbits4 = rbits4;
622                         net->xfrm.policy_bydst[dir].dbits6 = lbits6;
623                         net->xfrm.policy_bydst[dir].sbits6 = rbits6;
624                 }
625         }
626
627         /* re-insert all policies by order of creation */
628         list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) {
629                 newpos = NULL;
630                 chain = policy_hash_bysel(net, &policy->selector,
631                                           policy->family,
632                                           xfrm_policy_id2dir(policy->index));
633                 hlist_for_each_entry(pol, chain, bydst) {
634                         if (policy->priority >= pol->priority)
635                                 newpos = &pol->bydst;
636                         else
637                                 break;
638                 }
639                 if (newpos)
640                         hlist_add_behind(&policy->bydst, newpos);
641                 else
642                         hlist_add_head(&policy->bydst, chain);
643         }
644
645         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
646
647         mutex_unlock(&hash_resize_mutex);
648 }
649
650 void xfrm_policy_hash_rebuild(struct net *net)
651 {
652         schedule_work(&net->xfrm.policy_hthresh.work);
653 }
654 EXPORT_SYMBOL(xfrm_policy_hash_rebuild);
655
656 /* Generate new index... KAME seems to generate them ordered by cost
657  * of an absolute inpredictability of ordering of rules. This will not pass. */
658 static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
659 {
660         static u32 idx_generator;
661
662         for (;;) {
663                 struct hlist_head *list;
664                 struct xfrm_policy *p;
665                 u32 idx;
666                 int found;
667
668                 if (!index) {
669                         idx = (idx_generator | dir);
670                         idx_generator += 8;
671                 } else {
672                         idx = index;
673                         index = 0;
674                 }
675
676                 if (idx == 0)
677                         idx = 8;
678                 list = net->xfrm.policy_byidx + idx_hash(net, idx);
679                 found = 0;
680                 hlist_for_each_entry(p, list, byidx) {
681                         if (p->index == idx) {
682                                 found = 1;
683                                 break;
684                         }
685                 }
686                 if (!found)
687                         return idx;
688         }
689 }
690
691 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
692 {
693         u32 *p1 = (u32 *) s1;
694         u32 *p2 = (u32 *) s2;
695         int len = sizeof(struct xfrm_selector) / sizeof(u32);
696         int i;
697
698         for (i = 0; i < len; i++) {
699                 if (p1[i] != p2[i])
700                         return 1;
701         }
702
703         return 0;
704 }
705
706 static void xfrm_policy_requeue(struct xfrm_policy *old,
707                                 struct xfrm_policy *new)
708 {
709         struct xfrm_policy_queue *pq = &old->polq;
710         struct sk_buff_head list;
711
712         if (skb_queue_empty(&pq->hold_queue))
713                 return;
714
715         __skb_queue_head_init(&list);
716
717         spin_lock_bh(&pq->hold_queue.lock);
718         skb_queue_splice_init(&pq->hold_queue, &list);
719         if (del_timer(&pq->hold_timer))
720                 xfrm_pol_put(old);
721         spin_unlock_bh(&pq->hold_queue.lock);
722
723         pq = &new->polq;
724
725         spin_lock_bh(&pq->hold_queue.lock);
726         skb_queue_splice(&list, &pq->hold_queue);
727         pq->timeout = XFRM_QUEUE_TMO_MIN;
728         if (!mod_timer(&pq->hold_timer, jiffies))
729                 xfrm_pol_hold(new);
730         spin_unlock_bh(&pq->hold_queue.lock);
731 }
732
733 static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
734                                    struct xfrm_policy *pol)
735 {
736         u32 mark = policy->mark.v & policy->mark.m;
737
738         if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
739                 return true;
740
741         if ((mark & pol->mark.m) == pol->mark.v &&
742             policy->priority == pol->priority)
743                 return true;
744
745         return false;
746 }
747
748 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
749 {
750         struct net *net = xp_net(policy);
751         struct xfrm_policy *pol;
752         struct xfrm_policy *delpol;
753         struct hlist_head *chain;
754         struct hlist_node *newpos;
755
756         write_lock_bh(&net->xfrm.xfrm_policy_lock);
757         chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
758         delpol = NULL;
759         newpos = NULL;
760         hlist_for_each_entry(pol, chain, bydst) {
761                 if (pol->type == policy->type &&
762                     !selector_cmp(&pol->selector, &policy->selector) &&
763                     xfrm_policy_mark_match(policy, pol) &&
764                     xfrm_sec_ctx_match(pol->security, policy->security) &&
765                     !WARN_ON(delpol)) {
766                         if (excl) {
767                                 write_unlock_bh(&net->xfrm.xfrm_policy_lock);
768                                 return -EEXIST;
769                         }
770                         delpol = pol;
771                         if (policy->priority > pol->priority)
772                                 continue;
773                 } else if (policy->priority >= pol->priority) {
774                         newpos = &pol->bydst;
775                         continue;
776                 }
777                 if (delpol)
778                         break;
779         }
780         if (newpos)
781                 hlist_add_behind(&policy->bydst, newpos);
782         else
783                 hlist_add_head(&policy->bydst, chain);
784         __xfrm_policy_link(policy, dir);
785         atomic_inc(&net->xfrm.flow_cache_genid);
786
787         /* After previous checking, family can either be AF_INET or AF_INET6 */
788         if (policy->family == AF_INET)
789                 rt_genid_bump_ipv4(net);
790         else
791                 rt_genid_bump_ipv6(net);
792
793         if (delpol) {
794                 xfrm_policy_requeue(delpol, policy);
795                 __xfrm_policy_unlink(delpol, dir);
796         }
797         policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
798         hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
799         policy->curlft.add_time = get_seconds();
800         policy->curlft.use_time = 0;
801         if (!mod_timer(&policy->timer, jiffies + HZ))
802                 xfrm_pol_hold(policy);
803         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
804
805         if (delpol)
806                 xfrm_policy_kill(delpol);
807         else if (xfrm_bydst_should_resize(net, dir, NULL))
808                 schedule_work(&net->xfrm.policy_hash_work);
809
810         return 0;
811 }
812 EXPORT_SYMBOL(xfrm_policy_insert);
813
814 struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
815                                           int dir, struct xfrm_selector *sel,
816                                           struct xfrm_sec_ctx *ctx, int delete,
817                                           int *err)
818 {
819         struct xfrm_policy *pol, *ret;
820         struct hlist_head *chain;
821
822         *err = 0;
823         write_lock_bh(&net->xfrm.xfrm_policy_lock);
824         chain = policy_hash_bysel(net, sel, sel->family, dir);
825         ret = NULL;
826         hlist_for_each_entry(pol, chain, bydst) {
827                 if (pol->type == type &&
828                     (mark & pol->mark.m) == pol->mark.v &&
829                     !selector_cmp(sel, &pol->selector) &&
830                     xfrm_sec_ctx_match(ctx, pol->security)) {
831                         xfrm_pol_hold(pol);
832                         if (delete) {
833                                 *err = security_xfrm_policy_delete(
834                                                                 pol->security);
835                                 if (*err) {
836                                         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
837                                         return pol;
838                                 }
839                                 __xfrm_policy_unlink(pol, dir);
840                         }
841                         ret = pol;
842                         break;
843                 }
844         }
845         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
846
847         if (ret && delete)
848                 xfrm_policy_kill(ret);
849         return ret;
850 }
851 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
852
853 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
854                                      int dir, u32 id, int delete, int *err)
855 {
856         struct xfrm_policy *pol, *ret;
857         struct hlist_head *chain;
858
859         *err = -ENOENT;
860         if (xfrm_policy_id2dir(id) != dir)
861                 return NULL;
862
863         *err = 0;
864         write_lock_bh(&net->xfrm.xfrm_policy_lock);
865         chain = net->xfrm.policy_byidx + idx_hash(net, id);
866         ret = NULL;
867         hlist_for_each_entry(pol, chain, byidx) {
868                 if (pol->type == type && pol->index == id &&
869                     (mark & pol->mark.m) == pol->mark.v) {
870                         xfrm_pol_hold(pol);
871                         if (delete) {
872                                 *err = security_xfrm_policy_delete(
873                                                                 pol->security);
874                                 if (*err) {
875                                         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
876                                         return pol;
877                                 }
878                                 __xfrm_policy_unlink(pol, dir);
879                         }
880                         ret = pol;
881                         break;
882                 }
883         }
884         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
885
886         if (ret && delete)
887                 xfrm_policy_kill(ret);
888         return ret;
889 }
890 EXPORT_SYMBOL(xfrm_policy_byid);
891
892 #ifdef CONFIG_SECURITY_NETWORK_XFRM
893 static inline int
894 xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
895 {
896         int dir, err = 0;
897
898         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
899                 struct xfrm_policy *pol;
900                 int i;
901
902                 hlist_for_each_entry(pol,
903                                      &net->xfrm.policy_inexact[dir], bydst) {
904                         if (pol->type != type)
905                                 continue;
906                         err = security_xfrm_policy_delete(pol->security);
907                         if (err) {
908                                 xfrm_audit_policy_delete(pol, 0, task_valid);
909                                 return err;
910                         }
911                 }
912                 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
913                         hlist_for_each_entry(pol,
914                                              net->xfrm.policy_bydst[dir].table + i,
915                                              bydst) {
916                                 if (pol->type != type)
917                                         continue;
918                                 err = security_xfrm_policy_delete(
919                                                                 pol->security);
920                                 if (err) {
921                                         xfrm_audit_policy_delete(pol, 0,
922                                                                  task_valid);
923                                         return err;
924                                 }
925                         }
926                 }
927         }
928         return err;
929 }
930 #else
931 static inline int
932 xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid)
933 {
934         return 0;
935 }
936 #endif
937
938 int xfrm_policy_flush(struct net *net, u8 type, bool task_valid)
939 {
940         int dir, err = 0, cnt = 0;
941
942         write_lock_bh(&net->xfrm.xfrm_policy_lock);
943
944         err = xfrm_policy_flush_secctx_check(net, type, task_valid);
945         if (err)
946                 goto out;
947
948         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
949                 struct xfrm_policy *pol;
950                 int i;
951
952         again1:
953                 hlist_for_each_entry(pol,
954                                      &net->xfrm.policy_inexact[dir], bydst) {
955                         if (pol->type != type)
956                                 continue;
957                         __xfrm_policy_unlink(pol, dir);
958                         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
959                         cnt++;
960
961                         xfrm_audit_policy_delete(pol, 1, task_valid);
962
963                         xfrm_policy_kill(pol);
964
965                         write_lock_bh(&net->xfrm.xfrm_policy_lock);
966                         goto again1;
967                 }
968
969                 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
970         again2:
971                         hlist_for_each_entry(pol,
972                                              net->xfrm.policy_bydst[dir].table + i,
973                                              bydst) {
974                                 if (pol->type != type)
975                                         continue;
976                                 __xfrm_policy_unlink(pol, dir);
977                                 write_unlock_bh(&net->xfrm.xfrm_policy_lock);
978                                 cnt++;
979
980                                 xfrm_audit_policy_delete(pol, 1, task_valid);
981                                 xfrm_policy_kill(pol);
982
983                                 write_lock_bh(&net->xfrm.xfrm_policy_lock);
984                                 goto again2;
985                         }
986                 }
987
988         }
989         if (!cnt)
990                 err = -ESRCH;
991 out:
992         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
993         return err;
994 }
995 EXPORT_SYMBOL(xfrm_policy_flush);
996
997 int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
998                      int (*func)(struct xfrm_policy *, int, int, void*),
999                      void *data)
1000 {
1001         struct xfrm_policy *pol;
1002         struct xfrm_policy_walk_entry *x;
1003         int error = 0;
1004
1005         if (walk->type >= XFRM_POLICY_TYPE_MAX &&
1006             walk->type != XFRM_POLICY_TYPE_ANY)
1007                 return -EINVAL;
1008
1009         if (list_empty(&walk->walk.all) && walk->seq != 0)
1010                 return 0;
1011
1012         write_lock_bh(&net->xfrm.xfrm_policy_lock);
1013         if (list_empty(&walk->walk.all))
1014                 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
1015         else
1016                 x = list_first_entry(&walk->walk.all,
1017                                      struct xfrm_policy_walk_entry, all);
1018
1019         list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
1020                 if (x->dead)
1021                         continue;
1022                 pol = container_of(x, struct xfrm_policy, walk);
1023                 if (walk->type != XFRM_POLICY_TYPE_ANY &&
1024                     walk->type != pol->type)
1025                         continue;
1026                 error = func(pol, xfrm_policy_id2dir(pol->index),
1027                              walk->seq, data);
1028                 if (error) {
1029                         list_move_tail(&walk->walk.all, &x->all);
1030                         goto out;
1031                 }
1032                 walk->seq++;
1033         }
1034         if (walk->seq == 0) {
1035                 error = -ENOENT;
1036                 goto out;
1037         }
1038         list_del_init(&walk->walk.all);
1039 out:
1040         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1041         return error;
1042 }
1043 EXPORT_SYMBOL(xfrm_policy_walk);
1044
1045 void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
1046 {
1047         INIT_LIST_HEAD(&walk->walk.all);
1048         walk->walk.dead = 1;
1049         walk->type = type;
1050         walk->seq = 0;
1051 }
1052 EXPORT_SYMBOL(xfrm_policy_walk_init);
1053
1054 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
1055 {
1056         if (list_empty(&walk->walk.all))
1057                 return;
1058
1059         write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
1060         list_del(&walk->walk.all);
1061         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1062 }
1063 EXPORT_SYMBOL(xfrm_policy_walk_done);
1064
1065 /*
1066  * Find policy to apply to this flow.
1067  *
1068  * Returns 0 if policy found, else an -errno.
1069  */
1070 static int xfrm_policy_match(const struct xfrm_policy *pol,
1071                              const struct flowi *fl,
1072                              u8 type, u16 family, int dir)
1073 {
1074         const struct xfrm_selector *sel = &pol->selector;
1075         int ret = -ESRCH;
1076         bool match;
1077
1078         if (pol->family != family ||
1079             (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
1080             pol->type != type)
1081                 return ret;
1082
1083         match = xfrm_selector_match(sel, fl, family);
1084         if (match)
1085                 ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
1086                                                   dir);
1087
1088         return ret;
1089 }
1090
1091 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
1092                                                      const struct flowi *fl,
1093                                                      u16 family, u8 dir)
1094 {
1095         int err;
1096         struct xfrm_policy *pol, *ret;
1097         const xfrm_address_t *daddr, *saddr;
1098         struct hlist_head *chain;
1099         u32 priority = ~0U;
1100
1101         daddr = xfrm_flowi_daddr(fl, family);
1102         saddr = xfrm_flowi_saddr(fl, family);
1103         if (unlikely(!daddr || !saddr))
1104                 return NULL;
1105
1106         read_lock_bh(&net->xfrm.xfrm_policy_lock);
1107         chain = policy_hash_direct(net, daddr, saddr, family, dir);
1108         ret = NULL;
1109         hlist_for_each_entry(pol, chain, bydst) {
1110                 err = xfrm_policy_match(pol, fl, type, family, dir);
1111                 if (err) {
1112                         if (err == -ESRCH)
1113                                 continue;
1114                         else {
1115                                 ret = ERR_PTR(err);
1116                                 goto fail;
1117                         }
1118                 } else {
1119                         ret = pol;
1120                         priority = ret->priority;
1121                         break;
1122                 }
1123         }
1124         chain = &net->xfrm.policy_inexact[dir];
1125         hlist_for_each_entry(pol, chain, bydst) {
1126                 if ((pol->priority >= priority) && ret)
1127                         break;
1128
1129                 err = xfrm_policy_match(pol, fl, type, family, dir);
1130                 if (err) {
1131                         if (err == -ESRCH)
1132                                 continue;
1133                         else {
1134                                 ret = ERR_PTR(err);
1135                                 goto fail;
1136                         }
1137                 } else {
1138                         ret = pol;
1139                         break;
1140                 }
1141         }
1142
1143         xfrm_pol_hold(ret);
1144 fail:
1145         read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1146
1147         return ret;
1148 }
1149
1150 static struct xfrm_policy *
1151 __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
1152 {
1153 #ifdef CONFIG_XFRM_SUB_POLICY
1154         struct xfrm_policy *pol;
1155
1156         pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
1157         if (pol != NULL)
1158                 return pol;
1159 #endif
1160         return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1161 }
1162
1163 static int flow_to_policy_dir(int dir)
1164 {
1165         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1166             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1167             XFRM_POLICY_FWD == FLOW_DIR_FWD)
1168                 return dir;
1169
1170         switch (dir) {
1171         default:
1172         case FLOW_DIR_IN:
1173                 return XFRM_POLICY_IN;
1174         case FLOW_DIR_OUT:
1175                 return XFRM_POLICY_OUT;
1176         case FLOW_DIR_FWD:
1177                 return XFRM_POLICY_FWD;
1178         }
1179 }
1180
1181 static struct flow_cache_object *
1182 xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1183                    u8 dir, struct flow_cache_object *old_obj, void *ctx)
1184 {
1185         struct xfrm_policy *pol;
1186
1187         if (old_obj)
1188                 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
1189
1190         pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
1191         if (IS_ERR_OR_NULL(pol))
1192                 return ERR_CAST(pol);
1193
1194         /* Resolver returns two references:
1195          * one for cache and one for caller of flow_cache_lookup() */
1196         xfrm_pol_hold(pol);
1197
1198         return &pol->flo;
1199 }
1200
1201 static inline int policy_to_flow_dir(int dir)
1202 {
1203         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1204             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1205             XFRM_POLICY_FWD == FLOW_DIR_FWD)
1206                 return dir;
1207         switch (dir) {
1208         default:
1209         case XFRM_POLICY_IN:
1210                 return FLOW_DIR_IN;
1211         case XFRM_POLICY_OUT:
1212                 return FLOW_DIR_OUT;
1213         case XFRM_POLICY_FWD:
1214                 return FLOW_DIR_FWD;
1215         }
1216 }
1217
1218 static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir,
1219                                                  const struct flowi *fl)
1220 {
1221         struct xfrm_policy *pol;
1222         struct net *net = sock_net(sk);
1223
1224         rcu_read_lock();
1225         read_lock_bh(&net->xfrm.xfrm_policy_lock);
1226         pol = rcu_dereference(sk->sk_policy[dir]);
1227         if (pol != NULL) {
1228                 bool match = xfrm_selector_match(&pol->selector, fl,
1229                                                  sk->sk_family);
1230                 int err = 0;
1231
1232                 if (match) {
1233                         if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
1234                                 pol = NULL;
1235                                 goto out;
1236                         }
1237                         err = security_xfrm_policy_lookup(pol->security,
1238                                                       fl->flowi_secid,
1239                                                       policy_to_flow_dir(dir));
1240                         if (!err)
1241                                 xfrm_pol_hold(pol);
1242                         else if (err == -ESRCH)
1243                                 pol = NULL;
1244                         else
1245                                 pol = ERR_PTR(err);
1246                 } else
1247                         pol = NULL;
1248         }
1249 out:
1250         read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1251         rcu_read_unlock();
1252         return pol;
1253 }
1254
1255 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1256 {
1257         struct net *net = xp_net(pol);
1258
1259         list_add(&pol->walk.all, &net->xfrm.policy_all);
1260         net->xfrm.policy_count[dir]++;
1261         xfrm_pol_hold(pol);
1262 }
1263
1264 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1265                                                 int dir)
1266 {
1267         struct net *net = xp_net(pol);
1268
1269         if (list_empty(&pol->walk.all))
1270                 return NULL;
1271
1272         /* Socket policies are not hashed. */
1273         if (!hlist_unhashed(&pol->bydst)) {
1274                 hlist_del(&pol->bydst);
1275                 hlist_del(&pol->byidx);
1276         }
1277
1278         list_del_init(&pol->walk.all);
1279         net->xfrm.policy_count[dir]--;
1280
1281         return pol;
1282 }
1283
1284 static void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir)
1285 {
1286         __xfrm_policy_link(pol, XFRM_POLICY_MAX + dir);
1287 }
1288
1289 static void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir)
1290 {
1291         __xfrm_policy_unlink(pol, XFRM_POLICY_MAX + dir);
1292 }
1293
1294 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1295 {
1296         struct net *net = xp_net(pol);
1297
1298         write_lock_bh(&net->xfrm.xfrm_policy_lock);
1299         pol = __xfrm_policy_unlink(pol, dir);
1300         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1301         if (pol) {
1302                 xfrm_policy_kill(pol);
1303                 return 0;
1304         }
1305         return -ENOENT;
1306 }
1307 EXPORT_SYMBOL(xfrm_policy_delete);
1308
1309 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1310 {
1311         struct net *net = xp_net(pol);
1312         struct xfrm_policy *old_pol;
1313
1314 #ifdef CONFIG_XFRM_SUB_POLICY
1315         if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1316                 return -EINVAL;
1317 #endif
1318
1319         write_lock_bh(&net->xfrm.xfrm_policy_lock);
1320         old_pol = rcu_dereference_protected(sk->sk_policy[dir],
1321                                 lockdep_is_held(&net->xfrm.xfrm_policy_lock));
1322         if (pol) {
1323                 pol->curlft.add_time = get_seconds();
1324                 pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
1325                 xfrm_sk_policy_link(pol, dir);
1326         }
1327         rcu_assign_pointer(sk->sk_policy[dir], pol);
1328         if (old_pol) {
1329                 if (pol)
1330                         xfrm_policy_requeue(old_pol, pol);
1331
1332                 /* Unlinking succeeds always. This is the only function
1333                  * allowed to delete or replace socket policy.
1334                  */
1335                 xfrm_sk_policy_unlink(old_pol, dir);
1336         }
1337         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1338
1339         if (old_pol) {
1340                 xfrm_policy_kill(old_pol);
1341         }
1342         return 0;
1343 }
1344
1345 static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
1346 {
1347         struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
1348         struct net *net = xp_net(old);
1349
1350         if (newp) {
1351                 newp->selector = old->selector;
1352                 if (security_xfrm_policy_clone(old->security,
1353                                                &newp->security)) {
1354                         kfree(newp);
1355                         return NULL;  /* ENOMEM */
1356                 }
1357                 newp->lft = old->lft;
1358                 newp->curlft = old->curlft;
1359                 newp->mark = old->mark;
1360                 newp->action = old->action;
1361                 newp->flags = old->flags;
1362                 newp->xfrm_nr = old->xfrm_nr;
1363                 newp->index = old->index;
1364                 newp->type = old->type;
1365                 memcpy(newp->xfrm_vec, old->xfrm_vec,
1366                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1367                 write_lock_bh(&net->xfrm.xfrm_policy_lock);
1368                 xfrm_sk_policy_link(newp, dir);
1369                 write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1370                 xfrm_pol_put(newp);
1371         }
1372         return newp;
1373 }
1374
1375 int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk)
1376 {
1377         const struct xfrm_policy *p;
1378         struct xfrm_policy *np;
1379         int i, ret = 0;
1380
1381         rcu_read_lock();
1382         for (i = 0; i < 2; i++) {
1383                 p = rcu_dereference(osk->sk_policy[i]);
1384                 if (p) {
1385                         np = clone_policy(p, i);
1386                         if (unlikely(!np)) {
1387                                 ret = -ENOMEM;
1388                                 break;
1389                         }
1390                         rcu_assign_pointer(sk->sk_policy[i], np);
1391                 }
1392         }
1393         rcu_read_unlock();
1394         return ret;
1395 }
1396
1397 static int
1398 xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local,
1399                xfrm_address_t *remote, unsigned short family)
1400 {
1401         int err;
1402         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1403
1404         if (unlikely(afinfo == NULL))
1405                 return -EINVAL;
1406         err = afinfo->get_saddr(net, oif, local, remote);
1407         xfrm_policy_put_afinfo(afinfo);
1408         return err;
1409 }
1410
1411 /* Resolve list of templates for the flow, given policy. */
1412
1413 static int
1414 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
1415                       struct xfrm_state **xfrm, unsigned short family)
1416 {
1417         struct net *net = xp_net(policy);
1418         int nx;
1419         int i, error;
1420         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1421         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1422         xfrm_address_t tmp;
1423
1424         for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
1425                 struct xfrm_state *x;
1426                 xfrm_address_t *remote = daddr;
1427                 xfrm_address_t *local  = saddr;
1428                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1429
1430                 if (tmpl->mode == XFRM_MODE_TUNNEL ||
1431                     tmpl->mode == XFRM_MODE_BEET) {
1432                         remote = &tmpl->id.daddr;
1433                         local = &tmpl->saddr;
1434                         if (xfrm_addr_any(local, tmpl->encap_family)) {
1435                                 error = xfrm_get_saddr(net, fl->flowi_oif,
1436                                                        &tmp, remote,
1437                                                        tmpl->encap_family);
1438                                 if (error)
1439                                         goto fail;
1440                                 local = &tmp;
1441                         }
1442                 }
1443
1444                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1445
1446                 if (x && x->km.state == XFRM_STATE_VALID) {
1447                         xfrm[nx++] = x;
1448                         daddr = remote;
1449                         saddr = local;
1450                         continue;
1451                 }
1452                 if (x) {
1453                         error = (x->km.state == XFRM_STATE_ERROR ?
1454                                  -EINVAL : -EAGAIN);
1455                         xfrm_state_put(x);
1456                 } else if (error == -ESRCH) {
1457                         error = -EAGAIN;
1458                 }
1459
1460                 if (!tmpl->optional)
1461                         goto fail;
1462         }
1463         return nx;
1464
1465 fail:
1466         for (nx--; nx >= 0; nx--)
1467                 xfrm_state_put(xfrm[nx]);
1468         return error;
1469 }
1470
1471 static int
1472 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
1473                   struct xfrm_state **xfrm, unsigned short family)
1474 {
1475         struct xfrm_state *tp[XFRM_MAX_DEPTH];
1476         struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1477         int cnx = 0;
1478         int error;
1479         int ret;
1480         int i;
1481
1482         for (i = 0; i < npols; i++) {
1483                 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1484                         error = -ENOBUFS;
1485                         goto fail;
1486                 }
1487
1488                 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1489                 if (ret < 0) {
1490                         error = ret;
1491                         goto fail;
1492                 } else
1493                         cnx += ret;
1494         }
1495
1496         /* found states are sorted for outbound processing */
1497         if (npols > 1)
1498                 xfrm_state_sort(xfrm, tpp, cnx, family);
1499
1500         return cnx;
1501
1502  fail:
1503         for (cnx--; cnx >= 0; cnx--)
1504                 xfrm_state_put(tpp[cnx]);
1505         return error;
1506
1507 }
1508
1509 /* Check that the bundle accepts the flow and its components are
1510  * still valid.
1511  */
1512
1513 static inline int xfrm_get_tos(const struct flowi *fl, int family)
1514 {
1515         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1516         int tos;
1517
1518         if (!afinfo)
1519                 return -EINVAL;
1520
1521         tos = afinfo->get_tos(fl);
1522
1523         xfrm_policy_put_afinfo(afinfo);
1524
1525         return tos;
1526 }
1527
1528 static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
1529 {
1530         struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1531         struct dst_entry *dst = &xdst->u.dst;
1532
1533         if (xdst->route == NULL) {
1534                 /* Dummy bundle - if it has xfrms we were not
1535                  * able to build bundle as template resolution failed.
1536                  * It means we need to try again resolving. */
1537                 if (xdst->num_xfrms > 0)
1538                         return NULL;
1539         } else if (dst->flags & DST_XFRM_QUEUE) {
1540                 return NULL;
1541         } else {
1542                 /* Real bundle */
1543                 if (stale_bundle(dst))
1544                         return NULL;
1545         }
1546
1547         dst_hold(dst);
1548         return flo;
1549 }
1550
1551 static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
1552 {
1553         struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1554         struct dst_entry *dst = &xdst->u.dst;
1555
1556         if (!xdst->route)
1557                 return 0;
1558         if (stale_bundle(dst))
1559                 return 0;
1560
1561         return 1;
1562 }
1563
1564 static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
1565 {
1566         struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1567         struct dst_entry *dst = &xdst->u.dst;
1568
1569         dst_free(dst);
1570 }
1571
1572 static const struct flow_cache_ops xfrm_bundle_fc_ops = {
1573         .get = xfrm_bundle_flo_get,
1574         .check = xfrm_bundle_flo_check,
1575         .delete = xfrm_bundle_flo_delete,
1576 };
1577
1578 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1579 {
1580         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1581         struct dst_ops *dst_ops;
1582         struct xfrm_dst *xdst;
1583
1584         if (!afinfo)
1585                 return ERR_PTR(-EINVAL);
1586
1587         switch (family) {
1588         case AF_INET:
1589                 dst_ops = &net->xfrm.xfrm4_dst_ops;
1590                 break;
1591 #if IS_ENABLED(CONFIG_IPV6)
1592         case AF_INET6:
1593                 dst_ops = &net->xfrm.xfrm6_dst_ops;
1594                 break;
1595 #endif
1596         default:
1597                 BUG();
1598         }
1599         xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0);
1600
1601         if (likely(xdst)) {
1602                 struct dst_entry *dst = &xdst->u.dst;
1603
1604                 memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
1605                 xdst->flo.ops = &xfrm_bundle_fc_ops;
1606         } else
1607                 xdst = ERR_PTR(-ENOBUFS);
1608
1609         xfrm_policy_put_afinfo(afinfo);
1610
1611         return xdst;
1612 }
1613
1614 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1615                                  int nfheader_len)
1616 {
1617         struct xfrm_policy_afinfo *afinfo =
1618                 xfrm_policy_get_afinfo(dst->ops->family);
1619         int err;
1620
1621         if (!afinfo)
1622                 return -EINVAL;
1623
1624         err = afinfo->init_path(path, dst, nfheader_len);
1625
1626         xfrm_policy_put_afinfo(afinfo);
1627
1628         return err;
1629 }
1630
1631 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1632                                 const struct flowi *fl)
1633 {
1634         struct xfrm_policy_afinfo *afinfo =
1635                 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1636         int err;
1637
1638         if (!afinfo)
1639                 return -EINVAL;
1640
1641         err = afinfo->fill_dst(xdst, dev, fl);
1642
1643         xfrm_policy_put_afinfo(afinfo);
1644
1645         return err;
1646 }
1647
1648
1649 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1650  * all the metrics... Shortly, bundle a bundle.
1651  */
1652
1653 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1654                                             struct xfrm_state **xfrm, int nx,
1655                                             const struct flowi *fl,
1656                                             struct dst_entry *dst)
1657 {
1658         struct net *net = xp_net(policy);
1659         unsigned long now = jiffies;
1660         struct net_device *dev;
1661         struct xfrm_mode *inner_mode;
1662         struct dst_entry *dst_prev = NULL;
1663         struct dst_entry *dst0 = NULL;
1664         int i = 0;
1665         int err;
1666         int header_len = 0;
1667         int nfheader_len = 0;
1668         int trailer_len = 0;
1669         int tos;
1670         int family = policy->selector.family;
1671         xfrm_address_t saddr, daddr;
1672
1673         xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1674
1675         tos = xfrm_get_tos(fl, family);
1676         err = tos;
1677         if (tos < 0)
1678                 goto put_states;
1679
1680         dst_hold(dst);
1681
1682         for (; i < nx; i++) {
1683                 struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
1684                 struct dst_entry *dst1 = &xdst->u.dst;
1685
1686                 err = PTR_ERR(xdst);
1687                 if (IS_ERR(xdst)) {
1688                         dst_release(dst);
1689                         goto put_states;
1690                 }
1691
1692                 if (xfrm[i]->sel.family == AF_UNSPEC) {
1693                         inner_mode = xfrm_ip2inner_mode(xfrm[i],
1694                                                         xfrm_af2proto(family));
1695                         if (!inner_mode) {
1696                                 err = -EAFNOSUPPORT;
1697                                 dst_release(dst);
1698                                 goto put_states;
1699                         }
1700                 } else
1701                         inner_mode = xfrm[i]->inner_mode;
1702
1703                 if (!dst_prev)
1704                         dst0 = dst1;
1705                 else {
1706                         dst_prev->child = dst_clone(dst1);
1707                         dst1->flags |= DST_NOHASH;
1708                 }
1709
1710                 xdst->route = dst;
1711                 dst_copy_metrics(dst1, dst);
1712
1713                 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1714                         family = xfrm[i]->props.family;
1715                         dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif,
1716                                               &saddr, &daddr, family);
1717                         err = PTR_ERR(dst);
1718                         if (IS_ERR(dst))
1719                                 goto put_states;
1720                 } else
1721                         dst_hold(dst);
1722
1723                 dst1->xfrm = xfrm[i];
1724                 xdst->xfrm_genid = xfrm[i]->genid;
1725
1726                 dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
1727                 dst1->flags |= DST_HOST;
1728                 dst1->lastuse = now;
1729
1730                 dst1->input = dst_discard;
1731                 dst1->output = inner_mode->afinfo->output;
1732
1733                 dst1->next = dst_prev;
1734                 dst_prev = dst1;
1735
1736                 header_len += xfrm[i]->props.header_len;
1737                 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1738                         nfheader_len += xfrm[i]->props.header_len;
1739                 trailer_len += xfrm[i]->props.trailer_len;
1740         }
1741
1742         dst_prev->child = dst;
1743         dst0->path = dst;
1744
1745         err = -ENODEV;
1746         dev = dst->dev;
1747         if (!dev)
1748                 goto free_dst;
1749
1750         xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1751         xfrm_init_pmtu(dst_prev);
1752
1753         for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1754                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1755
1756                 err = xfrm_fill_dst(xdst, dev, fl);
1757                 if (err)
1758                         goto free_dst;
1759
1760                 dst_prev->header_len = header_len;
1761                 dst_prev->trailer_len = trailer_len;
1762                 header_len -= xdst->u.dst.xfrm->props.header_len;
1763                 trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1764         }
1765
1766 out:
1767         return dst0;
1768
1769 put_states:
1770         for (; i < nx; i++)
1771                 xfrm_state_put(xfrm[i]);
1772 free_dst:
1773         if (dst0)
1774                 dst_free(dst0);
1775         dst0 = ERR_PTR(err);
1776         goto out;
1777 }
1778
1779 #ifdef CONFIG_XFRM_SUB_POLICY
1780 static int xfrm_dst_alloc_copy(void **target, const void *src, int size)
1781 {
1782         if (!*target) {
1783                 *target = kmalloc(size, GFP_ATOMIC);
1784                 if (!*target)
1785                         return -ENOMEM;
1786         }
1787
1788         memcpy(*target, src, size);
1789         return 0;
1790 }
1791 #endif
1792
1793 static int xfrm_dst_update_parent(struct dst_entry *dst,
1794                                   const struct xfrm_selector *sel)
1795 {
1796 #ifdef CONFIG_XFRM_SUB_POLICY
1797         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1798         return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1799                                    sel, sizeof(*sel));
1800 #else
1801         return 0;
1802 #endif
1803 }
1804
1805 static int xfrm_dst_update_origin(struct dst_entry *dst,
1806                                   const struct flowi *fl)
1807 {
1808 #ifdef CONFIG_XFRM_SUB_POLICY
1809         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1810         return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1811 #else
1812         return 0;
1813 #endif
1814 }
1815
1816 static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1817                                 struct xfrm_policy **pols,
1818                                 int *num_pols, int *num_xfrms)
1819 {
1820         int i;
1821
1822         if (*num_pols == 0 || !pols[0]) {
1823                 *num_pols = 0;
1824                 *num_xfrms = 0;
1825                 return 0;
1826         }
1827         if (IS_ERR(pols[0]))
1828                 return PTR_ERR(pols[0]);
1829
1830         *num_xfrms = pols[0]->xfrm_nr;
1831
1832 #ifdef CONFIG_XFRM_SUB_POLICY
1833         if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
1834             pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1835                 pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
1836                                                     XFRM_POLICY_TYPE_MAIN,
1837                                                     fl, family,
1838                                                     XFRM_POLICY_OUT);
1839                 if (pols[1]) {
1840                         if (IS_ERR(pols[1])) {
1841                                 xfrm_pols_put(pols, *num_pols);
1842                                 return PTR_ERR(pols[1]);
1843                         }
1844                         (*num_pols)++;
1845                         (*num_xfrms) += pols[1]->xfrm_nr;
1846                 }
1847         }
1848 #endif
1849         for (i = 0; i < *num_pols; i++) {
1850                 if (pols[i]->action != XFRM_POLICY_ALLOW) {
1851                         *num_xfrms = -1;
1852                         break;
1853                 }
1854         }
1855
1856         return 0;
1857
1858 }
1859
1860 static struct xfrm_dst *
1861 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1862                                const struct flowi *fl, u16 family,
1863                                struct dst_entry *dst_orig)
1864 {
1865         struct net *net = xp_net(pols[0]);
1866         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1867         struct dst_entry *dst;
1868         struct xfrm_dst *xdst;
1869         int err;
1870
1871         /* Try to instantiate a bundle */
1872         err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1873         if (err <= 0) {
1874                 if (err != 0 && err != -EAGAIN)
1875                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1876                 return ERR_PTR(err);
1877         }
1878
1879         dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
1880         if (IS_ERR(dst)) {
1881                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1882                 return ERR_CAST(dst);
1883         }
1884
1885         xdst = (struct xfrm_dst *)dst;
1886         xdst->num_xfrms = err;
1887         if (num_pols > 1)
1888                 err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1889         else
1890                 err = xfrm_dst_update_origin(dst, fl);
1891         if (unlikely(err)) {
1892                 dst_free(dst);
1893                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1894                 return ERR_PTR(err);
1895         }
1896
1897         xdst->num_pols = num_pols;
1898         memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
1899         xdst->policy_genid = atomic_read(&pols[0]->genid);
1900
1901         return xdst;
1902 }
1903
1904 static void xfrm_policy_queue_process(unsigned long arg)
1905 {
1906         struct sk_buff *skb;
1907         struct sock *sk;
1908         struct dst_entry *dst;
1909         struct xfrm_policy *pol = (struct xfrm_policy *)arg;
1910         struct net *net = xp_net(pol);
1911         struct xfrm_policy_queue *pq = &pol->polq;
1912         struct flowi fl;
1913         struct sk_buff_head list;
1914
1915         spin_lock(&pq->hold_queue.lock);
1916         skb = skb_peek(&pq->hold_queue);
1917         if (!skb) {
1918                 spin_unlock(&pq->hold_queue.lock);
1919                 goto out;
1920         }
1921         dst = skb_dst(skb);
1922         sk = skb->sk;
1923         xfrm_decode_session(skb, &fl, dst->ops->family);
1924         spin_unlock(&pq->hold_queue.lock);
1925
1926         dst_hold(dst->path);
1927         dst = xfrm_lookup(net, dst->path, &fl, sk, 0);
1928         if (IS_ERR(dst))
1929                 goto purge_queue;
1930
1931         if (dst->flags & DST_XFRM_QUEUE) {
1932                 dst_release(dst);
1933
1934                 if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
1935                         goto purge_queue;
1936
1937                 pq->timeout = pq->timeout << 1;
1938                 if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
1939                         xfrm_pol_hold(pol);
1940         goto out;
1941         }
1942
1943         dst_release(dst);
1944
1945         __skb_queue_head_init(&list);
1946
1947         spin_lock(&pq->hold_queue.lock);
1948         pq->timeout = 0;
1949         skb_queue_splice_init(&pq->hold_queue, &list);
1950         spin_unlock(&pq->hold_queue.lock);
1951
1952         while (!skb_queue_empty(&list)) {
1953                 skb = __skb_dequeue(&list);
1954
1955                 xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
1956                 dst_hold(skb_dst(skb)->path);
1957                 dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0);
1958                 if (IS_ERR(dst)) {
1959                         kfree_skb(skb);
1960                         continue;
1961                 }
1962
1963                 nf_reset(skb);
1964                 skb_dst_drop(skb);
1965                 skb_dst_set(skb, dst);
1966
1967                 dst_output(net, skb->sk, skb);
1968         }
1969
1970 out:
1971         xfrm_pol_put(pol);
1972         return;
1973
1974 purge_queue:
1975         pq->timeout = 0;
1976         skb_queue_purge(&pq->hold_queue);
1977         xfrm_pol_put(pol);
1978 }
1979
1980 static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *skb)
1981 {
1982         unsigned long sched_next;
1983         struct dst_entry *dst = skb_dst(skb);
1984         struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
1985         struct xfrm_policy *pol = xdst->pols[0];
1986         struct xfrm_policy_queue *pq = &pol->polq;
1987
1988         if (unlikely(skb_fclone_busy(sk, skb))) {
1989                 kfree_skb(skb);
1990                 return 0;
1991         }
1992
1993         if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
1994                 kfree_skb(skb);
1995                 return -EAGAIN;
1996         }
1997
1998         skb_dst_force(skb);
1999
2000         spin_lock_bh(&pq->hold_queue.lock);
2001
2002         if (!pq->timeout)
2003                 pq->timeout = XFRM_QUEUE_TMO_MIN;
2004
2005         sched_next = jiffies + pq->timeout;
2006
2007         if (del_timer(&pq->hold_timer)) {
2008                 if (time_before(pq->hold_timer.expires, sched_next))
2009                         sched_next = pq->hold_timer.expires;
2010                 xfrm_pol_put(pol);
2011         }
2012
2013         __skb_queue_tail(&pq->hold_queue, skb);
2014         if (!mod_timer(&pq->hold_timer, sched_next))
2015                 xfrm_pol_hold(pol);
2016
2017         spin_unlock_bh(&pq->hold_queue.lock);
2018
2019         return 0;
2020 }
2021
2022 static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
2023                                                  struct xfrm_flo *xflo,
2024                                                  const struct flowi *fl,
2025                                                  int num_xfrms,
2026                                                  u16 family)
2027 {
2028         int err;
2029         struct net_device *dev;
2030         struct dst_entry *dst;
2031         struct dst_entry *dst1;
2032         struct xfrm_dst *xdst;
2033
2034         xdst = xfrm_alloc_dst(net, family);
2035         if (IS_ERR(xdst))
2036                 return xdst;
2037
2038         if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
2039             net->xfrm.sysctl_larval_drop ||
2040             num_xfrms <= 0)
2041                 return xdst;
2042
2043         dst = xflo->dst_orig;
2044         dst1 = &xdst->u.dst;
2045         dst_hold(dst);
2046         xdst->route = dst;
2047
2048         dst_copy_metrics(dst1, dst);
2049
2050         dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
2051         dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
2052         dst1->lastuse = jiffies;
2053
2054         dst1->input = dst_discard;
2055         dst1->output = xdst_queue_output;
2056
2057         dst_hold(dst);
2058         dst1->child = dst;
2059         dst1->path = dst;
2060
2061         xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
2062
2063         err = -ENODEV;
2064         dev = dst->dev;
2065         if (!dev)
2066                 goto free_dst;
2067
2068         err = xfrm_fill_dst(xdst, dev, fl);
2069         if (err)
2070                 goto free_dst;
2071
2072 out:
2073         return xdst;
2074
2075 free_dst:
2076         dst_release(dst1);
2077         xdst = ERR_PTR(err);
2078         goto out;
2079 }
2080
2081 static struct flow_cache_object *
2082 xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
2083                    struct flow_cache_object *oldflo, void *ctx)
2084 {
2085         struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
2086         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2087         struct xfrm_dst *xdst, *new_xdst;
2088         int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
2089
2090         /* Check if the policies from old bundle are usable */
2091         xdst = NULL;
2092         if (oldflo) {
2093                 xdst = container_of(oldflo, struct xfrm_dst, flo);
2094                 num_pols = xdst->num_pols;
2095                 num_xfrms = xdst->num_xfrms;
2096                 pol_dead = 0;
2097                 for (i = 0; i < num_pols; i++) {
2098                         pols[i] = xdst->pols[i];
2099                         pol_dead |= pols[i]->walk.dead;
2100                 }
2101                 if (pol_dead) {
2102                         dst_free(&xdst->u.dst);
2103                         xdst = NULL;
2104                         num_pols = 0;
2105                         num_xfrms = 0;
2106                         oldflo = NULL;
2107                 }
2108         }
2109
2110         /* Resolve policies to use if we couldn't get them from
2111          * previous cache entry */
2112         if (xdst == NULL) {
2113                 num_pols = 1;
2114                 pols[0] = __xfrm_policy_lookup(net, fl, family,
2115                                                flow_to_policy_dir(dir));
2116                 err = xfrm_expand_policies(fl, family, pols,
2117                                            &num_pols, &num_xfrms);
2118                 if (err < 0)
2119                         goto inc_error;
2120                 if (num_pols == 0)
2121                         return NULL;
2122                 if (num_xfrms <= 0)
2123                         goto make_dummy_bundle;
2124         }
2125
2126         new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
2127                                                   xflo->dst_orig);
2128         if (IS_ERR(new_xdst)) {
2129                 err = PTR_ERR(new_xdst);
2130                 if (err != -EAGAIN)
2131                         goto error;
2132                 if (oldflo == NULL)
2133                         goto make_dummy_bundle;
2134                 dst_hold(&xdst->u.dst);
2135                 return oldflo;
2136         } else if (new_xdst == NULL) {
2137                 num_xfrms = 0;
2138                 if (oldflo == NULL)
2139                         goto make_dummy_bundle;
2140                 xdst->num_xfrms = 0;
2141                 dst_hold(&xdst->u.dst);
2142                 return oldflo;
2143         }
2144
2145         /* Kill the previous bundle */
2146         if (xdst) {
2147                 /* The policies were stolen for newly generated bundle */
2148                 xdst->num_pols = 0;
2149                 dst_free(&xdst->u.dst);
2150         }
2151
2152         /* Flow cache does not have reference, it dst_free()'s,
2153          * but we do need to return one reference for original caller */
2154         dst_hold(&new_xdst->u.dst);
2155         return &new_xdst->flo;
2156
2157 make_dummy_bundle:
2158         /* We found policies, but there's no bundles to instantiate:
2159          * either because the policy blocks, has no transformations or
2160          * we could not build template (no xfrm_states).*/
2161         xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
2162         if (IS_ERR(xdst)) {
2163                 xfrm_pols_put(pols, num_pols);
2164                 return ERR_CAST(xdst);
2165         }
2166         xdst->num_pols = num_pols;
2167         xdst->num_xfrms = num_xfrms;
2168         memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
2169
2170         dst_hold(&xdst->u.dst);
2171         return &xdst->flo;
2172
2173 inc_error:
2174         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
2175 error:
2176         if (xdst != NULL)
2177                 dst_free(&xdst->u.dst);
2178         else
2179                 xfrm_pols_put(pols, num_pols);
2180         return ERR_PTR(err);
2181 }
2182
2183 static struct dst_entry *make_blackhole(struct net *net, u16 family,
2184                                         struct dst_entry *dst_orig)
2185 {
2186         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2187         struct dst_entry *ret;
2188
2189         if (!afinfo) {
2190                 dst_release(dst_orig);
2191                 return ERR_PTR(-EINVAL);
2192         } else {
2193                 ret = afinfo->blackhole_route(net, dst_orig);
2194         }
2195         xfrm_policy_put_afinfo(afinfo);
2196
2197         return ret;
2198 }
2199
2200 /* Main function: finds/creates a bundle for given flow.
2201  *
2202  * At the moment we eat a raw IP route. Mostly to speed up lookups
2203  * on interfaces with disabled IPsec.
2204  */
2205 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
2206                               const struct flowi *fl,
2207                               const struct sock *sk, int flags)
2208 {
2209         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2210         struct flow_cache_object *flo;
2211         struct xfrm_dst *xdst;
2212         struct dst_entry *dst, *route;
2213         u16 family = dst_orig->ops->family;
2214         u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
2215         int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
2216
2217         dst = NULL;
2218         xdst = NULL;
2219         route = NULL;
2220
2221         sk = sk_const_to_full_sk(sk);
2222         if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
2223                 num_pols = 1;
2224                 pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
2225                 err = xfrm_expand_policies(fl, family, pols,
2226                                            &num_pols, &num_xfrms);
2227                 if (err < 0)
2228                         goto dropdst;
2229
2230                 if (num_pols) {
2231                         if (num_xfrms <= 0) {
2232                                 drop_pols = num_pols;
2233                                 goto no_transform;
2234                         }
2235
2236                         xdst = xfrm_resolve_and_create_bundle(
2237                                         pols, num_pols, fl,
2238                                         family, dst_orig);
2239                         if (IS_ERR(xdst)) {
2240                                 xfrm_pols_put(pols, num_pols);
2241                                 err = PTR_ERR(xdst);
2242                                 goto dropdst;
2243                         } else if (xdst == NULL) {
2244                                 num_xfrms = 0;
2245                                 drop_pols = num_pols;
2246                                 goto no_transform;
2247                         }
2248
2249                         dst_hold(&xdst->u.dst);
2250                         xdst->u.dst.flags |= DST_NOCACHE;
2251                         route = xdst->route;
2252                 }
2253         }
2254
2255         if (xdst == NULL) {
2256                 struct xfrm_flo xflo;
2257
2258                 xflo.dst_orig = dst_orig;
2259                 xflo.flags = flags;
2260
2261                 /* To accelerate a bit...  */
2262                 if ((dst_orig->flags & DST_NOXFRM) ||
2263                     !net->xfrm.policy_count[XFRM_POLICY_OUT])
2264                         goto nopol;
2265
2266                 flo = flow_cache_lookup(net, fl, family, dir,
2267                                         xfrm_bundle_lookup, &xflo);
2268                 if (flo == NULL)
2269                         goto nopol;
2270                 if (IS_ERR(flo)) {
2271                         err = PTR_ERR(flo);
2272                         goto dropdst;
2273                 }
2274                 xdst = container_of(flo, struct xfrm_dst, flo);
2275
2276                 num_pols = xdst->num_pols;
2277                 num_xfrms = xdst->num_xfrms;
2278                 memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols);
2279                 route = xdst->route;
2280         }
2281
2282         dst = &xdst->u.dst;
2283         if (route == NULL && num_xfrms > 0) {
2284                 /* The only case when xfrm_bundle_lookup() returns a
2285                  * bundle with null route, is when the template could
2286                  * not be resolved. It means policies are there, but
2287                  * bundle could not be created, since we don't yet
2288                  * have the xfrm_state's. We need to wait for KM to
2289                  * negotiate new SA's or bail out with error.*/
2290                 if (net->xfrm.sysctl_larval_drop) {
2291                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2292                         err = -EREMOTE;
2293                         goto error;
2294                 }
2295
2296                 err = -EAGAIN;
2297
2298                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2299                 goto error;
2300         }
2301
2302 no_transform:
2303         if (num_pols == 0)
2304                 goto nopol;
2305
2306         if ((flags & XFRM_LOOKUP_ICMP) &&
2307             !(pols[0]->flags & XFRM_POLICY_ICMP)) {
2308                 err = -ENOENT;
2309                 goto error;
2310         }
2311
2312         for (i = 0; i < num_pols; i++)
2313                 pols[i]->curlft.use_time = get_seconds();
2314
2315         if (num_xfrms < 0) {
2316                 /* Prohibit the flow */
2317                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
2318                 err = -EPERM;
2319                 goto error;
2320         } else if (num_xfrms > 0) {
2321                 /* Flow transformed */
2322                 dst_release(dst_orig);
2323         } else {
2324                 /* Flow passes untransformed */
2325                 dst_release(dst);
2326                 dst = dst_orig;
2327         }
2328 ok:
2329         xfrm_pols_put(pols, drop_pols);
2330         if (dst && dst->xfrm &&
2331             dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
2332                 dst->flags |= DST_XFRM_TUNNEL;
2333         return dst;
2334
2335 nopol:
2336         if (!(flags & XFRM_LOOKUP_ICMP)) {
2337                 dst = dst_orig;
2338                 goto ok;
2339         }
2340         err = -ENOENT;
2341 error:
2342         dst_release(dst);
2343 dropdst:
2344         if (!(flags & XFRM_LOOKUP_KEEP_DST_REF))
2345                 dst_release(dst_orig);
2346         xfrm_pols_put(pols, drop_pols);
2347         return ERR_PTR(err);
2348 }
2349 EXPORT_SYMBOL(xfrm_lookup);
2350
2351 /* Callers of xfrm_lookup_route() must ensure a call to dst_output().
2352  * Otherwise we may send out blackholed packets.
2353  */
2354 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
2355                                     const struct flowi *fl,
2356                                     const struct sock *sk, int flags)
2357 {
2358         struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
2359                                             flags | XFRM_LOOKUP_QUEUE |
2360                                             XFRM_LOOKUP_KEEP_DST_REF);
2361
2362         if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
2363                 return make_blackhole(net, dst_orig->ops->family, dst_orig);
2364
2365         return dst;
2366 }
2367 EXPORT_SYMBOL(xfrm_lookup_route);
2368
2369 static inline int
2370 xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
2371 {
2372         struct xfrm_state *x;
2373
2374         if (!skb->sp || idx < 0 || idx >= skb->sp->len)
2375                 return 0;
2376         x = skb->sp->xvec[idx];
2377         if (!x->type->reject)
2378                 return 0;
2379         return x->type->reject(x, skb, fl);
2380 }
2381
2382 /* When skb is transformed back to its "native" form, we have to
2383  * check policy restrictions. At the moment we make this in maximally
2384  * stupid way. Shame on me. :-) Of course, connected sockets must
2385  * have policy cached at them.
2386  */
2387
2388 static inline int
2389 xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
2390               unsigned short family)
2391 {
2392         if (xfrm_state_kern(x))
2393                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
2394         return  x->id.proto == tmpl->id.proto &&
2395                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
2396                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
2397                 x->props.mode == tmpl->mode &&
2398                 (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
2399                  !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
2400                 !(x->props.mode != XFRM_MODE_TRANSPORT &&
2401                   xfrm_state_addr_cmp(tmpl, x, family));
2402 }
2403
2404 /*
2405  * 0 or more than 0 is returned when validation is succeeded (either bypass
2406  * because of optional transport mode, or next index of the mathced secpath
2407  * state with the template.
2408  * -1 is returned when no matching template is found.
2409  * Otherwise "-2 - errored_index" is returned.
2410  */
2411 static inline int
2412 xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
2413                unsigned short family)
2414 {
2415         int idx = start;
2416
2417         if (tmpl->optional) {
2418                 if (tmpl->mode == XFRM_MODE_TRANSPORT)
2419                         return start;
2420         } else
2421                 start = -1;
2422         for (; idx < sp->len; idx++) {
2423                 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
2424                         return ++idx;
2425                 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
2426                         if (start == -1)
2427                                 start = -2-idx;
2428                         break;
2429                 }
2430         }
2431         return start;
2432 }
2433
2434 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
2435                           unsigned int family, int reverse)
2436 {
2437         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2438         int err;
2439
2440         if (unlikely(afinfo == NULL))
2441                 return -EAFNOSUPPORT;
2442
2443         afinfo->decode_session(skb, fl, reverse);
2444         err = security_xfrm_decode_session(skb, &fl->flowi_secid);
2445         xfrm_policy_put_afinfo(afinfo);
2446         return err;
2447 }
2448 EXPORT_SYMBOL(__xfrm_decode_session);
2449
2450 static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
2451 {
2452         for (; k < sp->len; k++) {
2453                 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
2454                         *idxp = k;
2455                         return 1;
2456                 }
2457         }
2458
2459         return 0;
2460 }
2461
2462 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
2463                         unsigned short family)
2464 {
2465         struct net *net = dev_net(skb->dev);
2466         struct xfrm_policy *pol;
2467         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2468         int npols = 0;
2469         int xfrm_nr;
2470         int pi;
2471         int reverse;
2472         struct flowi fl;
2473         u8 fl_dir;
2474         int xerr_idx = -1;
2475
2476         reverse = dir & ~XFRM_POLICY_MASK;
2477         dir &= XFRM_POLICY_MASK;
2478         fl_dir = policy_to_flow_dir(dir);
2479
2480         if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
2481                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
2482                 return 0;
2483         }
2484
2485         nf_nat_decode_session(skb, &fl, family);
2486
2487         /* First, check used SA against their selectors. */
2488         if (skb->sp) {
2489                 int i;
2490
2491                 for (i = skb->sp->len-1; i >= 0; i--) {
2492                         struct xfrm_state *x = skb->sp->xvec[i];
2493                         if (!xfrm_selector_match(&x->sel, &fl, family)) {
2494                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
2495                                 return 0;
2496                         }
2497                 }
2498         }
2499
2500         pol = NULL;
2501         sk = sk_to_full_sk(sk);
2502         if (sk && sk->sk_policy[dir]) {
2503                 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
2504                 if (IS_ERR(pol)) {
2505                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2506                         return 0;
2507                 }
2508         }
2509
2510         if (!pol) {
2511                 struct flow_cache_object *flo;
2512
2513                 flo = flow_cache_lookup(net, &fl, family, fl_dir,
2514                                         xfrm_policy_lookup, NULL);
2515                 if (IS_ERR_OR_NULL(flo))
2516                         pol = ERR_CAST(flo);
2517                 else
2518                         pol = container_of(flo, struct xfrm_policy, flo);
2519         }
2520
2521         if (IS_ERR(pol)) {
2522                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2523                 return 0;
2524         }
2525
2526         if (!pol) {
2527                 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
2528                         xfrm_secpath_reject(xerr_idx, skb, &fl);
2529                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
2530                         return 0;
2531                 }
2532                 return 1;
2533         }
2534
2535         pol->curlft.use_time = get_seconds();
2536
2537         pols[0] = pol;
2538         npols++;
2539 #ifdef CONFIG_XFRM_SUB_POLICY
2540         if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
2541                 pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
2542                                                     &fl, family,
2543                                                     XFRM_POLICY_IN);
2544                 if (pols[1]) {
2545                         if (IS_ERR(pols[1])) {
2546                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2547                                 return 0;
2548                         }
2549                         pols[1]->curlft.use_time = get_seconds();
2550                         npols++;
2551                 }
2552         }
2553 #endif
2554
2555         if (pol->action == XFRM_POLICY_ALLOW) {
2556                 struct sec_path *sp;
2557                 static struct sec_path dummy;
2558                 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
2559                 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
2560                 struct xfrm_tmpl **tpp = tp;
2561                 int ti = 0;
2562                 int i, k;
2563
2564                 if ((sp = skb->sp) == NULL)
2565                         sp = &dummy;
2566
2567                 for (pi = 0; pi < npols; pi++) {
2568                         if (pols[pi] != pol &&
2569                             pols[pi]->action != XFRM_POLICY_ALLOW) {
2570                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2571                                 goto reject;
2572                         }
2573                         if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
2574                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
2575                                 goto reject_error;
2576                         }
2577                         for (i = 0; i < pols[pi]->xfrm_nr; i++)
2578                                 tpp[ti++] = &pols[pi]->xfrm_vec[i];
2579                 }
2580                 xfrm_nr = ti;
2581                 if (npols > 1) {
2582                         xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net);
2583                         tpp = stp;
2584                 }
2585
2586                 /* For each tunnel xfrm, find the first matching tmpl.
2587                  * For each tmpl before that, find corresponding xfrm.
2588                  * Order is _important_. Later we will implement
2589                  * some barriers, but at the moment barriers
2590                  * are implied between each two transformations.
2591                  */
2592                 for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2593                         k = xfrm_policy_ok(tpp[i], sp, k, family);
2594                         if (k < 0) {
2595                                 if (k < -1)
2596                                         /* "-2 - errored_index" returned */
2597                                         xerr_idx = -(2+k);
2598                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2599                                 goto reject;
2600                         }
2601                 }
2602
2603                 if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2604                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2605                         goto reject;
2606                 }
2607
2608                 xfrm_pols_put(pols, npols);
2609                 return 1;
2610         }
2611         XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2612
2613 reject:
2614         xfrm_secpath_reject(xerr_idx, skb, &fl);
2615 reject_error:
2616         xfrm_pols_put(pols, npols);
2617         return 0;
2618 }
2619 EXPORT_SYMBOL(__xfrm_policy_check);
2620
2621 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2622 {
2623         struct net *net = dev_net(skb->dev);
2624         struct flowi fl;
2625         struct dst_entry *dst;
2626         int res = 1;
2627
2628         if (xfrm_decode_session(skb, &fl, family) < 0) {
2629                 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
2630                 return 0;
2631         }
2632
2633         skb_dst_force(skb);
2634
2635         dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
2636         if (IS_ERR(dst)) {
2637                 res = 0;
2638                 dst = NULL;
2639         }
2640         skb_dst_set(skb, dst);
2641         return res;
2642 }
2643 EXPORT_SYMBOL(__xfrm_route_forward);
2644
2645 /* Optimize later using cookies and generation ids. */
2646
2647 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2648 {
2649         /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2650          * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to
2651          * get validated by dst_ops->check on every use.  We do this
2652          * because when a normal route referenced by an XFRM dst is
2653          * obsoleted we do not go looking around for all parent
2654          * referencing XFRM dsts so that we can invalidate them.  It
2655          * is just too much work.  Instead we make the checks here on
2656          * every use.  For example:
2657          *
2658          *      XFRM dst A --> IPv4 dst X
2659          *
2660          * X is the "xdst->route" of A (X is also the "dst->path" of A
2661          * in this example).  If X is marked obsolete, "A" will not
2662          * notice.  That's what we are validating here via the
2663          * stale_bundle() check.
2664          *
2665          * When a policy's bundle is pruned, we dst_free() the XFRM
2666          * dst which causes it's ->obsolete field to be set to
2667          * DST_OBSOLETE_DEAD.  If an XFRM dst has been pruned like
2668          * this, we want to force a new route lookup.
2669          */
2670         if (dst->obsolete < 0 && !stale_bundle(dst))
2671                 return dst;
2672
2673         return NULL;
2674 }
2675
2676 static int stale_bundle(struct dst_entry *dst)
2677 {
2678         return !xfrm_bundle_ok((struct xfrm_dst *)dst);
2679 }
2680
2681 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2682 {
2683         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2684                 dst->dev = dev_net(dev)->loopback_dev;
2685                 dev_hold(dst->dev);
2686                 dev_put(dev);
2687         }
2688 }
2689 EXPORT_SYMBOL(xfrm_dst_ifdown);
2690
2691 static void xfrm_link_failure(struct sk_buff *skb)
2692 {
2693         /* Impossible. Such dst must be popped before reaches point of failure. */
2694 }
2695
2696 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2697 {
2698         if (dst) {
2699                 if (dst->obsolete) {
2700                         dst_release(dst);
2701                         dst = NULL;
2702                 }
2703         }
2704         return dst;
2705 }
2706
2707 void xfrm_garbage_collect(struct net *net)
2708 {
2709         flow_cache_flush(net);
2710 }
2711 EXPORT_SYMBOL(xfrm_garbage_collect);
2712
2713 static void xfrm_garbage_collect_deferred(struct net *net)
2714 {
2715         flow_cache_flush_deferred(net);
2716 }
2717
2718 static void xfrm_init_pmtu(struct dst_entry *dst)
2719 {
2720         do {
2721                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2722                 u32 pmtu, route_mtu_cached;
2723
2724                 pmtu = dst_mtu(dst->child);
2725                 xdst->child_mtu_cached = pmtu;
2726
2727                 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2728
2729                 route_mtu_cached = dst_mtu(xdst->route);
2730                 xdst->route_mtu_cached = route_mtu_cached;
2731
2732                 if (pmtu > route_mtu_cached)
2733                         pmtu = route_mtu_cached;
2734
2735                 dst_metric_set(dst, RTAX_MTU, pmtu);
2736         } while ((dst = dst->next));
2737 }
2738
2739 /* Check that the bundle accepts the flow and its components are
2740  * still valid.
2741  */
2742
2743 static int xfrm_bundle_ok(struct xfrm_dst *first)
2744 {
2745         struct dst_entry *dst = &first->u.dst;
2746         struct xfrm_dst *last;
2747         u32 mtu;
2748
2749         if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2750             (dst->dev && !netif_running(dst->dev)))
2751                 return 0;
2752
2753         if (dst->flags & DST_XFRM_QUEUE)
2754                 return 1;
2755
2756         last = NULL;
2757
2758         do {
2759                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2760
2761                 if (dst->xfrm->km.state != XFRM_STATE_VALID)
2762                         return 0;
2763                 if (xdst->xfrm_genid != dst->xfrm->genid)
2764                         return 0;
2765                 if (xdst->num_pols > 0 &&
2766                     xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2767                         return 0;
2768
2769                 mtu = dst_mtu(dst->child);
2770                 if (xdst->child_mtu_cached != mtu) {
2771                         last = xdst;
2772                         xdst->child_mtu_cached = mtu;
2773                 }
2774
2775                 if (!dst_check(xdst->route, xdst->route_cookie))
2776                         return 0;
2777                 mtu = dst_mtu(xdst->route);
2778                 if (xdst->route_mtu_cached != mtu) {
2779                         last = xdst;
2780                         xdst->route_mtu_cached = mtu;
2781                 }
2782
2783                 dst = dst->child;
2784         } while (dst->xfrm);
2785
2786         if (likely(!last))
2787                 return 1;
2788
2789         mtu = last->child_mtu_cached;
2790         for (;;) {
2791                 dst = &last->u.dst;
2792
2793                 mtu = xfrm_state_mtu(dst->xfrm, mtu);
2794                 if (mtu > last->route_mtu_cached)
2795                         mtu = last->route_mtu_cached;
2796                 dst_metric_set(dst, RTAX_MTU, mtu);
2797
2798                 if (last == first)
2799                         break;
2800
2801                 last = (struct xfrm_dst *)last->u.dst.next;
2802                 last->child_mtu_cached = mtu;
2803         }
2804
2805         return 1;
2806 }
2807
2808 static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
2809 {
2810         return dst_metric_advmss(dst->path);
2811 }
2812
2813 static unsigned int xfrm_mtu(const struct dst_entry *dst)
2814 {
2815         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2816
2817         return mtu ? : dst_mtu(dst->path);
2818 }
2819
2820 static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
2821                                            struct sk_buff *skb,
2822                                            const void *daddr)
2823 {
2824         return dst->path->ops->neigh_lookup(dst, skb, daddr);
2825 }
2826
2827 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2828 {
2829         int err = 0;
2830         if (unlikely(afinfo == NULL))
2831                 return -EINVAL;
2832         if (unlikely(afinfo->family >= NPROTO))
2833                 return -EAFNOSUPPORT;
2834         spin_lock(&xfrm_policy_afinfo_lock);
2835         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2836                 err = -EEXIST;
2837         else {
2838                 struct dst_ops *dst_ops = afinfo->dst_ops;
2839                 if (likely(dst_ops->kmem_cachep == NULL))
2840                         dst_ops->kmem_cachep = xfrm_dst_cache;
2841                 if (likely(dst_ops->check == NULL))
2842                         dst_ops->check = xfrm_dst_check;
2843                 if (likely(dst_ops->default_advmss == NULL))
2844                         dst_ops->default_advmss = xfrm_default_advmss;
2845                 if (likely(dst_ops->mtu == NULL))
2846                         dst_ops->mtu = xfrm_mtu;
2847                 if (likely(dst_ops->negative_advice == NULL))
2848                         dst_ops->negative_advice = xfrm_negative_advice;
2849                 if (likely(dst_ops->link_failure == NULL))
2850                         dst_ops->link_failure = xfrm_link_failure;
2851                 if (likely(dst_ops->neigh_lookup == NULL))
2852                         dst_ops->neigh_lookup = xfrm_neigh_lookup;
2853                 if (likely(afinfo->garbage_collect == NULL))
2854                         afinfo->garbage_collect = xfrm_garbage_collect_deferred;
2855                 rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);
2856         }
2857         spin_unlock(&xfrm_policy_afinfo_lock);
2858
2859         return err;
2860 }
2861 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2862
2863 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2864 {
2865         int err = 0;
2866         if (unlikely(afinfo == NULL))
2867                 return -EINVAL;
2868         if (unlikely(afinfo->family >= NPROTO))
2869                 return -EAFNOSUPPORT;
2870         spin_lock(&xfrm_policy_afinfo_lock);
2871         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2872                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2873                         err = -EINVAL;
2874                 else
2875                         RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family],
2876                                          NULL);
2877         }
2878         spin_unlock(&xfrm_policy_afinfo_lock);
2879         if (!err) {
2880                 struct dst_ops *dst_ops = afinfo->dst_ops;
2881
2882                 synchronize_rcu();
2883
2884                 dst_ops->kmem_cachep = NULL;
2885                 dst_ops->check = NULL;
2886                 dst_ops->negative_advice = NULL;
2887                 dst_ops->link_failure = NULL;
2888                 afinfo->garbage_collect = NULL;
2889         }
2890         return err;
2891 }
2892 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2893
2894 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2895 {
2896         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2897
2898         switch (event) {
2899         case NETDEV_DOWN:
2900                 xfrm_garbage_collect(dev_net(dev));
2901         }
2902         return NOTIFY_DONE;
2903 }
2904
2905 static struct notifier_block xfrm_dev_notifier = {
2906         .notifier_call  = xfrm_dev_event,
2907 };
2908
2909 #ifdef CONFIG_XFRM_STATISTICS
2910 static int __net_init xfrm_statistics_init(struct net *net)
2911 {
2912         int rv;
2913         net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib);
2914         if (!net->mib.xfrm_statistics)
2915                 return -ENOMEM;
2916         rv = xfrm_proc_init(net);
2917         if (rv < 0)
2918                 free_percpu(net->mib.xfrm_statistics);
2919         return rv;
2920 }
2921
2922 static void xfrm_statistics_fini(struct net *net)
2923 {
2924         xfrm_proc_fini(net);
2925         free_percpu(net->mib.xfrm_statistics);
2926 }
2927 #else
2928 static int __net_init xfrm_statistics_init(struct net *net)
2929 {
2930         return 0;
2931 }
2932
2933 static void xfrm_statistics_fini(struct net *net)
2934 {
2935 }
2936 #endif
2937
2938 static int __net_init xfrm_policy_init(struct net *net)
2939 {
2940         unsigned int hmask, sz;
2941         int dir;
2942
2943         if (net_eq(net, &init_net))
2944                 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2945                                            sizeof(struct xfrm_dst),
2946                                            0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2947                                            NULL);
2948
2949         hmask = 8 - 1;
2950         sz = (hmask+1) * sizeof(struct hlist_head);
2951
2952         net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
2953         if (!net->xfrm.policy_byidx)
2954                 goto out_byidx;
2955         net->xfrm.policy_idx_hmask = hmask;
2956
2957         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
2958                 struct xfrm_policy_hash *htab;
2959
2960                 net->xfrm.policy_count[dir] = 0;
2961                 net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0;
2962                 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2963
2964                 htab = &net->xfrm.policy_bydst[dir];
2965                 htab->table = xfrm_hash_alloc(sz);
2966                 if (!htab->table)
2967                         goto out_bydst;
2968                 htab->hmask = hmask;
2969                 htab->dbits4 = 32;
2970                 htab->sbits4 = 32;
2971                 htab->dbits6 = 128;
2972                 htab->sbits6 = 128;
2973         }
2974         net->xfrm.policy_hthresh.lbits4 = 32;
2975         net->xfrm.policy_hthresh.rbits4 = 32;
2976         net->xfrm.policy_hthresh.lbits6 = 128;
2977         net->xfrm.policy_hthresh.rbits6 = 128;
2978
2979         seqlock_init(&net->xfrm.policy_hthresh.lock);
2980
2981         INIT_LIST_HEAD(&net->xfrm.policy_all);
2982         INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
2983         INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild);
2984         if (net_eq(net, &init_net))
2985                 register_netdevice_notifier(&xfrm_dev_notifier);
2986         return 0;
2987
2988 out_bydst:
2989         for (dir--; dir >= 0; dir--) {
2990                 struct xfrm_policy_hash *htab;
2991
2992                 htab = &net->xfrm.policy_bydst[dir];
2993                 xfrm_hash_free(htab->table, sz);
2994         }
2995         xfrm_hash_free(net->xfrm.policy_byidx, sz);
2996 out_byidx:
2997         return -ENOMEM;
2998 }
2999
3000 static void xfrm_policy_fini(struct net *net)
3001 {
3002         unsigned int sz;
3003         int dir;
3004
3005         flush_work(&net->xfrm.policy_hash_work);
3006 #ifdef CONFIG_XFRM_SUB_POLICY
3007         xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false);
3008 #endif
3009         xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false);
3010
3011         WARN_ON(!list_empty(&net->xfrm.policy_all));
3012
3013         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
3014                 struct xfrm_policy_hash *htab;
3015
3016                 WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
3017
3018                 htab = &net->xfrm.policy_bydst[dir];
3019                 sz = (htab->hmask + 1) * sizeof(struct hlist_head);
3020                 WARN_ON(!hlist_empty(htab->table));
3021                 xfrm_hash_free(htab->table, sz);
3022         }
3023
3024         sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
3025         WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
3026         xfrm_hash_free(net->xfrm.policy_byidx, sz);
3027 }
3028
3029 static int __net_init xfrm_net_init(struct net *net)
3030 {
3031         int rv;
3032
3033         rv = xfrm_statistics_init(net);
3034         if (rv < 0)
3035                 goto out_statistics;
3036         rv = xfrm_state_init(net);
3037         if (rv < 0)
3038                 goto out_state;
3039         rv = xfrm_policy_init(net);
3040         if (rv < 0)
3041                 goto out_policy;
3042         rv = xfrm_sysctl_init(net);
3043         if (rv < 0)
3044                 goto out_sysctl;
3045         rv = flow_cache_init(net);
3046         if (rv < 0)
3047                 goto out;
3048
3049         /* Initialize the per-net locks here */
3050         spin_lock_init(&net->xfrm.xfrm_state_lock);
3051         rwlock_init(&net->xfrm.xfrm_policy_lock);
3052         mutex_init(&net->xfrm.xfrm_cfg_mutex);
3053
3054         return 0;
3055
3056 out:
3057         xfrm_sysctl_fini(net);
3058 out_sysctl:
3059         xfrm_policy_fini(net);
3060 out_policy:
3061         xfrm_state_fini(net);
3062 out_state:
3063         xfrm_statistics_fini(net);
3064 out_statistics:
3065         return rv;
3066 }
3067
3068 static void __net_exit xfrm_net_exit(struct net *net)
3069 {
3070         flow_cache_fini(net);
3071         xfrm_sysctl_fini(net);
3072         xfrm_policy_fini(net);
3073         xfrm_state_fini(net);
3074         xfrm_statistics_fini(net);
3075 }
3076
3077 static struct pernet_operations __net_initdata xfrm_net_ops = {
3078         .init = xfrm_net_init,
3079         .exit = xfrm_net_exit,
3080 };
3081
3082 void __init xfrm_init(void)
3083 {
3084         register_pernet_subsys(&xfrm_net_ops);
3085         xfrm_input_init();
3086 }
3087
3088 #ifdef CONFIG_AUDITSYSCALL
3089 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
3090                                          struct audit_buffer *audit_buf)
3091 {
3092         struct xfrm_sec_ctx *ctx = xp->security;
3093         struct xfrm_selector *sel = &xp->selector;
3094
3095         if (ctx)
3096                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
3097                                  ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
3098
3099         switch (sel->family) {
3100         case AF_INET:
3101                 audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
3102                 if (sel->prefixlen_s != 32)
3103                         audit_log_format(audit_buf, " src_prefixlen=%d",
3104                                          sel->prefixlen_s);
3105                 audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
3106                 if (sel->prefixlen_d != 32)
3107                         audit_log_format(audit_buf, " dst_prefixlen=%d",
3108                                          sel->prefixlen_d);
3109                 break;
3110         case AF_INET6:
3111                 audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
3112                 if (sel->prefixlen_s != 128)
3113                         audit_log_format(audit_buf, " src_prefixlen=%d",
3114                                          sel->prefixlen_s);
3115                 audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
3116                 if (sel->prefixlen_d != 128)
3117                         audit_log_format(audit_buf, " dst_prefixlen=%d",
3118                                          sel->prefixlen_d);
3119                 break;
3120         }
3121 }
3122
3123 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid)
3124 {
3125         struct audit_buffer *audit_buf;
3126
3127         audit_buf = xfrm_audit_start("SPD-add");
3128         if (audit_buf == NULL)
3129                 return;
3130         xfrm_audit_helper_usrinfo(task_valid, audit_buf);
3131         audit_log_format(audit_buf, " res=%u", result);
3132         xfrm_audit_common_policyinfo(xp, audit_buf);
3133         audit_log_end(audit_buf);
3134 }
3135 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
3136
3137 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
3138                               bool task_valid)
3139 {
3140         struct audit_buffer *audit_buf;
3141
3142         audit_buf = xfrm_audit_start("SPD-delete");
3143         if (audit_buf == NULL)
3144                 return;
3145         xfrm_audit_helper_usrinfo(task_valid, audit_buf);
3146         audit_log_format(audit_buf, " res=%u", result);
3147         xfrm_audit_common_policyinfo(xp, audit_buf);
3148         audit_log_end(audit_buf);
3149 }
3150 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
3151 #endif
3152
3153 #ifdef CONFIG_XFRM_MIGRATE
3154 static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
3155                                         const struct xfrm_selector *sel_tgt)
3156 {
3157         if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
3158                 if (sel_tgt->family == sel_cmp->family &&
3159                     xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
3160                                     sel_cmp->family) &&
3161                     xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
3162                                     sel_cmp->family) &&
3163                     sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
3164                     sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
3165                         return true;
3166                 }
3167         } else {
3168                 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
3169                         return true;
3170                 }
3171         }
3172         return false;
3173 }
3174
3175 static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
3176                                                     u8 dir, u8 type, struct net *net)
3177 {
3178         struct xfrm_policy *pol, *ret = NULL;
3179         struct hlist_head *chain;
3180         u32 priority = ~0U;
3181
3182         read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/
3183         chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
3184         hlist_for_each_entry(pol, chain, bydst) {
3185                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3186                     pol->type == type) {
3187                         ret = pol;
3188                         priority = ret->priority;
3189                         break;
3190                 }
3191         }
3192         chain = &net->xfrm.policy_inexact[dir];
3193         hlist_for_each_entry(pol, chain, bydst) {
3194                 if ((pol->priority >= priority) && ret)
3195                         break;
3196
3197                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3198                     pol->type == type) {
3199                         ret = pol;
3200                         break;
3201                 }
3202         }
3203
3204         xfrm_pol_hold(ret);
3205
3206         read_unlock_bh(&net->xfrm.xfrm_policy_lock);
3207
3208         return ret;
3209 }
3210
3211 static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
3212 {
3213         int match = 0;
3214
3215         if (t->mode == m->mode && t->id.proto == m->proto &&
3216             (m->reqid == 0 || t->reqid == m->reqid)) {
3217                 switch (t->mode) {
3218                 case XFRM_MODE_TUNNEL:
3219                 case XFRM_MODE_BEET:
3220                         if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
3221                                             m->old_family) &&
3222                             xfrm_addr_equal(&t->saddr, &m->old_saddr,
3223                                             m->old_family)) {
3224                                 match = 1;
3225                         }
3226                         break;
3227                 case XFRM_MODE_TRANSPORT:
3228                         /* in case of transport mode, template does not store
3229                            any IP addresses, hence we just compare mode and
3230                            protocol */
3231                         match = 1;
3232                         break;
3233                 default:
3234                         break;
3235                 }
3236         }
3237         return match;
3238 }
3239
3240 /* update endpoint address(es) of template(s) */
3241 static int xfrm_policy_migrate(struct xfrm_policy *pol,
3242                                struct xfrm_migrate *m, int num_migrate)
3243 {
3244         struct xfrm_migrate *mp;
3245         int i, j, n = 0;
3246
3247         write_lock_bh(&pol->lock);
3248         if (unlikely(pol->walk.dead)) {
3249                 /* target policy has been deleted */
3250                 write_unlock_bh(&pol->lock);
3251                 return -ENOENT;
3252         }
3253
3254         for (i = 0; i < pol->xfrm_nr; i++) {
3255                 for (j = 0, mp = m; j < num_migrate; j++, mp++) {
3256                         if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
3257                                 continue;
3258                         n++;
3259                         if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
3260                             pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
3261                                 continue;
3262                         /* update endpoints */
3263                         memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
3264                                sizeof(pol->xfrm_vec[i].id.daddr));
3265                         memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
3266                                sizeof(pol->xfrm_vec[i].saddr));
3267                         pol->xfrm_vec[i].encap_family = mp->new_family;
3268                         /* flush bundles */
3269                         atomic_inc(&pol->genid);
3270                 }
3271         }
3272
3273         write_unlock_bh(&pol->lock);
3274
3275         if (!n)
3276                 return -ENODATA;
3277
3278         return 0;
3279 }
3280
3281 static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
3282 {
3283         int i, j;
3284
3285         if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
3286                 return -EINVAL;
3287
3288         for (i = 0; i < num_migrate; i++) {
3289                 if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr,
3290                                     m[i].old_family) &&
3291                     xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr,
3292                                     m[i].old_family))
3293                         return -EINVAL;
3294                 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
3295                     xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
3296                         return -EINVAL;
3297
3298                 /* check if there is any duplicated entry */
3299                 for (j = i + 1; j < num_migrate; j++) {
3300                         if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
3301                                     sizeof(m[i].old_daddr)) &&
3302                             !memcmp(&m[i].old_saddr, &m[j].old_saddr,
3303                                     sizeof(m[i].old_saddr)) &&
3304                             m[i].proto == m[j].proto &&
3305                             m[i].mode == m[j].mode &&
3306                             m[i].reqid == m[j].reqid &&
3307                             m[i].old_family == m[j].old_family)
3308                                 return -EINVAL;
3309                 }
3310         }
3311
3312         return 0;
3313 }
3314
3315 int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3316                  struct xfrm_migrate *m, int num_migrate,
3317                  struct xfrm_kmaddress *k, struct net *net)
3318 {
3319         int i, err, nx_cur = 0, nx_new = 0;
3320         struct xfrm_policy *pol = NULL;
3321         struct xfrm_state *x, *xc;
3322         struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
3323         struct xfrm_state *x_new[XFRM_MAX_DEPTH];
3324         struct xfrm_migrate *mp;
3325
3326         if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
3327                 goto out;
3328
3329         /* Stage 1 - find policy */
3330         if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
3331                 err = -ENOENT;
3332                 goto out;
3333         }
3334
3335         /* Stage 2 - find and update state(s) */
3336         for (i = 0, mp = m; i < num_migrate; i++, mp++) {
3337                 if ((x = xfrm_migrate_state_find(mp, net))) {
3338                         x_cur[nx_cur] = x;
3339                         nx_cur++;
3340                         if ((xc = xfrm_state_migrate(x, mp))) {
3341                                 x_new[nx_new] = xc;
3342                                 nx_new++;
3343                         } else {
3344                                 err = -ENODATA;
3345                                 goto restore_state;
3346                         }
3347                 }
3348         }
3349
3350         /* Stage 3 - update policy */
3351         if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
3352                 goto restore_state;
3353
3354         /* Stage 4 - delete old state(s) */
3355         if (nx_cur) {
3356                 xfrm_states_put(x_cur, nx_cur);
3357                 xfrm_states_delete(x_cur, nx_cur);
3358         }
3359
3360         /* Stage 5 - announce */
3361         km_migrate(sel, dir, type, m, num_migrate, k);
3362
3363         xfrm_pol_put(pol);
3364
3365         return 0;
3366 out:
3367         return err;
3368
3369 restore_state:
3370         if (pol)
3371                 xfrm_pol_put(pol);
3372         if (nx_cur)
3373                 xfrm_states_put(x_cur, nx_cur);
3374         if (nx_new)
3375                 xfrm_states_delete(x_new, nx_new);
3376
3377         return err;
3378 }
3379 EXPORT_SYMBOL(xfrm_migrate);
3380 #endif