Pull pnpacpi into acpica branch
[cascardo/linux.git] / net / ipv6 / netfilter / nf_conntrack_reasm.c
1 /*
2  * IPv6 fragment reassembly for connection tracking
3  *
4  * Copyright (C)2004 USAGI/WIDE Project
5  *
6  * Author:
7  *      Yasuyuki Kozakai @USAGI <yasuyuki.kozakai@toshiba.co.jp>
8  *
9  * Based on: net/ipv6/reassembly.c
10  *
11  * This program is free software; you can redistribute it and/or
12  * modify it under the terms of the GNU General Public License
13  * as published by the Free Software Foundation; either version
14  * 2 of the License, or (at your option) any later version.
15  */
16
17 #include <linux/config.h>
18 #include <linux/errno.h>
19 #include <linux/types.h>
20 #include <linux/string.h>
21 #include <linux/socket.h>
22 #include <linux/sockios.h>
23 #include <linux/jiffies.h>
24 #include <linux/net.h>
25 #include <linux/list.h>
26 #include <linux/netdevice.h>
27 #include <linux/in6.h>
28 #include <linux/ipv6.h>
29 #include <linux/icmpv6.h>
30 #include <linux/random.h>
31 #include <linux/jhash.h>
32
33 #include <net/sock.h>
34 #include <net/snmp.h>
35
36 #include <net/ipv6.h>
37 #include <net/protocol.h>
38 #include <net/transp_v6.h>
39 #include <net/rawv6.h>
40 #include <net/ndisc.h>
41 #include <net/addrconf.h>
42 #include <linux/sysctl.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45 #include <linux/kernel.h>
46 #include <linux/module.h>
47
48 #if 0
49 #define DEBUGP printk
50 #else
51 #define DEBUGP(format, args...)
52 #endif
53
54 #define NF_CT_FRAG6_HIGH_THRESH 262144 /* == 256*1024 */
55 #define NF_CT_FRAG6_LOW_THRESH 196608  /* == 192*1024 */
56 #define NF_CT_FRAG6_TIMEOUT IPV6_FRAG_TIMEOUT
57
58 unsigned int nf_ct_frag6_high_thresh = 256*1024;
59 unsigned int nf_ct_frag6_low_thresh = 192*1024;
60 unsigned long nf_ct_frag6_timeout = IPV6_FRAG_TIMEOUT;
61
62 struct nf_ct_frag6_skb_cb
63 {
64         struct inet6_skb_parm   h;
65         int                     offset;
66         struct sk_buff          *orig;
67 };
68
69 #define NFCT_FRAG6_CB(skb)      ((struct nf_ct_frag6_skb_cb*)((skb)->cb))
70
71 struct nf_ct_frag6_queue
72 {
73         struct nf_ct_frag6_queue        *next;
74         struct list_head lru_list;              /* lru list member      */
75
76         __u32                   id;             /* fragment id          */
77         struct in6_addr         saddr;
78         struct in6_addr         daddr;
79
80         spinlock_t              lock;
81         atomic_t                refcnt;
82         struct timer_list       timer;          /* expire timer         */
83         struct sk_buff          *fragments;
84         int                     len;
85         int                     meat;
86         struct timeval          stamp;
87         unsigned int            csum;
88         __u8                    last_in;        /* has first/last segment arrived? */
89 #define COMPLETE                4
90 #define FIRST_IN                2
91 #define LAST_IN                 1
92         __u16                   nhoffset;
93         struct nf_ct_frag6_queue        **pprev;
94 };
95
96 /* Hash table. */
97
98 #define FRAG6Q_HASHSZ   64
99
100 static struct nf_ct_frag6_queue *nf_ct_frag6_hash[FRAG6Q_HASHSZ];
101 static DEFINE_RWLOCK(nf_ct_frag6_lock);
102 static u32 nf_ct_frag6_hash_rnd;
103 static LIST_HEAD(nf_ct_frag6_lru_list);
104 int nf_ct_frag6_nqueues = 0;
105
106 static __inline__ void __fq_unlink(struct nf_ct_frag6_queue *fq)
107 {
108         if (fq->next)
109                 fq->next->pprev = fq->pprev;
110         *fq->pprev = fq->next;
111         list_del(&fq->lru_list);
112         nf_ct_frag6_nqueues--;
113 }
114
115 static __inline__ void fq_unlink(struct nf_ct_frag6_queue *fq)
116 {
117         write_lock(&nf_ct_frag6_lock);
118         __fq_unlink(fq);
119         write_unlock(&nf_ct_frag6_lock);
120 }
121
122 static unsigned int ip6qhashfn(u32 id, struct in6_addr *saddr,
123                                struct in6_addr *daddr)
124 {
125         u32 a, b, c;
126
127         a = saddr->s6_addr32[0];
128         b = saddr->s6_addr32[1];
129         c = saddr->s6_addr32[2];
130
131         a += JHASH_GOLDEN_RATIO;
132         b += JHASH_GOLDEN_RATIO;
133         c += nf_ct_frag6_hash_rnd;
134         __jhash_mix(a, b, c);
135
136         a += saddr->s6_addr32[3];
137         b += daddr->s6_addr32[0];
138         c += daddr->s6_addr32[1];
139         __jhash_mix(a, b, c);
140
141         a += daddr->s6_addr32[2];
142         b += daddr->s6_addr32[3];
143         c += id;
144         __jhash_mix(a, b, c);
145
146         return c & (FRAG6Q_HASHSZ - 1);
147 }
148
149 static struct timer_list nf_ct_frag6_secret_timer;
150 int nf_ct_frag6_secret_interval = 10 * 60 * HZ;
151
152 static void nf_ct_frag6_secret_rebuild(unsigned long dummy)
153 {
154         unsigned long now = jiffies;
155         int i;
156
157         write_lock(&nf_ct_frag6_lock);
158         get_random_bytes(&nf_ct_frag6_hash_rnd, sizeof(u32));
159         for (i = 0; i < FRAG6Q_HASHSZ; i++) {
160                 struct nf_ct_frag6_queue *q;
161
162                 q = nf_ct_frag6_hash[i];
163                 while (q) {
164                         struct nf_ct_frag6_queue *next = q->next;
165                         unsigned int hval = ip6qhashfn(q->id,
166                                                        &q->saddr,
167                                                        &q->daddr);
168
169                         if (hval != i) {
170                                 /* Unlink. */
171                                 if (q->next)
172                                         q->next->pprev = q->pprev;
173                                 *q->pprev = q->next;
174
175                                 /* Relink to new hash chain. */
176                                 if ((q->next = nf_ct_frag6_hash[hval]) != NULL)
177                                         q->next->pprev = &q->next;
178                                 nf_ct_frag6_hash[hval] = q;
179                                 q->pprev = &nf_ct_frag6_hash[hval];
180                         }
181
182                         q = next;
183                 }
184         }
185         write_unlock(&nf_ct_frag6_lock);
186
187         mod_timer(&nf_ct_frag6_secret_timer, now + nf_ct_frag6_secret_interval);
188 }
189
190 atomic_t nf_ct_frag6_mem = ATOMIC_INIT(0);
191
192 /* Memory Tracking Functions. */
193 static inline void frag_kfree_skb(struct sk_buff *skb, unsigned int *work)
194 {
195         if (work)
196                 *work -= skb->truesize;
197         atomic_sub(skb->truesize, &nf_ct_frag6_mem);
198         if (NFCT_FRAG6_CB(skb)->orig)
199                 kfree_skb(NFCT_FRAG6_CB(skb)->orig);
200
201         kfree_skb(skb);
202 }
203
204 static inline void frag_free_queue(struct nf_ct_frag6_queue *fq,
205                                    unsigned int *work)
206 {
207         if (work)
208                 *work -= sizeof(struct nf_ct_frag6_queue);
209         atomic_sub(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
210         kfree(fq);
211 }
212
213 static inline struct nf_ct_frag6_queue *frag_alloc_queue(void)
214 {
215         struct nf_ct_frag6_queue *fq = kmalloc(sizeof(struct nf_ct_frag6_queue), GFP_ATOMIC);
216
217         if (!fq)
218                 return NULL;
219         atomic_add(sizeof(struct nf_ct_frag6_queue), &nf_ct_frag6_mem);
220         return fq;
221 }
222
223 /* Destruction primitives. */
224
225 /* Complete destruction of fq. */
226 static void nf_ct_frag6_destroy(struct nf_ct_frag6_queue *fq,
227                                 unsigned int *work)
228 {
229         struct sk_buff *fp;
230
231         BUG_TRAP(fq->last_in&COMPLETE);
232         BUG_TRAP(del_timer(&fq->timer) == 0);
233
234         /* Release all fragment data. */
235         fp = fq->fragments;
236         while (fp) {
237                 struct sk_buff *xp = fp->next;
238
239                 frag_kfree_skb(fp, work);
240                 fp = xp;
241         }
242
243         frag_free_queue(fq, work);
244 }
245
246 static __inline__ void fq_put(struct nf_ct_frag6_queue *fq, unsigned int *work)
247 {
248         if (atomic_dec_and_test(&fq->refcnt))
249                 nf_ct_frag6_destroy(fq, work);
250 }
251
252 /* Kill fq entry. It is not destroyed immediately,
253  * because caller (and someone more) holds reference count.
254  */
255 static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq)
256 {
257         if (del_timer(&fq->timer))
258                 atomic_dec(&fq->refcnt);
259
260         if (!(fq->last_in & COMPLETE)) {
261                 fq_unlink(fq);
262                 atomic_dec(&fq->refcnt);
263                 fq->last_in |= COMPLETE;
264         }
265 }
266
267 static void nf_ct_frag6_evictor(void)
268 {
269         struct nf_ct_frag6_queue *fq;
270         struct list_head *tmp;
271         unsigned int work;
272
273         work = atomic_read(&nf_ct_frag6_mem);
274         if (work <= nf_ct_frag6_low_thresh)
275                 return;
276
277         work -= nf_ct_frag6_low_thresh;
278         while (work > 0) {
279                 read_lock(&nf_ct_frag6_lock);
280                 if (list_empty(&nf_ct_frag6_lru_list)) {
281                         read_unlock(&nf_ct_frag6_lock);
282                         return;
283                 }
284                 tmp = nf_ct_frag6_lru_list.next;
285                 BUG_ON(tmp == NULL);
286                 fq = list_entry(tmp, struct nf_ct_frag6_queue, lru_list);
287                 atomic_inc(&fq->refcnt);
288                 read_unlock(&nf_ct_frag6_lock);
289
290                 spin_lock(&fq->lock);
291                 if (!(fq->last_in&COMPLETE))
292                         fq_kill(fq);
293                 spin_unlock(&fq->lock);
294
295                 fq_put(fq, &work);
296         }
297 }
298
299 static void nf_ct_frag6_expire(unsigned long data)
300 {
301         struct nf_ct_frag6_queue *fq = (struct nf_ct_frag6_queue *) data;
302
303         spin_lock(&fq->lock);
304
305         if (fq->last_in & COMPLETE)
306                 goto out;
307
308         fq_kill(fq);
309
310 out:
311         spin_unlock(&fq->lock);
312         fq_put(fq, NULL);
313 }
314
315 /* Creation primitives. */
316
317
318 static struct nf_ct_frag6_queue *nf_ct_frag6_intern(unsigned int hash,
319                                           struct nf_ct_frag6_queue *fq_in)
320 {
321         struct nf_ct_frag6_queue *fq;
322
323         write_lock(&nf_ct_frag6_lock);
324 #ifdef CONFIG_SMP
325         for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
326                 if (fq->id == fq_in->id && 
327                     !ipv6_addr_cmp(&fq_in->saddr, &fq->saddr) &&
328                     !ipv6_addr_cmp(&fq_in->daddr, &fq->daddr)) {
329                         atomic_inc(&fq->refcnt);
330                         write_unlock(&nf_ct_frag6_lock);
331                         fq_in->last_in |= COMPLETE;
332                         fq_put(fq_in, NULL);
333                         return fq;
334                 }
335         }
336 #endif
337         fq = fq_in;
338
339         if (!mod_timer(&fq->timer, jiffies + nf_ct_frag6_timeout))
340                 atomic_inc(&fq->refcnt);
341
342         atomic_inc(&fq->refcnt);
343         if ((fq->next = nf_ct_frag6_hash[hash]) != NULL)
344                 fq->next->pprev = &fq->next;
345         nf_ct_frag6_hash[hash] = fq;
346         fq->pprev = &nf_ct_frag6_hash[hash];
347         INIT_LIST_HEAD(&fq->lru_list);
348         list_add_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
349         nf_ct_frag6_nqueues++;
350         write_unlock(&nf_ct_frag6_lock);
351         return fq;
352 }
353
354
355 static struct nf_ct_frag6_queue *
356 nf_ct_frag6_create(unsigned int hash, u32 id, struct in6_addr *src,                                struct in6_addr *dst)
357 {
358         struct nf_ct_frag6_queue *fq;
359
360         if ((fq = frag_alloc_queue()) == NULL) {
361                 DEBUGP("Can't alloc new queue\n");
362                 goto oom;
363         }
364
365         memset(fq, 0, sizeof(struct nf_ct_frag6_queue));
366
367         fq->id = id;
368         ipv6_addr_copy(&fq->saddr, src);
369         ipv6_addr_copy(&fq->daddr, dst);
370
371         init_timer(&fq->timer);
372         fq->timer.function = nf_ct_frag6_expire;
373         fq->timer.data = (long) fq;
374         spin_lock_init(&fq->lock);
375         atomic_set(&fq->refcnt, 1);
376
377         return nf_ct_frag6_intern(hash, fq);
378
379 oom:
380         return NULL;
381 }
382
383 static __inline__ struct nf_ct_frag6_queue *
384 fq_find(u32 id, struct in6_addr *src, struct in6_addr *dst)
385 {
386         struct nf_ct_frag6_queue *fq;
387         unsigned int hash = ip6qhashfn(id, src, dst);
388
389         read_lock(&nf_ct_frag6_lock);
390         for (fq = nf_ct_frag6_hash[hash]; fq; fq = fq->next) {
391                 if (fq->id == id && 
392                     !ipv6_addr_cmp(src, &fq->saddr) &&
393                     !ipv6_addr_cmp(dst, &fq->daddr)) {
394                         atomic_inc(&fq->refcnt);
395                         read_unlock(&nf_ct_frag6_lock);
396                         return fq;
397                 }
398         }
399         read_unlock(&nf_ct_frag6_lock);
400
401         return nf_ct_frag6_create(hash, id, src, dst);
402 }
403
404
405 static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, 
406                              struct frag_hdr *fhdr, int nhoff)
407 {
408         struct sk_buff *prev, *next;
409         int offset, end;
410
411         if (fq->last_in & COMPLETE) {
412                 DEBUGP("Allready completed\n");
413                 goto err;
414         }
415
416         offset = ntohs(fhdr->frag_off) & ~0x7;
417         end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
418                         ((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
419
420         if ((unsigned int)end > IPV6_MAXPLEN) {
421                 DEBUGP("offset is too large.\n");
422                 return -1;
423         }
424
425         if (skb->ip_summed == CHECKSUM_HW)
426                 skb->csum = csum_sub(skb->csum,
427                                      csum_partial(skb->nh.raw,
428                                                   (u8*)(fhdr + 1) - skb->nh.raw,
429                                                   0));
430
431         /* Is this the final fragment? */
432         if (!(fhdr->frag_off & htons(IP6_MF))) {
433                 /* If we already have some bits beyond end
434                  * or have different end, the segment is corrupted.
435                  */
436                 if (end < fq->len ||
437                     ((fq->last_in & LAST_IN) && end != fq->len)) {
438                         DEBUGP("already received last fragment\n");
439                         goto err;
440                 }
441                 fq->last_in |= LAST_IN;
442                 fq->len = end;
443         } else {
444                 /* Check if the fragment is rounded to 8 bytes.
445                  * Required by the RFC.
446                  */
447                 if (end & 0x7) {
448                         /* RFC2460 says always send parameter problem in
449                          * this case. -DaveM
450                          */
451                         DEBUGP("the end of this fragment is not rounded to 8 bytes.\n");
452                         return -1;
453                 }
454                 if (end > fq->len) {
455                         /* Some bits beyond end -> corruption. */
456                         if (fq->last_in & LAST_IN) {
457                                 DEBUGP("last packet already reached.\n");
458                                 goto err;
459                         }
460                         fq->len = end;
461                 }
462         }
463
464         if (end == offset)
465                 goto err;
466
467         /* Point into the IP datagram 'data' part. */
468         if (!pskb_pull(skb, (u8 *) (fhdr + 1) - skb->data)) {
469                 DEBUGP("queue: message is too short.\n");
470                 goto err;
471         }
472         if (end-offset < skb->len) {
473                 if (pskb_trim(skb, end - offset)) {
474                         DEBUGP("Can't trim\n");
475                         goto err;
476                 }
477                 if (skb->ip_summed != CHECKSUM_UNNECESSARY)
478                         skb->ip_summed = CHECKSUM_NONE;
479         }
480
481         /* Find out which fragments are in front and at the back of us
482          * in the chain of fragments so far.  We must know where to put
483          * this fragment, right?
484          */
485         prev = NULL;
486         for (next = fq->fragments; next != NULL; next = next->next) {
487                 if (NFCT_FRAG6_CB(next)->offset >= offset)
488                         break;  /* bingo! */
489                 prev = next;
490         }
491
492         /* We found where to put this one.  Check for overlap with
493          * preceding fragment, and, if needed, align things so that
494          * any overlaps are eliminated.
495          */
496         if (prev) {
497                 int i = (NFCT_FRAG6_CB(prev)->offset + prev->len) - offset;
498
499                 if (i > 0) {
500                         offset += i;
501                         if (end <= offset) {
502                                 DEBUGP("overlap\n");
503                                 goto err;
504                         }
505                         if (!pskb_pull(skb, i)) {
506                                 DEBUGP("Can't pull\n");
507                                 goto err;
508                         }
509                         if (skb->ip_summed != CHECKSUM_UNNECESSARY)
510                                 skb->ip_summed = CHECKSUM_NONE;
511                 }
512         }
513
514         /* Look for overlap with succeeding segments.
515          * If we can merge fragments, do it.
516          */
517         while (next && NFCT_FRAG6_CB(next)->offset < end) {
518                 /* overlap is 'i' bytes */
519                 int i = end - NFCT_FRAG6_CB(next)->offset;
520
521                 if (i < next->len) {
522                         /* Eat head of the next overlapped fragment
523                          * and leave the loop. The next ones cannot overlap.
524                          */
525                         DEBUGP("Eat head of the overlapped parts.: %d", i);
526                         if (!pskb_pull(next, i))
527                                 goto err;
528
529                         /* next fragment */
530                         NFCT_FRAG6_CB(next)->offset += i;
531                         fq->meat -= i;
532                         if (next->ip_summed != CHECKSUM_UNNECESSARY)
533                                 next->ip_summed = CHECKSUM_NONE;
534                         break;
535                 } else {
536                         struct sk_buff *free_it = next;
537
538                         /* Old fragmnet is completely overridden with
539                          * new one drop it.
540                          */
541                         next = next->next;
542
543                         if (prev)
544                                 prev->next = next;
545                         else
546                                 fq->fragments = next;
547
548                         fq->meat -= free_it->len;
549                         frag_kfree_skb(free_it, NULL);
550                 }
551         }
552
553         NFCT_FRAG6_CB(skb)->offset = offset;
554
555         /* Insert this fragment in the chain of fragments. */
556         skb->next = next;
557         if (prev)
558                 prev->next = skb;
559         else
560                 fq->fragments = skb;
561
562         skb->dev = NULL;
563         skb_get_timestamp(skb, &fq->stamp);
564         fq->meat += skb->len;
565         atomic_add(skb->truesize, &nf_ct_frag6_mem);
566
567         /* The first fragment.
568          * nhoffset is obtained from the first fragment, of course.
569          */
570         if (offset == 0) {
571                 fq->nhoffset = nhoff;
572                 fq->last_in |= FIRST_IN;
573         }
574         write_lock(&nf_ct_frag6_lock);
575         list_move_tail(&fq->lru_list, &nf_ct_frag6_lru_list);
576         write_unlock(&nf_ct_frag6_lock);
577         return 0;
578
579 err:
580         return -1;
581 }
582
583 /*
584  *      Check if this packet is complete.
585  *      Returns NULL on failure by any reason, and pointer
586  *      to current nexthdr field in reassembled frame.
587  *
588  *      It is called with locked fq, and caller must check that
589  *      queue is eligible for reassembly i.e. it is not COMPLETE,
590  *      the last and the first frames arrived and all the bits are here.
591  */
592 static struct sk_buff *
593 nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev)
594 {
595         struct sk_buff *fp, *op, *head = fq->fragments;
596         int    payload_len;
597
598         fq_kill(fq);
599
600         BUG_TRAP(head != NULL);
601         BUG_TRAP(NFCT_FRAG6_CB(head)->offset == 0);
602
603         /* Unfragmented part is taken from the first segment. */
604         payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
605         if (payload_len > IPV6_MAXPLEN) {
606                 DEBUGP("payload len is too large.\n");
607                 goto out_oversize;
608         }
609
610         /* Head of list must not be cloned. */
611         if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC)) {
612                 DEBUGP("skb is cloned but can't expand head");
613                 goto out_oom;
614         }
615
616         /* If the first fragment is fragmented itself, we split
617          * it to two chunks: the first with data and paged part
618          * and the second, holding only fragments. */
619         if (skb_shinfo(head)->frag_list) {
620                 struct sk_buff *clone;
621                 int i, plen = 0;
622
623                 if ((clone = alloc_skb(0, GFP_ATOMIC)) == NULL) {
624                         DEBUGP("Can't alloc skb\n");
625                         goto out_oom;
626                 }
627                 clone->next = head->next;
628                 head->next = clone;
629                 skb_shinfo(clone)->frag_list = skb_shinfo(head)->frag_list;
630                 skb_shinfo(head)->frag_list = NULL;
631                 for (i=0; i<skb_shinfo(head)->nr_frags; i++)
632                         plen += skb_shinfo(head)->frags[i].size;
633                 clone->len = clone->data_len = head->data_len - plen;
634                 head->data_len -= clone->len;
635                 head->len -= clone->len;
636                 clone->csum = 0;
637                 clone->ip_summed = head->ip_summed;
638
639                 NFCT_FRAG6_CB(clone)->orig = NULL;
640                 atomic_add(clone->truesize, &nf_ct_frag6_mem);
641         }
642
643         /* We have to remove fragment header from datagram and to relocate
644          * header in order to calculate ICV correctly. */
645         head->nh.raw[fq->nhoffset] = head->h.raw[0];
646         memmove(head->head + sizeof(struct frag_hdr), head->head, 
647                 (head->data - head->head) - sizeof(struct frag_hdr));
648         head->mac.raw += sizeof(struct frag_hdr);
649         head->nh.raw += sizeof(struct frag_hdr);
650
651         skb_shinfo(head)->frag_list = head->next;
652         head->h.raw = head->data;
653         skb_push(head, head->data - head->nh.raw);
654         atomic_sub(head->truesize, &nf_ct_frag6_mem);
655
656         for (fp=head->next; fp; fp = fp->next) {
657                 head->data_len += fp->len;
658                 head->len += fp->len;
659                 if (head->ip_summed != fp->ip_summed)
660                         head->ip_summed = CHECKSUM_NONE;
661                 else if (head->ip_summed == CHECKSUM_HW)
662                         head->csum = csum_add(head->csum, fp->csum);
663                 head->truesize += fp->truesize;
664                 atomic_sub(fp->truesize, &nf_ct_frag6_mem);
665         }
666
667         head->next = NULL;
668         head->dev = dev;
669         skb_set_timestamp(head, &fq->stamp);
670         head->nh.ipv6h->payload_len = htons(payload_len);
671
672         /* Yes, and fold redundant checksum back. 8) */
673         if (head->ip_summed == CHECKSUM_HW)
674                 head->csum = csum_partial(head->nh.raw, head->h.raw-head->nh.raw, head->csum);
675
676         fq->fragments = NULL;
677
678         /* all original skbs are linked into the NFCT_FRAG6_CB(head).orig */
679         fp = skb_shinfo(head)->frag_list;
680         if (NFCT_FRAG6_CB(fp)->orig == NULL)
681                 /* at above code, head skb is divided into two skbs. */
682                 fp = fp->next;
683
684         op = NFCT_FRAG6_CB(head)->orig;
685         for (; fp; fp = fp->next) {
686                 struct sk_buff *orig = NFCT_FRAG6_CB(fp)->orig;
687
688                 op->next = orig;
689                 op = orig;
690                 NFCT_FRAG6_CB(fp)->orig = NULL;
691         }
692
693         return head;
694
695 out_oversize:
696         if (net_ratelimit())
697                 printk(KERN_DEBUG "nf_ct_frag6_reasm: payload len = %d\n", payload_len);
698         goto out_fail;
699 out_oom:
700         if (net_ratelimit())
701                 printk(KERN_DEBUG "nf_ct_frag6_reasm: no memory for reassembly\n");
702 out_fail:
703         return NULL;
704 }
705
706 /*
707  * find the header just before Fragment Header.
708  *
709  * if success return 0 and set ...
710  * (*prevhdrp): the value of "Next Header Field" in the header
711  *              just before Fragment Header.
712  * (*prevhoff): the offset of "Next Header Field" in the header
713  *              just before Fragment Header.
714  * (*fhoff)   : the offset of Fragment Header.
715  *
716  * Based on ipv6_skip_hdr() in net/ipv6/exthdr.c
717  *
718  */
719 static int
720 find_prev_fhdr(struct sk_buff *skb, u8 *prevhdrp, int *prevhoff, int *fhoff)
721 {
722         u8 nexthdr = skb->nh.ipv6h->nexthdr;
723         u8 prev_nhoff = (u8 *)&skb->nh.ipv6h->nexthdr - skb->data;
724         int start = (u8 *)(skb->nh.ipv6h+1) - skb->data;
725         int len = skb->len - start;
726         u8 prevhdr = NEXTHDR_IPV6;
727
728         while (nexthdr != NEXTHDR_FRAGMENT) {
729                 struct ipv6_opt_hdr hdr;
730                 int hdrlen;
731
732                 if (!ipv6_ext_hdr(nexthdr)) {
733                         return -1;
734                 }
735                 if (len < (int)sizeof(struct ipv6_opt_hdr)) {
736                         DEBUGP("too short\n");
737                         return -1;
738                 }
739                 if (nexthdr == NEXTHDR_NONE) {
740                         DEBUGP("next header is none\n");
741                         return -1;
742                 }
743                 if (skb_copy_bits(skb, start, &hdr, sizeof(hdr)))
744                         BUG();
745                 if (nexthdr == NEXTHDR_AUTH)
746                         hdrlen = (hdr.hdrlen+2)<<2;
747                 else
748                         hdrlen = ipv6_optlen(&hdr);
749
750                 prevhdr = nexthdr;
751                 prev_nhoff = start;
752
753                 nexthdr = hdr.nexthdr;
754                 len -= hdrlen;
755                 start += hdrlen;
756         }
757
758         if (len < 0)
759                 return -1;
760
761         *prevhdrp = prevhdr;
762         *prevhoff = prev_nhoff;
763         *fhoff = start;
764
765         return 0;
766 }
767
768 struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb)
769 {
770         struct sk_buff *clone; 
771         struct net_device *dev = skb->dev;
772         struct frag_hdr *fhdr;
773         struct nf_ct_frag6_queue *fq;
774         struct ipv6hdr *hdr;
775         int fhoff, nhoff;
776         u8 prevhdr;
777         struct sk_buff *ret_skb = NULL;
778
779         /* Jumbo payload inhibits frag. header */
780         if (skb->nh.ipv6h->payload_len == 0) {
781                 DEBUGP("payload len = 0\n");
782                 return skb;
783         }
784
785         if (find_prev_fhdr(skb, &prevhdr, &nhoff, &fhoff) < 0)
786                 return skb;
787
788         clone = skb_clone(skb, GFP_ATOMIC);
789         if (clone == NULL) {
790                 DEBUGP("Can't clone skb\n");
791                 return skb;
792         }
793
794         NFCT_FRAG6_CB(clone)->orig = skb;
795
796         if (!pskb_may_pull(clone, fhoff + sizeof(*fhdr))) {
797                 DEBUGP("message is too short.\n");
798                 goto ret_orig;
799         }
800
801         clone->h.raw = clone->data + fhoff;
802         hdr = clone->nh.ipv6h;
803         fhdr = (struct frag_hdr *)clone->h.raw;
804
805         if (!(fhdr->frag_off & htons(0xFFF9))) {
806                 DEBUGP("Invalid fragment offset\n");
807                 /* It is not a fragmented frame */
808                 goto ret_orig;
809         }
810
811         if (atomic_read(&nf_ct_frag6_mem) > nf_ct_frag6_high_thresh)
812                 nf_ct_frag6_evictor();
813
814         fq = fq_find(fhdr->identification, &hdr->saddr, &hdr->daddr);
815         if (fq == NULL) {
816                 DEBUGP("Can't find and can't create new queue\n");
817                 goto ret_orig;
818         }
819
820         spin_lock(&fq->lock);
821
822         if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) {
823                 spin_unlock(&fq->lock);
824                 DEBUGP("Can't insert skb to queue\n");
825                 fq_put(fq, NULL);
826                 goto ret_orig;
827         }
828
829         if (fq->last_in == (FIRST_IN|LAST_IN) && fq->meat == fq->len) {
830                 ret_skb = nf_ct_frag6_reasm(fq, dev);
831                 if (ret_skb == NULL)
832                         DEBUGP("Can't reassemble fragmented packets\n");
833         }
834         spin_unlock(&fq->lock);
835
836         fq_put(fq, NULL);
837         return ret_skb;
838
839 ret_orig:
840         kfree_skb(clone);
841         return skb;
842 }
843
844 void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb,
845                         struct net_device *in, struct net_device *out,
846                         int (*okfn)(struct sk_buff *))
847 {
848         struct sk_buff *s, *s2;
849
850         for (s = NFCT_FRAG6_CB(skb)->orig; s;) {
851                 nf_conntrack_put_reasm(s->nfct_reasm);
852                 nf_conntrack_get_reasm(skb);
853                 s->nfct_reasm = skb;
854
855                 s2 = s->next;
856                 NF_HOOK_THRESH(PF_INET6, hooknum, s, in, out, okfn,
857                                NF_IP6_PRI_CONNTRACK_DEFRAG + 1);
858                 s = s2;
859         }
860         nf_conntrack_put_reasm(skb);
861 }
862
863 int nf_ct_frag6_kfree_frags(struct sk_buff *skb)
864 {
865         struct sk_buff *s, *s2;
866
867         for (s = NFCT_FRAG6_CB(skb)->orig; s; s = s2) {
868
869                 s2 = s->next;
870                 kfree_skb(s);
871         }
872
873         kfree_skb(skb);
874
875         return 0;
876 }
877
878 int nf_ct_frag6_init(void)
879 {
880         nf_ct_frag6_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
881                                    (jiffies ^ (jiffies >> 6)));
882
883         init_timer(&nf_ct_frag6_secret_timer);
884         nf_ct_frag6_secret_timer.function = nf_ct_frag6_secret_rebuild;
885         nf_ct_frag6_secret_timer.expires = jiffies
886                                            + nf_ct_frag6_secret_interval;
887         add_timer(&nf_ct_frag6_secret_timer);
888
889         return 0;
890 }
891
892 void nf_ct_frag6_cleanup(void)
893 {
894         del_timer(&nf_ct_frag6_secret_timer);
895         nf_ct_frag6_low_thresh = 0;
896         nf_ct_frag6_evictor();
897 }