xfs: introduce xfs_bulkstat_ichunk_ra
[cascardo/linux.git] / fs / xfs / xfs_itable.c
1 /*
2  * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_shared.h"
21 #include "xfs_format.h"
22 #include "xfs_log_format.h"
23 #include "xfs_trans_resv.h"
24 #include "xfs_inum.h"
25 #include "xfs_sb.h"
26 #include "xfs_ag.h"
27 #include "xfs_mount.h"
28 #include "xfs_inode.h"
29 #include "xfs_btree.h"
30 #include "xfs_ialloc.h"
31 #include "xfs_ialloc_btree.h"
32 #include "xfs_itable.h"
33 #include "xfs_error.h"
34 #include "xfs_trace.h"
35 #include "xfs_icache.h"
36 #include "xfs_dinode.h"
37
38 STATIC int
39 xfs_internal_inum(
40         xfs_mount_t     *mp,
41         xfs_ino_t       ino)
42 {
43         return (ino == mp->m_sb.sb_rbmino || ino == mp->m_sb.sb_rsumino ||
44                 (xfs_sb_version_hasquota(&mp->m_sb) &&
45                  xfs_is_quota_inode(&mp->m_sb, ino)));
46 }
47
48 /*
49  * Return stat information for one inode.
50  * Return 0 if ok, else errno.
51  */
52 int
53 xfs_bulkstat_one_int(
54         struct xfs_mount        *mp,            /* mount point for filesystem */
55         xfs_ino_t               ino,            /* inode to get data for */
56         void __user             *buffer,        /* buffer to place output in */
57         int                     ubsize,         /* size of buffer */
58         bulkstat_one_fmt_pf     formatter,      /* formatter, copy to user */
59         int                     *ubused,        /* bytes used by me */
60         int                     *stat)          /* BULKSTAT_RV_... */
61 {
62         struct xfs_icdinode     *dic;           /* dinode core info pointer */
63         struct xfs_inode        *ip;            /* incore inode pointer */
64         struct xfs_bstat        *buf;           /* return buffer */
65         int                     error = 0;      /* error value */
66
67         *stat = BULKSTAT_RV_NOTHING;
68
69         if (!buffer || xfs_internal_inum(mp, ino))
70                 return -EINVAL;
71
72         buf = kmem_alloc(sizeof(*buf), KM_SLEEP | KM_MAYFAIL);
73         if (!buf)
74                 return -ENOMEM;
75
76         error = xfs_iget(mp, NULL, ino,
77                          (XFS_IGET_DONTCACHE | XFS_IGET_UNTRUSTED),
78                          XFS_ILOCK_SHARED, &ip);
79         if (error)
80                 goto out_free;
81
82         ASSERT(ip != NULL);
83         ASSERT(ip->i_imap.im_blkno != 0);
84
85         dic = &ip->i_d;
86
87         /* xfs_iget returns the following without needing
88          * further change.
89          */
90         buf->bs_nlink = dic->di_nlink;
91         buf->bs_projid_lo = dic->di_projid_lo;
92         buf->bs_projid_hi = dic->di_projid_hi;
93         buf->bs_ino = ino;
94         buf->bs_mode = dic->di_mode;
95         buf->bs_uid = dic->di_uid;
96         buf->bs_gid = dic->di_gid;
97         buf->bs_size = dic->di_size;
98         buf->bs_atime.tv_sec = dic->di_atime.t_sec;
99         buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
100         buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
101         buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
102         buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
103         buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
104         buf->bs_xflags = xfs_ip2xflags(ip);
105         buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
106         buf->bs_extents = dic->di_nextents;
107         buf->bs_gen = dic->di_gen;
108         memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
109         buf->bs_dmevmask = dic->di_dmevmask;
110         buf->bs_dmstate = dic->di_dmstate;
111         buf->bs_aextents = dic->di_anextents;
112         buf->bs_forkoff = XFS_IFORK_BOFF(ip);
113
114         switch (dic->di_format) {
115         case XFS_DINODE_FMT_DEV:
116                 buf->bs_rdev = ip->i_df.if_u2.if_rdev;
117                 buf->bs_blksize = BLKDEV_IOSIZE;
118                 buf->bs_blocks = 0;
119                 break;
120         case XFS_DINODE_FMT_LOCAL:
121         case XFS_DINODE_FMT_UUID:
122                 buf->bs_rdev = 0;
123                 buf->bs_blksize = mp->m_sb.sb_blocksize;
124                 buf->bs_blocks = 0;
125                 break;
126         case XFS_DINODE_FMT_EXTENTS:
127         case XFS_DINODE_FMT_BTREE:
128                 buf->bs_rdev = 0;
129                 buf->bs_blksize = mp->m_sb.sb_blocksize;
130                 buf->bs_blocks = dic->di_nblocks + ip->i_delayed_blks;
131                 break;
132         }
133         xfs_iunlock(ip, XFS_ILOCK_SHARED);
134         IRELE(ip);
135
136         error = formatter(buffer, ubsize, ubused, buf);
137         if (!error)
138                 *stat = BULKSTAT_RV_DIDONE;
139
140  out_free:
141         kmem_free(buf);
142         return error;
143 }
144
145 /* Return 0 on success or positive error */
146 STATIC int
147 xfs_bulkstat_one_fmt(
148         void                    __user *ubuffer,
149         int                     ubsize,
150         int                     *ubused,
151         const xfs_bstat_t       *buffer)
152 {
153         if (ubsize < sizeof(*buffer))
154                 return -ENOMEM;
155         if (copy_to_user(ubuffer, buffer, sizeof(*buffer)))
156                 return -EFAULT;
157         if (ubused)
158                 *ubused = sizeof(*buffer);
159         return 0;
160 }
161
162 int
163 xfs_bulkstat_one(
164         xfs_mount_t     *mp,            /* mount point for filesystem */
165         xfs_ino_t       ino,            /* inode number to get data for */
166         void            __user *buffer, /* buffer to place output in */
167         int             ubsize,         /* size of buffer */
168         int             *ubused,        /* bytes used by me */
169         int             *stat)          /* BULKSTAT_RV_... */
170 {
171         return xfs_bulkstat_one_int(mp, ino, buffer, ubsize,
172                                     xfs_bulkstat_one_fmt, ubused, stat);
173 }
174
175 /*
176  * Loop over all clusters in a chunk for a given incore inode allocation btree
177  * record.  Do a readahead if there are any allocated inodes in that cluster.
178  */
179 STATIC void
180 xfs_bulkstat_ichunk_ra(
181         struct xfs_mount                *mp,
182         xfs_agnumber_t                  agno,
183         struct xfs_inobt_rec_incore     *irec)
184 {
185         xfs_agblock_t                   agbno;
186         struct blk_plug                 plug;
187         int                             blks_per_cluster;
188         int                             inodes_per_cluster;
189         int                             i;      /* inode chunk index */
190
191         agbno = XFS_AGINO_TO_AGBNO(mp, irec->ir_startino);
192         blks_per_cluster = xfs_icluster_size_fsb(mp);
193         inodes_per_cluster = blks_per_cluster << mp->m_sb.sb_inopblog;
194
195         blk_start_plug(&plug);
196         for (i = 0; i < XFS_INODES_PER_CHUNK;
197              i += inodes_per_cluster, agbno += blks_per_cluster) {
198                 if (xfs_inobt_maskn(i, inodes_per_cluster) & ~irec->ir_free) {
199                         xfs_btree_reada_bufs(mp, agno, agbno, blks_per_cluster,
200                                              &xfs_inode_buf_ops);
201                 }
202         }
203         blk_finish_plug(&plug);
204 }
205
206 #define XFS_BULKSTAT_UBLEFT(ubleft)     ((ubleft) >= statstruct_size)
207
208 /*
209  * Return stat information in bulk (by-inode) for the filesystem.
210  */
211 int                                     /* error status */
212 xfs_bulkstat(
213         xfs_mount_t             *mp,    /* mount point for filesystem */
214         xfs_ino_t               *lastinop, /* last inode returned */
215         int                     *ubcountp, /* size of buffer/count returned */
216         bulkstat_one_pf         formatter, /* func that'd fill a single buf */
217         size_t                  statstruct_size, /* sizeof struct filling */
218         char                    __user *ubuffer, /* buffer with inode stats */
219         int                     *done)  /* 1 if there are more stats to get */
220 {
221         xfs_buf_t               *agbp;  /* agi header buffer */
222         xfs_agi_t               *agi;   /* agi header data */
223         xfs_agino_t             agino;  /* inode # in allocation group */
224         xfs_agnumber_t          agno;   /* allocation group number */
225         int                     chunkidx; /* current index into inode chunk */
226         int                     clustidx; /* current index into inode cluster */
227         xfs_btree_cur_t         *cur;   /* btree cursor for ialloc btree */
228         int                     end_of_ag; /* set if we've seen the ag end */
229         int                     error;  /* error code */
230         int                     fmterror;/* bulkstat formatter result */
231         int                     i;      /* loop index */
232         int                     icount; /* count of inodes good in irbuf */
233         size_t                  irbsize; /* size of irec buffer in bytes */
234         xfs_ino_t               ino;    /* inode number (filesystem) */
235         xfs_inobt_rec_incore_t  *irbp;  /* current irec buffer pointer */
236         xfs_inobt_rec_incore_t  *irbuf; /* start of irec buffer */
237         xfs_inobt_rec_incore_t  *irbufend; /* end of good irec buffer entries */
238         xfs_ino_t               lastino; /* last inode number returned */
239         int                     nirbuf; /* size of irbuf */
240         int                     rval;   /* return value error code */
241         int                     tmp;    /* result value from btree calls */
242         int                     ubcount; /* size of user's buffer */
243         int                     ubleft; /* bytes left in user's buffer */
244         char                    __user *ubufp;  /* pointer into user's buffer */
245         int                     ubelem; /* spaces used in user's buffer */
246         int                     ubused; /* bytes used by formatter */
247
248         /*
249          * Get the last inode value, see if there's nothing to do.
250          */
251         ino = (xfs_ino_t)*lastinop;
252         lastino = ino;
253         agno = XFS_INO_TO_AGNO(mp, ino);
254         agino = XFS_INO_TO_AGINO(mp, ino);
255         if (agno >= mp->m_sb.sb_agcount ||
256             ino != XFS_AGINO_TO_INO(mp, agno, agino)) {
257                 *done = 1;
258                 *ubcountp = 0;
259                 return 0;
260         }
261
262         ubcount = *ubcountp; /* statstruct's */
263         ubleft = ubcount * statstruct_size; /* bytes */
264         *ubcountp = ubelem = 0;
265         *done = 0;
266         fmterror = 0;
267         ubufp = ubuffer;
268         irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4);
269         if (!irbuf)
270                 return -ENOMEM;
271
272         nirbuf = irbsize / sizeof(*irbuf);
273
274         /*
275          * Loop over the allocation groups, starting from the last
276          * inode returned; 0 means start of the allocation group.
277          */
278         rval = 0;
279         while (XFS_BULKSTAT_UBLEFT(ubleft) && agno < mp->m_sb.sb_agcount) {
280                 cond_resched();
281                 error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
282                 if (error)
283                         break;
284                 agi = XFS_BUF_TO_AGI(agbp);
285                 /*
286                  * Allocate and initialize a btree cursor for ialloc btree.
287                  */
288                 cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
289                                             XFS_BTNUM_INO);
290                 irbp = irbuf;
291                 irbufend = irbuf + nirbuf;
292                 end_of_ag = 0;
293                 /*
294                  * If we're returning in the middle of an allocation group,
295                  * we need to get the remainder of the chunk we're in.
296                  */
297                 if (agino > 0) {
298                         xfs_inobt_rec_incore_t r;
299
300                         /*
301                          * Lookup the inode chunk that this inode lives in.
302                          */
303                         error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE,
304                                                  &tmp);
305                         if (!error &&   /* no I/O error */
306                             tmp &&      /* lookup succeeded */
307                                         /* got the record, should always work */
308                             !(error = xfs_inobt_get_rec(cur, &r, &i)) &&
309                             i == 1 &&
310                                         /* this is the right chunk */
311                             agino < r.ir_startino + XFS_INODES_PER_CHUNK &&
312                                         /* lastino was not last in chunk */
313                             (chunkidx = agino - r.ir_startino + 1) <
314                                     XFS_INODES_PER_CHUNK &&
315                                         /* there are some left allocated */
316                             xfs_inobt_maskn(chunkidx,
317                                     XFS_INODES_PER_CHUNK - chunkidx) &
318                                     ~r.ir_free) {
319                                 /*
320                                  * Grab the chunk record.  Mark all the
321                                  * uninteresting inodes (because they're
322                                  * before our start point) free.
323                                  */
324                                 for (i = 0; i < chunkidx; i++) {
325                                         if (XFS_INOBT_MASK(i) & ~r.ir_free)
326                                                 r.ir_freecount++;
327                                 }
328                                 r.ir_free |= xfs_inobt_maskn(0, chunkidx);
329                                 irbp->ir_startino = r.ir_startino;
330                                 irbp->ir_freecount = r.ir_freecount;
331                                 irbp->ir_free = r.ir_free;
332                                 irbp++;
333                                 agino = r.ir_startino + XFS_INODES_PER_CHUNK;
334                                 icount = XFS_INODES_PER_CHUNK - r.ir_freecount;
335                         } else {
336                                 /*
337                                  * If any of those tests failed, bump the
338                                  * inode number (just in case).
339                                  */
340                                 agino++;
341                                 icount = 0;
342                         }
343                         /*
344                          * In any case, increment to the next record.
345                          */
346                         if (!error)
347                                 error = xfs_btree_increment(cur, 0, &tmp);
348                 } else {
349                         /*
350                          * Start of ag.  Lookup the first inode chunk.
351                          */
352                         error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &tmp);
353                         icount = 0;
354                 }
355                 if (error)
356                         break;
357
358                 /*
359                  * Loop through inode btree records in this ag,
360                  * until we run out of inodes or space in the buffer.
361                  */
362                 while (irbp < irbufend && icount < ubcount) {
363                         struct xfs_inobt_rec_incore     r;
364
365                         error = xfs_inobt_get_rec(cur, &r, &i);
366                         if (error || i == 0) {
367                                 end_of_ag = 1;
368                                 break;
369                         }
370
371                         /*
372                          * If this chunk has any allocated inodes, save it.
373                          * Also start read-ahead now for this chunk.
374                          */
375                         if (r.ir_freecount < XFS_INODES_PER_CHUNK) {
376                                 xfs_bulkstat_ichunk_ra(mp, agno, &r);
377                                 irbp->ir_startino = r.ir_startino;
378                                 irbp->ir_freecount = r.ir_freecount;
379                                 irbp->ir_free = r.ir_free;
380                                 irbp++;
381                                 icount += XFS_INODES_PER_CHUNK - r.ir_freecount;
382                         }
383                         /*
384                          * Set agino to after this chunk and bump the cursor.
385                          */
386                         agino = r.ir_startino + XFS_INODES_PER_CHUNK;
387                         error = xfs_btree_increment(cur, 0, &tmp);
388                         cond_resched();
389                 }
390                 /*
391                  * Drop the btree buffers and the agi buffer.
392                  * We can't hold any of the locks these represent
393                  * when calling iget.
394                  */
395                 xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
396                 xfs_buf_relse(agbp);
397                 /*
398                  * Now format all the good inodes into the user's buffer.
399                  */
400                 irbufend = irbp;
401                 for (irbp = irbuf;
402                      irbp < irbufend && XFS_BULKSTAT_UBLEFT(ubleft); irbp++) {
403                         /*
404                          * Now process this chunk of inodes.
405                          */
406                         for (agino = irbp->ir_startino, chunkidx = clustidx = 0;
407                              XFS_BULKSTAT_UBLEFT(ubleft) &&
408                                 irbp->ir_freecount < XFS_INODES_PER_CHUNK;
409                              chunkidx++, clustidx++, agino++) {
410                                 ASSERT(chunkidx < XFS_INODES_PER_CHUNK);
411
412                                 ino = XFS_AGINO_TO_INO(mp, agno, agino);
413                                 /*
414                                  * Skip if this inode is free.
415                                  */
416                                 if (XFS_INOBT_MASK(chunkidx) & irbp->ir_free) {
417                                         lastino = ino;
418                                         continue;
419                                 }
420                                 /*
421                                  * Count used inodes as free so we can tell
422                                  * when the chunk is used up.
423                                  */
424                                 irbp->ir_freecount++;
425
426                                 /*
427                                  * Get the inode and fill in a single buffer.
428                                  */
429                                 ubused = statstruct_size;
430                                 error = formatter(mp, ino, ubufp, ubleft,
431                                                   &ubused, &fmterror);
432                                 if (fmterror == BULKSTAT_RV_NOTHING) {
433                                         if (error && error != -ENOENT &&
434                                                 error != -EINVAL) {
435                                                 ubleft = 0;
436                                                 rval = error;
437                                                 break;
438                                         }
439                                         lastino = ino;
440                                         continue;
441                                 }
442                                 if (fmterror == BULKSTAT_RV_GIVEUP) {
443                                         ubleft = 0;
444                                         ASSERT(error);
445                                         rval = error;
446                                         break;
447                                 }
448                                 if (ubufp)
449                                         ubufp += ubused;
450                                 ubleft -= ubused;
451                                 ubelem++;
452                                 lastino = ino;
453                         }
454
455                         cond_resched();
456                 }
457                 /*
458                  * Set up for the next loop iteration.
459                  */
460                 if (XFS_BULKSTAT_UBLEFT(ubleft)) {
461                         if (end_of_ag) {
462                                 agno++;
463                                 agino = 0;
464                         } else
465                                 agino = XFS_INO_TO_AGINO(mp, lastino);
466                 } else
467                         break;
468         }
469         /*
470          * Done, we're either out of filesystem or space to put the data.
471          */
472         kmem_free(irbuf);
473         *ubcountp = ubelem;
474         /*
475          * Found some inodes, return them now and return the error next time.
476          */
477         if (ubelem)
478                 rval = 0;
479         if (agno >= mp->m_sb.sb_agcount) {
480                 /*
481                  * If we ran out of filesystem, mark lastino as off
482                  * the end of the filesystem, so the next call
483                  * will return immediately.
484                  */
485                 *lastinop = (xfs_ino_t)XFS_AGINO_TO_INO(mp, agno, 0);
486                 *done = 1;
487         } else
488                 *lastinop = (xfs_ino_t)lastino;
489
490         return rval;
491 }
492
493 int
494 xfs_inumbers_fmt(
495         void                    __user *ubuffer, /* buffer to write to */
496         const struct xfs_inogrp *buffer,        /* buffer to read from */
497         long                    count,          /* # of elements to read */
498         long                    *written)       /* # of bytes written */
499 {
500         if (copy_to_user(ubuffer, buffer, count * sizeof(*buffer)))
501                 return -EFAULT;
502         *written = count * sizeof(*buffer);
503         return 0;
504 }
505
506 /*
507  * Return inode number table for the filesystem.
508  */
509 int                                     /* error status */
510 xfs_inumbers(
511         struct xfs_mount        *mp,/* mount point for filesystem */
512         xfs_ino_t               *lastino,/* last inode returned */
513         int                     *count,/* size of buffer/count returned */
514         void                    __user *ubuffer,/* buffer with inode descriptions */
515         inumbers_fmt_pf         formatter)
516 {
517         xfs_agnumber_t          agno = XFS_INO_TO_AGNO(mp, *lastino);
518         xfs_agino_t             agino = XFS_INO_TO_AGINO(mp, *lastino);
519         struct xfs_btree_cur    *cur = NULL;
520         struct xfs_buf          *agbp = NULL;
521         struct xfs_inogrp       *buffer;
522         int                     bcount;
523         int                     left = *count;
524         int                     bufidx = 0;
525         int                     error = 0;
526
527         *count = 0;
528         if (agno >= mp->m_sb.sb_agcount ||
529             *lastino != XFS_AGINO_TO_INO(mp, agno, agino))
530                 return error;
531
532         bcount = MIN(left, (int)(PAGE_SIZE / sizeof(*buffer)));
533         buffer = kmem_alloc(bcount * sizeof(*buffer), KM_SLEEP);
534         do {
535                 struct xfs_inobt_rec_incore     r;
536                 int                             stat;
537
538                 if (!agbp) {
539                         error = xfs_ialloc_read_agi(mp, NULL, agno, &agbp);
540                         if (error)
541                                 break;
542
543                         cur = xfs_inobt_init_cursor(mp, NULL, agbp, agno,
544                                                     XFS_BTNUM_INO);
545                         error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_GE,
546                                                  &stat);
547                         if (error)
548                                 break;
549                         if (!stat)
550                                 goto next_ag;
551                 }
552
553                 error = xfs_inobt_get_rec(cur, &r, &stat);
554                 if (error)
555                         break;
556                 if (!stat)
557                         goto next_ag;
558
559                 agino = r.ir_startino + XFS_INODES_PER_CHUNK - 1;
560                 buffer[bufidx].xi_startino =
561                         XFS_AGINO_TO_INO(mp, agno, r.ir_startino);
562                 buffer[bufidx].xi_alloccount =
563                         XFS_INODES_PER_CHUNK - r.ir_freecount;
564                 buffer[bufidx].xi_allocmask = ~r.ir_free;
565                 if (++bufidx == bcount) {
566                         long    written;
567
568                         error = formatter(ubuffer, buffer, bufidx, &written);
569                         if (error)
570                                 break;
571                         ubuffer += written;
572                         *count += bufidx;
573                         bufidx = 0;
574                 }
575                 if (!--left)
576                         break;
577
578                 error = xfs_btree_increment(cur, 0, &stat);
579                 if (error)
580                         break;
581                 if (stat)
582                         continue;
583
584 next_ag:
585                 xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
586                 cur = NULL;
587                 xfs_buf_relse(agbp);
588                 agbp = NULL;
589                 agino = 0;
590         } while (++agno < mp->m_sb.sb_agcount);
591
592         if (!error) {
593                 if (bufidx) {
594                         long    written;
595
596                         error = formatter(ubuffer, buffer, bufidx, &written);
597                         if (!error)
598                                 *count += bufidx;
599                 }
600                 *lastino = XFS_AGINO_TO_INO(mp, agno, agino);
601         }
602
603         kmem_free(buffer);
604         if (cur)
605                 xfs_btree_del_cursor(cur, (error ? XFS_BTREE_ERROR :
606                                            XFS_BTREE_NOERROR));
607         if (agbp)
608                 xfs_buf_relse(agbp);
609
610         return error;
611 }