exofs: Write sbi->s_nextid as part of the Create command

author Boaz Harrosh <bharrosh@panasas.com>

Thu, 3 Feb 2011 15:53:25 +0000 (17:53 +0200)

committer Boaz Harrosh <bharrosh@panasas.com>

Tue, 15 Mar 2011 13:02:51 +0000 (15:02 +0200)
author Boaz Harrosh <bharrosh@panasas.com>
Thu, 3 Feb 2011 15:53:25 +0000 (17:53 +0200)
committer Boaz Harrosh <bharrosh@panasas.com>
Tue, 15 Mar 2011 13:02:51 +0000 (15:02 +0200)
diff --git a/fs/exofs/common.h b/fs/exofs/common.h

index f0d5203..5e74ad3 100644 (file)
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -53,10 +53,14 @@
  #define EXOFS_ROOT_ID  0x10002 /* object ID for root directory */
  
  /* exofs Application specific page/attribute */
+/* Inode attrs */
  # define EXOFS_APAGE_FS_DATA   (OSD_APAGE_APP_DEFINED_FIRST + 3)
  # define EXOFS_ATTR_INODE_DATA 1
  # define EXOFS_ATTR_INODE_FILE_LAYOUT  2
  # define EXOFS_ATTR_INODE_DIR_LAYOUT   3
+/* Partition attrs */
+# define EXOFS_APAGE_SB_DATA   (0xF0000000U + 3)
+# define EXOFS_ATTR_SB_STATS   1
  
  /*
   * The maximum number of files we can have is limited by the size of the
@@ -86,8 +90,8 @@ enum {
   */
  enum {EXOFS_FSCB_VER = 1, EXOFS_DT_VER = 1};
  struct exofs_fscb {
-       __le64  s_nextid;       /* Highest object ID used */
-       __le64  s_numfiles;     /* Number of files on fs */
+       __le64  s_nextid;       /* Only used after mkfs */
+       __le64  s_numfiles;     /* Only used after mkfs */
         __le32  s_version;      /* == EXOFS_FSCB_VER */
         __le16  s_magic;        /* Magic signature */
         __le16  s_newfs;        /* Non-zero if this is a new fs */
@@ -97,6 +101,16 @@ struct exofs_fscb {
         __le64  s_dev_table_count; /* == 0 means no dev_table */
  } __packed;
  
+/*
+ * This struct is set on the FS partition's attributes.
+ * [EXOFS_APAGE_SB_DATA, EXOFS_ATTR_SB_STATS] and is written together
+ * with the create command, to atomically persist the sb writeable information.
+ */
+struct exofs_sb_stats {
+       __le64  s_nextid;       /* Highest object ID used */
+       __le64  s_numfiles;     /* Number of files on fs */
+} __packed;
+
  /*
   * Describes the raid used in the FS. It is part of the device table.
   * This here is taken from the pNFS-objects definition. In exofs we
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h

index 99fcb91..c965806 100644 (file)
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -77,7 +77,7 @@ struct exofs_layout {
   * our extension to the in-memory superblock
   */
  struct exofs_sb_info {
-       struct exofs_fscb s_fscb;               /* Written often, pre-allocate*/
+       struct exofs_sb_stats s_ess;            /* Written often, pre-allocate*/
         int             s_timeout;              /* timeout for OSD operations */
         uint64_t        s_nextid;               /* highest object ID used     */
         uint32_t        s_numfiles;             /* number of files on fs      */
@@ -281,7 +281,7 @@ int exofs_set_link(struct inode *, struct exofs_dir_entry *, struct page *,
                     struct inode *);
  
  /* super.c               */
-int exofs_sync_fs(struct super_block *sb, int wait);
+int exofs_sbi_write_stats(struct exofs_sb_info *sbi);
  
  /*********************
   * operation vectors *
diff --git a/fs/exofs/file.c b/fs/exofs/file.c

index 4c0d6ba..45ca323 100644 (file)
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -45,17 +45,8 @@ static int exofs_release_file(struct inode *inode, struct file *filp)
  static int exofs_file_fsync(struct file *filp, int datasync)
  {
         int ret;
-       struct inode *inode = filp->f_mapping->host;
-       struct super_block *sb;
-
-       ret = sync_inode_metadata(inode, 1);
-
-       /* This is a good place to write the sb */
-       /* TODO: Sechedule an sb-sync on create */
-       sb = inode->i_sb;
-       if (sb->s_dirt)
-               exofs_sync_fs(sb, 1);
  
+       ret = sync_inode_metadata(filp->f_mapping->host, 1);
         return ret;
  }
  
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c

index 681b3cb..0c713cf 100644 (file)
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -1102,6 +1102,7 @@ int __exofs_wait_obj_created(struct exofs_i_info *oi)
         }
         return unlikely(is_bad_inode(&oi->vfs_inode)) ? -EIO : 0;
  }
+
  /*
   * Callback function from exofs_new_inode().  The important thing is that we
   * set the obj_created flag so that other methods know that the object exists on
@@ -1160,7 +1161,6 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
         sbi = sb->s_fs_info;
  
         inode->i_mapping->backing_dev_info = sb->s_bdi;
-       sb->s_dirt = 1;
         inode_init_owner(inode, dir, mode);
         inode->i_ino = sbi->s_nextid++;
         inode->i_blkbits = EXOFS_BLKSHIFT;
@@ -1171,6 +1171,8 @@ struct inode *exofs_new_inode(struct inode *dir, int mode)
         spin_unlock(&sbi->s_next_gen_lock);
         insert_inode_hash(inode);
  
+       exofs_sbi_write_stats(sbi); /* Make sure new sbi->s_nextid is on disk */
+
         mark_inode_dirty(inode);
  
         ret = exofs_get_io_state(&sbi->layout, &ios);
diff --git a/fs/exofs/super.c b/fs/exofs/super.c

index 474989e..5eb0851 100644 (file)
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -213,6 +213,101 @@ static void destroy_inodecache(void)
  static const struct super_operations exofs_sops;
  static const struct export_operations exofs_export_ops;
  
+static const struct osd_attr g_attr_sb_stats = ATTR_DEF(
+       EXOFS_APAGE_SB_DATA,
+       EXOFS_ATTR_SB_STATS,
+       sizeof(struct exofs_sb_stats));
+
+static int __sbi_read_stats(struct exofs_sb_info *sbi)
+{
+       struct osd_attr attrs[] = {
+               [0] = g_attr_sb_stats,
+       };
+       struct exofs_io_state *ios;
+       int ret;
+
+       ret = exofs_get_io_state(&sbi->layout, &ios);
+       if (unlikely(ret)) {
+               EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+               return ret;
+       }
+
+       ios->cred = sbi->s_cred;
+
+       ios->in_attr = attrs;
+       ios->in_attr_len = ARRAY_SIZE(attrs);
+
+       ret = exofs_sbi_read(ios);
+       if (unlikely(ret)) {
+               EXOFS_ERR("Error reading super_block stats => %d\n", ret);
+               goto out;
+       }
+
+       ret = extract_attr_from_ios(ios, &attrs[0]);
+       if (ret) {
+               EXOFS_ERR("%s: extract_attr of sb_stats failed\n", __func__);
+               goto out;
+       }
+       if (attrs[0].len) {
+               struct exofs_sb_stats *ess;
+
+               if (unlikely(attrs[0].len != sizeof(*ess))) {
+                       EXOFS_ERR("%s: Wrong version of exofs_sb_stats "
+                                 "size(%d) != expected(%zd)\n",
+                                 __func__, attrs[0].len, sizeof(*ess));
+                       goto out;
+               }
+
+               ess = attrs[0].val_ptr;
+               sbi->s_nextid = le64_to_cpu(ess->s_nextid);
+               sbi->s_numfiles = le32_to_cpu(ess->s_numfiles);
+       }
+
+out:
+       exofs_put_io_state(ios);
+       return ret;
+}
+
+static void stats_done(struct exofs_io_state *ios, void *p)
+{
+       exofs_put_io_state(ios);
+       /* Good thanks nothing to do anymore */
+}
+
+/* Asynchronously write the stats attribute */
+int exofs_sbi_write_stats(struct exofs_sb_info *sbi)
+{
+       struct osd_attr attrs[] = {
+               [0] = g_attr_sb_stats,
+       };
+       struct exofs_io_state *ios;
+       int ret;
+
+       ret = exofs_get_io_state(&sbi->layout, &ios);
+       if (unlikely(ret)) {
+               EXOFS_ERR("%s: exofs_get_io_state failed.\n", __func__);
+               return ret;
+       }
+
+       sbi->s_ess.s_nextid   = cpu_to_le64(sbi->s_nextid);
+       sbi->s_ess.s_numfiles = cpu_to_le64(sbi->s_numfiles);
+       attrs[0].val_ptr = &sbi->s_ess;
+
+       ios->cred = sbi->s_cred;
+       ios->done = stats_done;
+       ios->private = sbi;
+       ios->out_attr = attrs;
+       ios->out_attr_len = ARRAY_SIZE(attrs);
+
+       ret = exofs_sbi_write(ios);
+       if (unlikely(ret)) {
+               EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
+               exofs_put_io_state(ios);
+       }
+
+       return ret;
+}
+
  /*
   * Write the superblock to the OSD
   */
@@ -223,18 +318,25 @@ int exofs_sync_fs(struct super_block *sb, int wait)
         struct exofs_io_state *ios;
         int ret = -ENOMEM;
  
-       lock_super(sb);
+       fscb = kmalloc(sizeof(*fscb), GFP_KERNEL);
+       if (unlikely(!fscb))
+               return -ENOMEM;
+
         sbi = sb->s_fs_info;
-       fscb = &sbi->s_fscb;
  
+       /* NOTE: We no longer dirty the super_block anywhere in exofs. The
+        * reason we write the fscb here on unmount is so we can stay backwards
+        * compatible with fscb->s_version == 1. (What we are not compatible
+        * with is if a new version FS crashed and then we try to mount an old
+        * version). Otherwise the exofs_fscb is read-only from mkfs time. All
+        * the writeable info is set in exofs_sbi_write_stats() above.
+        */
         ret = exofs_get_io_state(&sbi->layout, &ios);
-       if (ret)
+       if (unlikely(ret))
                 goto out;
  
-       /* Note: We only write the changing part of the fscb. .i.e upto the
-        *       the fscb->s_dev_table_oid member. There is no read-modify-write
-        *       here.
-        */
+       lock_super(sb);
+
         ios->length = offsetof(struct exofs_fscb, s_dev_table_oid);
         memset(fscb, 0, ios->length);
         fscb->s_nextid = cpu_to_le64(sbi->s_nextid);
@@ -249,16 +351,17 @@ int exofs_sync_fs(struct super_block *sb, int wait)
         ios->cred = sbi->s_cred;
  
         ret = exofs_sbi_write(ios);
-       if (unlikely(ret)) {
+       if (unlikely(ret))
                 EXOFS_ERR("%s: exofs_sbi_write failed.\n", __func__);
-               goto out;
-       }
-       sb->s_dirt = 0;
+       else
+               sb->s_dirt = 0;
  
+
+       unlock_super(sb);
  out:
         EXOFS_DBGMSG("s_nextid=0x%llx ret=%d\n", _LLU(sbi->s_nextid), ret);
         exofs_put_io_state(ios);
-       unlock_super(sb);
+       kfree(fscb);
         return ret;
  }
  
@@ -302,9 +405,6 @@ static void exofs_put_super(struct super_block *sb)
         int num_pend;
         struct exofs_sb_info *sbi = sb->s_fs_info;
  
-       if (sb->s_dirt)
-               exofs_write_super(sb);
-
         /* make sure there are no pending commands */
         for (num_pend = atomic_read(&sbi->s_curr_pending); num_pend > 0;
              num_pend = atomic_read(&sbi->s_curr_pending)) {
@@ -629,6 +729,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
                 goto free_sbi;
  
         sb->s_magic = le16_to_cpu(fscb.s_magic);
+       /* NOTE: we read below to be backward compatible with old versions */
         sbi->s_nextid = le64_to_cpu(fscb.s_nextid);
         sbi->s_numfiles = le32_to_cpu(fscb.s_numfiles);
  
@@ -639,7 +740,7 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
                 ret = -EINVAL;
                 goto free_sbi;
         }
-       if (le32_to_cpu(fscb.s_version) != EXOFS_FSCB_VER) {
+       if (le32_to_cpu(fscb.s_version) > EXOFS_FSCB_VER) {
                 EXOFS_ERR("ERROR: Bad FSCB version expected-%d got-%d\n",
                           EXOFS_FSCB_VER, le32_to_cpu(fscb.s_version));
                 ret = -EINVAL;
@@ -657,6 +758,8 @@ static int exofs_fill_super(struct super_block *sb, void *data, int silent)
                         goto free_sbi;
         }
  
+       __sbi_read_stats(sbi);
+
         /* set up operation vectors */
         sbi->bdi.ra_pages = __ra_pages(&sbi->layout);
         sb->s_bdi = &sbi->bdi;
author	Boaz Harrosh <bharrosh@panasas.com>
	Thu, 3 Feb 2011 15:53:25 +0000 (17:53 +0200)
committer	Boaz Harrosh <bharrosh@panasas.com>
	Tue, 15 Mar 2011 13:02:51 +0000 (15:02 +0200)
fs/exofs/common.h		patch \| blob \| history
fs/exofs/exofs.h		patch \| blob \| history
fs/exofs/file.c		patch \| blob \| history
fs/exofs/inode.c		patch \| blob \| history
fs/exofs/super.c		patch \| blob \| history